Changeset 693
- Timestamp:
- 01/15/14 10:41:27 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/org/expeditee/io/WebParser.java
r692 r693 99 99 // MessageBay.displayMessage("Finished loading page"); 100 100 System.out.println("Parsing page!"); 101 JavaFX.WebEngineExecuteScript.invoke(webEngine, "window.resizeTo(800, 800);"); 101 JavaFX.WebEngineExecuteScript.invoke(webEngine, "window.resizeTo(800, 800);" 102 + "document.body.style.width = '1000px'"); 102 103 parsePage(webEngine, frame); 103 104 System.out.println("Parsed page!"); … … 165 166 ); 166 167 167 // Getting an array of all HTML elements in the page 168 Object contentElements = JavaFX.WebEngineExecuteScript.invoke(webEngine, "document.querySelectorAll('body *');"); 169 int contentElementsLength = (Integer) JavaFX.JSObjectGetMember.invoke(contentElements, "length"); 170 171 for (int i = 0; i < contentElementsLength; i++) { 172 // Getting the current HTML element, then making it accessible in JavaScript 173 Element currentElement = (Element) JavaFX.JSObjectGetSlot.invoke(contentElements, i); 174 JavaFX.JSObjectSetMember.invoke(window, "para", currentElement); 175 176 JavaFX.WebEngineExecuteScript.invoke(webEngine, "para.style.wordBreak = 'normal';"); 177 178 // Creating a TreeWalker that is used to loop over all the TextNodes within the current element 179 JavaFX.WebEngineExecuteScript.invoke(webEngine, "var walker = document.createTreeWalker(para, NodeFilter.SHOW_TEXT, null, false);"); 180 181 // Using Javascript to get an array of all the text nodes in the current element. Have to loop through twice (once 182 // to build the array and once actually going through the array, otherwise when the textnode is removed from the 183 // document items end up being skipped) 184 Object textNodes = JavaFX.WebEngineExecuteScript.invoke(webEngine, "" 185 + "function getTextNodes(rootNode){" 186 + "var node;" 187 + "var textNodes=[];" 188 + "var walk = document.createTreeWalker(rootNode, NodeFilter.SHOW_TEXT);" 189 + "while(node=walk.nextNode()) {" 190 + "if((node.textContent.trim().length > 0)) { " 191 + "textNodes.push(node);" 192 + "}" 193 + "}" 194 + "return textNodes;" 195 + "}; " 196 + "getTextNodes(para)"); 197 198 int nodesLength = (Integer) JavaFX.JSObjectGetMember.invoke(textNodes, "length"); 199 200 // Looping through all the text nodes in the current paragraph 201 for (int j = 0; j < nodesLength; j++) { 202 Node currentNode = (Node) JavaFX.JSObjectGetSlot.invoke(textNodes, j); 203 204 // Making the current node accessible in JavaScript 205 JavaFX.JSObjectSetMember.invoke(window, "textNode", currentNode); 206 207 JavaFX.WebEngineExecuteScript.invoke(webEngine, "" 208 + "var span = null;" 209 + "var prevSpan = null;" 210 + "var prevPrevSpan = null;" 211 ); 212 213 // Splitting the text node's content into individual words 214 String textContent = ((String) JavaFX.WebEngineExecuteScript.invoke(webEngine, "textNode.textContent")).replaceAll("\\n|\\r", "").replaceAll("\\s+", " "); 215 String[] words = splitIntoWords(textContent); 216 217 JavaFX.WebEngineExecuteScript.invoke(webEngine, "" 218 + "var refNode = textNode.nextSibling;" 219 + "var par = textNode.parentElement;" 220 + "textNode.parentElement.removeChild(textNode)"); 221 222 // Adding each word back to the page 223 for (int k = 0; k < words.length; k++) { 224 Object currentWord = words[k]; 225 JavaFX.JSObjectCall.invoke(window, "addToSpan", new Object[] { currentWord }); 226 } 227 228 JavaFX.WebEngineExecuteScript.invoke(webEngine, "" 229 + " if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {" 230 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;" 231 + " par.removeChild(prevSpan);" 232 + " }" 233 ); 168 // Using Javascript to get an array of all the text nodes in the document so they can be wrapped in spans. Have to 169 // loop through twice (once to build the array and once actually going through the array, otherwise when the 170 // textnode is removed from the document items end up being skipped) 171 Object textNodes = JavaFX.WebEngineExecuteScript.invoke(webEngine, "" 172 + "function getTextNodes(rootNode){" 173 + "var node;" 174 + "var textNodes=[];" 175 + "var walk = document.createTreeWalker(rootNode, NodeFilter.SHOW_TEXT);" 176 + "while(node=walk.nextNode()) {" 177 + "if((node.textContent.trim().length > 0)) { " 178 + "textNodes.push(node);" 179 + "}" 180 + "}" 181 + "return textNodes;" 182 + "}; " 183 + "getTextNodes(document.body)"); 184 185 int nodesLength = (Integer) JavaFX.JSObjectGetMember.invoke(textNodes, "length"); 186 187 // Looping through all the text nodes in the current paragraph 188 for (int j = 0; j < nodesLength; j++) { 189 Node currentNode = (Node) JavaFX.JSObjectGetSlot.invoke(textNodes, j); 190 191 // Making the current node accessible in JavaScript 192 JavaFX.JSObjectSetMember.invoke(window, "textNode", currentNode); 193 194 JavaFX.WebEngineExecuteScript.invoke(webEngine, "" 195 + "var span = null;" 196 + "var prevSpan = null;" 197 + "var prevPrevSpan = null;" 198 ); 199 200 // Splitting the text node's content into individual words 201 String textContent = ((String) JavaFX.WebEngineExecuteScript.invoke(webEngine, "textNode.textContent")).replaceAll("\\n|\\r", "").replaceAll("\\s+", " "); 202 String[] words = splitIntoWords(textContent); 203 204 JavaFX.WebEngineExecuteScript.invoke(webEngine, "" 205 + "var refNode = textNode.nextSibling;" 206 + "var par = textNode.parentElement;" 207 + "textNode.parentElement.removeChild(textNode)"); 208 209 // Adding each word back to the page 210 for (int k = 0; k < words.length; k++) { 211 Object currentWord = words[k]; 212 JavaFX.JSObjectCall.invoke(window, "addToSpan", new Object[] { currentWord }); 234 213 } 235 236 progressBar.set((100 * (i + 1)) / contentElementsLength); 214 215 JavaFX.WebEngineExecuteScript.invoke(webEngine, "" 216 + " if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {" 217 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;" 218 + " par.removeChild(prevSpan);" 219 + " }" 220 ); 221 222 progressBar.set((100 * (j + 1)) / nodesLength); 237 223 } 238 224 239 225 // Finding all links within the page, then setting the href attribute of all their descendants to be the same 240 226 // link/URL.
Note:
See TracChangeset
for help on using the changeset viewer.