Changeset 695
- Timestamp:
- 01/15/14 14:52:29 (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/org/expeditee/io/WebParser.java
r693 r695 9 9 import java.net.MalformedURLException; 10 10 import java.net.URL; 11 import java.util.ArrayList;12 11 import java.util.Arrays; 13 import java.util.regex.Matcher;14 import java.util.regex.Pattern;15 12 16 13 import javax.imageio.ImageIO; … … 26 23 import org.expeditee.items.Text; 27 24 import org.expeditee.reflection.JavaFX; 28 import org.w3c.dom.Element;29 25 import org.w3c.dom.Node; 30 26 import org.w3c.dom.html.HTMLBodyElement; … … 163 159 + " prevSpan = span;" 164 160 + " }" 165 + "}" 161 + "}" 162 163 + "function splitIntoWords(toSplit) {" 164 + " var words = [];" 165 + " var pattern = /\\s+/g;" 166 + " var words = toSplit.split(pattern);" 167 + "" 168 + " for (var i = 0; i < words.length - 1; i++) {" 169 + " words[i] = words[i] + ' ';" 170 + " }" 171 + " return words;" 172 + "}" 166 173 ); 167 174 … … 181 188 + "return textNodes;" 182 189 + "}; " 183 + "getTextNodes(document.body)"); 184 190 + "getTextNodes(document.body)" 191 ); 192 185 193 int nodesLength = (Integer) JavaFX.JSObjectGetMember.invoke(textNodes, "length"); 186 194 187 // Looping through all the text nodes in the current paragraph195 // Looping through all the text nodes in the document 188 196 for (int j = 0; j < nodesLength; j++) { 189 197 Node currentNode = (Node) JavaFX.JSObjectGetSlot.invoke(textNodes, j); 190 198 191 199 // Making the current node accessible in JavaScript 192 JavaFX.JSObjectSetMember.invoke(window, " textNode", currentNode);193 200 JavaFX.JSObjectSetMember.invoke(window, "currentNode", currentNode); 201 194 202 JavaFX.WebEngineExecuteScript.invoke(webEngine, "" 195 + "var span = null;" 196 + "var prevSpan = null;" 197 + "var prevPrevSpan = null;" 198 ); 199 200 // Splitting the text node's content into individual words 201 String textContent = ((String) JavaFX.WebEngineExecuteScript.invoke(webEngine, "textNode.textContent")).replaceAll("\\n|\\r", "").replaceAll("\\s+", " "); 202 String[] words = splitIntoWords(textContent); 203 204 JavaFX.WebEngineExecuteScript.invoke(webEngine, "" 205 + "var refNode = textNode.nextSibling;" 206 + "var par = textNode.parentElement;" 207 + "textNode.parentElement.removeChild(textNode)"); 208 209 // Adding each word back to the page 210 for (int k = 0; k < words.length; k++) { 211 Object currentWord = words[k]; 212 JavaFX.JSObjectCall.invoke(window, "addToSpan", new Object[] { currentWord }); 213 } 214 215 JavaFX.WebEngineExecuteScript.invoke(webEngine, "" 216 + " if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {" 217 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;" 218 + " par.removeChild(prevSpan);" 219 + " }" 220 ); 221 222 progressBar.set((100 * (j + 1)) / nodesLength); 203 + "var span = null, prevSpan = null, prevPrevSpan = null;" 204 205 // Removing repeated whitespace from the text node's content then splitting it into individual words 206 + "var textContent = currentNode.textContent.replace(/\\n|\\r/g, '').replace(/\\s+/g, ' ');" 207 + "var words = splitIntoWords(textContent);" 208 209 + "var refNode = currentNode.nextSibling;" 210 + "var par = currentNode.parentElement;" 211 + "currentNode.parentElement.removeChild(currentNode);" 212 213 + "for (var i = 0; i < words.length; i++) {" + " addToSpan(words[i]);" 214 + "}" 215 216 + "if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {" 217 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;" 218 + " par.removeChild(prevSpan);" 219 + "}" 220 ); 221 222 // Will never reach 100% here, as the processing is not quite finished - progress is set to 100% at the end of 223 // the loop below 224 progressBar.set((100 * (j)) / nodesLength); 223 225 } 224 226 … … 517 519 } 518 520 } 521 522 progressBar.set(100); 519 523 520 524 } catch (Exception e) { … … 705 709 pic.getSource().anchor(); 706 710 } 707 708 private static String[] splitIntoWords(String toSplit) {709 ArrayList<String> words = new ArrayList<String>();710 Pattern regex = Pattern.compile("\\s+");711 Matcher matcher = regex.matcher(toSplit);712 713 // The index at which the previous word ended714 int prevEndIndex = 0;715 716 String prev = null;717 718 while (matcher.find()) {719 String w = toSplit.substring(prevEndIndex, matcher.start());720 721 if (prev != null) {722 words.add(prev + " ");723 }724 725 prev = w;726 prevEndIndex = matcher.end();727 }728 729 // Adding the final two words730 if (prev != null) {731 words.add(prev + " ");732 }733 734 words.add(toSplit.substring(prevEndIndex));735 736 return words.toArray(new String[words.size()]);737 }738 711 }
Note:
See TracChangeset
for help on using the changeset viewer.