Context Navigation

← Previous Changeset
Next Changeset →

Changeset 695

Timestamp:

01/15/14 14:52:29 (10 years ago)

Author:

ngw8

Message:

Minor changes to web parser

File:

: 1 edited

trunk/src/org/expeditee/io/WebParser.java (modified) (6 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/src/org/expeditee/io/WebParser.java

-              r693
+              r695
 import java.net.MalformedURLException;
 import java.net.URL;
-import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 import javax.imageio.ImageIO;
 …
 import org.expeditee.items.Text;
 import org.expeditee.reflection.JavaFX;
-import org.w3c.dom.Element;
 import org.w3c.dom.Node;
 import org.w3c.dom.html.HTMLBodyElement;
 …
                                                                                 + "                     prevSpan = span;"
                                                                                 + "             }"
+                                                                                + "}"
+                                                                                + "}"
+                                                                                + "function splitIntoWords(toSplit) {"
+                                                                                + "             var words = [];"
+                                                                                + "             var pattern = /\\s+/g;"
+                                                                                + "             var words = toSplit.split(pattern);"
+                                                                                + ""
+                                                                                + "             for (var i = 0; i < words.length - 1; i++) {"
+                                                                                + "                     words[i] = words[i] + ' ';"
+                                                                                + "             }"
+                                                                                + "             return words;"
+                                                                                + "}"
                                                 );
 …
                                                                 + "return textNodes;"
                                                         + "}; "
+                                                        + "getTextNodes(document.body)");
+                                                        + "getTextNodes(document.body)"
+                                                        );
                                                 int nodesLength = (Integer) JavaFX.JSObjectGetMember.invoke(textNodes, "length");
                                                 // Looping through all the text nodes in the current paragraph
+                                                // Looping through all the text nodes in the document
                                                 for (int j = 0; j < nodesLength; j++) {
                                                         Node currentNode = (Node) JavaFX.JSObjectGetSlot.invoke(textNodes, j);
                                                         // Making the current node accessible in JavaScript
                                                         JavaFX.JSObjectSetMember.invoke(window, "textNode", currentNode);
+                                                        JavaFX.JSObjectSetMember.invoke(window, "currentNode", currentNode);
                                                         JavaFX.WebEngineExecuteScript.invoke(webEngine, ""
+                                                                        + "var span = null;"
+                                                                        + "var prevSpan = null;"
+                                                                        + "var prevPrevSpan = null;"
+                                                                        );
+                                                        // Splitting the text node's content into individual words
+                                                        String textContent = ((String) JavaFX.WebEngineExecuteScript.invoke(webEngine, "textNode.textContent")).replaceAll("\\n|\\r", "").replaceAll("\\s+", " ");
+                                                        String[] words = splitIntoWords(textContent);
+                                                        JavaFX.WebEngineExecuteScript.invoke(webEngine, ""
+                                                                        + "var refNode = textNode.nextSibling;"
+                                                                        + "var par = textNode.parentElement;"
+                                                                        + "textNode.parentElement.removeChild(textNode)");
+                                                        // Adding each word back to the page
+                                                        for (int k = 0; k < words.length; k++) {
+                                                                Object currentWord = words[k];
+                                                                JavaFX.JSObjectCall.invoke(window, "addToSpan", new Object[] { currentWord });
+                                                        }
+                                                        JavaFX.WebEngineExecuteScript.invoke(webEngine, ""
+                                                                        + "                     if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
+                                                                        + "                             prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
+                                                                        + "                             par.removeChild(prevSpan);"
+                                                                        + "                     }"
+                                                                        );
+                                                        progressBar.set((100 * (j + 1)) / nodesLength);
+                                                                        + "var span = null, prevSpan = null, prevPrevSpan = null;"
+                                                                        // Removing repeated whitespace from the text node's content then splitting it into individual words
+                                                                        + "var textContent  = currentNode.textContent.replace(/\\n|\\r/g, '').replace(/\\s+/g, ' ');"
+                                                                        + "var words = splitIntoWords(textContent);"
+                                                                        + "var refNode = currentNode.nextSibling;"
+                                                                        + "var par = currentNode.parentElement;"
+                                                                        + "currentNode.parentElement.removeChild(currentNode);"
+                                                                        + "for (var i = 0; i < words.length; i++) {" + "                addToSpan(words[i]);"
+                                                                        + "}"
+                                                                        + "if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
+                                                                        + "             prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
+                                                                        + "             par.removeChild(prevSpan);"
+                                                                        + "}"
+);
+                                                        // Will never reach 100% here, as the processing is not quite finished - progress is set to 100% at the end of
+                                                        // the loop below
+                                                        progressBar.set((100 * (j)) / nodesLength);
+                                                }
 …
+                                                        }
+                                                }
+                                                progressBar.set(100);
                                         } catch (Exception e) {
 …
                 pic.getSource().anchor();
+        }
-        private static String[] splitIntoWords(String toSplit) {
-                ArrayList<String> words = new ArrayList<String>();
-                Pattern regex = Pattern.compile("\\s+");
-                Matcher matcher = regex.matcher(toSplit);
-                // The index at which the previous word ended
-                int prevEndIndex = 0;
-                String prev = null;
-                while (matcher.find()) {
-                        String w = toSplit.substring(prevEndIndex, matcher.start());
-                        if (prev != null) {
-                                words.add(prev + " ");
+                        }
-                        prev = w;
-                        prevEndIndex = matcher.end();
+                 }
-                // Adding the final two words
-                if (prev != null) {
-                        words.add(prev + " ");
+                }
-                words.add(toSplit.substring(prevEndIndex));
-                return words.toArray(new String[words.size()]);
+         }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 695

Legend:

trunk/src/org/expeditee/io/WebParser.java

Download in other formats: