Context Navigation

← Previous Change
Next Change →

Changeset 672 for trunk

Timestamp:

01/09/14 11:59:53 (10 years ago)

Author:

ngw8

Message:

Webparser improvements regarding how whitespace is dealt with

File:

: 1 edited

trunk/src/org/expeditee/io/WebParser.java (modified) (13 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/src/org/expeditee/io/WebParser.java

-              r662
+              r672
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import javax.imageio.ImageIO;
 …
                                                                                 + "function addToSpan(text) {"
                                                                                 + "             span = document.createElement('wordSpan');"
                                                                                 + "             span.textContent = text + ' ';"
+                                                                                + "             span.textContent = text;"
                                                                                 + "             textNode.parentElement.insertBefore(span, textNode);"
                                                                                 + "             if (prevSpan !== null && span.getBoundingClientRect().top > prevSpan.getBoundingClientRect().top) {"
 …
                                                 // Getting an array of HTML elements from the page that will be checked for 'content' (i.e. will be modified to be
                                                 // properly wrapped in Expeditee)
                                                 Object contentElements = JavaFX.WebEngineExecuteScript.invoke(webEngine, "document.querySelectorAll('p,li');");
+                                                Object contentElements = JavaFX.WebEngineExecuteScript.invoke(webEngine, "document.querySelectorAll('body *');");
                                                 int contentElementsLength = (Integer) JavaFX.JSObjectGetMember.invoke(contentElements, "length");
 …
                                                                 // Looping through all the text nodes in the current paragraph
                                                                 while ((textNode = (Node) JavaFX.WebEngineExecuteScript.invoke(webEngine, "walker.nextNode()")) != null) {
                                                                         // Making the current node accesable in JavaScript
+                                                                        // Making the current node accessible in JavaScript
                                                                         JavaFX.JSObjectSetMember.invoke(window, "textNode", textNode);
 …
                                                                         // Splitting the text node's content into individual words
                                                                         Object words = JavaFX.WebEngineExecuteScript.invoke(webEngine, "textNode.textContent.split(/\\s+/);");
                                                                         int wordsLength = (Integer) JavaFX.JSObjectGetMember.invoke(words, "length");
+                                                                        String textContent = (String) JavaFX.WebEngineExecuteScript.invoke(webEngine, "textNode.textContent");
+                                                                        String[] words = splitIntoWords(textContent);
                                                                         // Clearing all text from the current text node (but not removing it, as it is needed as a reference
 …
                                                                         // Adding each word back to the page
                                                                         for (int j = 0; j < wordsLength; j++) {
                                                                                 Object currentWord = JavaFX.JSObjectGetSlot.invoke(words, j);
+                                                                        for (int j = 0; j < words.length; j++) {
+                                                                                Object currentWord = words[j];
                                                                                 JavaFX.JSObjectCall.invoke(window, "addToSpan", new Object[] { currentWord });
+                                                                        }
 …
                                                                                 + Float.valueOf(JavaFX.WebEngineExecuteScript.invoke(webEngine, "window.pageYOffset").toString());
                                                                 float width = Float.valueOf(JavaFX.JSObjectGetMember.invoke(bounds, "width").toString()) + 5;
+                                                                float width = Float.valueOf(JavaFX.JSObjectGetMember.invoke(bounds, "width").toString());
                                                                 float height = Float.valueOf(JavaFX.JSObjectGetMember.invoke(bounds, "height").toString());
 …
+                                                                                }
                                                                                 float lineSpacingInt = -1;
+                                                                                float lineHeightInt = -1;
                                                                                 try {
                                                                                         lineSpacingInt = (Float.parseFloat(lineHeight.substring(0, lineHeight.length() - 2)));
+                                                                                        lineHeightInt = (Float.parseFloat(lineHeight.substring(0, lineHeight.length() - 2)));
                                                                                 } catch (NumberFormatException nfe) {
                                                                                         // Use default value as set above
 …
                                                                                 Text t;
                                                                                 String textContent = currentNode.getTextContent().replaceAll("[^\\S\\n]+", " ").trim();
+                                                                                String textContent = currentNode.getTextContent(); // .replaceAll("[^\\S\\n]+", " ").trim();
                                                                                 if (textTransform.equals("uppercase")) {
 …
+                                                                                }
                                                                                 t = frame.addText((int) x, (int) y, textContent, null);
+                                                                                t = frame.addText(Math.round(x), Math.round(y), textContent, null);
                                                                                 t.setColor(rgbStringToColor(color));
 …
                                                                                 t.setLetterSpacing(letterSpacingFloat);
+                                                                                // Removing any spacing between lines allowing t.getLineHeight() to be used to get the actual height
+                                                                                // of just the characters (i.e. distance from ascenders to descenders)
                                                                                 t.setSpacing(0);
+                                                                                t.setSpacing(lineSpacingInt - t.getLineHeight());
+                                                                                t.setSpacing(lineHeightInt - t.getLineHeight());
                                                                                 if (align.equals("left")) {
 …
+                                                                                }
                                                                                 // Font size divided by 2 is added to the item width to give a little breathing room
                                                                                 t.setWidth(Math.round(width + (t.getSize() / 2)));
+                                                                                // Font size is added to the item width to give a little breathing room
+                                                                                t.setWidth(Math.round(width + (t.getSize())));
                                                                         } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) {
 …
                 pic.getSource().anchor();
+        }
+        private static String[] splitIntoWords(String toSplit) {
+                ArrayList<String> words = new ArrayList<String>();
+                Pattern regex = Pattern.compile("\\s+");
+                Matcher matcher = regex.matcher(toSplit);
+                // The index at which the previous word ended
+                int prevEndIndex = 0;
+                String prev = null;
+                while (matcher.find()) {
+                        String w = toSplit.substring(prevEndIndex, matcher.start());
+                        System.out.println(toSplit.substring(0, 0));
+                        if (prev != null) {
+                                words.add(prev + " ");
+                        }
+                        prev = w;
+                        prevEndIndex = matcher.end();
+                 }
+                // Adding the final two words
+                if (prev != null) {
+                        words.add(prev + " ");
+                }
+                words.add(toSplit.substring(prevEndIndex));
+                return words.toArray(new String[words.size()]);
+         }
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 672 for trunk

Legend:

trunk/src/org/expeditee/io/WebParser.java

Download in other formats: