source: trunk/src/org/expeditee/io/WebParser.java@ 753

Last change on this file since 753 was 753, checked in by jts21, 10 years ago

Can't cast from Object to boolean, changed to Boolean

File size: 38.9 KB
Line 
1package org.expeditee.io;
2
3import java.awt.Color;
4import java.awt.Font;
5import java.awt.Graphics;
6import java.awt.image.BufferedImage;
7import java.io.File;
8import java.io.IOException;
9import java.lang.reflect.InvocationTargetException;
10import java.net.HttpURLConnection;
11import java.net.MalformedURLException;
12import java.net.URL;
13import java.util.Arrays;
14
15/*
16 * JavaFX is not on the default java classpath until Java 8 (but is still included with Java 7), so your IDE will probably complain that the imports below can't be resolved.
17 * In Eclipse hitting'Proceed' when told 'Errors exist in project' should allow you to run Expeditee without any issues (although the JFX Browser widget will not display),
18 * or you can just exclude JfxBrowser, WebParser and JfxbrowserActions from the build path.
19 *
20 * If you are using Ant to build/run, 'ant build' will try to build with JavaFX jar added to the classpath.
21 * If this fails, 'ant build-nojfx' will build with the JfxBrowser, WebParser and JfxbrowserActions excluded from the build path.
22 */
23
24import javafx.animation.AnimationTimer;
25import javafx.application.Platform;
26import javafx.beans.value.ChangeListener;
27import javafx.beans.value.ObservableValue;
28import javafx.concurrent.Worker.State;
29import javafx.scene.web.WebEngine;
30
31import javax.imageio.ImageIO;
32import javax.swing.JComponent;
33
34import netscape.javascript.JSObject;
35
36import org.expeditee.gui.Frame;
37import org.expeditee.gui.FrameCreator;
38import org.expeditee.gui.FrameIO;
39import org.expeditee.gui.FrameUtils;
40import org.expeditee.gui.MessageBay;
41import org.expeditee.gui.MessageBay.Progress;
42import org.expeditee.items.ItemUtils;
43import org.expeditee.items.Justification;
44import org.expeditee.items.Picture;
45import org.expeditee.items.Text;
46import org.w3c.dom.Node;
47import org.w3c.dom.html.HTMLBodyElement;
48
49/**
50 * Methods to convert webpages to Expeditee frames
51 *
52 * @author ngw8
53 * @author jts21
54 */
55public class WebParser {
56
57
58 /**
59 * Loads a webpage and renders it as Expeditee frame(s)
60 *
61 * @param URL
62 * Page to load
63 * @param frame
64 * The Expeditee frame to output the converted page to
65 */
66 public static void parseURL(final String URL, final Frame frame) {
67 try {
68 Platform.runLater(new Runnable() {
69 @Override
70 public void run() {
71 try {
72 WebEngine webEngine = new WebEngine(URL);
73 loadPage(webEngine, frame);
74 } catch (Exception e) {
75 e.printStackTrace();
76 }
77 }
78 });
79 } catch (Exception e) {
80 e.printStackTrace();
81 }
82 }
83
84 protected static void loadPage(final WebEngine webEngine, final Frame frame) throws Exception {
85 webEngine.getLoadWorker().stateProperty().addListener(new ChangeListener<State>() {
86
87 @Override
88 public void changed(ObservableValue<? extends State> ov, State oldState, State newState) {
89
90 switch (newState) {
91 case READY: // READY
92 // MessageBay.displayMessage("WebEngine ready");
93 break;
94 case SCHEDULED: // SCHEDULED
95 // MessageBay.displayMessage("Scheduled page load");
96 break;
97 case RUNNING: // RUNNING
98 System.out.println("Loading page!");
99 // MessageBay.displayMessage("WebEngine running");
100 break;
101 case SUCCEEDED: // SUCCEEDED
102 // MessageBay.displayMessage("Finished loading page");
103 System.out.println("Parsing page!");
104 webEngine.executeScript("window.resizeTo(800, 800);"
105 + "document.body.style.width = '1000px'");
106 parsePage(webEngine, frame);
107 System.out.println("Parsed page!");
108 break;
109 case CANCELLED: // CANCELLED
110 MessageBay.displayMessage("Cancelled loading page");
111 break;
112 case FAILED: // FAILED
113 MessageBay.displayMessage("Failed to load page");
114 break;
115 }
116 }
117 });
118 }
119
120 /**
121 * Converts a loaded page to Expeditee frame(s)
122 *
123 * @param webEngine
124 * The JavaFX WebEngine in which the page to be converted is loaded
125 * @param frame
126 * The Expeditee frame to output the converted page to
127 */
128 public static void parsePage(final WebEngine webEngine, final Frame frame) {
129 try {
130 Platform.runLater(new Runnable() {
131 @Override
132 public void run() {
133 try {
134 Progress progressBar = MessageBay.displayProgress("Converting web page");
135
136 Node doc = (Node) webEngine.executeScript("document.body");
137
138 JSObject window = (JSObject) webEngine.executeScript("window");
139
140 frame.setBackgroundColor(rgbStringToColor((String) ((JSObject) (window.call("getComputedStyle", new Object[] { doc }))).call("getPropertyValue",
141 new Object[] { "background-color" })));
142
143 // Functions to be used later in JavaScript
144 webEngine.executeScript(""
145 + "function addToSpan(text) {"
146 + " span = document.createElement('wordSpan');"
147 + " span.textContent = text;"
148 + " par.insertBefore(span, refNode);"
149 // Checking if the current word is on a new line (i.e. lower than the previous word)
150 + " if (prevSpan !== null && span.getBoundingClientRect().top > prevSpan.getBoundingClientRect().top) {"
151 // If it is, prepend a new line character to it. The new line characters doesn't affect the rendered HTML
152 + " span.textContent = '\\n' + span.textContent;"
153
154 // Checking if the previous word is horizontally aligned with the one before it.
155 // If it is, merge the text of the two spans
156 + " if ( prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
157 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
158 + " par.removeChild(prevSpan);"
159 + " } else {"
160 + " prevPrevSpan = prevSpan;"
161 + " }"
162 + " prevSpan = span;"
163 + " } else if ( prevSpan !== null) {"
164 // Word is on the same line as the previous one, so merge the second into the span of the first
165 + " prevSpan.textContent = prevSpan.textContent + span.textContent;"
166 + " par.removeChild(span);"
167 + " } else {"
168 + " prevSpan = span;"
169 + " }"
170 + "}"
171
172 + "function splitIntoWords(toSplit) {"
173 + " var words = [];"
174 + " var pattern = /\\s+/g;"
175 + " var words = toSplit.split(pattern);"
176 + ""
177 + " for (var i = 0; i < words.length - 1; i++) {"
178 + " words[i] = words[i] + ' ';"
179 + " }"
180 + " return words;"
181 + "}"
182 );
183
184 // Using Javascript to get an array of all the text nodes in the document so they can be wrapped in spans. Have to
185 // loop through twice (once to build the array and once actually going through the array, otherwise when the
186 // textnode is removed from the document items end up being skipped)
187 JSObject textNodes = (JSObject) webEngine.executeScript(""
188 + "function getTextNodes(rootNode){"
189 + "var node;"
190 + "var textNodes=[];"
191 + "var walk = document.createTreeWalker(rootNode, NodeFilter.SHOW_TEXT);"
192 + "while(node=walk.nextNode()) {"
193 + "if((node.textContent.trim().length > 0)) { "
194 + "textNodes.push(node);"
195 + "}"
196 + "}"
197 + "return textNodes;"
198 + "}; "
199 + "getTextNodes(document.body)"
200 );
201
202 int nodesLength = (Integer) textNodes.getMember("length");
203
204 // Looping through all the text nodes in the document
205 for (int j = 0; j < nodesLength; j++) {
206 Node currentNode = (Node) textNodes.getSlot(j);
207
208 // Making the current node accessible in JavaScript
209 window.setMember("currentNode", currentNode);
210
211 webEngine.executeScript(""
212 + "var span = null, prevSpan = null, prevPrevSpan = null;"
213
214 // Removing repeated whitespace from the text node's content then splitting it into individual words
215 + "var textContent = currentNode.textContent.replace(/\\n|\\r/g, '').replace(/\\s+/g, ' ');"
216 + "var words = splitIntoWords(textContent);"
217
218 + "var refNode = currentNode.nextSibling;"
219 + "var par = currentNode.parentElement;"
220 + "currentNode.parentElement.removeChild(currentNode);"
221
222 + "for (var i = 0; i < words.length; i++) {"
223 + " addToSpan(words[i]);"
224 + "}"
225
226 + "if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
227 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
228 + " par.removeChild(prevSpan);"
229 + "}"
230 );
231
232 // Will never reach 100% here, as the processing is not quite finished - progress is set to 100% at the end of
233 // the addPageToFrame loop below
234 progressBar.set((100 * (j)) / nodesLength);
235 }
236
237 // Finding all links within the page, then setting the href attribute of all their descendants to be the same
238 // link/URL.
239 // This is needed because there is no apparent and efficient way to check if an element is a child of a link when
240 // running through the document when added each element to Expeditee
241 webEngine.executeScript(""
242 + "var anchors = document.getElementsByTagName('a');"
243 + ""
244 + "for (var i = 0; i < anchors.length; i++) {"
245 + "var currentAnchor = anchors.item(i);"
246 + "var anchorDescendants = currentAnchor.querySelectorAll('*');"
247 + "for (var j = 0; j < anchorDescendants.length; j++) {"
248 + "anchorDescendants.item(j).href = currentAnchor.href;"
249 + "}"
250 + "}"
251 );
252
253 WebParser.addPageToFrame(doc, window, webEngine, frame);
254
255 progressBar.set(100);
256
257 } catch (Exception e) {
258 e.printStackTrace();
259 }
260 System.out.println("Parsed frame");
261 FrameUtils.Parse(frame);
262 frame.setChanged(true);
263 FrameIO.SaveFrame(frame);
264 }
265 });
266 } catch (Exception e) {
267 e.printStackTrace();
268 }
269 }
270
271 /**
272 * Converts a loaded page to Expeditee frame(s)
273 *
274 * @param webEngine
275 * The JavaFX WebEngine in which the page to be converted is loaded
276 * @param frame
277 * The Expeditee frame to output the converted page to
278 */
279 public static void parsePageSimple(final WebEngine webEngine, final Object webView, final JComponent jfxPanel, final Frame frame) {
280 try {
281 final Object notifier = new Object();
282 final MutableBool bottomReached = new MutableBool(false);
283
284 final Progress progressBar = MessageBay.displayProgress("Converting web page");
285
286 AnimationTimer timer = new AnimationTimer() {
287
288 int frameCount = 0;
289 Frame frameToAddTo = frame;
290
291 @Override
292 public void handle(long arg0) {
293 // Must wait 2 frames before taking a snapshot of the webview, otherwise JavaFX won't have redrawn
294 if (frameCount++ > 1) {
295 frameCount = 0;
296 this.stop();
297
298 BufferedImage image = new BufferedImage(jfxPanel.getWidth(), jfxPanel.getHeight(), BufferedImage.TYPE_INT_ARGB);
299
300 Graphics graphics = image.createGraphics();
301
302 // Drawing the JfxPanel (containing the webview) to the image
303 jfxPanel.paint(graphics);
304
305 try {
306 int hashcode = Arrays.hashCode(image.getData().getPixels(0, 0, image.getWidth(), image.getHeight(), (int[]) null));
307
308 File out = new File(FrameIO.IMAGES_PATH + "webpage-" + Integer.toHexString(hashcode) + ".png");
309 out.mkdirs();
310 ImageIO.write(image, "png", out);
311
312 Text link = new Text("Next");
313 link.setPosition(500, 20);
314 frameToAddTo.addItem(link);
315
316 FrameIO.SaveFrame(frameToAddTo);
317
318 frameToAddTo = FrameIO.CreateFrame(frame.getFramesetName(), Integer.toHexString(hashcode), null);
319
320 link.setLink(frameToAddTo.getName());
321
322 // Adding the image
323 frameToAddTo.addText(0, 0, "@i: " + out.getName(), null);
324
325 // Button to go to the next page
326 Text nextButton = (Text) FrameCreator.createButton("Next", null, null, 10F, 10F);
327 nextButton.setID(frameToAddTo.getNextItemID());
328 nextButton.addAction("next");
329 frameToAddTo.addItem(nextButton);
330
331 FrameIO.SaveFrame(frameToAddTo);
332 System.out.println("C");
333
334 } catch (IOException e) {
335 e.printStackTrace();
336 }
337
338
339 graphics.dispose();
340 image.flush();
341
342 synchronized (notifier) {
343 notifier.notify();
344 }
345
346 try {
347 Platform.runLater(new Runnable() {
348 @Override
349 public void run() {
350 try {
351 HTMLBodyElement doc = (HTMLBodyElement) webEngine.executeScript("document.body");
352
353 JSObject window = (JSObject) webEngine.executeScript("window");
354
355 System.out.println("adding");
356 WebParser.addPageToFrame(doc, window, webEngine, frameToAddTo);
357 } catch (Exception ex) {
358 ex.printStackTrace();
359 }
360 }
361 });
362 } catch (Exception ex) {
363 ex.printStackTrace();
364 }
365 }
366 }
367 };
368
369 Platform.runLater(new Runnable() {
370 @Override
371 public void run() {
372 try {
373
374 webEngine.executeScript(""
375 // Initializing the counter used when scrolling the page
376 + "var scrollCounter = 0;"
377
378 // Setting all text to be hidden
379 + "var css = document.createElement('style');"
380 + "css.type = 'text/css';"
381 + "var style = 'WordSpan { visibility: hidden }';"
382 + "css.appendChild(document.createTextNode(style));"
383 + "document.getElementsByTagName('head')[0].appendChild(css);");
384
385 HTMLBodyElement doc = (HTMLBodyElement) webEngine.executeScript("document.body");
386
387 JSObject window = (JSObject) webEngine.executeScript("window");
388
389 frame.setBackgroundColor(rgbStringToColor((String) ((JSObject) (window.call("getComputedStyle", new Object[] { doc }))).call("getPropertyValue",
390 new Object[] { "background-color" })));
391
392 // Functions to be used later in JavaScript
393 webEngine.executeScript(""
394 + "function addToSpan(text) {"
395 + " span = document.createElement('wordSpan');"
396 + " span.textContent = text;"
397 + " par.insertBefore(span, refNode);"
398 + " if (prevSpan !== null && span.getBoundingClientRect().top > prevSpan.getBoundingClientRect().top) {"
399 + " span.textContent = '\\n' + span.textContent;"
400 + " if ( prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
401 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
402 + " par.removeChild(prevSpan);"
403 + " } else {"
404 + " prevPrevSpan = prevSpan;"
405 + " }"
406 + " prevSpan = span;"
407 + " } else if ( prevSpan !== null) {"
408 + " prevSpan.textContent = prevSpan.textContent + span.textContent;"
409 + " par.removeChild(span);"
410 + " } else {"
411 + " prevSpan = span;"
412 + " }"
413 + "}"
414
415 + "function splitIntoWords(toSplit) {"
416 + " var words = [];"
417 + " var pattern = /\\s+/g;"
418 + " var words = toSplit.split(pattern);"
419 + ""
420 + " for (var i = 0; i < words.length - 1; i++) {"
421 + " words[i] = words[i] + ' ';"
422 + " }"
423 + " return words;"
424 + "}"
425 );
426
427 // Using Javascript to get an array of all the text nodes in the document so they can be wrapped in spans. Have to
428 // loop through twice (once to build the array and once actually going through the array, otherwise when the
429 // textnode is removed from the document items end up being skipped)
430 JSObject textNodes = (JSObject) webEngine.executeScript(""
431 + "function getTextNodes(rootNode){"
432 + "var node;"
433 + "var textNodes=[];"
434 + "var walk = document.createTreeWalker(rootNode, NodeFilter.SHOW_TEXT);"
435 + "while(node=walk.nextNode()) {"
436 + "if((node.textContent.trim().length > 0)) { "
437 + "textNodes.push(node);"
438 + "}"
439 + "}"
440 + "return textNodes;"
441 + "}; "
442 + "getTextNodes(document.body)"
443 );
444
445 int nodesLength = (Integer) textNodes.getMember("length");
446
447 // Looping through all the text nodes in the document
448 for (int j = 0; j < nodesLength; j++) {
449 Node currentNode = (Node) textNodes.getSlot(j);
450
451 // Making the current node accessible in JavaScript
452 window.setMember("currentNode", currentNode);
453
454 webEngine.executeScript(""
455 + "var span = null, prevSpan = null, prevPrevSpan = null;"
456
457 // Removing repeated whitespace from the text node's content then splitting it into individual words
458 + "var textContent = currentNode.textContent.replace(/\\n|\\r/g, '').replace(/\\s+/g, ' ');"
459 + "var words = splitIntoWords(textContent);"
460
461 + "var refNode = currentNode.nextSibling;"
462 + "var par = currentNode.parentElement;"
463 + "currentNode.parentElement.removeChild(currentNode);"
464
465 + "for (var i = 0; i < words.length; i++) {"
466 + " addToSpan(words[i]);"
467 + "}"
468
469 + "if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
470 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
471 + " par.removeChild(prevSpan);"
472 + "}"
473 );
474
475 // Will never reach 100% here, as the processing is not quite finished - progress is set to 100% at the end of
476 // the addPageToFrame loop below
477 progressBar.set((100 * (j)) / nodesLength);
478 }
479
480 // Finding all links within the page, then setting the href attribute of all their descendants to be the same
481 // link/URL.
482 // This is needed because there is no apparent and efficient way to check if an element is a child of a link when
483 // running through the document when added each element to Expeditee
484 webEngine.executeScript(""
485 + "var anchors = document.getElementsByTagName('a');"
486 + ""
487 + "for (var i = 0; i < anchors.length; i++) {"
488 + "var currentAnchor = anchors.item(i);"
489 + "var anchorDescendants = currentAnchor.querySelectorAll('*');"
490 + "for (var j = 0; j < anchorDescendants.length; j++) {"
491 + "anchorDescendants.item(j).href = currentAnchor.href;"
492 + "}"
493 + "}"
494 );
495
496 } catch (Exception ex) {
497 ex.printStackTrace();
498 }
499
500 synchronized (notifier) {
501 notifier.notify();
502 }
503 }
504 });
505
506 synchronized (notifier) {
507 try {
508 // Waiting for the JavaFX thread to finish
509 notifier.wait();
510 } catch (InterruptedException e) {
511 // TODO Auto-generated catch block
512 e.printStackTrace();
513 }
514 }
515
516 while (!bottomReached.getValue()) {
517 Platform.runLater(new Runnable() {
518 @Override
519 public void run() {
520 try {
521 // Scrolling down the page
522 webEngine.executeScript(""
523 + "window.scrollTo(0, scrollCounter * window.innerHeight);"
524 + "scrollCounter = scrollCounter+1;");
525
526 System.out.println('B');
527
528 bottomReached.setValue((Boolean) webEngine.executeScript("(window.pageYOffset + window.innerHeight >= document.documentElement.scrollHeight)"));
529
530 synchronized (notifier) {
531 notifier.notify();
532 }
533
534 } catch (Exception e) {
535 e.printStackTrace();
536 }
537 }
538 });
539
540 synchronized (notifier) {
541 try {
542 // Waiting for the JavaFX thread to finish
543 notifier.wait();
544 } catch (InterruptedException e) {
545 // TODO Auto-generated catch block
546 e.printStackTrace();
547 }
548 }
549
550 timer.start();
551
552 synchronized (notifier) {
553 try {
554 // Waiting for the timer thread to finish before looping again
555 notifier.wait();
556 } catch (InterruptedException e) {
557 // TODO Auto-generated catch block
558 e.printStackTrace();
559 }
560 }
561
562 }
563
564 } catch (Exception ex) {
565 ex.printStackTrace();
566 }
567
568
569
570 }
571
572 /**
573 * @param rgbString
574 * string in the format <i>rgb(x,x,x)</i> or <i>rgba(x,x,x,x)</i>
575 * @return A Color object that should match the rgb string passed int. Returns null if alpha is 0
576 */
577 private static Color rgbStringToColor(String rgbString) {
578
579 if (rgbString == null) {
580 return null;
581 }
582
583 // Splitting the string into 'rgb' and 'x, x, x'
584 String[] tmpStrings = rgbString.split("\\(|\\)");
585
586 // Splitting up the RGB(A) components into an array
587 tmpStrings = tmpStrings[1].split(",");
588
589 int[] components = new int[4];
590 Arrays.fill(components, 255);
591
592 for (int i = 0; i < tmpStrings.length; i++) {
593 Float d = Float.parseFloat(tmpStrings[i].trim());
594
595 components[i] = Math.round(d);
596 }
597
598 if (components[3] > 0) {
599 return new Color(components[0], components[1], components[2], components[3]);
600 } else {
601 return null;
602 }
603 }
604
605 /**
606 * @param rootElement
607 * Element that will be converted (including all sub-elements)
608 * @param backgroundColor
609 * String to be used as the background color of this element when added. In the format "rgb(x,x,x)" or "rgba(x,x,x,x)"
610 * @param window
611 * 'window' from Javascript
612 * @param webEngine
613 * Web engine that the page is loaded in
614 * @param frame
615 * Expeditee frame to add the converted page to
616 * @throws IllegalArgumentException
617 * @throws IllegalAccessException
618 */
619 private static void addPageToFrame(Node rootElement, JSObject window, WebEngine webEngine, Frame frame) throws InvocationTargetException, IllegalAccessException,
620 IllegalArgumentException {
621
622 Node currentNode = rootElement;
623
624 if (currentNode.getNodeType() == Node.TEXT_NODE || currentNode.getNodeType() == Node.ELEMENT_NODE) {
625
626 JSObject style;
627 JSObject bounds;
628
629 if (currentNode.getNodeType() == Node.TEXT_NODE) {
630 // CSS style for the element
631 style = (JSObject) window.call("getComputedStyle", new Object[] { currentNode.getParentNode() });
632
633 // Getting a rectangle that represents the area and position of the element
634 bounds = (JSObject) ((JSObject) currentNode.getParentNode()).call("getBoundingClientRect", new Object[] {});
635 } else {
636 style = (JSObject) window.call("getComputedStyle", new Object[] { currentNode });
637
638 bounds = (JSObject) ((JSObject) currentNode).call("getBoundingClientRect", new Object[] {});
639 }
640
641 // Bounding rectangle position is relative to the current view, so scroll position must be added to x/y
642 // TODO: This doesn't check if an element or any of its parent elements have position:fixed set - the only
643 // way to check seems to be to walking through the element's parents until the document root is reached
644 float x = Float.valueOf(bounds.getMember("left").toString()) + Float.valueOf(webEngine.executeScript("window.pageXOffset").toString());
645 float y = Float.valueOf(bounds.getMember("top").toString()) + Float.valueOf(webEngine.executeScript("window.pageYOffset").toString());
646
647 float width = Float.valueOf(bounds.getMember("width").toString());
648 float height = Float.valueOf(bounds.getMember("height").toString());
649
650 // Checking if the element is actually visible on the page
651 if (WebParser.elementVisible(x, y, width, height, style)) {
652
653 // Filtering the node type, starting with text nodes
654 if (currentNode.getNodeType() == Node.TEXT_NODE) {
655
656 String fontSize = ((String) style.call("getPropertyValue", new Object[] { "font-size" }));
657
658 // Trimming off the units (always px) from the font size
659 fontSize = fontSize.substring(0, fontSize.length() - 2);
660
661 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
662 String color = (String) style.call("getPropertyValue", new Object[] { "color" });
663
664 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
665 String bgColorString = (String) style.call("getPropertyValue", new Object[] { "background-color" });
666
667 String align = (String) style.call("getPropertyValue", new Object[] { "text-align" });
668
669 // Returns comma-separated list of typefaces
670 String typeface = (String) style.call("getPropertyValue", new Object[] { "font-family" });
671
672 String[] typefaces = typeface.split(", |,");
673
674 String weight = (String) style.call("getPropertyValue", new Object[] { "font-weight" });
675
676 String fontStyle = (String) style.call("getPropertyValue", new Object[] { "font-style" });
677
678 // Returns "normal" or a value in pixels (e.g. "10px")
679 String letterSpacing = (String) style.call("getPropertyValue", new Object[] { "letter-spacing" });
680
681 // Returns a value in pixels (e.g. "10px")
682 String lineHeight = (String) style.call("getPropertyValue", new Object[] { "line-height" });
683
684 String textTransform = (String) style.call("getPropertyValue", new Object[] { "text-transform" });
685
686 String linkUrl = (String) ((JSObject) currentNode.getParentNode()).getMember("href");
687
688 Boolean fontFound = false;
689 Font font = new Font(null);
690
691 // Looping through all font-families listed in the element's CSS until one that is installed is
692 // found, or the end of the list is reached, in which case the default font is used
693 for (int j = 0; j < typefaces.length && !fontFound; j++) {
694 if (typefaces[j].toLowerCase().equals("sans-serif")) {
695 typefaces[j] = "Arial Unicode MS";
696 } else if (typefaces[j].toLowerCase().equals("serif")) {
697 typefaces[j] = "Times New Roman";
698 } else if ((typefaces[j].toLowerCase().equals("arial"))) {
699 // Have to use Arial Unicode, otherwise unicode characters display incorrectly
700 typefaces[j] = "Arial Unicode MS";
701 }
702
703 // Regex will remove any inverted commas surrounding multi-word typeface names
704 font = new Font(typefaces[j].replaceAll("^'|'$", ""), Font.PLAIN, 12);
705
706 // If the font isn't found, Java just uses Font.DIALOG, so this check checks whether the font was found
707 if (!(font.getFamily().toLowerCase().equals(Font.DIALOG.toLowerCase()))) {
708 fontFound = true;
709 }
710 }
711
712 if (font.getFamily().toLowerCase().equals(Font.DIALOG.toLowerCase())) {
713 font = new Font("Times New Roman", Font.PLAIN, 12);
714 }
715
716 String fontStyleComplete = "";
717
718 int weightInt = 0;
719
720 try {
721 weightInt = Integer.parseInt(weight);
722 } catch (NumberFormatException nfe) {
723 // Use default value as set above
724 }
725
726 // checking if font is bold - i.e. 'bold', 'bolder' or weight over 500
727 if (weight.toLowerCase().startsWith("bold") || weightInt > 500) {
728 fontStyleComplete = fontStyleComplete.concat("bold");
729 }
730
731 if (fontStyle.toLowerCase().equals("italic") || fontStyle.toLowerCase().equals("oblique")) {
732 fontStyleComplete = fontStyleComplete.concat("italic");
733 }
734
735 float fontSizeFloat = 12;
736
737 try {
738 fontSizeFloat = Float.valueOf(fontSize);
739 } catch (NumberFormatException nfe) {
740 // Use default value as set above
741 }
742
743 float letterSpacingFloat = -0.008f;
744
745 try {
746 letterSpacingFloat = (Integer.parseInt(letterSpacing.substring(0, letterSpacing.length() - 2)) / (fontSizeFloat));
747 } catch (NumberFormatException nfe) {
748 // Use default value as set above
749 }
750
751 float lineHeightInt = -1;
752
753 try {
754 lineHeightInt = (Float.parseFloat(lineHeight.substring(0, lineHeight.length() - 2)));
755 } catch (NumberFormatException nfe) {
756 // Use default value as set above
757 }
758
759 Text t;
760
761 String textContent = currentNode.getTextContent().replaceAll("[^\\S\\n]+", " ");
762 textContent = textContent.replaceAll("^(\\s)(\\n|\\r)", "");
763
764 if (textTransform.equals("uppercase")) {
765 textContent = textContent.toUpperCase();
766 } else if (textTransform.equals("lowercase")) {
767 textContent = textContent.toUpperCase();
768 }
769
770 // Adding the text to the frame. Expeditee text seems to be positioned relative to the baseline of the first line, so
771 // the font size has to be added to the y-position
772 t = frame.addText(Math.round(x), Math.round(y + fontSizeFloat), textContent, null);
773
774 t.setColor(rgbStringToColor(color));
775 t.setBackgroundColor(rgbStringToColor(bgColorString));
776 t.setFont(font);
777 t.setSize(fontSizeFloat);
778 t.setFontStyle(fontStyleComplete);
779 t.setLetterSpacing(letterSpacingFloat);
780
781 // Removing any spacing between lines allowing t.getLineHeight() to be used to get the actual height
782 // of just the characters (i.e. distance from ascenders to descenders)
783 t.setSpacing(0);
784
785 t.setSpacing(lineHeightInt - t.getLineHeight());
786
787 if (align.equals("left")) {
788 t.setJustification(Justification.left);
789 } else if (align.equals("right")) {
790 t.setJustification(Justification.right);
791 } else if (align.equals("center")) {
792 t.setJustification(Justification.center);
793 } else if (align.equals("justify")) {
794 t.setJustification(Justification.full);
795 }
796
797 // Font size is added to the item width to give a little breathing room
798 t.setWidth(Math.round(width + (t.getSize())));
799
800 if (!linkUrl.equals("undefined")) {
801 t.setAction("gotourl " + linkUrl);
802 t.setActionMark(false);
803 }
804
805 } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) {
806
807 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
808 String bgColorString = (String) style.call("getPropertyValue", new Object[] { "background-color" });
809
810 Color bgColor = rgbStringToColor(bgColorString);
811
812 // If the element has a background color then add it (to Expeditee) as a rectangle with that background color
813 if (bgColor != null) {
814 System.out.println("bg");
815 frame.addRectangle(Math.round(x), Math.round(y), Math.round(width), Math.round(height), 0, null, bgColor);
816 }
817
818 // background image, returns in format "url(protocol://absolute/path/to/img.extension)" for images,
819 // may also return gradients, data, etc. (not handled yet). Only need to add bg image on
820 // 'ELEMENT_NODE' (and not 'TEXT_NODE' otherwise there would be double-ups
821 String bgImage = (String) style.call("getPropertyValue", new Object[] { "background-image" });
822
823 String linkUrl = (String) ((JSObject) currentNode).getMember("href");
824
825 if (bgImage.startsWith("url(")) {
826 bgImage = bgImage.substring(4, bgImage.length() - 1);
827
828 String bgSize = ((String) style.call("getPropertyValue", new Object[] { "background-size" })).toLowerCase();
829 String bgRepeat = ((String) style.call("getPropertyValue", new Object[] { "background-repeat" })).toLowerCase();
830
831 // Returns "[x]px [y]px", "[x]% [y]%", "[x]px [y]%" or "[x]% [y]px"
832 String bgPosition = ((String) style.call("getPropertyValue", new Object[] { "background-position" })).toLowerCase();
833
834 String[] bgOffsetCoords = bgPosition.split(" ");
835
836 int bgOffsetX = 0, bgOffsetY = 0;
837
838 float originXPercent = 0, originYPercent = 0;
839
840 int cropStartX, cropStartY, cropEndX, cropEndY;
841
842 // Converting the x and y offset values to integers (and from % to px if needed)
843 if (bgOffsetCoords[0].endsWith("%")) {
844 bgOffsetX = (int) ((Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 1)) / 100.0) * width);
845 originXPercent = (Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 1))) / 100f;
846 } else if (bgOffsetCoords[0].endsWith("px")) {
847 bgOffsetX = (int) (Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 2)));
848 }
849
850 if (bgOffsetCoords[1].endsWith("%")) {
851 bgOffsetY = (int) ((Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 1)) / 100.0) * height);
852 originYPercent = (Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 1))) / 100f;
853 } else if (bgOffsetCoords[1].endsWith("px")) {
854 bgOffsetY = (int) (Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 2)));
855 }
856
857 // Converting from an offset to crop coords
858 cropStartX = -1 * bgOffsetX;
859 cropEndX = (int) (cropStartX + width);
860
861 cropStartY = -1 * bgOffsetY;
862 cropEndY = (int) (cropStartY + height);
863
864 int bgWidth = -1;
865
866 if (bgSize.equals("cover")) {
867 bgWidth = (int) width;
868 } else if (bgSize.equals("contain")) {
869 // TODO: actually compute the appropriate width
870 bgWidth = (int) width;
871 } else if (bgSize.equals("auto")) {
872 bgWidth = -1;
873 } else {
874 bgSize = bgSize.split(" ")[0];
875
876 if (bgSize.endsWith("%")) {
877 bgWidth = (int) ((Integer.parseInt(bgSize.replaceAll("\\D", "")) / 100.0) * width);
878 } else if (bgSize.endsWith("px")) {
879 bgWidth = Integer.parseInt(bgSize.replaceAll("\\D", ""));
880 }
881 }
882
883 try {
884 WebParser.addImageFromUrl(bgImage, linkUrl, frame, x, y, bgWidth, cropStartX, cropStartY, cropEndX, cropEndY, bgRepeat, originXPercent, originYPercent);
885 } catch (MalformedURLException mue) {
886 // probably a 'data:' url, not supported yet
887 mue.printStackTrace();
888 } catch (IOException e) {
889 // TODO Auto-generated catch block
890 e.printStackTrace();
891 }
892 }
893
894 String imgSrc;
895
896 if (currentNode.getNodeName().toLowerCase().equals("img") && (imgSrc = ((JSObject) currentNode).getMember("src").toString()) != null) {
897 try {
898 WebParser.addImageFromUrl(imgSrc, linkUrl, frame, x, y, (int) width, null, null, null, null, null, 0, 0);
899 } catch (MalformedURLException mue) {
900 // probably a 'data:' url, not supported yet
901 mue.printStackTrace();
902 } catch (IOException e) {
903 // TODO Auto-generated catch block
904 e.printStackTrace();
905 }
906 }
907 }
908 }
909
910 Node childNode = currentNode.getFirstChild();
911
912 while (childNode != null) {
913 addPageToFrame(childNode, window, webEngine, frame);
914 childNode = childNode.getNextSibling();
915 }
916 }
917 }
918
919 private static boolean elementVisible(float x, float y, float width, float height, JSObject style) {
920 try {
921 if (width <= 0 || height <= 0 || x + width <= 0 || y + height <= 0 || ((String) style.call("getPropertyValue", new Object[] { "visibility" })).equals("hidden")
922 || ((String) style.call("getPropertyValue", new Object[] { "display" })).equals("none")) {
923 return false;
924 } else {
925 return true;
926 }
927 } catch (Exception e) {
928 e.printStackTrace();
929 return false;
930 }
931 }
932
933 /**
934 * @param imgSrc
935 * URL of the image to add
936 * @param linkUrl
937 * Absolute URL that the image should link to when clicked
938 * @param frame
939 * Frame to add the image to
940 * @param x
941 * X-coordinate at which the image should be placed on the frame
942 * @param y
943 * Y-coordinate at which the image should be placed on the frame
944 * @param width
945 * Width of the image once added to the frame. Negative 1 (-1) will cause the actual width of the image file to be used
946 *
947 * @param cropStartX
948 * X-coordinate at which to start crop, or null for no crop
949 * @param cropStartY
950 * Y-coordinate at which to start crop, or null for no crop
951 * @param cropEndX
952 * X-coordinate at which to end the crop, or null for no crop
953 * @param cropEndY
954 * Y-coordinate at which to end the crop, or null for no crop
955 *
956 * @param repeat
957 * String determining how the image should be tiled/repeated. Valid strings are: <i>no-repeat</i>, <i>repeat-x</i>, or
958 * <i>repeat-y</i>. All other values (including null) will cause the image to repeat in both directions
959 *
960 * @param originXPercent
961 * Percentage into the image to use as the x coordinate of the image's origin point
962 * @param originYPercent
963 * Percentage into the image to use as the y coordinate of the image's origin point
964 *
965 * @throws MalformedURLException
966 * @throws IOException
967 */
968 private static void addImageFromUrl(String imgSrc, String linkUrl, final Frame frame, float x, float y, int width, Integer cropStartX, Integer cropStartY, Integer cropEndX, Integer cropEndY, String repeat,
969 float originXPercent, float originYPercent)
970 throws MalformedURLException,
971 IOException {
972
973 URL imgUrl = new URL(imgSrc);
974
975 HttpURLConnection connection = (HttpURLConnection) (imgUrl.openConnection());
976
977 // Spoofing a widely accepted User Agent, since some sites refuse to serve non-webbrowser clients
978 connection.setRequestProperty("User-Agent", "Mozilla/5.0");
979
980 BufferedImage img = ImageIO.read(connection.getInputStream());
981
982 int hashcode = Arrays.hashCode(img.getData().getPixels(0, 0, img.getWidth(), img.getHeight(), (int[]) null));
983 File out = new File(FrameIO.IMAGES_PATH + Integer.toHexString(hashcode) + ".png");
984 out.mkdirs();
985 ImageIO.write(img, "png", out);
986
987 if (cropEndX == null || cropStartX == null || cropEndY == null || cropStartY == null) {
988 cropStartX = 0;
989 cropStartY = 0;
990 cropEndX = img.getWidth();
991 cropEndY = img.getHeight();
992 } else if (cropStartX < 0) {
993 cropEndX = cropEndX - cropStartX;
994 x = x + Math.abs(cropStartX);
995 cropStartX = 0;
996 }
997
998 if (cropStartY < 0) {
999 cropEndY = cropEndY - cropStartY;
1000 y = y + Math.abs(cropStartY);
1001 cropStartY = 0;
1002 }
1003
1004 if (width < 0) {
1005 width = img.getWidth();
1006 }
1007
1008 if (repeat != null) {
1009 if (repeat.equals("no-repeat")) {
1010 int tmpCropEndY = (int) (cropStartY + ((float) width / img.getWidth()) * img.getHeight());
1011 int tmpCropEndX = cropStartX + width;
1012
1013 cropEndX = (cropEndX < tmpCropEndX) ? cropEndX : tmpCropEndX;
1014 cropEndY = (cropEndY < tmpCropEndY) ? cropEndY : tmpCropEndY;
1015 } else if (repeat.equals("repeat-x")) {
1016 int tmpCropEndY = (int) (cropStartY + ((float) width / img.getWidth()) * img.getHeight());
1017 cropEndY = (cropEndY < tmpCropEndY) ? cropEndY : tmpCropEndY;
1018 } else if (repeat.equals("repeat-y")) {
1019 int tmpCropEndX = cropStartX + width;
1020 cropEndX = (cropEndX < tmpCropEndX) ? cropEndX : tmpCropEndX;
1021 }
1022 }
1023
1024 if (originXPercent > 0) {
1025 int actualWidth = cropEndX - cropStartX;
1026
1027 int originXPixels = Math.round(originXPercent * actualWidth);
1028
1029 x = x - originXPixels;
1030
1031 cropStartX = (int) (cropStartX + (width - actualWidth) * originXPercent);
1032 cropEndX = (int) (cropEndX + (width - actualWidth) * originXPercent);
1033 }
1034
1035 if (originYPercent > 0) {
1036 int height = (int) ((img.getHeight() / (float) img.getWidth()) * width);
1037 int actualHeight = (cropEndY - cropStartY);
1038 int originYPixels = Math.round(originYPercent * actualHeight);
1039
1040 y = y - originYPixels;
1041
1042 cropStartY = (int) (cropStartY + (height - actualHeight) * originYPercent);
1043 cropEndY = (int) (cropEndY + (height - actualHeight) * originYPercent);
1044 }
1045
1046 Text text = new Text("@i: " + out.getName() + " " + width);
1047 text.setPosition(x, y);
1048
1049 Picture pic = ItemUtils.CreatePicture(text, frame);
1050
1051 float invScale = 1 / pic.getScale();
1052
1053 pic.setCrop((int)(cropStartX * invScale), (int)(cropStartY * invScale), (int)(cropEndX * invScale), (int)(cropEndY * invScale));
1054
1055 if (linkUrl != null && !linkUrl.equals("undefined")) {
1056 pic.setAction("goto " + linkUrl);
1057 pic.setActionMark(false);
1058 }
1059
1060 frame.addItem(pic);
1061 pic.anchor();
1062 pic.getSource().anchor();
1063 }
1064
1065 private static class MutableBool {
1066 private boolean value;
1067
1068 public MutableBool(boolean value) {
1069 this.value = value;
1070 }
1071
1072 public boolean getValue() {
1073 return value;
1074 }
1075
1076 public void setValue(boolean value) {
1077 this.value = value;
1078 }
1079 }
1080}
Note: See TracBrowser for help on using the repository browser.