source: trunk/src/org/expeditee/io/WebParser.java@ 753

Last change on this file since 753 was 753, checked in by jts21, 10 years ago

Can't cast from Object to boolean, changed to Boolean

File size: 38.9 KB
RevLine 
[566]1package org.expeditee.io;
2
[568]3import java.awt.Color;
4import java.awt.Font;
[733]5import java.awt.Graphics;
[576]6import java.awt.image.BufferedImage;
7import java.io.File;
8import java.io.IOException;
[701]9import java.lang.reflect.InvocationTargetException;
[576]10import java.net.HttpURLConnection;
11import java.net.MalformedURLException;
12import java.net.URL;
[568]13import java.util.Arrays;
[566]14
[748]15/*
16 * JavaFX is not on the default java classpath until Java 8 (but is still included with Java 7), so your IDE will probably complain that the imports below can't be resolved.
17 * In Eclipse hitting'Proceed' when told 'Errors exist in project' should allow you to run Expeditee without any issues (although the JFX Browser widget will not display),
18 * or you can just exclude JfxBrowser, WebParser and JfxbrowserActions from the build path.
19 *
20 * If you are using Ant to build/run, 'ant build' will try to build with JavaFX jar added to the classpath.
21 * If this fails, 'ant build-nojfx' will build with the JfxBrowser, WebParser and JfxbrowserActions excluded from the build path.
22 */
23
24import javafx.animation.AnimationTimer;
25import javafx.application.Platform;
26import javafx.beans.value.ChangeListener;
27import javafx.beans.value.ObservableValue;
28import javafx.concurrent.Worker.State;
29import javafx.scene.web.WebEngine;
30
[576]31import javax.imageio.ImageIO;
[733]32import javax.swing.JComponent;
[576]33
[748]34import netscape.javascript.JSObject;
35
[568]36import org.expeditee.gui.Frame;
[733]37import org.expeditee.gui.FrameCreator;
[576]38import org.expeditee.gui.FrameIO;
[601]39import org.expeditee.gui.FrameUtils;
[566]40import org.expeditee.gui.MessageBay;
[690]41import org.expeditee.gui.MessageBay.Progress;
[576]42import org.expeditee.items.ItemUtils;
[568]43import org.expeditee.items.Justification;
[576]44import org.expeditee.items.Picture;
[568]45import org.expeditee.items.Text;
46import org.w3c.dom.Node;
47import org.w3c.dom.html.HTMLBodyElement;
[566]48
[594]49/**
50 * Methods to convert webpages to Expeditee frames
51 *
52 * @author ngw8
53 * @author jts21
54 */
[566]55public class WebParser {
56
57
[568]58 /**
59 * Loads a webpage and renders it as Expeditee frame(s)
60 *
61 * @param URL
62 * Page to load
63 * @param frame
64 * The Expeditee frame to output the converted page to
65 */
[577]66 public static void parseURL(final String URL, final Frame frame) {
[566]67 try {
[748]68 Platform.runLater(new Runnable() {
[577]69 @Override
70 public void run() {
71 try {
[748]72 WebEngine webEngine = new WebEngine(URL);
[577]73 loadPage(webEngine, frame);
74 } catch (Exception e) {
75 e.printStackTrace();
76 }
77 }
78 });
[566]79 } catch (Exception e) {
80 e.printStackTrace();
81 }
82 }
[577]83
[748]84 protected static void loadPage(final WebEngine webEngine, final Frame frame) throws Exception {
85 webEngine.getLoadWorker().stateProperty().addListener(new ChangeListener<State>() {
86
87 @Override
88 public void changed(ObservableValue<? extends State> ov, State oldState, State newState) {
89
90 switch (newState) {
91 case READY: // READY
92 // MessageBay.displayMessage("WebEngine ready");
93 break;
94 case SCHEDULED: // SCHEDULED
95 // MessageBay.displayMessage("Scheduled page load");
96 break;
97 case RUNNING: // RUNNING
98 System.out.println("Loading page!");
99 // MessageBay.displayMessage("WebEngine running");
100 break;
101 case SUCCEEDED: // SUCCEEDED
102 // MessageBay.displayMessage("Finished loading page");
103 System.out.println("Parsing page!");
104 webEngine.executeScript("window.resizeTo(800, 800);"
105 + "document.body.style.width = '1000px'");
106 parsePage(webEngine, frame);
107 System.out.println("Parsed page!");
108 break;
109 case CANCELLED: // CANCELLED
110 MessageBay.displayMessage("Cancelled loading page");
111 break;
112 case FAILED: // FAILED
113 MessageBay.displayMessage("Failed to load page");
114 break;
115 }
116 }
117 });
[577]118 }
119
[568]120 /**
121 * Converts a loaded page to Expeditee frame(s)
122 *
123 * @param webEngine
124 * The JavaFX WebEngine in which the page to be converted is loaded
125 * @param frame
126 * The Expeditee frame to output the converted page to
127 */
[748]128 public static void parsePage(final WebEngine webEngine, final Frame frame) {
[568]129 try {
[748]130 Platform.runLater(new Runnable() {
[568]131 @Override
132 public void run() {
133 try {
[690]134 Progress progressBar = MessageBay.displayProgress("Converting web page");
135
[748]136 Node doc = (Node) webEngine.executeScript("document.body");
[568]137
[748]138 JSObject window = (JSObject) webEngine.executeScript("window");
[691]139
[748]140 frame.setBackgroundColor(rgbStringToColor((String) ((JSObject) (window.call("getComputedStyle", new Object[] { doc }))).call("getPropertyValue",
[568]141 new Object[] { "background-color" })));
[628]142
[659]143 // Functions to be used later in JavaScript
[748]144 webEngine.executeScript(""
[628]145 + "function addToSpan(text) {"
146 + " span = document.createElement('wordSpan');"
[672]147 + " span.textContent = text;"
[688]148 + " par.insertBefore(span, refNode);"
[748]149 // Checking if the current word is on a new line (i.e. lower than the previous word)
[630]150 + " if (prevSpan !== null && span.getBoundingClientRect().top > prevSpan.getBoundingClientRect().top) {"
[748]151 // If it is, prepend a new line character to it. The new line characters doesn't affect the rendered HTML
[628]152 + " span.textContent = '\\n' + span.textContent;"
[748]153
154 // Checking if the previous word is horizontally aligned with the one before it.
155 // If it is, merge the text of the two spans
[659]156 + " if ( prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
[688]157 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
158 + " par.removeChild(prevSpan);"
[628]159 + " } else {"
[688]160 + " prevPrevSpan = prevSpan;"
161 + " }"
[628]162 + " prevSpan = span;"
[659]163 + " } else if ( prevSpan !== null) {"
[748]164 // Word is on the same line as the previous one, so merge the second into the span of the first
[688]165 + " prevSpan.textContent = prevSpan.textContent + span.textContent;"
166 + " par.removeChild(span);"
167 + " } else {"
168 + " prevSpan = span;"
[628]169 + " }"
[695]170 + "}"
171
172 + "function splitIntoWords(toSplit) {"
173 + " var words = [];"
174 + " var pattern = /\\s+/g;"
175 + " var words = toSplit.split(pattern);"
176 + ""
177 + " for (var i = 0; i < words.length - 1; i++) {"
178 + " words[i] = words[i] + ' ';"
179 + " }"
180 + " return words;"
181 + "}"
[628]182 );
183
[693]184 // Using Javascript to get an array of all the text nodes in the document so they can be wrapped in spans. Have to
185 // loop through twice (once to build the array and once actually going through the array, otherwise when the
186 // textnode is removed from the document items end up being skipped)
[748]187 JSObject textNodes = (JSObject) webEngine.executeScript(""
[693]188 + "function getTextNodes(rootNode){"
189 + "var node;"
190 + "var textNodes=[];"
191 + "var walk = document.createTreeWalker(rootNode, NodeFilter.SHOW_TEXT);"
192 + "while(node=walk.nextNode()) {"
193 + "if((node.textContent.trim().length > 0)) { "
194 + "textNodes.push(node);"
195 + "}"
196 + "}"
197 + "return textNodes;"
198 + "}; "
[695]199 + "getTextNodes(document.body)"
200 );
201
[748]202 int nodesLength = (Integer) textNodes.getMember("length");
[659]203
[695]204 // Looping through all the text nodes in the document
[693]205 for (int j = 0; j < nodesLength; j++) {
[748]206 Node currentNode = (Node) textNodes.getSlot(j);
[659]207
[693]208 // Making the current node accessible in JavaScript
[748]209 window.setMember("currentNode", currentNode);
[695]210
[748]211 webEngine.executeScript(""
[695]212 + "var span = null, prevSpan = null, prevPrevSpan = null;"
213
214 // Removing repeated whitespace from the text node's content then splitting it into individual words
215 + "var textContent = currentNode.textContent.replace(/\\n|\\r/g, '').replace(/\\s+/g, ' ');"
216 + "var words = splitIntoWords(textContent);"
217
218 + "var refNode = currentNode.nextSibling;"
219 + "var par = currentNode.parentElement;"
220 + "currentNode.parentElement.removeChild(currentNode);"
221
[733]222 + "for (var i = 0; i < words.length; i++) {"
223 + " addToSpan(words[i]);"
[695]224 + "}"
225
226 + "if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
227 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
228 + " par.removeChild(prevSpan);"
229 + "}"
[733]230 );
[688]231
[695]232 // Will never reach 100% here, as the processing is not quite finished - progress is set to 100% at the end of
[701]233 // the addPageToFrame loop below
[695]234 progressBar.set((100 * (j)) / nodesLength);
[693]235 }
[688]236
[691]237 // Finding all links within the page, then setting the href attribute of all their descendants to be the same
238 // link/URL.
239 // This is needed because there is no apparent and efficient way to check if an element is a child of a link when
240 // running through the document when added each element to Expeditee
[748]241 webEngine.executeScript(""
[691]242 + "var anchors = document.getElementsByTagName('a');"
243 + ""
244 + "for (var i = 0; i < anchors.length; i++) {"
245 + "var currentAnchor = anchors.item(i);"
246 + "var anchorDescendants = currentAnchor.querySelectorAll('*');"
247 + "for (var j = 0; j < anchorDescendants.length; j++) {"
248 + "anchorDescendants.item(j).href = currentAnchor.href;"
249 + "}"
250 + "}"
251 );
[568]252
[713]253 WebParser.addPageToFrame(doc, window, webEngine, frame);
[568]254
[701]255 progressBar.set(100);
[628]256
[701]257 } catch (Exception e) {
258 e.printStackTrace();
259 }
260 System.out.println("Parsed frame");
261 FrameUtils.Parse(frame);
262 frame.setChanged(true);
263 FrameIO.SaveFrame(frame);
264 }
265 });
266 } catch (Exception e) {
267 e.printStackTrace();
268 }
269 }
[568]270
[701]271 /**
[733]272 * Converts a loaded page to Expeditee frame(s)
273 *
274 * @param webEngine
275 * The JavaFX WebEngine in which the page to be converted is loaded
276 * @param frame
277 * The Expeditee frame to output the converted page to
278 */
[748]279 public static void parsePageSimple(final WebEngine webEngine, final Object webView, final JComponent jfxPanel, final Frame frame) {
[733]280 try {
281 final Object notifier = new Object();
[748]282 final MutableBool bottomReached = new MutableBool(false);
[733]283
[748]284 final Progress progressBar = MessageBay.displayProgress("Converting web page");
285
286 AnimationTimer timer = new AnimationTimer() {
287
288 int frameCount = 0;
289 Frame frameToAddTo = frame;
290
[733]291 @Override
[748]292 public void handle(long arg0) {
293 // Must wait 2 frames before taking a snapshot of the webview, otherwise JavaFX won't have redrawn
294 if (frameCount++ > 1) {
295 frameCount = 0;
296 this.stop();
297
298 BufferedImage image = new BufferedImage(jfxPanel.getWidth(), jfxPanel.getHeight(), BufferedImage.TYPE_INT_ARGB);
299
300 Graphics graphics = image.createGraphics();
301
302 // Drawing the JfxPanel (containing the webview) to the image
303 jfxPanel.paint(graphics);
304
305 try {
306 int hashcode = Arrays.hashCode(image.getData().getPixels(0, 0, image.getWidth(), image.getHeight(), (int[]) null));
307
308 File out = new File(FrameIO.IMAGES_PATH + "webpage-" + Integer.toHexString(hashcode) + ".png");
309 out.mkdirs();
310 ImageIO.write(image, "png", out);
311
312 Text link = new Text("Next");
313 link.setPosition(500, 20);
314 frameToAddTo.addItem(link);
315
316 FrameIO.SaveFrame(frameToAddTo);
317
318 frameToAddTo = FrameIO.CreateFrame(frame.getFramesetName(), Integer.toHexString(hashcode), null);
319
320 link.setLink(frameToAddTo.getName());
321
322 // Adding the image
323 frameToAddTo.addText(0, 0, "@i: " + out.getName(), null);
324
325 // Button to go to the next page
326 Text nextButton = (Text) FrameCreator.createButton("Next", null, null, 10F, 10F);
327 nextButton.setID(frameToAddTo.getNextItemID());
328 nextButton.addAction("next");
329 frameToAddTo.addItem(nextButton);
330
331 FrameIO.SaveFrame(frameToAddTo);
332 System.out.println("C");
333
334 } catch (IOException e) {
335 e.printStackTrace();
336 }
337
338
339 graphics.dispose();
340 image.flush();
341
342 synchronized (notifier) {
343 notifier.notify();
344 }
345
346 try {
347 Platform.runLater(new Runnable() {
348 @Override
349 public void run() {
350 try {
351 HTMLBodyElement doc = (HTMLBodyElement) webEngine.executeScript("document.body");
352
353 JSObject window = (JSObject) webEngine.executeScript("window");
354
355 System.out.println("adding");
356 WebParser.addPageToFrame(doc, window, webEngine, frameToAddTo);
357 } catch (Exception ex) {
358 ex.printStackTrace();
359 }
360 }
361 });
362 } catch (Exception ex) {
363 ex.printStackTrace();
364 }
365 }
366 }
367 };
368
369 Platform.runLater(new Runnable() {
370 @Override
[733]371 public void run() {
372 try {
[748]373
374 webEngine.executeScript(""
375 // Initializing the counter used when scrolling the page
376 + "var scrollCounter = 0;"
377
378 // Setting all text to be hidden
379 + "var css = document.createElement('style');"
380 + "css.type = 'text/css';"
381 + "var style = 'WordSpan { visibility: hidden }';"
382 + "css.appendChild(document.createTextNode(style));"
383 + "document.getElementsByTagName('head')[0].appendChild(css);");
384
385 HTMLBodyElement doc = (HTMLBodyElement) webEngine.executeScript("document.body");
386
387 JSObject window = (JSObject) webEngine.executeScript("window");
388
389 frame.setBackgroundColor(rgbStringToColor((String) ((JSObject) (window.call("getComputedStyle", new Object[] { doc }))).call("getPropertyValue",
390 new Object[] { "background-color" })));
391
392 // Functions to be used later in JavaScript
393 webEngine.executeScript(""
394 + "function addToSpan(text) {"
395 + " span = document.createElement('wordSpan');"
396 + " span.textContent = text;"
397 + " par.insertBefore(span, refNode);"
398 + " if (prevSpan !== null && span.getBoundingClientRect().top > prevSpan.getBoundingClientRect().top) {"
399 + " span.textContent = '\\n' + span.textContent;"
400 + " if ( prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
401 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
402 + " par.removeChild(prevSpan);"
403 + " } else {"
404 + " prevPrevSpan = prevSpan;"
405 + " }"
406 + " prevSpan = span;"
407 + " } else if ( prevSpan !== null) {"
408 + " prevSpan.textContent = prevSpan.textContent + span.textContent;"
409 + " par.removeChild(span);"
410 + " } else {"
411 + " prevSpan = span;"
412 + " }"
413 + "}"
414
415 + "function splitIntoWords(toSplit) {"
416 + " var words = [];"
417 + " var pattern = /\\s+/g;"
418 + " var words = toSplit.split(pattern);"
419 + ""
420 + " for (var i = 0; i < words.length - 1; i++) {"
421 + " words[i] = words[i] + ' ';"
422 + " }"
423 + " return words;"
424 + "}"
425 );
426
427 // Using Javascript to get an array of all the text nodes in the document so they can be wrapped in spans. Have to
428 // loop through twice (once to build the array and once actually going through the array, otherwise when the
429 // textnode is removed from the document items end up being skipped)
430 JSObject textNodes = (JSObject) webEngine.executeScript(""
431 + "function getTextNodes(rootNode){"
432 + "var node;"
433 + "var textNodes=[];"
434 + "var walk = document.createTreeWalker(rootNode, NodeFilter.SHOW_TEXT);"
435 + "while(node=walk.nextNode()) {"
436 + "if((node.textContent.trim().length > 0)) { "
437 + "textNodes.push(node);"
438 + "}"
439 + "}"
440 + "return textNodes;"
441 + "}; "
442 + "getTextNodes(document.body)"
443 );
444
445 int nodesLength = (Integer) textNodes.getMember("length");
446
447 // Looping through all the text nodes in the document
448 for (int j = 0; j < nodesLength; j++) {
449 Node currentNode = (Node) textNodes.getSlot(j);
450
451 // Making the current node accessible in JavaScript
452 window.setMember("currentNode", currentNode);
453
454 webEngine.executeScript(""
455 + "var span = null, prevSpan = null, prevPrevSpan = null;"
456
457 // Removing repeated whitespace from the text node's content then splitting it into individual words
458 + "var textContent = currentNode.textContent.replace(/\\n|\\r/g, '').replace(/\\s+/g, ' ');"
459 + "var words = splitIntoWords(textContent);"
460
461 + "var refNode = currentNode.nextSibling;"
462 + "var par = currentNode.parentElement;"
463 + "currentNode.parentElement.removeChild(currentNode);"
464
465 + "for (var i = 0; i < words.length; i++) {"
466 + " addToSpan(words[i]);"
467 + "}"
468
469 + "if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
470 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
471 + " par.removeChild(prevSpan);"
472 + "}"
473 );
474
475 // Will never reach 100% here, as the processing is not quite finished - progress is set to 100% at the end of
476 // the addPageToFrame loop below
477 progressBar.set((100 * (j)) / nodesLength);
478 }
479
480 // Finding all links within the page, then setting the href attribute of all their descendants to be the same
481 // link/URL.
482 // This is needed because there is no apparent and efficient way to check if an element is a child of a link when
483 // running through the document when added each element to Expeditee
484 webEngine.executeScript(""
485 + "var anchors = document.getElementsByTagName('a');"
486 + ""
487 + "for (var i = 0; i < anchors.length; i++) {"
488 + "var currentAnchor = anchors.item(i);"
489 + "var anchorDescendants = currentAnchor.querySelectorAll('*');"
490 + "for (var j = 0; j < anchorDescendants.length; j++) {"
491 + "anchorDescendants.item(j).href = currentAnchor.href;"
492 + "}"
493 + "}"
494 );
495
[733]496 } catch (Exception ex) {
497 ex.printStackTrace();
498 }
499
500 synchronized (notifier) {
501 notifier.notify();
502 }
503 }
504 });
505
506 synchronized (notifier) {
507 try {
508 // Waiting for the JavaFX thread to finish
509 notifier.wait();
510 } catch (InterruptedException e) {
511 // TODO Auto-generated catch block
512 e.printStackTrace();
513 }
514 }
515
[748]516 while (!bottomReached.getValue()) {
517 Platform.runLater(new Runnable() {
[733]518 @Override
519 public void run() {
520 try {
521 // Scrolling down the page
[748]522 webEngine.executeScript(""
[733]523 + "window.scrollTo(0, scrollCounter * window.innerHeight);"
524 + "scrollCounter = scrollCounter+1;");
525
[748]526 System.out.println('B');
527
[753]528 bottomReached.setValue((Boolean) webEngine.executeScript("(window.pageYOffset + window.innerHeight >= document.documentElement.scrollHeight)"));
[748]529
[733]530 synchronized (notifier) {
531 notifier.notify();
532 }
533
534 } catch (Exception e) {
535 e.printStackTrace();
536 }
537 }
538 });
539
540 synchronized (notifier) {
541 try {
542 // Waiting for the JavaFX thread to finish
543 notifier.wait();
544 } catch (InterruptedException e) {
545 // TODO Auto-generated catch block
546 e.printStackTrace();
547 }
548 }
549
[748]550 timer.start();
[733]551
552 synchronized (notifier) {
[748]553 try {
554 // Waiting for the timer thread to finish before looping again
555 notifier.wait();
556 } catch (InterruptedException e) {
557 // TODO Auto-generated catch block
558 e.printStackTrace();
559 }
[733]560 }
561
562 }
563
564 } catch (Exception ex) {
565 ex.printStackTrace();
566 }
567
568
569
570 }
571
572 /**
[701]573 * @param rgbString
574 * string in the format <i>rgb(x,x,x)</i> or <i>rgba(x,x,x,x)</i>
575 * @return A Color object that should match the rgb string passed int. Returns null if alpha is 0
576 */
577 private static Color rgbStringToColor(String rgbString) {
[568]578
[701]579 if (rgbString == null) {
580 return null;
581 }
[568]582
[701]583 // Splitting the string into 'rgb' and 'x, x, x'
584 String[] tmpStrings = rgbString.split("\\(|\\)");
[568]585
[701]586 // Splitting up the RGB(A) components into an array
587 tmpStrings = tmpStrings[1].split(",");
[568]588
[701]589 int[] components = new int[4];
590 Arrays.fill(components, 255);
591
592 for (int i = 0; i < tmpStrings.length; i++) {
593 Float d = Float.parseFloat(tmpStrings[i].trim());
594
595 components[i] = Math.round(d);
596 }
597
598 if (components[3] > 0) {
599 return new Color(components[0], components[1], components[2], components[3]);
600 } else {
601 return null;
602 }
603 }
[576]604
[701]605 /**
606 * @param rootElement
607 * Element that will be converted (including all sub-elements)
608 * @param backgroundColor
609 * String to be used as the background color of this element when added. In the format "rgb(x,x,x)" or "rgba(x,x,x,x)"
610 * @param window
611 * 'window' from Javascript
612 * @param webEngine
613 * Web engine that the page is loaded in
614 * @param frame
615 * Expeditee frame to add the converted page to
616 * @throws IllegalArgumentException
617 * @throws IllegalAccessException
618 */
[748]619 private static void addPageToFrame(Node rootElement, JSObject window, WebEngine webEngine, Frame frame) throws InvocationTargetException, IllegalAccessException,
[701]620 IllegalArgumentException {
621
622 Node currentNode = rootElement;
[568]623
[701]624 if (currentNode.getNodeType() == Node.TEXT_NODE || currentNode.getNodeType() == Node.ELEMENT_NODE) {
625
[748]626 JSObject style;
627 JSObject bounds;
[568]628
[701]629 if (currentNode.getNodeType() == Node.TEXT_NODE) {
630 // CSS style for the element
[748]631 style = (JSObject) window.call("getComputedStyle", new Object[] { currentNode.getParentNode() });
[568]632
[701]633 // Getting a rectangle that represents the area and position of the element
[748]634 bounds = (JSObject) ((JSObject) currentNode.getParentNode()).call("getBoundingClientRect", new Object[] {});
[701]635 } else {
[748]636 style = (JSObject) window.call("getComputedStyle", new Object[] { currentNode });
[659]637
[748]638 bounds = (JSObject) ((JSObject) currentNode).call("getBoundingClientRect", new Object[] {});
[701]639 }
640
641 // Bounding rectangle position is relative to the current view, so scroll position must be added to x/y
642 // TODO: This doesn't check if an element or any of its parent elements have position:fixed set - the only
643 // way to check seems to be to walking through the element's parents until the document root is reached
[748]644 float x = Float.valueOf(bounds.getMember("left").toString()) + Float.valueOf(webEngine.executeScript("window.pageXOffset").toString());
645 float y = Float.valueOf(bounds.getMember("top").toString()) + Float.valueOf(webEngine.executeScript("window.pageYOffset").toString());
[630]646
[748]647 float width = Float.valueOf(bounds.getMember("width").toString());
648 float height = Float.valueOf(bounds.getMember("height").toString());
[691]649
[701]650 // Checking if the element is actually visible on the page
651 if (WebParser.elementVisible(x, y, width, height, style)) {
[594]652
[701]653 // Filtering the node type, starting with text nodes
654 if (currentNode.getNodeType() == Node.TEXT_NODE) {
[748]655
656 String fontSize = ((String) style.call("getPropertyValue", new Object[] { "font-size" }));
[568]657
[701]658 // Trimming off the units (always px) from the font size
659 fontSize = fontSize.substring(0, fontSize.length() - 2);
660
661 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
[748]662 String color = (String) style.call("getPropertyValue", new Object[] { "color" });
[701]663
[713]664 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
[748]665 String bgColorString = (String) style.call("getPropertyValue", new Object[] { "background-color" });
[713]666
[748]667 String align = (String) style.call("getPropertyValue", new Object[] { "text-align" });
[701]668
669 // Returns comma-separated list of typefaces
[748]670 String typeface = (String) style.call("getPropertyValue", new Object[] { "font-family" });
[568]671
[701]672 String[] typefaces = typeface.split(", |,");
673
[748]674 String weight = (String) style.call("getPropertyValue", new Object[] { "font-weight" });
[701]675
[748]676 String fontStyle = (String) style.call("getPropertyValue", new Object[] { "font-style" });
[576]677
[701]678 // Returns "normal" or a value in pixels (e.g. "10px")
[748]679 String letterSpacing = (String) style.call("getPropertyValue", new Object[] { "letter-spacing" });
[576]680
[701]681 // Returns a value in pixels (e.g. "10px")
[748]682 String lineHeight = (String) style.call("getPropertyValue", new Object[] { "line-height" });
[594]683
[748]684 String textTransform = (String) style.call("getPropertyValue", new Object[] { "text-transform" });
[594]685
[748]686 String linkUrl = (String) ((JSObject) currentNode.getParentNode()).getMember("href");
[659]687
[701]688 Boolean fontFound = false;
689 Font font = new Font(null);
[659]690
[701]691 // Looping through all font-families listed in the element's CSS until one that is installed is
692 // found, or the end of the list is reached, in which case the default font is used
693 for (int j = 0; j < typefaces.length && !fontFound; j++) {
694 if (typefaces[j].toLowerCase().equals("sans-serif")) {
695 typefaces[j] = "Arial Unicode MS";
696 } else if (typefaces[j].toLowerCase().equals("serif")) {
697 typefaces[j] = "Times New Roman";
698 } else if ((typefaces[j].toLowerCase().equals("arial"))) {
699 // Have to use Arial Unicode, otherwise unicode characters display incorrectly
700 typefaces[j] = "Arial Unicode MS";
701 }
702
703 // Regex will remove any inverted commas surrounding multi-word typeface names
704 font = new Font(typefaces[j].replaceAll("^'|'$", ""), Font.PLAIN, 12);
705
706 // If the font isn't found, Java just uses Font.DIALOG, so this check checks whether the font was found
707 if (!(font.getFamily().toLowerCase().equals(Font.DIALOG.toLowerCase()))) {
708 fontFound = true;
709 }
710 }
[659]711
[701]712 if (font.getFamily().toLowerCase().equals(Font.DIALOG.toLowerCase())) {
713 font = new Font("Times New Roman", Font.PLAIN, 12);
714 }
[628]715
[701]716 String fontStyleComplete = "";
[688]717
[701]718 int weightInt = 0;
[628]719
[701]720 try {
721 weightInt = Integer.parseInt(weight);
722 } catch (NumberFormatException nfe) {
723 // Use default value as set above
724 }
[628]725
[701]726 // checking if font is bold - i.e. 'bold', 'bolder' or weight over 500
727 if (weight.toLowerCase().startsWith("bold") || weightInt > 500) {
728 fontStyleComplete = fontStyleComplete.concat("bold");
729 }
[576]730
[701]731 if (fontStyle.toLowerCase().equals("italic") || fontStyle.toLowerCase().equals("oblique")) {
732 fontStyleComplete = fontStyleComplete.concat("italic");
733 }
[659]734
[701]735 float fontSizeFloat = 12;
[672]736
[701]737 try {
738 fontSizeFloat = Float.valueOf(fontSize);
739 } catch (NumberFormatException nfe) {
740 // Use default value as set above
741 }
[576]742
[701]743 float letterSpacingFloat = -0.008f;
[576]744
[701]745 try {
746 letterSpacingFloat = (Integer.parseInt(letterSpacing.substring(0, letterSpacing.length() - 2)) / (fontSizeFloat));
747 } catch (NumberFormatException nfe) {
748 // Use default value as set above
749 }
[691]750
[701]751 float lineHeightInt = -1;
752
753 try {
754 lineHeightInt = (Float.parseFloat(lineHeight.substring(0, lineHeight.length() - 2)));
755 } catch (NumberFormatException nfe) {
756 // Use default value as set above
757 }
[576]758
[701]759 Text t;
[600]760
[701]761 String textContent = currentNode.getTextContent().replaceAll("[^\\S\\n]+", " ");
762 textContent = textContent.replaceAll("^(\\s)(\\n|\\r)", "");
[600]763
[701]764 if (textTransform.equals("uppercase")) {
765 textContent = textContent.toUpperCase();
766 } else if (textTransform.equals("lowercase")) {
767 textContent = textContent.toUpperCase();
768 }
[600]769
[713]770 // Adding the text to the frame. Expeditee text seems to be positioned relative to the baseline of the first line, so
771 // the font size has to be added to the y-position
772 t = frame.addText(Math.round(x), Math.round(y + fontSizeFloat), textContent, null);
[600]773
[701]774 t.setColor(rgbStringToColor(color));
[713]775 t.setBackgroundColor(rgbStringToColor(bgColorString));
[701]776 t.setFont(font);
777 t.setSize(fontSizeFloat);
778 t.setFontStyle(fontStyleComplete);
779 t.setLetterSpacing(letterSpacingFloat);
[600]780
[701]781 // Removing any spacing between lines allowing t.getLineHeight() to be used to get the actual height
782 // of just the characters (i.e. distance from ascenders to descenders)
783 t.setSpacing(0);
[600]784
[701]785 t.setSpacing(lineHeightInt - t.getLineHeight());
[600]786
[701]787 if (align.equals("left")) {
788 t.setJustification(Justification.left);
789 } else if (align.equals("right")) {
790 t.setJustification(Justification.right);
791 } else if (align.equals("center")) {
792 t.setJustification(Justification.center);
793 } else if (align.equals("justify")) {
794 t.setJustification(Justification.full);
795 }
[638]796
[701]797 // Font size is added to the item width to give a little breathing room
798 t.setWidth(Math.round(width + (t.getSize())));
[600]799
[701]800 if (!linkUrl.equals("undefined")) {
801 t.setAction("gotourl " + linkUrl);
802 t.setActionMark(false);
803 }
[600]804
[701]805 } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) {
[600]806
[713]807 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
[748]808 String bgColorString = (String) style.call("getPropertyValue", new Object[] { "background-color" });
[713]809
810 Color bgColor = rgbStringToColor(bgColorString);
811
812 // If the element has a background color then add it (to Expeditee) as a rectangle with that background color
813 if (bgColor != null) {
814 System.out.println("bg");
815 frame.addRectangle(Math.round(x), Math.round(y), Math.round(width), Math.round(height), 0, null, bgColor);
816 }
817
[701]818 // background image, returns in format "url(protocol://absolute/path/to/img.extension)" for images,
819 // may also return gradients, data, etc. (not handled yet). Only need to add bg image on
820 // 'ELEMENT_NODE' (and not 'TEXT_NODE' otherwise there would be double-ups
[748]821 String bgImage = (String) style.call("getPropertyValue", new Object[] { "background-image" });
[701]822
[748]823 String linkUrl = (String) ((JSObject) currentNode).getMember("href");
[600]824
[701]825 if (bgImage.startsWith("url(")) {
826 bgImage = bgImage.substring(4, bgImage.length() - 1);
[600]827
[748]828 String bgSize = ((String) style.call("getPropertyValue", new Object[] { "background-size" })).toLowerCase();
829 String bgRepeat = ((String) style.call("getPropertyValue", new Object[] { "background-repeat" })).toLowerCase();
[600]830
[701]831 // Returns "[x]px [y]px", "[x]% [y]%", "[x]px [y]%" or "[x]% [y]px"
[748]832 String bgPosition = ((String) style.call("getPropertyValue", new Object[] { "background-position" })).toLowerCase();
[600]833
[701]834 String[] bgOffsetCoords = bgPosition.split(" ");
[600]835
[701]836 int bgOffsetX = 0, bgOffsetY = 0;
[576]837
[701]838 float originXPercent = 0, originYPercent = 0;
839
840 int cropStartX, cropStartY, cropEndX, cropEndY;
841
842 // Converting the x and y offset values to integers (and from % to px if needed)
843 if (bgOffsetCoords[0].endsWith("%")) {
844 bgOffsetX = (int) ((Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 1)) / 100.0) * width);
845 originXPercent = (Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 1))) / 100f;
846 } else if (bgOffsetCoords[0].endsWith("px")) {
847 bgOffsetX = (int) (Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 2)));
848 }
849
850 if (bgOffsetCoords[1].endsWith("%")) {
851 bgOffsetY = (int) ((Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 1)) / 100.0) * height);
852 originYPercent = (Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 1))) / 100f;
853 } else if (bgOffsetCoords[1].endsWith("px")) {
854 bgOffsetY = (int) (Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 2)));
855 }
856
857 // Converting from an offset to crop coords
858 cropStartX = -1 * bgOffsetX;
859 cropEndX = (int) (cropStartX + width);
860
861 cropStartY = -1 * bgOffsetY;
862 cropEndY = (int) (cropStartY + height);
863
864 int bgWidth = -1;
865
866 if (bgSize.equals("cover")) {
867 bgWidth = (int) width;
868 } else if (bgSize.equals("contain")) {
869 // TODO: actually compute the appropriate width
870 bgWidth = (int) width;
871 } else if (bgSize.equals("auto")) {
872 bgWidth = -1;
873 } else {
874 bgSize = bgSize.split(" ")[0];
875
876 if (bgSize.endsWith("%")) {
877 bgWidth = (int) ((Integer.parseInt(bgSize.replaceAll("\\D", "")) / 100.0) * width);
878 } else if (bgSize.endsWith("px")) {
879 bgWidth = Integer.parseInt(bgSize.replaceAll("\\D", ""));
[568]880 }
881 }
882
[701]883 try {
884 WebParser.addImageFromUrl(bgImage, linkUrl, frame, x, y, bgWidth, cropStartX, cropStartY, cropEndX, cropEndY, bgRepeat, originXPercent, originYPercent);
885 } catch (MalformedURLException mue) {
886 // probably a 'data:' url, not supported yet
887 mue.printStackTrace();
888 } catch (IOException e) {
889 // TODO Auto-generated catch block
890 e.printStackTrace();
891 }
[568]892 }
[566]893
[701]894 String imgSrc;
[568]895
[748]896 if (currentNode.getNodeName().toLowerCase().equals("img") && (imgSrc = ((JSObject) currentNode).getMember("src").toString()) != null) {
[701]897 try {
898 WebParser.addImageFromUrl(imgSrc, linkUrl, frame, x, y, (int) width, null, null, null, null, null, 0, 0);
899 } catch (MalformedURLException mue) {
900 // probably a 'data:' url, not supported yet
901 mue.printStackTrace();
902 } catch (IOException e) {
903 // TODO Auto-generated catch block
904 e.printStackTrace();
905 }
906 }
907 }
908 }
[568]909
[701]910 Node childNode = currentNode.getFirstChild();
[568]911
[701]912 while (childNode != null) {
[713]913 addPageToFrame(childNode, window, webEngine, frame);
[701]914 childNode = childNode.getNextSibling();
915 }
[568]916 }
917 }
918
[748]919 private static boolean elementVisible(float x, float y, float width, float height, JSObject style) {
[576]920 try {
[748]921 if (width <= 0 || height <= 0 || x + width <= 0 || y + height <= 0 || ((String) style.call("getPropertyValue", new Object[] { "visibility" })).equals("hidden")
922 || ((String) style.call("getPropertyValue", new Object[] { "display" })).equals("none")) {
[576]923 return false;
924 } else {
925 return true;
926 }
927 } catch (Exception e) {
928 e.printStackTrace();
929 return false;
930 }
931 }
932
933 /**
934 * @param imgSrc
935 * URL of the image to add
[692]936 * @param linkUrl
937 * Absolute URL that the image should link to when clicked
[576]938 * @param frame
939 * Frame to add the image to
940 * @param x
941 * X-coordinate at which the image should be placed on the frame
942 * @param y
943 * Y-coordinate at which the image should be placed on the frame
944 * @param width
[600]945 * Width of the image once added to the frame. Negative 1 (-1) will cause the actual width of the image file to be used
[594]946 *
947 * @param cropStartX
[600]948 * X-coordinate at which to start crop, or null for no crop
[594]949 * @param cropStartY
[600]950 * Y-coordinate at which to start crop, or null for no crop
[594]951 * @param cropEndX
[600]952 * X-coordinate at which to end the crop, or null for no crop
[594]953 * @param cropEndY
[600]954 * Y-coordinate at which to end the crop, or null for no crop
[638]955 *
956 * @param repeat
957 * String determining how the image should be tiled/repeated. Valid strings are: <i>no-repeat</i>, <i>repeat-x</i>, or
958 * <i>repeat-y</i>. All other values (including null) will cause the image to repeat in both directions
[644]959 *
960 * @param originXPercent
961 * Percentage into the image to use as the x coordinate of the image's origin point
962 * @param originYPercent
963 * Percentage into the image to use as the y coordinate of the image's origin point
964 *
[576]965 * @throws MalformedURLException
966 * @throws IOException
967 */
[691]968 private static void addImageFromUrl(String imgSrc, String linkUrl, final Frame frame, float x, float y, int width, Integer cropStartX, Integer cropStartY, Integer cropEndX, Integer cropEndY, String repeat,
[644]969 float originXPercent, float originYPercent)
[594]970 throws MalformedURLException,
[576]971 IOException {
[594]972
[576]973 URL imgUrl = new URL(imgSrc);
974
975 HttpURLConnection connection = (HttpURLConnection) (imgUrl.openConnection());
976
977 // Spoofing a widely accepted User Agent, since some sites refuse to serve non-webbrowser clients
978 connection.setRequestProperty("User-Agent", "Mozilla/5.0");
979
980 BufferedImage img = ImageIO.read(connection.getInputStream());
981
982 int hashcode = Arrays.hashCode(img.getData().getPixels(0, 0, img.getWidth(), img.getHeight(), (int[]) null));
983 File out = new File(FrameIO.IMAGES_PATH + Integer.toHexString(hashcode) + ".png");
984 out.mkdirs();
985 ImageIO.write(img, "png", out);
[630]986
[603]987 if (cropEndX == null || cropStartX == null || cropEndY == null || cropStartY == null) {
988 cropStartX = 0;
989 cropStartY = 0;
990 cropEndX = img.getWidth();
991 cropEndY = img.getHeight();
[630]992 } else if (cropStartX < 0) {
993 cropEndX = cropEndX - cropStartX;
994 x = x + Math.abs(cropStartX);
995 cropStartX = 0;
[603]996 }
[576]997
[630]998 if (cropStartY < 0) {
999 cropEndY = cropEndY - cropStartY;
1000 y = y + Math.abs(cropStartY);
1001 cropStartY = 0;
1002 }
1003
[600]1004 if (width < 0) {
[603]1005 width = img.getWidth();
[576]1006 }
[630]1007
1008 if (repeat != null) {
1009 if (repeat.equals("no-repeat")) {
1010 int tmpCropEndY = (int) (cropStartY + ((float) width / img.getWidth()) * img.getHeight());
1011 int tmpCropEndX = cropStartX + width;
1012
1013 cropEndX = (cropEndX < tmpCropEndX) ? cropEndX : tmpCropEndX;
1014 cropEndY = (cropEndY < tmpCropEndY) ? cropEndY : tmpCropEndY;
1015 } else if (repeat.equals("repeat-x")) {
1016 int tmpCropEndY = (int) (cropStartY + ((float) width / img.getWidth()) * img.getHeight());
1017 cropEndY = (cropEndY < tmpCropEndY) ? cropEndY : tmpCropEndY;
1018 } else if (repeat.equals("repeat-y")) {
1019 int tmpCropEndX = cropStartX + width;
1020 cropEndX = (cropEndX < tmpCropEndX) ? cropEndX : tmpCropEndX;
1021 }
1022 }
1023
[644]1024 if (originXPercent > 0) {
1025 int actualWidth = cropEndX - cropStartX;
[638]1026
[644]1027 int originXPixels = Math.round(originXPercent * actualWidth);
1028
1029 x = x - originXPixels;
1030
1031 cropStartX = (int) (cropStartX + (width - actualWidth) * originXPercent);
1032 cropEndX = (int) (cropEndX + (width - actualWidth) * originXPercent);
[638]1033 }
1034
[644]1035 if (originYPercent > 0) {
[638]1036 int height = (int) ((img.getHeight() / (float) img.getWidth()) * width);
1037 int actualHeight = (cropEndY - cropStartY);
[644]1038 int originYPixels = Math.round(originYPercent * actualHeight);
[638]1039
[644]1040 y = y - originYPixels;
1041
1042 cropStartY = (int) (cropStartY + (height - actualHeight) * originYPercent);
1043 cropEndY = (int) (cropEndY + (height - actualHeight) * originYPercent);
[638]1044 }
1045
[610]1046 Text text = new Text("@i: " + out.getName() + " " + width);
[603]1047 text.setPosition(x, y);
1048
1049 Picture pic = ItemUtils.CreatePicture(text, frame);
[601]1050
[606]1051 float invScale = 1 / pic.getScale();
[630]1052
[606]1053 pic.setCrop((int)(cropStartX * invScale), (int)(cropStartY * invScale), (int)(cropEndX * invScale), (int)(cropEndY * invScale));
[691]1054
1055 if (linkUrl != null && !linkUrl.equals("undefined")) {
1056 pic.setAction("goto " + linkUrl);
1057 pic.setActionMark(false);
1058 }
[630]1059
[603]1060 frame.addItem(pic);
1061 pic.anchor();
1062 pic.getSource().anchor();
[576]1063 }
[733]1064
1065 private static class MutableBool {
1066 private boolean value;
1067
1068 public MutableBool(boolean value) {
1069 this.value = value;
1070 }
1071
1072 public boolean getValue() {
1073 return value;
1074 }
1075
1076 public void setValue(boolean value) {
1077 this.value = value;
1078 }
1079 }
[566]1080}
Note: See TracBrowser for help on using the repository browser.