source: trunk/src/org/expeditee/io/WebParser.java@ 919

Last change on this file since 919 was 919, checked in by jts21, 10 years ago

Added license headers to all files, added full GPL3 license file, moved license header generator script to dev/bin/scripts

File size: 57.2 KB
Line 
1/**
2 * WebParser.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://expeditee.org
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19package org.expeditee.io;
20
21import java.awt.Color;
22import java.awt.Font;
23import java.awt.image.BufferedImage;
24import java.io.File;
25import java.io.IOException;
26import java.lang.reflect.InvocationTargetException;
27import java.net.HttpURLConnection;
28import java.net.MalformedURLException;
29import java.net.URL;
30import java.text.SimpleDateFormat;
31/*
32 * JavaFX is not on the default java classpath until Java 8 (but is still included with Java 7), so your IDE will probably complain that the imports below can't be resolved.
33 * In Eclipse hitting'Proceed' when told 'Errors exist in project' should allow you to run Expeditee without any issues (although the JFX Browser widget will not display),
34 * or you can just exclude JfxBrowser, WebParser and JfxbrowserActions from the build path.
35 *
36 * If you are using Ant to build/run, 'ant build' will try to build with JavaFX jar added to the classpath.
37 * If this fails, 'ant build-nojfx' will build with the JfxBrowser, WebParser and JfxbrowserActions excluded from the build path.
38 */
39import java.util.Arrays;
40import java.util.Date;
41
42import javafx.animation.AnimationTimer;
43import javafx.application.Platform;
44import javafx.beans.value.ChangeListener;
45import javafx.beans.value.ObservableValue;
46import javafx.concurrent.Worker.State;
47import javafx.embed.swing.SwingFXUtils;
48import javafx.scene.SnapshotParameters;
49import javafx.scene.image.WritableImage;
50import javafx.scene.web.WebEngine;
51import javafx.scene.web.WebView;
52
53import javax.imageio.ImageIO;
54
55import netscape.javascript.JSObject;
56
57import org.expeditee.gui.DisplayIO;
58import org.expeditee.gui.Frame;
59import org.expeditee.gui.FrameGraphics;
60import org.expeditee.gui.FrameIO;
61import org.expeditee.gui.FrameMouseActions;
62import org.expeditee.gui.FrameUtils;
63import org.expeditee.gui.MessageBay;
64import org.expeditee.gui.MessageBay.Progress;
65import org.expeditee.items.ItemUtils;
66import org.expeditee.items.Justification;
67import org.expeditee.items.Picture;
68import org.expeditee.items.Text;
69import org.expeditee.items.widgets.JfxBrowser;
70import org.w3c.dom.Node;
71
72/**
73 * Methods to convert webpages to Expeditee frames
74 *
75 * @author ngw8
76 * @author jts21
77 */
78public class WebParser {
79
80
81 /**
82 * Loads a webpage and renders it as Expeditee frame(s)
83 *
84 * @param URL
85 * Page to load
86 * @param frame
87 * The Expeditee frame to output the converted page to
88 */
89 public static void parseURL(final String URL, final Frame frame) {
90 try {
91 Platform.runLater(new Runnable() {
92 @Override
93 public void run() {
94 try {
95 WebEngine webEngine = new WebEngine(URL);
96 loadPage(webEngine, frame);
97 } catch (Exception e) {
98 e.printStackTrace();
99 }
100 }
101 });
102 } catch (Exception e) {
103 e.printStackTrace();
104 }
105 }
106
107 protected static void loadPage(final WebEngine webEngine, final Frame frame) throws Exception {
108 webEngine.getLoadWorker().stateProperty().addListener(new ChangeListener<State>() {
109
110 @Override
111 public void changed(ObservableValue<? extends State> ov, State oldState, State newState) {
112
113 switch (newState) {
114 case READY: // READY
115 // MessageBay.displayMessage("WebEngine ready");
116 break;
117 case SCHEDULED: // SCHEDULED
118 // MessageBay.displayMessage("Scheduled page load");
119 break;
120 case RUNNING: // RUNNING
121 System.out.println("Loading page!");
122 // MessageBay.displayMessage("WebEngine running");
123 break;
124 case SUCCEEDED: // SUCCEEDED
125 // MessageBay.displayMessage("Finished loading page");
126 System.out.println("Parsing page!");
127 webEngine.executeScript("window.resizeTo(800, 800);"
128 + "document.body.style.width = '1000px'");
129 parsePage(webEngine, frame);
130 System.out.println("Parsed page!");
131 break;
132 case CANCELLED: // CANCELLED
133 MessageBay.displayMessage("Cancelled loading page");
134 break;
135 case FAILED: // FAILED
136 MessageBay.displayMessage("Failed to load page");
137 break;
138 }
139 }
140 });
141 }
142
143 /**
144 * Converts a loaded page to Expeditee frame(s)
145 *
146 * @param webEngine
147 * The JavaFX WebEngine in which the page to be converted is loaded
148 * @param frame
149 * The Expeditee frame to output the converted page to
150 */
151 public static void parsePage(final WebEngine webEngine, final Frame frame) {
152 try {
153 Platform.runLater(new Runnable() {
154 @Override
155 public void run() {
156 try {
157 Progress progressBar = MessageBay.displayProgress("Converting web page");
158
159 Node doc = (Node) webEngine.executeScript("document.body");
160
161 JSObject window = (JSObject) webEngine.executeScript("window");
162
163 frame.setBackgroundColor(rgbStringToColor((String) ((JSObject) (window.call("getComputedStyle", new Object[] { doc }))).call("getPropertyValue",
164 new Object[] { "background-color" })));
165
166 // Functions to be used later in JavaScript
167 webEngine.executeScript(""
168 + "function addToSpan(text) {"
169 + " span = document.createElement('wordSpan');"
170 + " span.textContent = text;"
171 + " par.insertBefore(span, refNode);"
172 // Checking if the current word is on a new line (i.e. lower than the previous word)
173 + " if (prevSpan !== null && span.getBoundingClientRect().top > prevSpan.getBoundingClientRect().top) {"
174 // If it is, prepend a new line character to it. The new line characters doesn't affect the rendered HTML
175 + " span.textContent = '\\n' + span.textContent;"
176
177 // Checking if the previous word is horizontally aligned with the one before it.
178 // If it is, merge the text of the two spans
179 + " if ( prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
180 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
181 + " par.removeChild(prevSpan);"
182 + " } else {"
183 + " prevPrevSpan = prevSpan;"
184 + " }"
185 + " prevSpan = span;"
186 + " } else if ( prevSpan !== null) {"
187 // Word is on the same line as the previous one, so merge the second into the span of the first
188 + " prevSpan.textContent = prevSpan.textContent + span.textContent;"
189 + " par.removeChild(span);"
190 + " } else {"
191 + " prevSpan = span;"
192 + " }"
193 + "}"
194
195 + "function splitIntoWords(toSplit) {"
196 + " var words = [];"
197 + " var pattern = /\\s+/g;"
198 + " var words = toSplit.split(pattern);"
199 + ""
200 + " for (var i = 0; i < words.length - 1; i++) {"
201 + " words[i] = words[i] + ' ';"
202 + " }"
203 + " return words;"
204 + "}"
205 );
206
207 // Using Javascript to get an array of all the text nodes in the document so they can be wrapped in spans. Have to
208 // loop through twice (once to build the array and once actually going through the array, otherwise when the
209 // textnode is removed from the document items end up being skipped)
210 JSObject textNodes = (JSObject) webEngine.executeScript(""
211 + "function getTextNodes(rootNode){"
212 + "var node;"
213 + "var textNodes=[];"
214 + "var walk = document.createTreeWalker(rootNode, NodeFilter.SHOW_TEXT);"
215 + "while(node=walk.nextNode()) {"
216 + "if((node.textContent.trim().length > 0)) { "
217 + "textNodes.push(node);"
218 + "}"
219 + "}"
220 + "return textNodes;"
221 + "}; "
222 + "getTextNodes(document.body)"
223 );
224
225 int nodesLength = (Integer) textNodes.getMember("length");
226
227 // Looping through all the text nodes in the document
228 for (int j = 0; j < nodesLength; j++) {
229 Node currentNode = (Node) textNodes.getSlot(j);
230
231 // Making the current node accessible in JavaScript
232 window.setMember("currentNode", currentNode);
233
234 webEngine.executeScript(""
235 + "var span = null, prevSpan = null, prevPrevSpan = null;"
236
237 // Removing repeated whitespace from the text node's content then splitting it into individual words
238 + "var textContent = currentNode.textContent.replace(/\\n|\\r/g, '').replace(/\\s+/g, ' ');"
239 + "var words = splitIntoWords(textContent);"
240
241 + "var refNode = currentNode.nextSibling;"
242 + "var par = currentNode.parentElement;"
243 + "currentNode.parentElement.removeChild(currentNode);"
244
245 + "for (var i = 0; i < words.length; i++) {"
246 + " addToSpan(words[i]);"
247 + "}"
248
249 + "if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
250 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
251 + " par.removeChild(prevSpan);"
252 + "}"
253 );
254
255 // Will never reach 100% here, as the processing is not quite finished - progress is set to 100% at the end of
256 // the addPageToFrame loop below
257 progressBar.set((100 * (j)) / nodesLength);
258 }
259
260 // Finding all links within the page, then setting the href attribute of all their descendants to be the same
261 // link/URL.
262 // This is needed because there is no apparent and efficient way to check if an element is a child of a link when
263 // running through the document when added each element to Expeditee
264 webEngine.executeScript(""
265 + "var anchors = document.getElementsByTagName('a');"
266 + ""
267 + "for (var i = 0; i < anchors.length; i++) {"
268 + "var currentAnchor = anchors.item(i);"
269 + "var anchorDescendants = currentAnchor.querySelectorAll('*');"
270 + "for (var j = 0; j < anchorDescendants.length; j++) {"
271 + "anchorDescendants.item(j).href = currentAnchor.href;"
272 + "}"
273 + "}"
274 );
275
276 WebParser.addPageToFrame(doc, window, webEngine, frame);
277
278 progressBar.set(100);
279
280 } catch (Exception e) {
281 e.printStackTrace();
282 }
283 System.out.println("Parsed frame");
284 FrameUtils.Parse(frame);
285 frame.setChanged(true);
286 FrameIO.SaveFrame(frame);
287 }
288 });
289 } catch (Exception e) {
290 e.printStackTrace();
291 }
292 }
293
294 /**
295 * Converts a loaded page to Expeditee frame(s)
296 *
297 * @param webEngine
298 * The JavaFX WebEngine in which the page to be converted is loaded
299 * @param frame
300 * The Expeditee frame to output the converted page to
301 */
302 public static void parsePageSimple(final JfxBrowser browserWidget, final WebEngine webEngine, final WebView webView, final Frame frame) {
303 try {
304
305 final int verticalScrollPerPage = (int) (FrameGraphics.getMaxFrameSize().getHeight() * 0.85);
306 final int horizontalScrollPerPage = (int) (FrameGraphics.getMaxFrameSize().getWidth() * 0.85);
307
308 Platform.runLater(new Runnable() {
309
310 @Override
311 public void run() {
312 browserWidget.setOverlayVisible(true);
313
314 // Webview area is set to slightly larger than the size of a converted page, to give some overlap between each page
315 browserWidget.setWebViewSize(horizontalScrollPerPage * 1.1, verticalScrollPerPage * 1.1);
316 browserWidget.setScrollbarsVisible(false);
317 }
318 });
319
320 final Object notifier = new Object();
321
322 final MutableInt verticalCount = new MutableInt(0);
323 final MutableInt horizontalCount = new MutableInt(0);
324
325 final MutableInt pagesVertical = new MutableInt(1);
326 final MutableInt pagesHorizontal = new MutableInt(1);
327
328 final String pageTitle;
329
330 if (webEngine.getTitle() != null) {
331 pageTitle = webEngine.getTitle();
332 } else {
333 pageTitle = "Untitled Page";
334 }
335
336 final Progress progressBar = MessageBay.displayProgress("Converting web page");
337
338 final Frame frameset = FrameIO.CreateNewFrameset(FrameIO.ConvertToValidFramesetName((new SimpleDateFormat("yy-MM-dd-HH-mm-ss").format(new Date())) + pageTitle));
339
340 frameset.setTitle(pageTitle);
341 frameset.getTitleItem().setSize(14);
342
343 WebParser.addButton("Return to original frame", frame.getName(), null, 200, frameset, null, 0f, 10f, null);
344
345 Text link = DisplayIO.getCurrentFrame().addText(FrameMouseActions.getX(), FrameMouseActions.getY(), pageTitle, null);
346 link.setLink(frameset.getName());
347
348 FrameMouseActions.pickup(link);
349
350 // Timer that fires every time JFX is redrawn. After a few redraws, the handle method of this takes a screenshot of the page,
351 // adds it to the frame, then adds the text on top
352 AnimationTimer timer = new AnimationTimer() {
353
354 int frameCount = 0;
355
356 Frame frameToAddTo = frameset;
357 int thumbWidth = 100;
358
359 @Override
360 public void handle(long arg0) {
361 // Must wait 2 frames before taking a snapshot of the webview, otherwise JavaFX won't have redrawn
362 if (frameCount++ > 1) {
363 frameCount = 0;
364 this.stop();
365
366 verticalCount.setValue(verticalCount.getValue() + 1);
367
368 frameToAddTo = FrameIO.CreateFrame(frameToAddTo.getFramesetName(), pageTitle, null);
369 frameToAddTo.removeAllItems(frameToAddTo.getItems());
370
371 try {
372 // removing the CSS that hides the text (otherwise the text would not pass the visibility check that is run on
373 // it before adding it to the frame)
374 webEngine.executeScript("cssHide.innerHTML = '';");
375
376 JSObject window = (JSObject) webEngine.executeScript("window");
377
378 int visibleWidth = (Integer) webEngine.executeScript("window.innerWidth");
379 int visibleHeight = (Integer) webEngine.executeScript("window.innerHeight");
380
381 WebParser.addTextToFrame(visibleWidth, visibleHeight, window, webEngine, frameToAddTo);
382
383 FrameIO.SaveFrame(frameToAddTo);
384 } catch (Exception ex) {
385 ex.printStackTrace();
386 }
387
388 webEngine.executeScript(""
389 // Setting all text to be hidden before taking the screenshot
390 + "cssHide.appendChild(document.createTextNode(wordSpanHiddenStyle));");
391
392 WritableImage img = new WritableImage((int)webView.getWidth(), (int)webView.getHeight());
393
394 webView.snapshot(new SnapshotParameters(), img);
395
396 // Getting a BufferedImage from the JavaFX image
397 BufferedImage image = SwingFXUtils.fromFXImage(img, null);
398
399 try {
400 int hashcode = Arrays.hashCode(image.getData().getPixels(0, 0, image.getWidth(), image.getHeight(), (int[]) null));
401
402 File out = new File(FrameIO.IMAGES_PATH + "webpage-" + Integer.toHexString(hashcode) + ".png");
403 out.mkdirs();
404 ImageIO.write(image, "png", out);
405
406 // Adding the image to the frame
407 frameToAddTo.addText(0, 0, "@i: " + out.getName(), null);
408
409 // Adding thumbnail to the overview page
410 Text thumb = frameset.addText((int) (thumbWidth * 1.1 * horizontalCount.getValue()) + 10,
411 (int) ((((float) thumbWidth / image.getWidth()) * image.getHeight()) * 1.1 * verticalCount.getValue()),
412 "@i: " + out.getName() + " " + thumbWidth,
413 null);
414
415 thumb.setLink(frameToAddTo.getName());
416 thumb.setBorderColor(Color.lightGray);
417 thumb.setThickness(1);
418
419 // Button to go to the next frame/page
420 WebParser.addButton("Next", null, "next", 70, frameToAddTo, null, 0f, 10f, null);
421
422 // Button to go to the previous frame/page
423 if (verticalCount.getValue() > 1 || horizontalCount.getValue() > 0) {
424 WebParser.addButton("Previous", null, "previous", 70, frameToAddTo, null, 85f, 10f, null);
425 }
426
427 // Button to return to the index/overview page
428 WebParser.addButton("Index", frameset.getName(), null, 70, frameToAddTo, null, null, 10f, 5f);
429
430 FrameIO.SaveFrame(frameToAddTo);
431 FrameIO.SaveFrame(frameset);
432
433 } catch (IOException e) {
434 e.printStackTrace();
435 }
436
437 image.flush();
438
439 synchronized (notifier) {
440 // Notifying that the timer has finished
441 notifier.notify();
442 }
443 }
444 }
445 };
446
447 Platform.runLater(new Runnable() {
448 @Override
449 public void run() {
450 try {
451 JSObject window = (JSObject) webEngine.executeScript("window");
452
453 webEngine.executeScript(""
454 // Initializing the counter used when scrolling the page
455 + "var scrollCounter = 0;"
456 + "var scrollCounterHorizontal = 0;"
457
458 // Storing the current scroll position
459 + "var originalScrollX = window.pageXOffset;"
460 + "var originalScrollY = window.pageYOffset;");
461
462 window.setMember("horizontalScrollPerPage", horizontalScrollPerPage);
463 window.setMember("verticalScrollPerPage", verticalScrollPerPage);
464
465
466
467 // The scrollPerPage will always be less than the page's height, due to the overlap being added/allowed for between pages,
468 // but if the webpage fits in a single converted page, there's no need for any overlap, so just use 1 as the number of pages
469 if((Boolean) webEngine.executeScript("document.documentElement.scrollHeight > window.innerHeight")) {
470 pagesVertical.setValue((int) Math.ceil((Integer) webEngine.executeScript("document.documentElement.scrollHeight") / (float) verticalScrollPerPage));
471 }
472
473 if((Boolean) webEngine.executeScript("document.documentElement.scrollWidth > window.innerWidth")) {
474 pagesHorizontal.setValue((int) Math.ceil((Integer) webEngine.executeScript("document.documentElement.scrollWidth") / (float) horizontalScrollPerPage));
475 }
476
477 System.out.println(webEngine.executeScript("document.documentElement.scrollWidth") + "/" + horizontalScrollPerPage);
478 System.out.println(pagesVertical.getValue() + "x" + pagesHorizontal.getValue());
479
480 // Setting up the element that contains the CSS to hide all text. Also hiding readability mode buttons.
481 // This is wiped before the text is converted, then re-added before taking the screenshot
482 webEngine.executeScript(""
483 + "var cssHide = document.createElement('style');"
484 + "cssHide.type = 'text/css';"
485 + "var wordSpanHiddenStyle = 'WordSpan, #readOverlay #readTools { visibility: hidden !important;}';"
486 + "cssHide.appendChild(document.createTextNode(wordSpanHiddenStyle));"
487 + "document.getElementsByTagName('head')[0].appendChild(cssHide);"
488 );
489
490 // Replacing line breaks in all <pre> tags with <br> tags, otherwise they are lost during the conversion
491 webEngine.executeScript(""
492 + "var pres = document.getElementsByTagName ('pre');"
493 + "for(var i = 0; i < pres.length; i++){"
494 + " pres[i].innerHTML = pres[i].innerHTML.replace(/\\n|\\r/g, '<br />');"
495 + "}");
496
497 // Functions to be used later in JavaScript
498 webEngine.executeScript(""
499 + "function addToSpan(text) {"
500 + " span = document.createElement('wordSpan');"
501 + " span.textContent = text;"
502 + " par.insertBefore(span, refNode);"
503 + " if (prevSpan !== null && span.getBoundingClientRect().top > prevSpan.getBoundingClientRect().top) {"
504 + " span.textContent = '\\n' + span.textContent;"
505 + " if ( prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
506 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
507 + " par.removeChild(prevSpan);"
508 + " } else {"
509 + " prevPrevSpan = prevSpan;"
510 + " }"
511 + " prevSpan = span;"
512 + " } else if ( prevSpan !== null) {"
513 + " prevSpan.textContent = prevSpan.textContent + span.textContent;"
514 + " par.removeChild(span);"
515 + " } else {"
516 + " prevSpan = span;"
517 + " }"
518 + "}"
519
520 + "function splitIntoWords(toSplit) {"
521 + " var words = [];"
522 + " var pattern = /\\s+/g;"
523 + " var words = toSplit.split(pattern);"
524 + ""
525 + " for (var i = 0; i < words.length - 1; i++) {"
526 + " words[i] = words[i] + ' ';"
527 + " }"
528 + " return words;"
529 + "}"
530 );
531
532 // Using Javascript to get an array of all the text nodes in the document so they can be wrapped in spans. Have to
533 // loop through twice (once here to build the array and once later actually going through the array, otherwise when the
534 // textnode is removed from the document items end up being skipped)
535 webEngine.executeScript(""
536 + "var node;"
537 + "var textNodes=[];"
538 + "var walk = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT);"
539 );
540
541 while(webEngine.executeScript("node=walk.nextNode()") != null && browserWidget.isParserRunning()) {
542
543 webEngine.executeScript(""
544 + "if((node.textContent.trim().length > 0)) { "
545 + "textNodes.push(node);"
546 + "}"
547 );
548 }
549
550 JSObject textNodes = (JSObject) webEngine.executeScript("textNodes");
551
552 int nodesLength = (Integer) textNodes.getMember("length");
553
554 // Looping through all the text nodes in the document
555 for (int j = 0; j < nodesLength && browserWidget.isParserRunning(); j++) {
556 Node currentNode = (Node) textNodes.getSlot(j);
557
558 // Making the current node accessible in JavaScript
559 window.setMember("currentNode", currentNode);
560
561 webEngine.executeScript(""
562 + "var span = null, prevSpan = null, prevPrevSpan = null;"
563
564 // Removing repeated whitespace from the text node's content then splitting it into individual words
565 + "var textContent = currentNode.textContent.replace(/\\n|\\r/g, '').replace(/\\s+/g, ' ');"
566 + "var words = splitIntoWords(textContent);"
567
568 + "var refNode = currentNode.nextSibling;"
569 + "var par = currentNode.parentElement;"
570 + "currentNode.parentElement.removeChild(currentNode);"
571
572 + "for (var i = 0; i < words.length; i++) {"
573 + " addToSpan(words[i]);"
574 + "}"
575
576 + "if (prevPrevSpan !== null && prevPrevSpan.getBoundingClientRect().left == prevSpan.getBoundingClientRect().left) {"
577 + " prevPrevSpan.textContent = prevPrevSpan.textContent + prevSpan.textContent;"
578 + " par.removeChild(prevSpan);"
579 + "}"
580 );
581
582 // Will never reach 100% here, as the processing is not quite finished - progress is set to 100% at the end of
583 // the addPageToFrame loop below
584 try {
585 progressBar.set((50 * (j + 1)) / nodesLength);
586 } catch (Exception e) {
587 // Seems to be a bug somewhere along the line when updating the progressbar, so am catching any exception
588 // thrown here to avoid it stuffing up the rest of the parsing
589 e.printStackTrace();
590 }
591 }
592
593 // Finding all links within the page, then setting the href attribute of all their descendants to be the same
594 // link/URL.
595 // This is needed because there is no apparent and efficient way to check if an element is a child of a link when
596 // running through the document when added each element to Expeditee
597 webEngine.executeScript(""
598 + "var anchors = document.getElementsByTagName('a');"
599 + ""
600 + "for (var i = 0; i < anchors.length; i++) {"
601 + "var currentAnchor = anchors.item(i);"
602 + "var anchorDescendants = currentAnchor.querySelectorAll('*');"
603 + "for (var j = 0; j < anchorDescendants.length; j++) {"
604 + "anchorDescendants.item(j).href = currentAnchor.href;"
605 + "}"
606 + "}"
607 );
608
609 } catch (Exception ex) {
610 ex.printStackTrace();
611 }
612
613 synchronized (notifier) {
614 notifier.notify();
615 }
616 }
617 });
618
619 synchronized (notifier) {
620 try {
621 // Waiting for the page setup (splitting into spans) to finish
622 notifier.wait();
623 } catch (InterruptedException e) {
624 // TODO Auto-generated catch block
625 e.printStackTrace();
626 }
627 }
628
629 // Loop that scrolls the page horizontally
630 for(int i = 0; i < pagesHorizontal.getValue() && browserWidget.isParserRunning(); i++) {
631
632 Platform.runLater(new Runnable() {
633 @Override
634 public void run() {
635 try {
636 // Scrolling down the page
637 webEngine.executeScript(""
638 + "scrollCounter = 0;"
639 + "window.scrollTo(scrollCounterHorizontal * horizontalScrollPerPage, 0);"
640 + "scrollCounterHorizontal = scrollCounterHorizontal+1;");
641
642 } catch (Exception e) {
643 e.printStackTrace();
644 }
645 }
646 });
647
648 // Loop that scrolls the page vertically (for each horizontal scroll position)
649 for(int j = 0; j < pagesVertical.getValue() && browserWidget.isParserRunning(); j++) {
650
651 try {
652 progressBar.set((int) (50 + ((float)(j+1)/(pagesVertical.getValue() * pagesHorizontal.getValue()) + ((float)(i) / pagesHorizontal.getValue())) * 50));
653 } catch (Exception e) {
654 e.printStackTrace();
655 }
656
657 Platform.runLater(new Runnable() {
658 @Override
659 public void run() {
660 try {
661 // Scrolling down the page
662 webEngine.executeScript(""
663 + "window.scrollTo(window.pageXOffset, scrollCounter * verticalScrollPerPage);"
664 + "scrollCounter = scrollCounter+1;");
665
666 synchronized (notifier) {
667 notifier.notify();
668 }
669
670 } catch (Exception e) {
671 e.printStackTrace();
672 }
673 }
674 });
675
676 synchronized (notifier) {
677 try {
678 // Waiting for the page to be scrolled
679 notifier.wait();
680 } catch (InterruptedException e) {
681 // TODO Auto-generated catch block
682 e.printStackTrace();
683 }
684 }
685
686 timer.start();
687
688 synchronized (notifier) {
689 try {
690 // Waiting for the timer thread to finish before looping again
691 notifier.wait();
692 } catch (InterruptedException e) {
693 // TODO Auto-generated catch block
694 e.printStackTrace();
695 }
696 }
697
698 }
699
700 horizontalCount.setValue(horizontalCount.getValue() + 1);
701 verticalCount.setValue(0);
702 }
703
704 if(browserWidget.isParserRunning()) {
705 progressBar.set(100);
706 } else {
707 MessageBay.displayMessage("Web page conversion cancelled");
708 }
709
710 browserWidget.parserFinished();
711
712 Platform.runLater(new Runnable() {
713 @Override
714 public void run() {
715 // Scrolling to the original position on the page
716 webEngine.executeScript("window.scrollTo(originalScrollX, originalScrollY)");
717 // Reloading the page once the parsing is done - only realistic way to reset (i.e. remove all the added WordSpan tags)
718 // the page
719 webEngine.reload();
720 }
721 });
722
723 } catch (Exception ex) {
724 ex.printStackTrace();
725 }
726
727 Platform.runLater(new Runnable() {
728
729 @Override
730 public void run() {
731 browserWidget.setOverlayVisible(false);
732 browserWidget.rebindWebViewSize();
733 browserWidget.setScrollbarsVisible(true);
734 }
735 });
736
737 }
738
739 /**
740 * @param rgbString
741 * string in the format <i>rgb(x,x,x)</i> or <i>rgba(x,x,x,x)</i>
742 * @return A Color object that should match the rgb string passed int. Returns null if alpha is 0
743 */
744 private static Color rgbStringToColor(String rgbString) {
745
746 if (rgbString == null) {
747 return null;
748 }
749
750 // Splitting the string into 'rgb' and 'x, x, x'
751 String[] tmpStrings = rgbString.split("\\(|\\)");
752
753 // Splitting up the RGB(A) components into an array
754 tmpStrings = tmpStrings[1].split(",");
755
756 int[] components = new int[4];
757 Arrays.fill(components, 255);
758
759 for (int i = 0; i < tmpStrings.length; i++) {
760 Float d = Float.parseFloat(tmpStrings[i].trim());
761
762 components[i] = Math.round(d);
763 }
764
765 if (components[3] > 0) {
766 return new Color(components[0], components[1], components[2], components[3]);
767 } else {
768 return null;
769 }
770 }
771
772 /**
773 * @param rootElement
774 * Element that will be converted (including all sub-elements)
775 * @param backgroundColor
776 * String to be used as the background color of this element when added. In the format "rgb(x,x,x)" or "rgba(x,x,x,x)"
777 * @param window
778 * 'window' from Javascript
779 * @param webEngine
780 * Web engine that the page is loaded in
781 * @param frame
782 * Expeditee frame to add the converted page to
783 * @throws IllegalArgumentException
784 * @throws IllegalAccessException
785 */
786 private static void addPageToFrame(Node rootElement, JSObject window, WebEngine webEngine, Frame frame) throws InvocationTargetException, IllegalAccessException,
787 IllegalArgumentException {
788
789 Node currentNode = rootElement;
790
791 if (currentNode.getNodeType() == Node.TEXT_NODE || currentNode.getNodeType() == Node.ELEMENT_NODE) {
792
793 JSObject style;
794 JSObject bounds;
795
796 if (currentNode.getNodeType() == Node.TEXT_NODE) {
797 // CSS style for the element
798 style = (JSObject) window.call("getComputedStyle", new Object[] { currentNode.getParentNode() });
799
800 // Getting a rectangle that represents the area and position of the element
801 bounds = (JSObject) ((JSObject) currentNode.getParentNode()).call("getBoundingClientRect", new Object[] {});
802 } else {
803 style = (JSObject) window.call("getComputedStyle", new Object[] { currentNode });
804
805 bounds = (JSObject) ((JSObject) currentNode).call("getBoundingClientRect", new Object[] {});
806 }
807
808 // Bounding rectangle position is relative to the current view, so scroll position must be added to x/y
809 // TODO: This doesn't check if an element or any of its parent elements have position:fixed set - the only
810 // way to check seems to be to walking through the element's parents until the document root is reached
811 float x = Float.valueOf(bounds.getMember("left").toString()) + Float.valueOf(webEngine.executeScript("window.pageXOffset").toString());
812 float y = Float.valueOf(bounds.getMember("top").toString()) + Float.valueOf(webEngine.executeScript("window.pageYOffset").toString());
813
814 float width = Float.valueOf(bounds.getMember("width").toString());
815 float height = Float.valueOf(bounds.getMember("height").toString());
816
817 // Checking if the element is actually visible on the page
818 if (WebParser.elementVisible(x, y, width, height, style)) {
819
820 // Filtering the node type, starting with text nodes
821 if (currentNode.getNodeType() == Node.TEXT_NODE) {
822
823 String fontSize = ((String) style.call("getPropertyValue", new Object[] { "font-size" }));
824
825 // Trimming off the units (always px) from the font size
826 fontSize = fontSize.substring(0, fontSize.length() - 2);
827
828 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
829 String color = (String) style.call("getPropertyValue", new Object[] { "color" });
830
831 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
832 String bgColorString = (String) style.call("getPropertyValue", new Object[] { "background-color" });
833
834 String align = (String) style.call("getPropertyValue", new Object[] { "text-align" });
835
836 // Returns comma-separated list of typefaces
837 String typeface = (String) style.call("getPropertyValue", new Object[] { "font-family" });
838
839 String[] typefaces = typeface.split(", |,");
840
841 String weight = (String) style.call("getPropertyValue", new Object[] { "font-weight" });
842
843 String fontStyle = (String) style.call("getPropertyValue", new Object[] { "font-style" });
844
845 // Returns "normal" or a value in pixels (e.g. "10px")
846 String letterSpacing = (String) style.call("getPropertyValue", new Object[] { "letter-spacing" });
847
848 // Returns a value in pixels (e.g. "10px")
849 String lineHeight = (String) style.call("getPropertyValue", new Object[] { "line-height" });
850
851 String textTransform = (String) style.call("getPropertyValue", new Object[] { "text-transform" });
852
853 String linkUrl = (String) ((JSObject) currentNode.getParentNode()).getMember("href");
854
855 Boolean fontFound = false;
856 Font font = new Font(null);
857
858 // Looping through all font-families listed in the element's CSS until one that is installed is
859 // found, or the end of the list is reached, in which case the default font is used
860 for (int j = 0; j < typefaces.length && !fontFound; j++) {
861 if (typefaces[j].toLowerCase().equals("sans-serif")) {
862 typefaces[j] = "Arial Unicode MS";
863 } else if (typefaces[j].toLowerCase().equals("serif")) {
864 typefaces[j] = "Times New Roman";
865 } else if ((typefaces[j].toLowerCase().equals("arial"))) {
866 // Have to use Arial Unicode, otherwise unicode characters display incorrectly
867 typefaces[j] = "Arial Unicode MS";
868 }
869
870 // Regex will remove any inverted commas surrounding multi-word typeface names
871 font = new Font(typefaces[j].replaceAll("^'|'$", ""), Font.PLAIN, 12);
872
873 // If the font isn't found, Java just uses Font.DIALOG, so this check checks whether the font was found
874 if (!(font.getFamily().toLowerCase().equals(Font.DIALOG.toLowerCase()))) {
875 fontFound = true;
876 }
877 }
878
879 if (font.getFamily().toLowerCase().equals(Font.DIALOG.toLowerCase())) {
880 font = new Font("Times New Roman", Font.PLAIN, 12);
881 }
882
883 String fontStyleComplete = "";
884
885 int weightInt = 0;
886
887 try {
888 weightInt = Integer.parseInt(weight);
889 } catch (NumberFormatException nfe) {
890 // Use default value as set above
891 }
892
893 // checking if font is bold - i.e. 'bold', 'bolder' or weight over 500
894 if (weight.toLowerCase().startsWith("bold") || weightInt > 500) {
895 fontStyleComplete = fontStyleComplete.concat("bold");
896 }
897
898 if (fontStyle.toLowerCase().equals("italic") || fontStyle.toLowerCase().equals("oblique")) {
899 fontStyleComplete = fontStyleComplete.concat("italic");
900 }
901
902 float fontSizeFloat = 12;
903
904 try {
905 fontSizeFloat = Float.valueOf(fontSize);
906 } catch (NumberFormatException nfe) {
907 // Use default value as set above
908 }
909
910 float letterSpacingFloat = -0.008f;
911
912 try {
913 letterSpacingFloat = (Integer.parseInt(letterSpacing.substring(0, letterSpacing.length() - 2)) / (fontSizeFloat));
914 } catch (NumberFormatException nfe) {
915 // Use default value as set above
916 }
917
918 float lineHeightInt = -1;
919
920 try {
921 lineHeightInt = (Float.parseFloat(lineHeight.substring(0, lineHeight.length() - 2)));
922 } catch (NumberFormatException nfe) {
923 // Use default value as set above
924 }
925
926 Text t;
927
928 String textContent = currentNode.getTextContent().replaceAll("[^\\S\\n]+", " ");
929 textContent = textContent.replaceAll("^(\\s)(\\n|\\r)", "");
930
931 if (textTransform.equals("uppercase")) {
932 textContent = textContent.toUpperCase();
933 } else if (textTransform.equals("lowercase")) {
934 textContent = textContent.toUpperCase();
935 }
936
937 // Adding the text to the frame. Expeditee text seems to be positioned relative to the baseline of the first line, so
938 // the font size has to be added to the y-position
939 t = frame.addText(Math.round(x), Math.round(y + fontSizeFloat), textContent, null);
940
941 t.setColor(rgbStringToColor(color));
942 t.setBackgroundColor(rgbStringToColor(bgColorString));
943 t.setFont(font);
944 t.setSize(fontSizeFloat);
945 t.setFontStyle(fontStyleComplete);
946 t.setLetterSpacing(letterSpacingFloat);
947
948 // Removing any spacing between lines allowing t.getLineHeight() to be used to get the actual height
949 // of just the characters (i.e. distance from ascenders to descenders)
950 t.setSpacing(0);
951
952 t.setSpacing(lineHeightInt - t.getLineHeight());
953
954 if (align.equals("left")) {
955 t.setJustification(Justification.left);
956 } else if (align.equals("right")) {
957 t.setJustification(Justification.right);
958 } else if (align.equals("center")) {
959 t.setJustification(Justification.center);
960 } else if (align.equals("justify")) {
961 t.setJustification(Justification.full);
962 }
963
964 // Font size is added to the item width to give a little breathing room
965 t.setWidth(Math.round(width + (t.getSize())));
966
967 if (!linkUrl.equals("undefined")) {
968 t.setAction("gotourl " + linkUrl);
969 t.setActionMark(false);
970 }
971
972 } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) {
973
974 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
975 String bgColorString = (String) style.call("getPropertyValue", new Object[] { "background-color" });
976
977 Color bgColor = rgbStringToColor(bgColorString);
978
979 // If the element has a background color then add it (to Expeditee) as a rectangle with that background color
980 if (bgColor != null) {
981 frame.addRectangle(Math.round(x), Math.round(y), Math.round(width), Math.round(height), 0, null, bgColor);
982 }
983
984 String linkUrl = (String) ((JSObject) currentNode).getMember("href");
985
986 // background image, returns in format "url(protocol://absolute/path/to/img.extension)" for images,
987 // may also return gradients, data, etc. (not handled yet). Only need to add bg image on
988 // 'ELEMENT_NODE' (and not 'TEXT_NODE' otherwise there would be double-ups
989 if (((String) style.call("getPropertyValue", new Object[] { "background-image" })).startsWith("url(")) {
990
991 try {
992 WebParser.addBackgroundImageFromNode(currentNode, style, frame, linkUrl, x, y, width, height);
993 } catch (MalformedURLException mue) {
994 // probably a 'data:' url, not supported yet
995 mue.printStackTrace();
996 } catch (IOException e) {
997 // TODO Auto-generated catch block
998 e.printStackTrace();
999 }
1000 }
1001
1002 String imgSrc;
1003
1004 if (currentNode.getNodeName().toLowerCase().equals("img") && (imgSrc = ((JSObject) currentNode).getMember("src").toString()) != null) {
1005 try {
1006 WebParser.addImageFromUrl(imgSrc, linkUrl, frame, x, y, (int) width, null, null, null, null, null, 0, 0);
1007 } catch (MalformedURLException mue) {
1008 // probably a 'data:' url, not supported yet
1009 mue.printStackTrace();
1010 } catch (IOException e) {
1011 // TODO Auto-generated catch block
1012 e.printStackTrace();
1013 }
1014 }
1015 }
1016 }
1017
1018 Node childNode = currentNode.getFirstChild();
1019
1020 while (childNode != null) {
1021 addPageToFrame(childNode, window, webEngine, frame);
1022 childNode = childNode.getNextSibling();
1023 }
1024 }
1025 }
1026
1027 private static boolean elementVisible(float x, float y, float width, float height, JSObject style) {
1028 if (width <= 0 || height <= 0 || x + width <= 0 || y + height <= 0 || ((String) style.call("getPropertyValue", new Object[] { "visibility" })).equals("hidden")
1029 || ((String) style.call("getPropertyValue", new Object[] { "display" })).equals("none")) {
1030 return false;
1031 } else {
1032 return true;
1033 }
1034 }
1035
1036 /**
1037 * @param imgSrc
1038 * URL of the image to add
1039 * @param linkUrl
1040 * Absolute URL that the image should link to when clicked
1041 * @param frame
1042 * Frame to add the image to
1043 * @param x
1044 * X-coordinate at which the image should be placed on the frame
1045 * @param y
1046 * Y-coordinate at which the image should be placed on the frame
1047 * @param width
1048 * Width of the image once added to the frame. Negative 1 (-1) will cause the actual width of the image file to be used
1049 *
1050 * @param cropStartX
1051 * X-coordinate at which to start crop, or null for no crop
1052 * @param cropStartY
1053 * Y-coordinate at which to start crop, or null for no crop
1054 * @param cropEndX
1055 * X-coordinate at which to end the crop, or null for no crop
1056 * @param cropEndY
1057 * Y-coordinate at which to end the crop, or null for no crop
1058 *
1059 * @param repeat
1060 * String determining how the image should be tiled/repeated. Valid strings are: <i>no-repeat</i>, <i>repeat-x</i>, or
1061 * <i>repeat-y</i>. All other values (including null) will cause the image to repeat in both directions
1062 *
1063 * @param originXPercent
1064 * Percentage into the image to use as the x coordinate of the image's origin point
1065 * @param originYPercent
1066 * Percentage into the image to use as the y coordinate of the image's origin point
1067 *
1068 * @throws MalformedURLException
1069 * @throws IOException
1070 */
1071 public static Picture getImageFromUrl(String imgSrc, String linkUrl, final Frame frame, float x, float y, int width,
1072 Integer cropStartX, Integer cropStartY, Integer cropEndX, Integer cropEndY, String repeat, float originXPercent, float originYPercent)
1073 throws IOException {
1074
1075 URL imgUrl = new URL(imgSrc);
1076
1077 HttpURLConnection connection = (HttpURLConnection) (imgUrl.openConnection());
1078
1079 // Spoofing a widely accepted User Agent, since some sites refuse to serve non-webbrowser clients
1080 connection.setRequestProperty("User-Agent", "Mozilla/5.0");
1081
1082 BufferedImage img = ImageIO.read(connection.getInputStream());
1083
1084 int hashcode = Arrays.hashCode(img.getData().getPixels(0, 0, img.getWidth(), img.getHeight(), (int[]) null));
1085 File out = new File(FrameIO.IMAGES_PATH + Integer.toHexString(hashcode) + ".png");
1086 out.mkdirs();
1087 ImageIO.write(img, "png", out);
1088
1089 if (repeat == null && cropEndX == null && cropStartX == null && cropEndY == null && cropStartY == null) {
1090 repeat = "no-repeat";
1091 }
1092
1093 if (cropEndX == null || cropStartX == null || cropEndY == null || cropStartY == null) {
1094 cropStartX = 0;
1095 cropStartY = 0;
1096 cropEndX = img.getWidth();
1097 cropEndY = img.getHeight();
1098 } else if (cropStartX < 0) {
1099 cropEndX = cropEndX - cropStartX;
1100 x = x + Math.abs(cropStartX);
1101 cropStartX = 0;
1102 }
1103
1104 if (cropStartY < 0) {
1105 cropEndY = cropEndY - cropStartY;
1106 y = y + Math.abs(cropStartY);
1107 cropStartY = 0;
1108 }
1109
1110 if (width < 0) {
1111 width = img.getWidth();
1112 }
1113
1114 if (repeat != null) {
1115 if (repeat.equals("no-repeat")) {
1116 int tmpCropEndY = (int) (cropStartY + ((float) width / img.getWidth()) * img.getHeight());
1117 int tmpCropEndX = cropStartX + width;
1118
1119 cropEndX = (cropEndX < tmpCropEndX) ? cropEndX : tmpCropEndX;
1120 cropEndY = (cropEndY < tmpCropEndY) ? cropEndY : tmpCropEndY;
1121 } else if (repeat.equals("repeat-x")) {
1122 int tmpCropEndY = (int) (cropStartY + ((float) width / img.getWidth()) * img.getHeight());
1123 cropEndY = (cropEndY < tmpCropEndY) ? cropEndY : tmpCropEndY;
1124 } else if (repeat.equals("repeat-y")) {
1125 int tmpCropEndX = cropStartX + width;
1126 cropEndX = (cropEndX < tmpCropEndX) ? cropEndX : tmpCropEndX;
1127 }
1128 }
1129
1130 if (originXPercent > 0) {
1131 int actualWidth = cropEndX - cropStartX;
1132
1133 int originXPixels = Math.round(originXPercent * actualWidth);
1134
1135 x = x - originXPixels;
1136
1137 cropStartX = (int) (cropStartX + (width - actualWidth) * originXPercent);
1138 cropEndX = (int) (cropEndX + (width - actualWidth) * originXPercent);
1139 }
1140
1141 if (originYPercent > 0) {
1142 int height = (int) ((img.getHeight() / (float) img.getWidth()) * width);
1143 int actualHeight = (cropEndY - cropStartY);
1144 int originYPixels = Math.round(originYPercent * actualHeight);
1145
1146 y = y - originYPixels;
1147
1148 cropStartY = (int) (cropStartY + (height - actualHeight) * originYPercent);
1149 cropEndY = (int) (cropEndY + (height - actualHeight) * originYPercent);
1150 }
1151
1152 Text text = new Text("@i: " + out.getName() + " " + width);
1153 text.setPosition(x, y);
1154
1155 Picture pic = ItemUtils.CreatePicture(text, frame);
1156
1157 float invScale = 1 / pic.getScale();
1158
1159 pic.setCrop((int)(cropStartX * invScale), (int)(cropStartY * invScale), (int)(cropEndX * invScale), (int)(cropEndY * invScale));
1160
1161 if (linkUrl != null && !linkUrl.equals("undefined")) {
1162 pic.setAction("goto " + linkUrl);
1163 pic.setActionMark(false);
1164 }
1165
1166 return pic;
1167 }
1168
1169 private static void addImageFromUrl(String imgSrc, String linkUrl, final Frame frame, float x, float y, int width, Integer cropStartX, Integer cropStartY, Integer cropEndX, Integer cropEndY, String repeat,
1170 float originXPercent, float originYPercent)
1171 throws IOException {
1172 Picture pic = getImageFromUrl(imgSrc, linkUrl, frame, x, y, width, cropStartX, cropStartY, cropEndX, cropEndY, repeat, originXPercent, originYPercent);
1173 frame.addItem(pic);
1174 pic.anchor();
1175 pic.getSource().anchor();
1176 }
1177
1178 public static Picture getBackgroundImageFromNode(Node node, JSObject style, final Frame frame, String linkUrl, float x, float y, float width, float height) throws IOException {
1179
1180
1181 String bgImage = (String) style.call("getPropertyValue", new Object[] { "background-image" });
1182 bgImage = bgImage.substring(4, bgImage.length() - 1);
1183
1184 String bgSize = ((String) style.call("getPropertyValue", new Object[] { "background-size" })).toLowerCase();
1185 String bgRepeat = ((String) style.call("getPropertyValue", new Object[] { "background-repeat" })).toLowerCase();
1186
1187 // Returns "[x]px [y]px", "[x]% [y]%", "[x]px [y]%" or "[x]% [y]px"
1188 String bgPosition = ((String) style.call("getPropertyValue", new Object[] { "background-position" })).toLowerCase();
1189
1190 String[] bgOffsetCoords = bgPosition.split(" ");
1191
1192 int bgOffsetX = 0, bgOffsetY = 0;
1193
1194 float originXPercent = 0, originYPercent = 0;
1195
1196 int cropStartX, cropStartY, cropEndX, cropEndY;
1197
1198 // Converting the x and y offset values to integers (and from % to px if needed)
1199 if (bgOffsetCoords[0].endsWith("%")) {
1200 bgOffsetX = (int) ((Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 1)) / 100.0) * width);
1201 originXPercent = (Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 1))) / 100f;
1202 } else if (bgOffsetCoords[0].endsWith("px")) {
1203 bgOffsetX = (int) (Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 2)));
1204 }
1205
1206 if (bgOffsetCoords[1].endsWith("%")) {
1207 bgOffsetY = (int) ((Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 1)) / 100.0) * height);
1208 originYPercent = (Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 1))) / 100f;
1209 } else if (bgOffsetCoords[1].endsWith("px")) {
1210 bgOffsetY = (int) (Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 2)));
1211 }
1212
1213 // Converting from an offset to crop coords
1214 cropStartX = -1 * bgOffsetX;
1215 cropEndX = (int) (cropStartX + width);
1216
1217 cropStartY = -1 * bgOffsetY;
1218 cropEndY = (int) (cropStartY + height);
1219
1220 int bgWidth = -1;
1221
1222 if (bgSize.equals("cover")) {
1223 bgWidth = (int) width;
1224 } else if (bgSize.equals("contain")) {
1225 // TODO: actually compute the appropriate width
1226 bgWidth = (int) width;
1227 } else if (bgSize.equals("auto")) {
1228 bgWidth = -1;
1229 } else {
1230 bgSize = bgSize.split(" ")[0];
1231
1232 if (bgSize.endsWith("%")) {
1233 bgWidth = (int) ((Integer.parseInt(bgSize.replaceAll("\\D", "")) / 100.0) * width);
1234 } else if (bgSize.endsWith("px")) {
1235 bgWidth = Integer.parseInt(bgSize.replaceAll("\\D", ""));
1236 }
1237 }
1238
1239 return getImageFromUrl(bgImage, linkUrl, frame, x, y, bgWidth, cropStartX, cropStartY, cropEndX, cropEndY, bgRepeat, originXPercent, originYPercent);
1240 }
1241
1242 private static void addBackgroundImageFromNode(Node node, JSObject style, final Frame frame, String linkUrl, float x, float y, float width, float height) throws IOException {
1243 Picture pic = getBackgroundImageFromNode(node, style, frame, linkUrl, x, y, width, height);
1244 frame.addItem(pic);
1245 pic.anchor();
1246 pic.getSource().anchor();
1247 }
1248
1249 /**
1250 * @param rootElement
1251 * Element that will be converted (including all sub-elements)
1252 * @param backgroundColor
1253 * String to be used as the background color of this element when added. In the format "rgb(x,x,x)" or "rgba(x,x,x,x)"
1254 * @param window
1255 * 'window' from Javascript
1256 * @param webEngine
1257 * Web engine that the page is loaded in
1258 * @param frame
1259 * Expeditee frame to add the converted page to
1260 * @throws IllegalArgumentException
1261 * @throws IllegalAccessException
1262 */
1263 private static void addTextToFrame(int visibleWidth, int visibleHeight, JSObject window, WebEngine webEngine, Frame frame) throws InvocationTargetException,
1264 IllegalAccessException, IllegalArgumentException {
1265
1266 webEngine.executeScript("var walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null, false);");
1267
1268 Node currentNode;
1269
1270 while ((currentNode = (Node) webEngine.executeScript("walker.nextNode()")) != null) {
1271 JSObject style;
1272 JSObject bounds;
1273
1274 // CSS style for the element
1275 style = (JSObject) window.call("getComputedStyle", new Object[] { currentNode.getParentNode() });
1276
1277 // Getting a rectangle that represents the area and position of the element
1278 bounds = (JSObject) ((JSObject) currentNode.getParentNode()).call("getBoundingClientRect", new Object[] {});
1279
1280 // TODO: This doesn't check if an element or any of its parent elements have position:fixed set - the only way to check seems to
1281 // be to walking through the element's parents until the document root is reached (or a recursive function)
1282 float x = Float.valueOf(bounds.getMember("left").toString());
1283 float y = Float.valueOf(bounds.getMember("top").toString());
1284
1285 float width = Float.valueOf(bounds.getMember("width").toString());
1286 float height = Float.valueOf(bounds.getMember("height").toString());
1287
1288 // Checking if the element is actually visible on the page
1289 if (width > 0 && height > 0 && x + width > 0 && y + height > 0 && x <= visibleWidth && y <= visibleHeight
1290 && !(((String) style.call("getPropertyValue", new Object[] { "display" })).equals("none"))
1291 && !(((String) style.call("getPropertyValue", new Object[] { "visibility" })).equals("hidden"))) {
1292
1293 String fontSize = ((String) style.call("getPropertyValue", new Object[] { "font-size" }));
1294
1295 // Trimming off the units (always px) from the font size
1296 fontSize = fontSize.substring(0, fontSize.length() - 2);
1297
1298 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
1299 String color = (String) style.call("getPropertyValue", new Object[] { "color" });
1300
1301 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
1302 String bgColorString = (String) style.call("getPropertyValue", new Object[] { "background-color" });
1303
1304 String align = (String) style.call("getPropertyValue", new Object[] { "text-align" });
1305
1306 // Returns comma-separated list of typefaces
1307 String typeface = (String) style.call("getPropertyValue", new Object[] { "font-family" });
1308
1309 String[] typefaces = typeface.split(", |,");
1310
1311 String weight = (String) style.call("getPropertyValue", new Object[] { "font-weight" });
1312
1313 String fontStyle = (String) style.call("getPropertyValue", new Object[] { "font-style" });
1314
1315 // Returns "normal" or a value in pixels (e.g. "10px")
1316 String letterSpacing = (String) style.call("getPropertyValue", new Object[] { "letter-spacing" });
1317
1318 // Returns a value in pixels (e.g. "10px")
1319 String lineHeight = (String) style.call("getPropertyValue", new Object[] { "line-height" });
1320
1321 String textTransform = (String) style.call("getPropertyValue", new Object[] { "text-transform" });
1322
1323 String linkUrl = (String) ((JSObject) currentNode.getParentNode()).getMember("href");
1324
1325 Boolean fontFound = false;
1326 Font font = new Font(null);
1327
1328 // Looping through all font-families listed in the element's CSS until one that is installed is
1329 // found, or the end of the list is reached, in which case the default font is used
1330 for (int j = 0; j < typefaces.length && !fontFound; j++) {
1331 if (typefaces[j].toLowerCase().equals("sans-serif")) {
1332 typefaces[j] = "SansSerif";
1333 } else if ((typefaces[j].toLowerCase().equals("arial"))) {
1334 // Have to use Arial Unicode, otherwise unicode characters display incorrectly
1335 // It seems that not all systems have this font (including some Windows machines),
1336 // but as long as the website has a general font type specified (e.g. "font-family: Arial, Sans-Serif"),
1337 // there should be no noticeable difference.
1338 typefaces[j] = "Arial Unicode MS";
1339 } else if ((typefaces[j].toLowerCase().equals("monospace"))) {
1340 typefaces[j] = "monospaced";
1341 }
1342
1343 // Regex will remove any inverted commas surrounding multi-word typeface names
1344 font = new Font(typefaces[j].replaceAll("^'|'$", ""), Font.PLAIN, 12);
1345
1346 // If the font isn't found, Java just uses Font.DIALOG, so this check checks whether the font was found
1347 if (!(font.getFamily().toLowerCase().equals(Font.DIALOG.toLowerCase()))) {
1348 fontFound = true;
1349 }
1350 }
1351
1352 if (font.getFamily().toLowerCase().equals(Font.DIALOG.toLowerCase())) {
1353 font = new Font("Times New Roman", Font.PLAIN, 12);
1354 }
1355
1356 String fontStyleComplete = "";
1357
1358 int weightInt = 0;
1359
1360 try {
1361 weightInt = Integer.parseInt(weight);
1362 } catch (NumberFormatException nfe) {
1363 // Use default value as set above
1364 }
1365
1366 // checking if font is bold - i.e. 'bold', 'bolder' or weight over 500
1367 if (weight.toLowerCase().startsWith("bold") || weightInt > 500) {
1368 fontStyleComplete = fontStyleComplete.concat("bold");
1369 }
1370
1371 if (fontStyle.toLowerCase().equals("italic") || fontStyle.toLowerCase().equals("oblique")) {
1372 fontStyleComplete = fontStyleComplete.concat("italic");
1373 }
1374
1375 float fontSizeFloat = 12;
1376
1377 try {
1378 fontSizeFloat = Float.valueOf(fontSize);
1379 } catch (NumberFormatException nfe) {
1380 // Use default value as set above
1381 }
1382
1383 float letterSpacingFloat = -0.008f;
1384
1385 try {
1386 letterSpacingFloat = (Integer.parseInt(letterSpacing.substring(0, letterSpacing.length() - 2)) / (fontSizeFloat));
1387 } catch (NumberFormatException nfe) {
1388 // Use default value as set above
1389 }
1390
1391 float lineHeightInt = -1;
1392
1393 try {
1394 lineHeightInt = (Float.parseFloat(lineHeight.substring(0, lineHeight.length() - 2)));
1395 } catch (NumberFormatException nfe) {
1396 // Use default value as set above
1397 }
1398
1399 Text t;
1400
1401 String textContent = currentNode.getTextContent().replaceAll("[^\\S\\n]+", " ");
1402 textContent = textContent.replaceAll("^(\\s)(\\n|\\r)", "");
1403
1404 if (textTransform.equals("uppercase")) {
1405 textContent = textContent.toUpperCase();
1406 } else if (textTransform.equals("lowercase")) {
1407 textContent = textContent.toUpperCase();
1408 }
1409
1410 // Adding the text to the frame. Expeditee text seems to be positioned relative to the baseline of the first line, so
1411 // the font size has to be added to the y-position
1412 t = frame.addText(Math.round(x), Math.round(y + fontSizeFloat), textContent, null);
1413
1414 t.setColor(rgbStringToColor(color));
1415 t.setBackgroundColor(rgbStringToColor(bgColorString));
1416 t.setFont(font);
1417 t.setSize(fontSizeFloat);
1418 t.setFontStyle(fontStyleComplete);
1419 t.setLetterSpacing(letterSpacingFloat);
1420
1421 // Removing any spacing between lines allowing t.getLineHeight() to be used to get the actual height
1422 // of just the characters (i.e. distance from ascenders to descenders)
1423 t.setSpacing(0);
1424
1425 t.setSpacing(lineHeightInt - t.getLineHeight());
1426
1427 if (align.equals("left")) {
1428 t.setJustification(Justification.left);
1429 } else if (align.equals("right")) {
1430 t.setJustification(Justification.right);
1431 } else if (align.equals("center")) {
1432 t.setJustification(Justification.center);
1433 } else if (align.equals("justify")) {
1434 t.setJustification(Justification.full);
1435 }
1436
1437 // Font size is added to the item width to give a little breathing room
1438 t.setWidth(Math.round(width + (t.getSize())));
1439
1440 if (!linkUrl.equals("undefined")) {
1441 t.setAction("createFrameWithBrowser " + linkUrl);
1442 t.setActionMark(false);
1443 }
1444 }
1445 }
1446 }
1447
1448 /**
1449 * Used by the web parser to add Next, Previous, etc. buttons to the converted pages
1450 *
1451 * @param text
1452 * text to display on the button
1453 * @param link
1454 * Frame that the button will link to
1455 * @param action
1456 * Action to run when button is clicked
1457 * @param width
1458 * Width of the button
1459 * @param toAddTo
1460 * Frame to add the button to
1461 * @param anchorTop
1462 * @param anchorRight
1463 * @param anchorBottom
1464 * @param anchorLeft
1465 */
1466 private static void addButton(String text, String link, String action, int width, Frame toAddTo, Float anchorTop, Float anchorRight, Float anchorBottom, Float anchorLeft) {
1467 // Button to go to the next frame/page
1468 Text button = new Text(text);
1469
1470 button.setLink(link);
1471 button.addAction(action);
1472 button.setBorderColor(new Color(0.7f, 0.7f, 0.7f));
1473 button.setBackgroundColor(new Color(0.9f, 0.9f, 0.9f));
1474 button.setThickness(1);
1475 button.setLinkMark(false);
1476 button.setActionMark(false);
1477 button.setFamily("Roboto Condensed Light");
1478 button.setJustification(Justification.center);
1479 button.setWidth(width);
1480
1481 if (anchorTop != null) {
1482 button.setAnchorTop(anchorTop);
1483 }
1484
1485 if (anchorRight != null) {
1486 button.setAnchorRight(anchorRight);
1487 }
1488
1489 if (anchorBottom != null) {
1490 button.setAnchorBottom(anchorBottom);
1491 }
1492
1493 if (anchorLeft != null) {
1494 button.setAnchorLeft(anchorLeft);
1495 }
1496
1497 button.setID(toAddTo.getNextItemID());
1498 toAddTo.addItem(button);
1499
1500 }
1501
1502
1503 private static class MutableBool {
1504 private boolean value;
1505
1506 public MutableBool(boolean value) {
1507 this.value = value;
1508 }
1509
1510 public boolean getValue() {
1511 return value;
1512 }
1513
1514 public void setValue(boolean value) {
1515 this.value = value;
1516 }
1517 }
1518
1519 private static class MutableInt {
1520 private int value;
1521
1522 public MutableInt(int value) {
1523 this.value = value;
1524 }
1525
1526 public int getValue() {
1527 return value;
1528 }
1529
1530 public void setValue(int value) {
1531 this.value = value;
1532 }
1533 }
1534}
Note: See TracBrowser for help on using the repository browser.