source: trunk/src/org/expeditee/io/WebParser.java@ 600

Last change on this file since 600 was 600, checked in by ngw8, 11 years ago

Added support for the CSS 'background-size' property to WebParser

File size: 18.7 KB
Line 
1package org.expeditee.io;
2
3import java.awt.Color;
4import java.awt.Font;
5import java.awt.Point;
6import java.awt.image.BufferedImage;
7import java.io.File;
8import java.io.IOException;
9import java.net.HttpURLConnection;
10import java.net.MalformedURLException;
11import java.net.URL;
12import java.util.Arrays;
13
14import javax.imageio.ImageIO;
15
16import org.expeditee.gui.Frame;
17import org.expeditee.gui.FrameIO;
18import org.expeditee.gui.MessageBay;
19import org.expeditee.items.ItemUtils;
20import org.expeditee.items.Justification;
21import org.expeditee.items.Picture;
22import org.expeditee.items.Text;
23import org.expeditee.reflection.JavaFX;
24import org.w3c.dom.Node;
25import org.w3c.dom.html.HTMLBodyElement;
26
27/**
28 * Methods to convert webpages to Expeditee frames
29 *
30 * @author ngw8
31 * @author jts21
32 */
33public class WebParser {
34
35
36 /**
37 * Loads a webpage and renders it as Expeditee frame(s)
38 *
39 * @param URL
40 * Page to load
41 * @param frame
42 * The Expeditee frame to output the converted page to
43 */
44 public static void parseURL(final String URL, final Frame frame) {
45 try {
46 JavaFX.PlatformRunLater.invoke(null, new Runnable() {
47 @Override
48 public void run() {
49 try {
50 Object webEngine = JavaFX.WebEngineConstructor.newInstance(URL);
51 loadPage(webEngine, frame);
52 } catch (Exception e) {
53 e.printStackTrace();
54 }
55 }
56 });
57 } catch (Exception e) {
58 e.printStackTrace();
59 }
60 }
61
62 protected static void loadPage(final Object webEngine, final Frame frame) throws Exception {
63 JavaFX.ReadOnlyObjectPropertyAddListener.invoke(JavaFX.WorkerStateProperty.invoke(JavaFX.WebEngineGetLoadWorker
64 .invoke(webEngine)), java.lang.reflect.Proxy.newProxyInstance(
65 JavaFX.ChangeListener.getClassLoader(), new java.lang.Class[] { JavaFX.ChangeListener },
66 new java.lang.reflect.InvocationHandler() {
67 @Override
68 public Object invoke(Object proxy, java.lang.reflect.Method method, Object[] args)
69 throws java.lang.Throwable {
70 String method_name = method.getName();
71 // Class<?>[] classes = method.getParameterTypes();
72 // public void changed(ObservableValue ov, State oldState, State newState)
73 if (method_name.equals("changed")) {
74 // changed takes 3 args
75 if (args == null || args.length != 3) {
76 return null;
77 }
78 // args[0] is the ObservableValue
79 // args[2] is the new State
80 if (args[2].getClass() == JavaFX.State) {
81 int id = JavaFX.StateConstants.indexOf(args[2]);
82 switch (id) {
83 case 0: // READY
84 // MessageBay.displayMessage("WebEngine ready");
85 break;
86 case 1: // SCHEDULED
87 // MessageBay.displayMessage("Scheduled page load");
88 break;
89 case 2: // RUNNING
90 System.out.println("Loading page!");
91 // MessageBay.displayMessage("WebEngine running");
92 break;
93 case 3: // SUCCEEDED
94 // MessageBay.displayMessage("Finished loading page");
95 System.out.println("Parsing page!");
96 JavaFX.WebEngineExecuteScript.invoke(webEngine, "window.resizeTo(800, 800)");
97 parsePage(webEngine, frame);
98 System.out.println("Parsed page!");
99 break;
100 case 4: // CANCELLED
101 MessageBay.displayMessage("Cancelled loading page");
102 break;
103 case 5: // FAILED
104 MessageBay.displayMessage("Failed to load page");
105 break;
106 }
107 }
108 System.out.println("\n");
109 }
110 return null;
111 }
112 }));
113 }
114
115 /**
116 * Converts a loaded page to Expeditee frame(s)
117 *
118 * @param webEngine
119 * The JavaFX WebEngine in which the page to be converted is loaded
120 * @param frame
121 * The Expeditee frame to output the converted page to
122 */
123 public static void parsePage(final Object webEngine, final Frame frame) {
124 try {
125 JavaFX.PlatformRunLater.invoke(null, new Runnable() {
126 @Override
127 public void run() {
128 try {
129 HTMLBodyElement doc = (HTMLBodyElement) JavaFX.WebEngineExecuteScript.invoke(webEngine, "document.body");
130
131 Object window = JavaFX.WebEngineExecuteScript.invoke(webEngine, "window");
132
133 frame.setBackgroundColor(rgbStringToColor((String) JavaFX.JSObjectCall.invoke(JavaFX.JSObjectCall.invoke(window, "getComputedStyle", new Object[] { doc }), "getPropertyValue",
134 new Object[] { "background-color" })));
135
136 // Using Javascript to get an array of all the nodes in the document
137 Object nodes = JavaFX.WebEngineExecuteScript.invoke(webEngine,
138 "function getTextNodes(rootNode){"
139 + "var node;" + "var textNodes=[];"
140 + "var walk = document.createTreeWalker(rootNode, NodeFilter.SHOW_ALL);"
141 + "while(node=walk.nextNode()) {"
142 + " textNodes.push(node);" + "}"
143 + "return textNodes;"
144 + "}; "
145 + "getTextNodes(document.body)");
146
147 int nodesLength = (Integer) JavaFX.JSObjectGetMember.invoke(nodes, "length");
148
149 for (int i = 0; i < nodesLength; i++) {
150 Node currentNode = (Node) JavaFX.JSObjectGetSlot.invoke(nodes, i);
151
152 if (currentNode.getNodeType() == Node.TEXT_NODE || currentNode.getNodeType() == Node.ELEMENT_NODE) {
153
154 System.out.println(i + "/" + nodesLength + " : " + currentNode);
155
156 Object style;
157 Object bounds;
158
159 if (currentNode.getNodeType() == Node.TEXT_NODE) {
160 // CSS style for the element
161 style = JavaFX.JSObjectCall.invoke(window, "getComputedStyle", new Object[] { currentNode.getParentNode() });
162
163 // Getting a rectangle that represents the area and position of the element
164 bounds = JavaFX.JSObjectCall.invoke(currentNode.getParentNode(), "getBoundingClientRect", new Object[] {});
165 } else {
166 style = JavaFX.JSObjectCall.invoke(window, "getComputedStyle", new Object[] { currentNode });
167
168 bounds = JavaFX.JSObjectCall.invoke(currentNode, "getBoundingClientRect", new Object[] {});
169 }
170
171 // Bounding rectangle position is relative to the current view, so scroll position must be added to x/y
172 float x = Float.valueOf(JavaFX.JSObjectGetMember.invoke(bounds, "left").toString())
173 + Float.valueOf(JavaFX.WebEngineExecuteScript.invoke(webEngine, "window.pageXOffset").toString());
174 float y = Float.valueOf(JavaFX.JSObjectGetMember.invoke(bounds, "top").toString())
175 + Float.valueOf(JavaFX.WebEngineExecuteScript.invoke(webEngine, "window.pageYOffset").toString());
176
177 float width = Float.valueOf(JavaFX.JSObjectGetMember.invoke(bounds, "width").toString());
178 float height = Float.valueOf(JavaFX.JSObjectGetMember.invoke(bounds, "height").toString());
179
180 // Checking if the element is actually visible on the page
181 if (WebParser.elementVisible(x, y, width, height, style)) {
182
183 // Filtering the node type, starting with text nodes
184 if (currentNode.getNodeType() == Node.TEXT_NODE) {
185 String fontSize = ((String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "font-size" }));
186
187 // Trimming off the units (always px) from the font size
188 fontSize = fontSize.substring(0, fontSize.length() - 2);
189
190 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
191 String color = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "color" });
192
193 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
194 String bgColor = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "background-color" });
195
196 String align = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "text-align" });
197
198 // Returns comma-separated list of typefaces
199 String typeface = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "font-family" });
200 String[] typefaces = typeface.split(", |,");
201
202 String weight = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "font-weight" });
203
204 String fontStyle = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "font-style" });
205
206 // Returns "normal" or a value in pixels (e.g. "10px")
207 String letterSpacing = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "letter-spacing" });
208
209 Boolean fontFound = false;
210 Font font = new Font(null);
211
212 // Looping through all font-families listed in the element's CSS until one that is installed is
213 // found, or the end of the list is reached, in which case the default font is used
214 for (int j = 0; j < typefaces.length && !fontFound; j++) {
215
216 // Regex will remove any inverted commas surrounding multi-word typeface names
217 font = new Font(typefaces[j].replaceAll("^'|'$", ""), Font.PLAIN, 12);
218
219 if (!(font.getFamily().toLowerCase().equals(Font.DIALOG.toLowerCase()))) {
220 fontFound = true;
221 }
222 }
223
224 String fontStyleComplete = "";
225
226 int weightInt = 0;
227
228 try {
229 weightInt = Integer.parseInt(weight);
230 } catch (NumberFormatException nfe) {
231 weightInt = 0;
232 }
233
234 // checking if font is bold - i.e. 'bold', 'bolder' or weight over 500
235 if (weight.toLowerCase().startsWith("bold") || weightInt > 500) {
236 fontStyleComplete = fontStyleComplete.concat("bold");
237 }
238
239 if (fontStyle.toLowerCase().equals("italic") || fontStyle.toLowerCase().equals("oblique")) {
240 fontStyleComplete = fontStyleComplete.concat("italic");
241 }
242
243 int letterSpacingInt = 0;
244
245 try {
246 letterSpacingInt = (int) (Integer.parseInt(letterSpacing.substring(0, letterSpacing.length() - 2)) / Float.valueOf(fontSize));
247 } catch (NumberFormatException nfe) {
248 letterSpacingInt = 0;
249 }
250
251 Text t;
252
253 t = frame.addText((int) x, (int) y, currentNode.getTextContent().replaceAll("\\s+", " ").trim(), null);
254
255 t.setColor(rgbStringToColor(color));
256 t.setBackgroundColor(rgbStringToColor(bgColor));
257 t.setFont(font);
258 t.setSize(Float.valueOf(fontSize));
259 t.setFontStyle(fontStyleComplete);
260 t.setLetterSpacing(letterSpacingInt);
261
262 if (align.equals("left")) {
263 t.setJustification(Justification.left);
264 } else if (align.equals("right")) {
265 t.setJustification(Justification.right);
266 } else if (align.equals("center")) {
267 t.setJustification(Justification.center);
268 } else if (align.equals("justify")) {
269 t.setJustification(Justification.full);
270 }
271
272 // Font size divided by 2 is added to the item width to give a little breathing room
273 t.setWidth(Math.round(width + (t.getSize() / 2)));
274
275 } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) {
276
277 // background image, returns in format "url(protocol://absolute/path/to/img.extension)" for images,
278 // may also return gradients, data, etc. (not handled yet). Only need to add bg image on
279 // 'ELEMENT_NODE' (and not 'TEXT_NODE' otherwise there would be double-ups
280 String bgImage = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "background-image" });
281
282 if (bgImage.startsWith("url(")) {
283
284 bgImage = bgImage.substring(4, bgImage.length() - 1);
285
286 String bgSize = ((String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "background-size" })).toLowerCase();
287
288 // Returns "[x]px [y]px", "[x]% [y]%", "[x]px [y]%" or "[x]% [y]px"
289 String bgPosition = ((String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "background-position" })).toLowerCase();
290
291 String[] bgOffsetCoords = bgPosition.split(" ");
292
293 int bgOffsetX = 0, bgOffsetY = 0;
294
295 int cropStartX, cropStartY, cropEndX, cropEndY;
296
297 // Converting the x and y offset values to integers (and from % to px if needed)
298 if (bgOffsetCoords[0].endsWith("%")) {
299 bgOffsetX = (int) ((Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 1)) / 100.0) * width);
300 } else if (bgOffsetCoords[0].endsWith("px")) {
301 bgOffsetX = (int) (Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 2)));
302 }
303
304 if (bgOffsetCoords[1].endsWith("%")) {
305 bgOffsetY = (int) ((Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 1)) / 100.0) * height);
306 } else if (bgOffsetCoords[1].endsWith("px")) {
307 bgOffsetY = (int) (Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 2)));
308 }
309
310 // Converting from an offset to crop coords
311 cropStartX = -1 * bgOffsetX;
312 cropEndX = (int) (cropStartX + width);
313
314 cropStartY = -1 * bgOffsetY;
315 cropEndY = (int) (cropStartY + height);
316
317 int bgWidth = -1;
318
319 if (bgSize.equals("cover")) {
320 bgWidth = (int) width;
321 } else if (bgSize.equals("contain")) {
322 // TODO: actually compute the appropriate width
323 bgWidth = (int) width;
324 } else if (bgSize.equals("auto")) {
325 bgWidth = -1;
326 } else {
327 bgSize = bgSize.split(" ")[0];
328
329 if (bgSize.endsWith("%")) {
330 bgWidth = (int) ((Integer.parseInt(bgSize.replaceAll("\\D", "")) / 100.0) * width);
331 } else if (bgSize.endsWith("px")) {
332 bgWidth = Integer.parseInt(bgSize.replaceAll("\\D", ""));
333 }
334 }
335
336 try {
337 WebParser.addImageFromUrl(bgImage, frame, x, y, bgWidth, cropStartX, cropStartY, cropEndX, cropEndY);
338 } catch (MalformedURLException mue) {
339 // probably a 'data:' url, not supported yet
340 mue.printStackTrace();
341 }
342 }
343
344 String imgSrc;
345
346 if (currentNode.getNodeName().toLowerCase().equals("img") && (imgSrc = JavaFX.JSObjectGetMember.invoke(currentNode, "src").toString()) != null) {
347 try {
348 WebParser.addImageFromUrl(imgSrc, frame, x, y, (int) width, null, null, null, null);
349 } catch (MalformedURLException mue) {
350 // probably a 'data:' url, not supported yet
351 mue.printStackTrace();
352 }
353 }
354 }
355 }
356 }
357 }
358
359 } catch (Exception e) {
360 e.printStackTrace();
361 }
362 System.out.println("Parsed frame");
363 FrameIO.SaveFrame(frame);
364 }
365 });
366 } catch (Exception e) {
367 e.printStackTrace();
368 }
369 }
370
371 /**
372 * @param rgbString
373 * string in the format <i>rgb(x,x,x)</i> or <i>rgba(x,x,x,x)</i>
374 * @return A Color object that should match the rgb string passed int. Returns null if alpha is 0
375 */
376 private static Color rgbStringToColor(String rgbString) {
377 // Splitting the string into 'rgb' and 'x, x, x'
378 String[] tmpStrings = rgbString.split("\\(|\\)");
379
380 // Splitting up the RGB(A) components into an array
381 tmpStrings = tmpStrings[1].split(",");
382
383 int[] components = new int[4];
384 Arrays.fill(components, 255);
385
386 for (int i = 0; i < tmpStrings.length; i++) {
387 Float d = Float.parseFloat(tmpStrings[i].trim());
388
389 components[i] = Math.round(d);
390 }
391
392 if (components[3] > 0) {
393 return new Color(components[0], components[1], components[2], components[3]);
394 } else {
395 return null;
396 }
397 }
398
399 private static boolean elementVisible(float x, float y, float width, float height, Object style) {
400 try {
401 if (width <= 0 || height <= 0 || x + width <= 0 || y + height <= 0 || ((String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "visibility" })).equals("hidden")
402 || ((String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "display" })).equals("none")) {
403 return false;
404 } else {
405 return true;
406 }
407 } catch (Exception e) {
408 e.printStackTrace();
409 return false;
410 }
411 }
412
413 /**
414 * @param imgSrc
415 * URL of the image to add
416 * @param frame
417 * Frame to add the image to
418 * @param x
419 * X-coordinate at which the image should be placed on the frame
420 * @param y
421 * Y-coordinate at which the image should be placed on the frame
422 * @param width
423 * Width of the image once added to the frame. Negative 1 (-1) will cause the actual width of the image file to be used
424 *
425 * @param cropStartX
426 * X-coordinate at which to start crop, or null for no crop
427 * @param cropStartY
428 * Y-coordinate at which to start crop, or null for no crop
429 * @param cropEndX
430 * X-coordinate at which to end the crop, or null for no crop
431 * @param cropEndY
432 * Y-coordinate at which to end the crop, or null for no crop
433 * @throws MalformedURLException
434 * @throws IOException
435 */
436 private static void addImageFromUrl(String imgSrc, final Frame frame, float x, float y, int width, Integer cropStartX, Integer cropStartY, Integer cropEndX, Integer cropEndY)
437 throws MalformedURLException,
438 IOException {
439
440 URL imgUrl = new URL(imgSrc);
441
442 HttpURLConnection connection = (HttpURLConnection) (imgUrl.openConnection());
443
444 // Spoofing a widely accepted User Agent, since some sites refuse to serve non-webbrowser clients
445 connection.setRequestProperty("User-Agent", "Mozilla/5.0");
446
447 BufferedImage img = ImageIO.read(connection.getInputStream());
448
449 int hashcode = Arrays.hashCode(img.getData().getPixels(0, 0, img.getWidth(), img.getHeight(), (int[]) null));
450 File out = new File(FrameIO.IMAGES_PATH + Integer.toHexString(hashcode) + ".png");
451 out.mkdirs();
452 ImageIO.write(img, "png", out);
453
454 if (cropEndX == null || cropStartX == null || cropEndY == null || cropStartY == null) {
455 cropStartX = 0;
456 cropStartY = 0;
457 cropEndX = img.getWidth();
458 cropEndY = img.getHeight();
459 }
460
461 if (width < 0) {
462 width = img.getWidth();
463 }
464
465 Picture pic = new Picture(out.getName(), frame);
466 pic.setWidth(width);
467
468 // Have to divide the crop coords by the image scale, since Expeditee seems to always crop then scale
469 pic.setCropStart(new Point((int) (cropStartX / pic.getScale()), (int) (cropStartY / pic.getScale())));
470 pic.setCropEnd(new Point((int) (cropEndX / pic.getScale()), (int) (cropEndY / pic.getScale())));
471 pic.setPosition(x, y);
472 frame.addItem(pic);
473 }
474}
Note: See TracBrowser for help on using the repository browser.