source: trunk/src/org/expeditee/io/WebParser.java@ 594

Last change on this file since 594 was 594, checked in by ngw8, 11 years ago

Added conversion support for CSS bg images that have been 'cropped' via bg-positioning, still need to make it work with Jeremy's save format.
Added tracking/letter spacing support to Expeditee text.

File size: 18.0 KB
Line 
1package org.expeditee.io;
2
3import java.awt.Color;
4import java.awt.Font;
5import java.awt.image.BufferedImage;
6import java.io.File;
7import java.io.IOException;
8import java.net.HttpURLConnection;
9import java.net.MalformedURLException;
10import java.net.URL;
11import java.util.Arrays;
12
13import javax.imageio.ImageIO;
14
15import org.expeditee.gui.Frame;
16import org.expeditee.gui.FrameIO;
17import org.expeditee.gui.MessageBay;
18import org.expeditee.items.ItemUtils;
19import org.expeditee.items.Justification;
20import org.expeditee.items.Picture;
21import org.expeditee.items.Text;
22import org.expeditee.reflection.JavaFX;
23import org.w3c.dom.Node;
24import org.w3c.dom.html.HTMLBodyElement;
25
26/**
27 * Methods to convert webpages to Expeditee frames
28 *
29 * @author ngw8
30 * @author jts21
31 */
32public class WebParser {
33
34
35 /**
36 * Loads a webpage and renders it as Expeditee frame(s)
37 *
38 * @param URL
39 * Page to load
40 * @param frame
41 * The Expeditee frame to output the converted page to
42 */
43 public static void parseURL(final String URL, final Frame frame) {
44 try {
45 JavaFX.PlatformRunLater.invoke(null, new Runnable() {
46 @Override
47 public void run() {
48 try {
49 Object webEngine = JavaFX.WebEngineConstructor.newInstance(URL);
50 loadPage(webEngine, frame);
51 } catch (Exception e) {
52 e.printStackTrace();
53 }
54 }
55 });
56 } catch (Exception e) {
57 e.printStackTrace();
58 }
59 }
60
61 protected static void loadPage(final Object webEngine, final Frame frame) throws Exception {
62 JavaFX.ReadOnlyObjectPropertyAddListener.invoke(JavaFX.WorkerStateProperty.invoke(JavaFX.WebEngineGetLoadWorker
63 .invoke(webEngine)), java.lang.reflect.Proxy.newProxyInstance(
64 JavaFX.ChangeListener.getClassLoader(), new java.lang.Class[] { JavaFX.ChangeListener },
65 new java.lang.reflect.InvocationHandler() {
66 @Override
67 public Object invoke(Object proxy, java.lang.reflect.Method method, Object[] args)
68 throws java.lang.Throwable {
69 String method_name = method.getName();
70 // Class<?>[] classes = method.getParameterTypes();
71 // public void changed(ObservableValue ov, State oldState, State newState)
72 if (method_name.equals("changed")) {
73 // changed takes 3 args
74 if (args == null || args.length != 3) {
75 return null;
76 }
77 // args[0] is the ObservableValue
78 // args[2] is the new State
79 if (args[2].getClass() == JavaFX.State) {
80 int id = JavaFX.StateConstants.indexOf(args[2]);
81 switch (id) {
82 case 0: // READY
83 // MessageBay.displayMessage("WebEngine ready");
84 break;
85 case 1: // SCHEDULED
86 // MessageBay.displayMessage("Scheduled page load");
87 break;
88 case 2: // RUNNING
89 System.out.println("Loading page!");
90 // MessageBay.displayMessage("WebEngine running");
91 break;
92 case 3: // SUCCEEDED
93 // MessageBay.displayMessage("Finished loading page");
94 System.out.println("Parsing page!");
95 JavaFX.WebEngineExecuteScript.invoke(webEngine, "window.resizeTo(800, 800)");
96 parsePage(webEngine, frame);
97 System.out.println("Parsed page!");
98 break;
99 case 4: // CANCELLED
100 MessageBay.displayMessage("Cancelled loading page");
101 break;
102 case 5: // FAILED
103 MessageBay.displayMessage("Failed to load page");
104 break;
105 }
106 }
107 System.out.println("\n");
108 }
109 return null;
110 }
111 }));
112 }
113
114 /**
115 * Converts a loaded page to Expeditee frame(s)
116 *
117 * @param webEngine
118 * The JavaFX WebEngine in which the page to be converted is loaded
119 * @param frame
120 * The Expeditee frame to output the converted page to
121 */
122 public static void parsePage(final Object webEngine, final Frame frame) {
123 try {
124 JavaFX.PlatformRunLater.invoke(null, new Runnable() {
125 @Override
126 public void run() {
127 try {
128 HTMLBodyElement doc = (HTMLBodyElement) JavaFX.WebEngineExecuteScript.invoke(webEngine, "document.body");
129
130 Object window = JavaFX.WebEngineExecuteScript.invoke(webEngine, "window");
131
132 frame.setBackgroundColor(rgbStringToColor((String) JavaFX.JSObjectCall.invoke(JavaFX.JSObjectCall.invoke(window, "getComputedStyle", new Object[] { doc }), "getPropertyValue",
133 new Object[] { "background-color" })));
134
135 // Using Javascript to get an array of all the nodes in the document
136 Object nodes = JavaFX.WebEngineExecuteScript.invoke(webEngine,
137 "function getTextNodes(rootNode){"
138 + "var node;" + "var textNodes=[];"
139 + "var walk = document.createTreeWalker(rootNode, NodeFilter.SHOW_ALL);"
140 + "while(node=walk.nextNode()) {"
141 + " textNodes.push(node);" + "}"
142 + "return textNodes;"
143 + "}; "
144 + "getTextNodes(document.body)");
145
146 int nodesLength = (Integer) JavaFX.JSObjectGetMember.invoke(nodes, "length");
147
148 for (int i = 0; i < nodesLength; i++) {
149 Node currentNode = (Node) JavaFX.JSObjectGetSlot.invoke(nodes, i);
150
151 if (currentNode.getNodeType() == Node.TEXT_NODE || currentNode.getNodeType() == Node.ELEMENT_NODE) {
152
153 System.out.println(i + "/" + nodesLength + " : " + currentNode);
154
155 Object style;
156 Object bounds;
157
158 if (currentNode.getNodeType() == Node.TEXT_NODE) {
159 // CSS style for the element
160 style = JavaFX.JSObjectCall.invoke(window, "getComputedStyle", new Object[] { currentNode.getParentNode() });
161
162 // Getting a rectangle that represents the area and position of the element
163 bounds = JavaFX.JSObjectCall.invoke(currentNode.getParentNode(), "getBoundingClientRect", new Object[] {});
164 } else {
165 style = JavaFX.JSObjectCall.invoke(window, "getComputedStyle", new Object[] { currentNode });
166
167 bounds = JavaFX.JSObjectCall.invoke(currentNode, "getBoundingClientRect", new Object[] {});
168 }
169
170 // Bounding rectangle position is relative to the current view, so scroll position must be added to x/y
171 float x = Float.valueOf(JavaFX.JSObjectGetMember.invoke(bounds, "left").toString())
172 + Float.valueOf(JavaFX.WebEngineExecuteScript.invoke(webEngine, "window.pageXOffset").toString());
173 float y = Float.valueOf(JavaFX.JSObjectGetMember.invoke(bounds, "top").toString())
174 + Float.valueOf(JavaFX.WebEngineExecuteScript.invoke(webEngine, "window.pageYOffset").toString());
175
176 float width = Float.valueOf(JavaFX.JSObjectGetMember.invoke(bounds, "width").toString());
177 float height = Float.valueOf(JavaFX.JSObjectGetMember.invoke(bounds, "height").toString());
178
179 // Checking if the element is actually visible on the page
180 if (WebParser.elementVisible(x, y, width, height, style)) {
181
182 // background image, returns in format "url(protocol://absolute/path/to/img.extension)" for images, may
183 // also return gradients, data, etc. (not handled yet)
184 String bgImage = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "background-image" });
185
186 if (bgImage.startsWith("url(")) {
187
188 bgImage = bgImage.substring(4, bgImage.length() - 1);
189
190 String bgSize = ((String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "background-size" })).toLowerCase();
191
192 // Returns "[x]px [y]px", "[x]% [y]%", "[x]px [y]%" or "[x]% [y]px"
193 String bgPosition = ((String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "background-position" })).toLowerCase();
194
195 String[] bgOffsetCoords = bgPosition.split(" ");
196
197 int bgOffsetX = 0, bgOffsetY = 0;
198
199 int cropStartX, cropStartY, cropEndX, cropEndY;
200
201 // Converting the x and y offset values to integers (and from % to px if needed)
202 if (bgOffsetCoords[0].endsWith("%")) {
203 bgOffsetX = (int) ((Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 1)) / 100.0) * width);
204 } else if (bgOffsetCoords[0].endsWith("px")) {
205 bgOffsetX = (int) (Integer.valueOf(bgOffsetCoords[0].substring(0, bgOffsetCoords[0].length() - 2)));
206 }
207
208 if (bgOffsetCoords[1].endsWith("%")) {
209 bgOffsetY = (int) ((Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 1)) / 100.0) * height);
210 } else if (bgOffsetCoords[1].endsWith("px")) {
211 bgOffsetY = (int) (Integer.valueOf(bgOffsetCoords[1].substring(0, bgOffsetCoords[1].length() - 2)));
212 }
213
214 // Converting from an offset to crop coords
215 cropStartX = -1 * bgOffsetX;
216 cropEndX = (int) (cropStartX + width);
217
218 cropStartY = -1 * bgOffsetY;
219 cropEndY = (int) (cropStartY + height);
220
221 int bgWidth = -1;
222
223 if (bgSize.equals("cover")) {
224 bgWidth = (int) width;
225 } else if (bgSize.equals("contain")) {
226 // TODO: actually compute the appropriate width
227 bgWidth = (int) width;
228 } else if (bgSize.equals("auto")) {
229 bgWidth = -1;
230 } else {
231 bgWidth = Integer.parseInt(bgSize.split(" ")[0].replaceAll("\\D", ""));
232 }
233
234 try {
235 WebParser.addImageFromUrl(bgImage, frame, x, y, bgWidth, cropStartX, cropStartY, cropEndX, cropEndY);
236 } catch (MalformedURLException mue) {
237 // probably a 'data:' url, not supported yet
238 mue.printStackTrace();
239 }
240 }
241
242 // Filtering the node type, starting with text nodes
243 if (currentNode.getNodeType() == Node.TEXT_NODE) {
244 String fontSize = ((String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "font-size" }));
245
246 // Trimming off the units (always px) from the font size
247 fontSize = fontSize.substring(0, fontSize.length() - 2);
248
249 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
250 String color = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "color" });
251
252 // Always returns in format "rgb(x,x,x)" or "rgba(x,x,x,x)"
253 String bgColor = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "background-color" });
254
255 String align = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "text-align" });
256
257 // Returns comma-separated list of typefaces
258 String typeface = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "font-family" });
259 String[] typefaces = typeface.split(", |,");
260
261 String weight = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "font-weight" });
262
263 String fontStyle = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "font-style" });
264
265 // Returns "normal" or a value in pixels (e.g. "10px")
266 String letterSpacing = (String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "letter-spacing" });
267
268 Boolean fontFound = false;
269 Font font = new Font(null);
270
271 // Looping through all font-families listed in the element's CSS until one that is installed is
272 // found, or the end of the list is reached, in which case the default font is used
273 for (int j = 0; j < typefaces.length && !fontFound; j++) {
274
275 // Regex will remove any inverted commas surrounding multi-word typeface names
276 font = new Font(typefaces[j].replaceAll("^'|'$", ""), Font.PLAIN, 12);
277
278 if (!(font.getFamily().toLowerCase().equals(Font.DIALOG.toLowerCase()))) {
279 fontFound = true;
280 }
281 }
282
283 String fontStyleComplete = "";
284
285 int weightInt = 0;
286
287 try {
288 weightInt = Integer.parseInt(weight);
289 } catch (NumberFormatException nfe) {
290 weightInt = 0;
291 }
292
293 // checking if font is bold - i.e. 'bold', 'bolder' or weight over 500
294 if (weight.toLowerCase().startsWith("bold") || weightInt > 500) {
295 fontStyleComplete = fontStyleComplete.concat("bold");
296 }
297
298 if (fontStyle.toLowerCase().equals("italic") || fontStyle.toLowerCase().equals("oblique")) {
299 fontStyleComplete = fontStyleComplete.concat("italic");
300 }
301
302 int letterSpacingInt = 0;
303
304 try {
305 letterSpacingInt = (int) (Integer.parseInt(letterSpacing.substring(0, letterSpacing.length() - 2)) / Float.valueOf(fontSize));
306 } catch (NumberFormatException nfe) {
307 letterSpacingInt = 0;
308 }
309
310 Text t;
311
312 t = frame.addText((int) x, (int) y, currentNode.getTextContent().replaceAll("\\s+", " ").trim(), null);
313
314 t.setColor(rgbStringToColor(color));
315 t.setBackgroundColor(rgbStringToColor(bgColor));
316 t.setFont(font);
317 t.setSize(Float.valueOf(fontSize));
318 t.setFontStyle(fontStyleComplete);
319 t.setLetterSpacing(letterSpacingInt);
320
321 if (align.equals("left")) {
322 t.setJustification(Justification.left);
323 } else if (align.equals("right")) {
324 t.setJustification(Justification.right);
325 } else if (align.equals("center")) {
326 t.setJustification(Justification.center);
327 } else if (align.equals("justify")) {
328 t.setJustification(Justification.full);
329 }
330
331 // Font size divided by 2 is added to the item width to give a little breathing room
332 t.setWidth(Math.round(width + (t.getSize() / 2)));
333
334 } else if (currentNode.getNodeType() == Node.ELEMENT_NODE) {
335
336 String imgSrc;
337
338 if (currentNode.getNodeName().toLowerCase().equals("img") && (imgSrc = JavaFX.JSObjectGetMember.invoke(currentNode, "src").toString()) != null) {
339 try {
340 WebParser.addImageFromUrl(imgSrc, frame, x, y, (int) width, null, null, null, null);
341 } catch (MalformedURLException mue) {
342 // probably a 'data:' url, not supported yet
343 mue.printStackTrace();
344 }
345 }
346 }
347 }
348 }
349 }
350
351 } catch (Exception e) {
352 e.printStackTrace();
353 }
354 System.out.println("Parsed frame");
355 FrameIO.SaveFrame(frame);
356 }
357 });
358 } catch (Exception e) {
359 e.printStackTrace();
360 }
361 }
362
363 /**
364 * @param rgbString
365 * string in the format <i>rgb(x,x,x)</i> or <i>rgba(x,x,x,x)</i>
366 * @return A Color object that should match the rgb string passed int. Returns null if alpha is 0
367 */
368 private static Color rgbStringToColor(String rgbString) {
369 // Splitting the string into 'rgb' and 'x, x, x'
370 String[] tmpStrings = rgbString.split("\\(|\\)");
371
372 // Splitting up the RGB(A) components into an array
373 tmpStrings = tmpStrings[1].split(",");
374
375 int[] components = new int[4];
376 Arrays.fill(components, 255);
377
378 for (int i = 0; i < tmpStrings.length; i++) {
379 Float d = Float.parseFloat(tmpStrings[i].trim());
380
381 components[i] = Math.round(d);
382 }
383
384 if (components[3] > 0) {
385 return new Color(components[0], components[1], components[2], components[3]);
386 } else {
387 return null;
388 }
389 }
390
391 private static boolean elementVisible(float x, float y, float width, float height, Object style) {
392 try {
393 if (width <= 0 || height <= 0 || x + width <= 0 || y + height <= 0 || ((String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "visibility" })).equals("hidden")
394 || ((String) JavaFX.JSObjectCall.invoke(style, "getPropertyValue", new Object[] { "display" })).equals("none")) {
395 return false;
396 } else {
397 return true;
398 }
399 } catch (Exception e) {
400 e.printStackTrace();
401 return false;
402 }
403 }
404
405 /**
406 * @param imgSrc
407 * URL of the image to add
408 * @param frame
409 * Frame to add the image to
410 * @param x
411 * X-coordinate at which the image should be placed on the frame
412 * @param y
413 * Y-coordinate at which the image should be placed on the frame
414 * @param width
415 * Width of the image once added to the frame, negative 1 (-1) will cause the actual width of the image file to be used
416 *
417 * @param cropStartX
418 * @param cropStartY
419 * @param cropEndX
420 * @param cropEndY
421 * @throws MalformedURLException
422 * @throws IOException
423 */
424 private static void addImageFromUrl(String imgSrc, final Frame frame, float x, float y, int width, Integer cropStartX, Integer cropStartY, Integer cropEndX, Integer cropEndY)
425 throws MalformedURLException,
426 IOException {
427
428 URL imgUrl = new URL(imgSrc);
429
430 HttpURLConnection connection = (HttpURLConnection) (imgUrl.openConnection());
431
432 // Spoofing a widely accepted User Agent, since some sites refuse to serve non-webbrowser clients
433 connection.setRequestProperty("User-Agent", "Mozilla/5.0");
434
435 BufferedImage img = ImageIO.read(connection.getInputStream());
436
437 int hashcode = Arrays.hashCode(img.getData().getPixels(0, 0, img.getWidth(), img.getHeight(), (int[]) null));
438 File out = new File(FrameIO.IMAGES_PATH + Integer.toHexString(hashcode) + ".png");
439 out.mkdirs();
440 ImageIO.write(img, "png", out);
441
442 if (cropEndX != null && cropStartX != null && cropEndY != null && cropStartY != null) {
443 width = cropEndX - cropStartX;
444 } else {
445 cropStartX = 0;
446 cropStartY = 0;
447 cropEndX = img.getWidth();
448 cropEndY = img.getHeight();
449
450 if (width < 0) {
451 width = img.getWidth();
452 }
453 }
454
455 Text item = new Text(ItemUtils.GetTag(ItemUtils.TAG_IMAGE) + ":" + out.getPath() + " " + ((width >= 0) ? width : img.getWidth()) + " " + cropStartX + " " + cropStartY
456 + " " + cropEndX + " " + cropEndY);
457
458 item.setPosition(x, y);
459
460 Picture pic = ItemUtils.CreatePicture(item, frame);
461 pic.setPosition(x, y);
462 frame.addItem(pic);
463 }
464}
Note: See TracBrowser for help on using the repository browser.