1 | package org.expeditee.stats;
|
---|
2 |
|
---|
3 | import java.io.BufferedReader;
|
---|
4 | import java.io.FileNotFoundException;
|
---|
5 | import java.io.FileReader;
|
---|
6 | import java.io.IOException;
|
---|
7 | import java.util.HashSet;
|
---|
8 | import java.util.Set;
|
---|
9 |
|
---|
10 | import org.expeditee.gui.AttributeValuePair;
|
---|
11 | import org.expeditee.gui.FrameIO;
|
---|
12 | import org.expeditee.gui.MessageBay;
|
---|
13 | import org.expeditee.gui.UserSettings;
|
---|
14 | import org.expeditee.io.Conversion;
|
---|
15 |
|
---|
16 | public class DocumentStatsFast extends Stats {
|
---|
17 | protected int _treeFrames = 0;
|
---|
18 |
|
---|
19 | protected int _characters = 0;
|
---|
20 |
|
---|
21 | protected int _words = 0;
|
---|
22 |
|
---|
23 | protected int _textItems = 0;
|
---|
24 |
|
---|
25 | protected int _sentances = 0;
|
---|
26 |
|
---|
27 | protected String _name = null;
|
---|
28 |
|
---|
29 | protected String _title = null;
|
---|
30 |
|
---|
31 | public static int wordCount(String paragraph) {
|
---|
32 | return paragraph.trim().split("\\s+").length + 1;
|
---|
33 | }
|
---|
34 |
|
---|
35 | public DocumentStatsFast(String topFrame, String title) {
|
---|
36 | this(topFrame, new HashSet<String>());
|
---|
37 | _title = title;
|
---|
38 | }
|
---|
39 |
|
---|
40 | public DocumentStatsFast(String topFrame, Set<String> visited) {
|
---|
41 | _name = topFrame;
|
---|
42 | String lowerName = _name.toLowerCase();
|
---|
43 |
|
---|
44 | if (visited.contains(lowerName)) {
|
---|
45 | return;
|
---|
46 | }
|
---|
47 |
|
---|
48 | visited.add(_name.toLowerCase());
|
---|
49 | MessageBay.overwriteMessage("Computed: " + _name);
|
---|
50 |
|
---|
51 | // Initialise variables with the data for this frames comet
|
---|
52 | _words = 0;
|
---|
53 | _characters = 0;
|
---|
54 | _textItems = 0;
|
---|
55 | _sentances = 0;
|
---|
56 | _treeFrames = 1;
|
---|
57 |
|
---|
58 | String fullPath = null;
|
---|
59 | for (String possiblePath : UserSettings.FrameDirs) {
|
---|
60 | fullPath = FrameIO.getFrameFullPathName(possiblePath, _name);
|
---|
61 | if (fullPath != null)
|
---|
62 | break;
|
---|
63 | }
|
---|
64 |
|
---|
65 | // If the frame was not located return null
|
---|
66 | if (fullPath == null)
|
---|
67 | return;
|
---|
68 |
|
---|
69 | String frameset = Conversion.getFramesetName(_name);
|
---|
70 |
|
---|
71 | // Open the file and search the text items
|
---|
72 | try {
|
---|
73 | BufferedReader reader = new BufferedReader(new FileReader(fullPath));
|
---|
74 | String next;
|
---|
75 | StringBuffer sb = new StringBuffer();
|
---|
76 | String link = null;
|
---|
77 | boolean ignore = false;
|
---|
78 | while (reader.ready() && ((next = reader.readLine()) != null)) {
|
---|
79 | if (next.length() == 0) {
|
---|
80 | // Ignore annotations
|
---|
81 | if (ignore) {
|
---|
82 | ignore = false;
|
---|
83 | link = null;
|
---|
84 | continue;
|
---|
85 | }
|
---|
86 |
|
---|
87 | // Ignore non text items
|
---|
88 | if (sb.length() == 0) {
|
---|
89 | link = null;
|
---|
90 | continue;
|
---|
91 | }
|
---|
92 |
|
---|
93 | if (link == null) {
|
---|
94 | // remove the last newLine... not absolutely needed
|
---|
95 | String text = sb.substring(0, sb.length() - 1);
|
---|
96 | _textItems++;
|
---|
97 | _characters += text.length();
|
---|
98 | _words += text.split("\\s+").length;
|
---|
99 | _sentances += text.split("\\.+").length;
|
---|
100 | } else {
|
---|
101 | DocumentStatsFast childItemStats = new DocumentStatsFast(
|
---|
102 | link, visited);
|
---|
103 | _characters += childItemStats._characters;
|
---|
104 | _words += childItemStats._words;
|
---|
105 | _textItems += childItemStats._textItems;
|
---|
106 | _sentances += childItemStats._sentances;
|
---|
107 | _treeFrames += childItemStats._treeFrames;
|
---|
108 | }
|
---|
109 | // Reinit the item variables
|
---|
110 | link = null;
|
---|
111 | sb = new StringBuffer();
|
---|
112 | } else if (ignore) {
|
---|
113 | continue;
|
---|
114 | } else if (next.startsWith("T")) {
|
---|
115 | String text = next.substring(2).trim();
|
---|
116 | // Ignore the rest of annotation items...
|
---|
117 | if (text.length() > 0
|
---|
118 | && text.charAt(0) == AttributeValuePair.ANNOTATION_CHAR) {
|
---|
119 | ignore = true;
|
---|
120 | continue;
|
---|
121 | }
|
---|
122 | sb.append(text).append('\n');
|
---|
123 | } else if (next.startsWith("F")) {
|
---|
124 | link = next.substring(2);
|
---|
125 | // Convert number only links
|
---|
126 | if (Character.isDigit(link.charAt(0)))
|
---|
127 | link = frameset + link;
|
---|
128 | }
|
---|
129 | }
|
---|
130 | } catch (FileNotFoundException e) {
|
---|
131 | e.printStackTrace();
|
---|
132 | } catch (IOException e) {
|
---|
133 | e.printStackTrace();
|
---|
134 | }
|
---|
135 | }
|
---|
136 |
|
---|
137 | @Override
|
---|
138 | public String toString() {
|
---|
139 | StringBuffer sb = new StringBuffer();
|
---|
140 | sb.append(SessionStats.getDate());
|
---|
141 | sb.append("DocStats: ").append(_name).append('\n');
|
---|
142 | sb.append("Title: ").append(_title).append('\n');
|
---|
143 | sb.append("Frames: ").append(_treeFrames).append('\n');
|
---|
144 | sb.append("TextItems: ").append(_textItems).append('\n');
|
---|
145 | sb.append("Sentances: ").append(_sentances).append('\n');
|
---|
146 | sb.append("Words: ").append(_words).append('\n');
|
---|
147 | sb.append("Chars: ").append(_characters);
|
---|
148 | return sb.toString();
|
---|
149 | }
|
---|
150 | }
|
---|