source: trunk/src/org/expeditee/stats/DocumentStatsFast.java@ 1244

Last change on this file since 1244 was 1244, checked in by davidb, 5 years ago

After change to have resources-public and resources-private, some changes needed to support running Expeditee for a single user; other main change is to allow FrameDirs to specify relative directory paths, to help with when Expeditee is run on the cloud -- similar work still needs to occurr for ImageDir and AudioDir; some other minor changes also made.

File size: 4.8 KB
Line 
1/**
2 * DocumentStatsFast.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://expeditee.org
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19package org.expeditee.stats;
20
21import java.io.BufferedReader;
22import java.io.FileNotFoundException;
23import java.io.FileReader;
24import java.io.IOException;
25import java.util.HashSet;
26import java.util.Set;
27
28import org.expeditee.gui.AttributeValuePair;
29import org.expeditee.gui.FrameIO;
30import org.expeditee.gui.MessageBay;
31import org.expeditee.io.Conversion;
32import org.expeditee.settings.folders.FolderSettings;
33
34public class DocumentStatsFast extends Stats {
35 protected int _treeFrames = 0;
36
37 protected int _characters = 0;
38
39 protected int _words = 0;
40
41 protected int _textItems = 0;
42
43 protected int _sentences = 0;
44
45 protected String _name = null;
46
47 protected String _title = null;
48
49 public static int wordCount(String paragraph) {
50 return paragraph.trim().split("\\s+").length + 1;
51 }
52
53 public DocumentStatsFast(String topFrame, String title) {
54 this(topFrame, new HashSet<String>());
55 _title = title;
56 }
57
58 public DocumentStatsFast(String topFrame, Set<String> visited) {
59 _name = topFrame;
60 String lowerName = _name.toLowerCase();
61
62 if (visited.contains(lowerName)) {
63 return;
64 }
65
66 visited.add(_name.toLowerCase());
67 MessageBay.overwriteMessage("Computed: " + _name);
68
69 // Initialise variables with the data for this frames comet
70 _words = 0;
71 _characters = 0;
72 _textItems = 0;
73 _sentences = 0;
74 _treeFrames = 1;
75
76 String fullPath = null;
77 for (String possiblePath : FolderSettings.FrameDirs.getAbsoluteDirs()) {
78 fullPath = FrameIO.getFrameFullPathName(possiblePath, _name);
79 if (fullPath != null)
80 break;
81 }
82
83 // If the frame was not located return null
84 if (fullPath == null)
85 return;
86
87 String frameset = Conversion.getFramesetName(_name);
88
89 // Open the file and search the text items
90 try {
91 BufferedReader reader = new BufferedReader(new FileReader(fullPath));
92 String next;
93 StringBuffer sb = new StringBuffer();
94 String link = null;
95 boolean ignore = false;
96 while (reader.ready() && ((next = reader.readLine()) != null)) {
97 if (next.length() == 0) {
98 // Ignore annotations
99 if (ignore) {
100 ignore = false;
101 link = null;
102 continue;
103 }
104
105 // Ignore non text items
106 if (sb.length() == 0) {
107 link = null;
108 continue;
109 }
110
111 if (link == null) {
112 // remove the last newLine... not absolutely needed
113 String text = sb.substring(0, sb.length() - 1);
114 _textItems++;
115 _characters += text.length();
116 _words += text.split("\\s+").length;
117 _sentences += text.split("\\.+").length;
118 } else {
119 DocumentStatsFast childItemStats = new DocumentStatsFast(
120 link, visited);
121 _characters += childItemStats._characters;
122 _words += childItemStats._words;
123 _textItems += childItemStats._textItems;
124 _sentences += childItemStats._sentences;
125 _treeFrames += childItemStats._treeFrames;
126 }
127 // Reinit the item variables
128 link = null;
129 sb = new StringBuffer();
130 } else if (ignore) {
131 continue;
132 } else if (next.startsWith("T")) {
133 String text = next.substring(2).trim();
134 // Ignore the rest of annotation items...
135 if (text.length() > 0
136 && text.charAt(0) == AttributeValuePair.ANNOTATION_CHAR) {
137 ignore = true;
138 continue;
139 }
140 sb.append(text).append('\n');
141 } else if (next.startsWith("F")) {
142 link = next.substring(2);
143 // Convert number only links
144 if (Character.isDigit(link.charAt(0)))
145 link = frameset + link;
146 }
147 }
148 } catch (FileNotFoundException e) {
149 e.printStackTrace();
150 } catch (IOException e) {
151 e.printStackTrace();
152 }
153 }
154
155 @Override
156 public String toString() {
157 StringBuffer sb = new StringBuffer();
158 sb.append(SessionStats.getDate());
159 sb.append("DocStats: ").append(_name).append('\n');
160 sb.append("Title: ").append(_title).append('\n');
161 sb.append("Frames: ").append(_treeFrames).append('\n');
162 sb.append("TextItems: ").append(_textItems).append('\n');
163 sb.append("Sentences: ").append(_sentences).append('\n');
164 sb.append("Words: ").append(_words).append('\n');
165 sb.append("Chars: ").append(_characters);
166 return sb.toString();
167 }
168}
Note: See TracBrowser for help on using the repository browser.