source: trunk/src/org/expeditee/stats/DocumentStatsFast.java@ 919

Last change on this file since 919 was 919, checked in by jts21, 10 years ago

Added license headers to all files, added full GPL3 license file, moved license header generator script to dev/bin/scripts

File size: 4.8 KB
Line 
1/**
2 * DocumentStatsFast.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://expeditee.org
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19package org.expeditee.stats;
20
21import java.io.BufferedReader;
22import java.io.FileNotFoundException;
23import java.io.FileReader;
24import java.io.IOException;
25import java.util.HashSet;
26import java.util.Set;
27
28import org.expeditee.gui.AttributeValuePair;
29import org.expeditee.gui.FrameIO;
30import org.expeditee.gui.MessageBay;
31import org.expeditee.io.Conversion;
32import org.expeditee.settings.folders.FolderSettings;
33
34public class DocumentStatsFast extends Stats {
35 protected int _treeFrames = 0;
36
37 protected int _characters = 0;
38
39 protected int _words = 0;
40
41 protected int _textItems = 0;
42
43 protected int _sentences = 0;
44
45 protected String _name = null;
46
47 protected String _title = null;
48
49 public static int wordCount(String paragraph) {
50 return paragraph.trim().split("\\s+").length + 1;
51 }
52
53 public DocumentStatsFast(String topFrame, String title) {
54 this(topFrame, new HashSet<String>());
55 _title = title;
56 }
57
58 public DocumentStatsFast(String topFrame, Set<String> visited) {
59 _name = topFrame;
60 String lowerName = _name.toLowerCase();
61
62 if (visited.contains(lowerName)) {
63 return;
64 }
65
66 visited.add(_name.toLowerCase());
67 MessageBay.overwriteMessage("Computed: " + _name);
68
69 // Initialise variables with the data for this frames comet
70 _words = 0;
71 _characters = 0;
72 _textItems = 0;
73 _sentences = 0;
74 _treeFrames = 1;
75
76 String fullPath = null;
77 for (String possiblePath : FolderSettings.FrameDirs.get()) {
78 fullPath = FrameIO.getFrameFullPathName(possiblePath, _name);
79 if (fullPath != null)
80 break;
81 }
82
83 // If the frame was not located return null
84 if (fullPath == null)
85 return;
86
87 String frameset = Conversion.getFramesetName(_name);
88
89 // Open the file and search the text items
90 try {
91 BufferedReader reader = new BufferedReader(new FileReader(fullPath));
92 String next;
93 StringBuffer sb = new StringBuffer();
94 String link = null;
95 boolean ignore = false;
96 while (reader.ready() && ((next = reader.readLine()) != null)) {
97 if (next.length() == 0) {
98 // Ignore annotations
99 if (ignore) {
100 ignore = false;
101 link = null;
102 continue;
103 }
104
105 // Ignore non text items
106 if (sb.length() == 0) {
107 link = null;
108 continue;
109 }
110
111 if (link == null) {
112 // remove the last newLine... not absolutely needed
113 String text = sb.substring(0, sb.length() - 1);
114 _textItems++;
115 _characters += text.length();
116 _words += text.split("\\s+").length;
117 _sentences += text.split("\\.+").length;
118 } else {
119 DocumentStatsFast childItemStats = new DocumentStatsFast(
120 link, visited);
121 _characters += childItemStats._characters;
122 _words += childItemStats._words;
123 _textItems += childItemStats._textItems;
124 _sentences += childItemStats._sentences;
125 _treeFrames += childItemStats._treeFrames;
126 }
127 // Reinit the item variables
128 link = null;
129 sb = new StringBuffer();
130 } else if (ignore) {
131 continue;
132 } else if (next.startsWith("T")) {
133 String text = next.substring(2).trim();
134 // Ignore the rest of annotation items...
135 if (text.length() > 0
136 && text.charAt(0) == AttributeValuePair.ANNOTATION_CHAR) {
137 ignore = true;
138 continue;
139 }
140 sb.append(text).append('\n');
141 } else if (next.startsWith("F")) {
142 link = next.substring(2);
143 // Convert number only links
144 if (Character.isDigit(link.charAt(0)))
145 link = frameset + link;
146 }
147 }
148 } catch (FileNotFoundException e) {
149 e.printStackTrace();
150 } catch (IOException e) {
151 e.printStackTrace();
152 }
153 }
154
155 @Override
156 public String toString() {
157 StringBuffer sb = new StringBuffer();
158 sb.append(SessionStats.getDate());
159 sb.append("DocStats: ").append(_name).append('\n');
160 sb.append("Title: ").append(_title).append('\n');
161 sb.append("Frames: ").append(_treeFrames).append('\n');
162 sb.append("TextItems: ").append(_textItems).append('\n');
163 sb.append("Sentences: ").append(_sentences).append('\n');
164 sb.append("Words: ").append(_words).append('\n');
165 sb.append("Chars: ").append(_characters);
166 return sb.toString();
167 }
168}
Note: See TracBrowser for help on using the repository browser.