1 | /**
|
---|
2 | * DocumentStats.java
|
---|
3 | * Copyright (C) 2010 New Zealand Digital Library, http://expeditee.org
|
---|
4 | *
|
---|
5 | * This program is free software: you can redistribute it and/or modify
|
---|
6 | * it under the terms of the GNU General Public License as published by
|
---|
7 | * the Free Software Foundation, either version 3 of the License, or
|
---|
8 | * (at your option) any later version.
|
---|
9 | *
|
---|
10 | * This program is distributed in the hope that it will be useful,
|
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
13 | * GNU General Public License for more details.
|
---|
14 | *
|
---|
15 | * You should have received a copy of the GNU General Public License
|
---|
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>.
|
---|
17 | */
|
---|
18 |
|
---|
19 | package org.expeditee.stats;
|
---|
20 |
|
---|
21 | import java.util.HashSet;
|
---|
22 | import java.util.Set;
|
---|
23 |
|
---|
24 | import org.expeditee.gui.Frame;
|
---|
25 | import org.expeditee.gui.FrameIO;
|
---|
26 | import org.expeditee.gui.MessageBay;
|
---|
27 | import org.expeditee.items.Text;
|
---|
28 |
|
---|
29 | public class DocumentStats extends CometStats {
|
---|
30 | protected int _treeFrames = 0;
|
---|
31 |
|
---|
32 | protected int _characters = 0;
|
---|
33 |
|
---|
34 | protected int _words = 0;
|
---|
35 |
|
---|
36 | protected int _textItems = 0;
|
---|
37 |
|
---|
38 | protected int _sentences = 0;
|
---|
39 |
|
---|
40 | public static int wordCount(String paragraph) {
|
---|
41 | return paragraph.trim().split("\\s+").length + 1;
|
---|
42 | }
|
---|
43 |
|
---|
44 |
|
---|
45 | public DocumentStats(Frame topFrame) {
|
---|
46 | this(topFrame, new HashSet<String>());
|
---|
47 | }
|
---|
48 |
|
---|
49 | public DocumentStats(Frame topFrame, Set<String> visited) {
|
---|
50 | super(topFrame);
|
---|
51 | visited.add(_name.toLowerCase());
|
---|
52 | MessageBay.overwriteMessage("Computed: " + _name);
|
---|
53 |
|
---|
54 | // Initialise variables with the data for this frames comet
|
---|
55 | _characters = 0;
|
---|
56 | _words = 0;
|
---|
57 | _textItems = 0;
|
---|
58 | _sentences = 0;
|
---|
59 | _treeFrames = 1;
|
---|
60 |
|
---|
61 | // Now get all add all the trees for linked items
|
---|
62 | for (Text i : topFrame.getBodyTextItems(false)) {
|
---|
63 | _textItems++;
|
---|
64 | String text = i.getText().trim();
|
---|
65 | _words += text.split("\\s+").length;
|
---|
66 | _sentences += text.split("\\.+").length;
|
---|
67 | _characters += text.length();
|
---|
68 |
|
---|
69 | String link = i.getAbsoluteLink();
|
---|
70 | if (link == null)
|
---|
71 | continue;
|
---|
72 | // Stop infinite loops by not visiting nodes we have already visited
|
---|
73 | if (visited.contains(link.toLowerCase())) {
|
---|
74 | continue;
|
---|
75 | }
|
---|
76 | Frame childFrame = FrameIO.LoadFrame(i.getAbsoluteLink());
|
---|
77 | if (childFrame == null)
|
---|
78 | continue;
|
---|
79 |
|
---|
80 | DocumentStats childItemStats = new DocumentStats(childFrame,
|
---|
81 | visited);
|
---|
82 | _words += childItemStats._words;
|
---|
83 | _characters += childItemStats._characters;
|
---|
84 | _textItems += childItemStats._textItems;
|
---|
85 | _sentences += childItemStats._sentences;
|
---|
86 | _treeFrames += childItemStats._treeFrames;
|
---|
87 | }
|
---|
88 | }
|
---|
89 |
|
---|
90 | @Override
|
---|
91 | public String toString() {
|
---|
92 | StringBuffer sb = new StringBuffer();
|
---|
93 | sb.append(SessionStats.getDate());
|
---|
94 | sb.append("DocStats: ").append(_name).append('\n');
|
---|
95 | sb.append("Title: ").append(_title).append('\n');
|
---|
96 | sb.append("Frames: ").append(_treeFrames).append('\n');
|
---|
97 | sb.append("TextItems: ").append(_textItems).append('\n');
|
---|
98 | sb.append("Sentences: ").append(_sentences).append('\n');
|
---|
99 | sb.append("Words: ").append(_words).append('\n');
|
---|
100 | sb.append("Chars: ").append(_characters);
|
---|
101 | return sb.toString();
|
---|
102 | }
|
---|
103 | }
|
---|