1 | package org.expeditee.greenstone;
|
---|
2 |
|
---|
3 | import java.io.BufferedReader;
|
---|
4 | import java.io.IOException;
|
---|
5 | import java.io.InputStreamReader;
|
---|
6 | import java.io.PrintWriter;
|
---|
7 | import java.io.StringReader;
|
---|
8 | import java.net.Socket;
|
---|
9 | import java.net.UnknownHostException;
|
---|
10 | import java.util.ArrayList;
|
---|
11 | import java.util.Collections;
|
---|
12 | import java.util.HashMap;
|
---|
13 | import java.util.Iterator;
|
---|
14 | import java.util.List;
|
---|
15 | import java.util.ListIterator;
|
---|
16 | import java.util.Map;
|
---|
17 | import java.util.Set;
|
---|
18 |
|
---|
19 | import org.apache.xerces.parsers.DOMParser;
|
---|
20 | import org.w3c.dom.Document;
|
---|
21 | import org.w3c.dom.NamedNodeMap;
|
---|
22 | import org.w3c.dom.Node;
|
---|
23 | import org.w3c.dom.NodeList;
|
---|
24 | import org.xml.sax.InputSource;
|
---|
25 | import org.xml.sax.SAXException;
|
---|
26 |
|
---|
27 | /**
|
---|
28 | * This class provides a simple API for communicating with a Greenstone 3 server
|
---|
29 | * using SOAP.
|
---|
30 | * <p>
|
---|
31 | * Greenstone 3 does not yet 'properly' implement SOAP-based web services. We
|
---|
32 | * would like to use a Greenstone WSDL (Web Services Definition Language) file
|
---|
33 | * and a higher level SOAP Client interface. But we can't. To get around this,
|
---|
34 | * this API uses a simple socket connection to the Greenstone 3 server, and
|
---|
35 | * sends SOAP requests as strings (XML documents). This works but isn't elegant.
|
---|
36 | * The server responds with a string representing an XML document.
|
---|
37 | * <p>
|
---|
38 | * The server's hostname and port are hard-coded. <b>Do not modify them.</b>
|
---|
39 | * <p>
|
---|
40 | * The Greenstone collection to use is <i>hcibib</i>, and this is also
|
---|
41 | * hard-coded. <b>Do not modify this.</b>
|
---|
42 | * <p>
|
---|
43 | * This collection can be accessed from a web browser at <a
|
---|
44 | * href="http://delaware.resnet.scms.waikato.ac.nz:8111/greenstone3/library?a=p&sa=about&c=hcibib">
|
---|
45 | * this location</a>.
|
---|
46 | */
|
---|
47 | public class Greenstone3Connection {
|
---|
48 | /** an ordered list of {@link Query} objects */
|
---|
49 | private List queryList;
|
---|
50 |
|
---|
51 | /**
|
---|
52 | * a HashMap of {@link ResultDocument} objects with document IDs as the
|
---|
53 | * keys. All the results returned in this session.
|
---|
54 | */
|
---|
55 | private Map allResults;
|
---|
56 |
|
---|
57 | /**
|
---|
58 | * a HashMap keyed on the keywords found for all documents returned in this
|
---|
59 | * session. Each item in the map is itself a HashMap, keyed on document IDs
|
---|
60 | * with each item being NULL.
|
---|
61 | */
|
---|
62 | private Map allKeywords;
|
---|
63 |
|
---|
64 | /**
|
---|
65 | * a HashMap keyed on the author names found for all documents returned in
|
---|
66 | * this session. Each item in the map is itself a HashMap, keyed on document
|
---|
67 | * IDs with each item being NULL.
|
---|
68 | */
|
---|
69 | private Map allAuthors;
|
---|
70 |
|
---|
71 | /**
|
---|
72 | * a HashMap keyed on the publication dates found for all documents returned
|
---|
73 | * in this session. Each item in the map is itself a HashMap, keyed on
|
---|
74 | * document IDs with each item being NULL.
|
---|
75 | */
|
---|
76 | private Map allDates;
|
---|
77 |
|
---|
78 | /**
|
---|
79 | * a HashMap keyed on the journal names found for all documents returned in
|
---|
80 | * this session. Each item in the map is itself a HashMap, keyed on document
|
---|
81 | * IDs with each item being NULL.
|
---|
82 | */
|
---|
83 | private Map allJournals;
|
---|
84 |
|
---|
85 | /**
|
---|
86 | * a HashMap keyed on the book titles found for all documents returned in
|
---|
87 | * this session. Each item in the map is itself a HashMap, keyed on document
|
---|
88 | * IDs with each item being NULL.
|
---|
89 | */
|
---|
90 | private Map allBooktitles;
|
---|
91 |
|
---|
92 | /** the <i>hostname</i> where the Greenstone 3 server is running */
|
---|
93 | private String hostname;
|
---|
94 |
|
---|
95 | /** the <i>port</i> on which the Greenstone 3 server is running */
|
---|
96 | private int port;
|
---|
97 |
|
---|
98 | /** for communication with the server */
|
---|
99 | private Socket socket = null;
|
---|
100 |
|
---|
101 | /** for writing the SOAP request strings to the server socket */
|
---|
102 | private PrintWriter toGSDL = null;
|
---|
103 |
|
---|
104 | /** for reading the SOAP response strings from the server socket */
|
---|
105 | private BufferedReader fromGSDL = null;
|
---|
106 |
|
---|
107 | /** string that starts every SOAP request */
|
---|
108 | private String SOAPrequestHeader;
|
---|
109 |
|
---|
110 | /** acts as a template for every SOAP request string */
|
---|
111 | private String SOAPrequestMessage = "<?xml version='1.0' encoding='UTF-8'?><soapenv:Envelope xmlns:soapenv='http://schemas.xmlsoap.org/soap/envelope/' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><soapenv:Body><message><request lang='en' to='hcibib/PROCESSNAME' type='PROCESSTYPE'>REQUESTBODY</request></message></soapenv:Body></soapenv:Envelope>";
|
---|
112 |
|
---|
113 | /**
|
---|
114 | * A client application using this API will normally only create one
|
---|
115 | * instance of this class.
|
---|
116 | * <p>
|
---|
117 | * Create an instance with something like this
|
---|
118 | *
|
---|
119 | * <pre>
|
---|
120 | * Greenstone3Connection gsdl = new Greenstone3Connection();
|
---|
121 | * </pre>
|
---|
122 | *
|
---|
123 | * The constructor initialises the following <b>private</b> variables...
|
---|
124 | * <ul>
|
---|
125 | * <li>the <i>hostname</i> where the Greenstone 3 server is running</li>
|
---|
126 | * <li>the <i>port</i> on which the Greenstone 3 server is running</li>
|
---|
127 | * <li><i>queryList</i> an ordered list of {@link Query} objects</li>
|
---|
128 | * <li><i>allResults</i> a HashMap of {@link ResultDocument} objects with
|
---|
129 | * document IDs as the keys. All the results returned in this session.</li>
|
---|
130 | * <li><i>allKeywords</i> a HashMap keyed on the keywords found for all
|
---|
131 | * documents returned in this session. Each item in the map is itself a
|
---|
132 | * HashMap, keyed on document IDs with each item being NULL.</li>
|
---|
133 | * <li><i>allAuthors</i> a HashMap keyed on the author names found for all
|
---|
134 | * documents returned in this session. Each item in the map is itself a
|
---|
135 | * HashMap, keyed on document IDs with each item being NULL.</li>
|
---|
136 | * <li><i>allDates</i> a HashMap keyed on the publication dates found for
|
---|
137 | * all documents returned in this session. Each item in the map is itself a
|
---|
138 | * HashMap, keyed on document IDs with each item being NULL.</li>
|
---|
139 | * <li><i>allJournals</i> a HashMap keyed on the journal names found for
|
---|
140 | * all documents returned in this session. Each item in the map is itself a
|
---|
141 | * HashMap, keyed on document IDs with each item being NULL.</li>
|
---|
142 | * <li><i>allBooktitles</i> a HashMap keyed on the book titles found for
|
---|
143 | * all documents returned in this session. Each item in the map is itself a
|
---|
144 | * HashMap, keyed on document IDs with each item being NULL.</li>
|
---|
145 | * </ul>
|
---|
146 | */
|
---|
147 | public Greenstone3Connection(int location) {
|
---|
148 | if (location == 0) {
|
---|
149 | this.hostname = "comp537.cs.waikato.ac.nz";
|
---|
150 | this.port = 80;
|
---|
151 | this.SOAPrequestHeader = "POST /greenstone3/services/localsite HTTP/1.1\nHost: comp537.cs.waikato.ac.nz:80\nSOAPAction: hcibib/PROCESSNAME\nContent-Type: text/xml;charset=utf-8\nContent-Length: ";
|
---|
152 | } else {
|
---|
153 | this.hostname = "130.217.220.10";
|
---|
154 | this.port = 8111;
|
---|
155 | this.SOAPrequestHeader = "POST /greenstone3/services/localsite HTTP/1.1\nHost: 130.217.220.10:8111\nSOAPAction: hcibib/PROCESSNAME\nContent-Type: text/xml;charset=utf-8\nContent-Length: ";
|
---|
156 | }
|
---|
157 | this.queryList = Collections.synchronizedList(new ArrayList());
|
---|
158 | this.allResults = Collections.synchronizedMap(new HashMap());
|
---|
159 | this.allKeywords = Collections.synchronizedMap(new HashMap());
|
---|
160 | this.allAuthors = Collections.synchronizedMap(new HashMap());
|
---|
161 | this.allDates = Collections.synchronizedMap(new HashMap());
|
---|
162 | this.allJournals = Collections.synchronizedMap(new HashMap());
|
---|
163 | this.allBooktitles = Collections.synchronizedMap(new HashMap());
|
---|
164 | }
|
---|
165 |
|
---|
166 | public Map getSessionResults() {
|
---|
167 | return this.allResults;
|
---|
168 | };
|
---|
169 |
|
---|
170 | /**
|
---|
171 | * Print a string representation of the list of queries issued in this
|
---|
172 | * session.
|
---|
173 | */
|
---|
174 | public void dumpQueryList() {
|
---|
175 | ListIterator iter = queryList.listIterator();
|
---|
176 | while (iter.hasNext()) {
|
---|
177 | Query query = (Query) iter.next();
|
---|
178 | System.out.println(query.toString());
|
---|
179 | }
|
---|
180 | }
|
---|
181 |
|
---|
182 | /**
|
---|
183 | * Print a string representation of the Booktitles occuring for all query
|
---|
184 | * results in this session. For each booktitle print the IDs of the
|
---|
185 | * documents with that booktitle.
|
---|
186 | */
|
---|
187 | public void dumpAllBooktitles() {
|
---|
188 | Set keys = allBooktitles.keySet();
|
---|
189 | Iterator iter = keys.iterator();
|
---|
190 | while (iter.hasNext()) {
|
---|
191 | String booktitle = (String) iter.next();
|
---|
192 | HashMap docMap = (HashMap) allBooktitles.get(booktitle);
|
---|
193 | System.out.println(booktitle);
|
---|
194 | System.out.println(docMap.keySet().toString());
|
---|
195 | }
|
---|
196 | }
|
---|
197 |
|
---|
198 | /**
|
---|
199 | * Print a string representation of the Journals occuring for all query
|
---|
200 | * results in this session. For each journal print the IDs of the documents
|
---|
201 | * with that journal.
|
---|
202 | */
|
---|
203 | public void dumpAllJournals() {
|
---|
204 | Set keys = allJournals.keySet();
|
---|
205 | Iterator iter = keys.iterator();
|
---|
206 | while (iter.hasNext()) {
|
---|
207 | String journal = (String) iter.next();
|
---|
208 | HashMap docMap = (HashMap) allJournals.get(journal);
|
---|
209 | System.out.println(journal);
|
---|
210 | System.out.println(docMap.keySet().toString());
|
---|
211 | }
|
---|
212 | }
|
---|
213 |
|
---|
214 | /**
|
---|
215 | * Print a string representation of the Dates occuring for all query results
|
---|
216 | * in this session. For each date print the IDs of the documents with that
|
---|
217 | * date.
|
---|
218 | */
|
---|
219 | public void dumpAllDates() {
|
---|
220 | Set keys = allDates.keySet();
|
---|
221 | Iterator iter = keys.iterator();
|
---|
222 | while (iter.hasNext()) {
|
---|
223 | String date = (String) iter.next();
|
---|
224 | HashMap docMap = (HashMap) allDates.get(date);
|
---|
225 | System.out.println(date);
|
---|
226 | System.out.println(docMap.keySet().toString());
|
---|
227 | }
|
---|
228 | }
|
---|
229 |
|
---|
230 | /**
|
---|
231 | * Print a string representation of the Authors occuring for all query
|
---|
232 | * results in this session. For each author print the IDs of the documents
|
---|
233 | * with that author.
|
---|
234 | */
|
---|
235 | public void dumpAllAuthors() {
|
---|
236 | Set keys = allAuthors.keySet();
|
---|
237 | Iterator iter = keys.iterator();
|
---|
238 | while (iter.hasNext()) {
|
---|
239 | String author = (String) iter.next();
|
---|
240 | HashMap docMap = (HashMap) allAuthors.get(author);
|
---|
241 | System.out.println(author);
|
---|
242 | System.out.println(docMap.keySet().toString());
|
---|
243 | }
|
---|
244 | }
|
---|
245 |
|
---|
246 | /**
|
---|
247 | * Print a string representation of the Keywords occuring for all query
|
---|
248 | * results in this session. For each keyword print the IDs of the documents
|
---|
249 | * with that keyword.
|
---|
250 | */
|
---|
251 | public void dumpAllKeywords() {
|
---|
252 | Set keys = allKeywords.keySet();
|
---|
253 | Iterator iter = keys.iterator();
|
---|
254 | while (iter.hasNext()) {
|
---|
255 | String keyword = (String) iter.next();
|
---|
256 | HashMap docMap = (HashMap) allKeywords.get(keyword);
|
---|
257 | System.out.println(keyword);
|
---|
258 | System.out.println(docMap.keySet().toString());
|
---|
259 | }
|
---|
260 | }
|
---|
261 |
|
---|
262 | /**
|
---|
263 | * Print a string representation of all the result documents returned by
|
---|
264 | * queries in this session.
|
---|
265 | */
|
---|
266 | public void dumpAllResults() {
|
---|
267 | Set keys = allResults.keySet();
|
---|
268 | Iterator iter = keys.iterator();
|
---|
269 |
|
---|
270 | while (iter.hasNext()) {
|
---|
271 | String docID = (String) iter.next();
|
---|
272 | ResultDocument resultDocument = (ResultDocument) allResults
|
---|
273 | .get(docID);
|
---|
274 | System.out.println("____________" + docID + " ___________");
|
---|
275 | System.out.println(resultDocument.toString());
|
---|
276 | }
|
---|
277 | }
|
---|
278 |
|
---|
279 | /**
|
---|
280 | * Print all the result documents IDs returned by queries in this session,
|
---|
281 | * along with their titles.
|
---|
282 | */
|
---|
283 | public void dumpAllTitles() {
|
---|
284 | Set keys = allResults.keySet();
|
---|
285 | Iterator iter = keys.iterator();
|
---|
286 | while (iter.hasNext()) {
|
---|
287 | String docID = (String) iter.next();
|
---|
288 | ResultDocument resultDocument = (ResultDocument) allResults
|
---|
289 | .get(docID);
|
---|
290 | System.out.println(docID + "\t" + resultDocument.getTitle());
|
---|
291 | }
|
---|
292 | }
|
---|
293 |
|
---|
294 | /**
|
---|
295 | * Provides the {@link ResultDocument} object for the document with the
|
---|
296 | * given ID
|
---|
297 | *
|
---|
298 | * @param docID
|
---|
299 | * is a document identifier, in the form returned by the server
|
---|
300 | * and available from a {@link QueryOutcome}
|
---|
301 | * @return the {@link ResultDocument} object reflecting the state of the
|
---|
302 | * result document at the time that this method was called. The
|
---|
303 | * state can change as more metadata is retrieved for the document
|
---|
304 | * and the document is returned by further queries.
|
---|
305 | */
|
---|
306 | public ResultDocument getDocument(String docID) {
|
---|
307 | return (ResultDocument) allResults.get(docID);
|
---|
308 | }
|
---|
309 |
|
---|
310 | /**
|
---|
311 | * Implements the actual communication with the server. <b>You can not call
|
---|
312 | * this method directly from your client code.</b>
|
---|
313 | * <p>
|
---|
314 | * Throws an exception and exits if the hosthame is not known or the
|
---|
315 | * connection can't be established.
|
---|
316 | * <p>
|
---|
317 | *
|
---|
318 | * @param request
|
---|
319 | * an already well formed string that contains the appropriate
|
---|
320 | * HTTP headers and a SOAP message (in XML form) that will ask
|
---|
321 | * the server for some information.
|
---|
322 | * @return a string containing a SOAP message (an XML document) that the
|
---|
323 | * server returned in response to the request
|
---|
324 | */
|
---|
325 | private String doRequest(String request) {
|
---|
326 | // System.err.println("Connecting to " + hostname + " on port " + port);
|
---|
327 | try {
|
---|
328 | try {
|
---|
329 | socket = new Socket(hostname, port);
|
---|
330 | } catch (SecurityException se) {
|
---|
331 | System.err.println("Security exception : " + se);
|
---|
332 | System.exit(1);
|
---|
333 | }
|
---|
334 | toGSDL = new PrintWriter(socket.getOutputStream(), true);
|
---|
335 | fromGSDL = new BufferedReader(new InputStreamReader(socket
|
---|
336 | .getInputStream()));
|
---|
337 | } catch (UnknownHostException e) {
|
---|
338 | System.err.println("Don't know about GSDL host: " + hostname);
|
---|
339 | System.exit(1);
|
---|
340 | } catch (IOException e) {
|
---|
341 | System.err.println("IO exception : " + e);
|
---|
342 | System.exit(1);
|
---|
343 | }
|
---|
344 |
|
---|
345 | String result = null;
|
---|
346 | toGSDL.println(request);
|
---|
347 | // System.err.println("Issued request to " + hostname + " on port " +
|
---|
348 | // port);
|
---|
349 | try {
|
---|
350 | String terminator = "Envelope>";
|
---|
351 | String response = "";
|
---|
352 |
|
---|
353 | char c;
|
---|
354 | do {
|
---|
355 | c = (char) fromGSDL.read();
|
---|
356 | response = response + c;
|
---|
357 | } while (!response.endsWith(terminator));
|
---|
358 | toGSDL.close();
|
---|
359 | fromGSDL.close();
|
---|
360 | socket.close();
|
---|
361 |
|
---|
362 | int start = response.indexOf("<?xml");
|
---|
363 | result = response.substring(start);
|
---|
364 | // System.out.println(result);
|
---|
365 | int a = result.indexOf('\n');
|
---|
366 | int b = result.indexOf('\n', a + 1);
|
---|
367 | while (a != -1 && b != -1) {
|
---|
368 | // System.out.println(a + " " +b);
|
---|
369 | result = result.substring(0, a - 1) + result.substring(b + 1);
|
---|
370 | a = result.indexOf('\n');
|
---|
371 | b = result.indexOf('\n', a + 1);
|
---|
372 | }
|
---|
373 | } catch (IOException e) {
|
---|
374 | System.err.println(e);
|
---|
375 | System.exit(1);
|
---|
376 | }
|
---|
377 | return result;
|
---|
378 | }
|
---|
379 |
|
---|
380 | /**
|
---|
381 | * Produces a SOAP request string, sends it to the server, gets and
|
---|
382 | * processes the response updating the appropriate data structures. Uses the
|
---|
383 | * settings represented in the provided argument to produce a SOAP request
|
---|
384 | * string. The string is sent to the server using the {@link doRequest}
|
---|
385 | * method. The returned XML document is processed and the information
|
---|
386 | * therein is used to store information about the returned documents and
|
---|
387 | * this query.
|
---|
388 | * <p>
|
---|
389 | * This method updates the {@link queryList} and {@link allResults} data
|
---|
390 | * <p>
|
---|
391 | *
|
---|
392 | * @param query
|
---|
393 | * a {@link Query} object that must be constructed and passed to
|
---|
394 | * this method by the calling client application
|
---|
395 | * @return a {@link QueryOutcome} object that stores information about the
|
---|
396 | * server's response
|
---|
397 | *
|
---|
398 | */
|
---|
399 | public QueryOutcome issueQueryToServer(Query query) {
|
---|
400 | QueryOutcome queryOutcome = new QueryOutcome();
|
---|
401 | String result = null;
|
---|
402 | String requestBody = "<paramList><param name='maxDocs' value='MAXDOCS'/><param name='level' value='Sec'/><param name ='index' value='INDEX'/><param name='matchMode' value='MATCHMODE'/><param name='query' value='QUERY'/><param name='case' value='CASE'/><param name='sortBy' value='SORTBY'/><param name='stem' value='STEM'/><param name='firstDoc' value='FIRSTDOC'/><param name='lastDoc' value='LASTDOC'/></paramList>";
|
---|
403 | requestBody = requestBody.replaceFirst("MAXDOCS", query
|
---|
404 | .getMaxDocsToReturn());
|
---|
405 | requestBody = requestBody.replaceFirst("INDEX", query.getIndex());
|
---|
406 | requestBody = requestBody.replaceFirst("MATCHMODE", query
|
---|
407 | .getMatchMode());
|
---|
408 | requestBody = requestBody.replaceFirst("QUERY", query.getQueryText());
|
---|
409 | requestBody = requestBody.replaceFirst("CASE", query.getCasefolding());
|
---|
410 | requestBody = requestBody.replaceFirst("SORTBY", query.getSortBy());
|
---|
411 | requestBody = requestBody.replaceFirst("STEM", query.getStemming());
|
---|
412 | requestBody = requestBody.replaceFirst("FIRSTDOC", query.getFirstDoc());
|
---|
413 | requestBody = requestBody.replaceFirst("LASTDOC", query.getLastDoc());
|
---|
414 | String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
|
---|
415 | "TextQuery");
|
---|
416 | request = request.replaceFirst("PROCESSTYPE", "process");
|
---|
417 | request = request.replaceFirst("REQUESTBODY", requestBody);
|
---|
418 | request = SOAPrequestHeader.replaceFirst("PROCESSNAME", "TextQuery")
|
---|
419 | + request.length() + "\n\n" + request;
|
---|
420 |
|
---|
421 | int firstDoc = java.lang.Integer.parseInt(query.getFirstDoc());
|
---|
422 |
|
---|
423 | result = doRequest(request);
|
---|
424 | // System.out.println("\n\n" + result + "\n");
|
---|
425 | StringReader sr = new StringReader(result);
|
---|
426 | InputSource is = new InputSource(sr);
|
---|
427 | DOMParser p = new DOMParser();
|
---|
428 | try {
|
---|
429 | p.parse(is);
|
---|
430 | } catch (SAXException se) {
|
---|
431 | System.err.println(se);
|
---|
432 | } catch (IOException ioe) {
|
---|
433 | System.err.println(ioe);
|
---|
434 | }
|
---|
435 | Document d = p.getDocument();
|
---|
436 | NodeList metadataList = d.getElementsByTagName("metadata");
|
---|
437 | for (int i = 0; i < metadataList.getLength(); i++) {
|
---|
438 | Node n = metadataList.item(i);
|
---|
439 | NamedNodeMap nnm = n.getAttributes();
|
---|
440 | Node att = nnm.getNamedItem("name");
|
---|
441 | if (att.getNodeValue().compareTo("numDocsMatched") == 0) {
|
---|
442 | queryOutcome.setHowManyDocsMatched(n.getFirstChild()
|
---|
443 | .getNodeValue());
|
---|
444 | } else if (att.getNodeValue().compareTo("numDocsReturned") == 0) {
|
---|
445 | queryOutcome.setHowManyDocsReturned(n.getFirstChild()
|
---|
446 | .getNodeValue());
|
---|
447 | }
|
---|
448 | }
|
---|
449 |
|
---|
450 | NodeList documentList = d.getElementsByTagName("documentNode");
|
---|
451 | for (int i = 0; i < documentList.getLength(); i++) {
|
---|
452 | Node n = documentList.item(i);
|
---|
453 | NamedNodeMap nnm = n.getAttributes();
|
---|
454 | Node nid = nnm.getNamedItem("nodeID");
|
---|
455 | Node nscore = nnm.getNamedItem("rank");
|
---|
456 | String docID = nid.getFirstChild().getNodeValue();
|
---|
457 | queryOutcome.addResult(docID, firstDoc + i, nscore.getFirstChild()
|
---|
458 | .getNodeValue());
|
---|
459 | }
|
---|
460 | query.addQueryOutcome(queryOutcome);
|
---|
461 | Query q = (Query) query.clone();
|
---|
462 | queryList.add(q);
|
---|
463 |
|
---|
464 | for (int i = 0; i < documentList.getLength(); i++) {
|
---|
465 | Node n = documentList.item(i);
|
---|
466 | NamedNodeMap nnm = n.getAttributes();
|
---|
467 | Node nid = nnm.getNamedItem("nodeID");
|
---|
468 | Node nscore = nnm.getNamedItem("rank");
|
---|
469 | String docID = nid.getFirstChild().getNodeValue();
|
---|
470 |
|
---|
471 | QueryContext queryContext = new QueryContext(firstDoc + i, nscore
|
---|
472 | .getFirstChild().getNodeValue(), q);
|
---|
473 | if (allResults.containsKey(docID)) {
|
---|
474 | ResultDocument resultDocument = (ResultDocument) allResults
|
---|
475 | .get(docID);
|
---|
476 | resultDocument.incrementFrequencyReturned();
|
---|
477 | resultDocument.addQueryContext(queryContext);
|
---|
478 | allResults.put(docID, resultDocument);
|
---|
479 | } else {
|
---|
480 | ResultDocument resultDocument = new ResultDocument();
|
---|
481 | resultDocument.addQueryContext(queryContext);
|
---|
482 | allResults.put(docID, resultDocument);
|
---|
483 | }
|
---|
484 | }
|
---|
485 | return queryOutcome;
|
---|
486 | }
|
---|
487 |
|
---|
488 | /**
|
---|
489 | * Produces a SOAP request string, sends it to the server, gets and
|
---|
490 | * processes the response updating the appropriate data structures. Given a
|
---|
491 | * document identifier and the name of a metadata item, this method produces
|
---|
492 | * a SOAP request string. The string is sent to the server using the
|
---|
493 | * {@link doRequest} method.
|
---|
494 | * <p>
|
---|
495 | * The request is simply for the values of the given metadata item of the
|
---|
496 | * given document. <b>If the metadata item for the given document has
|
---|
497 | * already been retrieved from the server, the server is NOT contacted
|
---|
498 | * again.</b>
|
---|
499 | * <p>
|
---|
500 | * The returned XML document is processed. The {@link ResultDocument} object
|
---|
501 | * for the document in question is updated with the returned metadata
|
---|
502 | * information, and the {@link allResults} data is consequently updated.
|
---|
503 | * <p>
|
---|
504 | * If the requested metadata is one of Keywords, Authors, Dates, Journals,
|
---|
505 | * Booktitles then the appropriate data structure is updated.
|
---|
506 | * <p>
|
---|
507 | * The method does not return a value. Private data structures are updated
|
---|
508 | * instead. The calling client application should proceed to access document
|
---|
509 | * metadata using the provided methods.
|
---|
510 | * <p>
|
---|
511 | *
|
---|
512 | * @param docID
|
---|
513 | * is a document identifier, in the form returned by the server
|
---|
514 | * and available from a {@link QueryOutcome}
|
---|
515 | * @param metadata
|
---|
516 | * is the metadata field whose value is to be retrieved. Valid
|
---|
517 | * values are
|
---|
518 | * <ul>
|
---|
519 | * <li>Title</li>
|
---|
520 | * <li>Creator (the authors)</li>
|
---|
521 | * <li>Journal</li>
|
---|
522 | * <li>Booktitle</li>
|
---|
523 | * <li>Volume</li>
|
---|
524 | * <li>Number</li>
|
---|
525 | * <li>Editor</li>
|
---|
526 | * <li>Pages</li>
|
---|
527 | * <li>Publisher</li>
|
---|
528 | * <li>Date</li>
|
---|
529 | * <li>Keywords</li>
|
---|
530 | * <li>Abstract</li>
|
---|
531 | * </ul>
|
---|
532 | */
|
---|
533 | public void getDocumentMetadataFromServer(String docID, String metadata) {
|
---|
534 | ResultDocument resultDocument = (ResultDocument) allResults.get(docID);
|
---|
535 | if (resultDocument.metadataExists(metadata)) {
|
---|
536 | return;
|
---|
537 | }
|
---|
538 |
|
---|
539 | String result = null;
|
---|
540 | String requestBody = "<paramList><param name='metadata' value='METADATAFIELD'/></paramList><documentNodeList><documentNode nodeID='DOCIDVALUE'/></documentNodeList>";
|
---|
541 | requestBody = requestBody.replaceFirst("METADATAFIELD", metadata);
|
---|
542 | requestBody = requestBody.replaceFirst("DOCIDVALUE", docID);
|
---|
543 | String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
|
---|
544 | "DocumentMetadataRetrieve");
|
---|
545 | request = request.replaceFirst("PROCESSTYPE", "process");
|
---|
546 | request = request.replaceFirst("REQUESTBODY", requestBody);
|
---|
547 |
|
---|
548 | request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
|
---|
549 | "DocumentMetadataRetrieve")
|
---|
550 | + request.length() + "\n\n" + request;
|
---|
551 |
|
---|
552 | result = doRequest(request);
|
---|
553 | StringReader sr = new StringReader(result);
|
---|
554 | InputSource is = new InputSource(sr);
|
---|
555 | DOMParser p = new DOMParser();
|
---|
556 | try {
|
---|
557 | p.parse(is);
|
---|
558 | } catch (SAXException se) {
|
---|
559 | System.err.println(se);
|
---|
560 | } catch (IOException ioe) {
|
---|
561 | System.err.println(ioe);
|
---|
562 | }
|
---|
563 | Document d = p.getDocument();
|
---|
564 | NodeList metadataList = d.getElementsByTagName("metadata");
|
---|
565 | String metadataval = null;
|
---|
566 | if (metadataList.getLength() > 0) {
|
---|
567 | Node n = metadataList.item(0);
|
---|
568 | metadataval = n.getFirstChild().getNodeValue();
|
---|
569 |
|
---|
570 | if (metadata.compareTo("Keywords") == 0) {
|
---|
571 | String[] keywords = metadataval.split(",");
|
---|
572 | for (int i = 0; i < keywords.length; i++) {
|
---|
573 | String s = keywords[i].trim().toLowerCase();
|
---|
574 | resultDocument.addKeyword(s);
|
---|
575 | if (allKeywords.containsKey(s)) {
|
---|
576 | HashMap docMap = (HashMap) allKeywords.get(s);
|
---|
577 | docMap.put(docID, null);
|
---|
578 | allKeywords.put(s, docMap);
|
---|
579 | } else {
|
---|
580 | HashMap docMap = new HashMap();
|
---|
581 | docMap.put(docID, null);
|
---|
582 | allKeywords.put(s, docMap);
|
---|
583 | }
|
---|
584 | }
|
---|
585 | } else if (metadata.compareTo("Creator") == 0) {
|
---|
586 | String[] authors = metadataval.split("(,)|( and )");
|
---|
587 | // System.err.println(metadataval);
|
---|
588 | for (int i = 0; i < authors.length; i++) {
|
---|
589 | authors[i] = authors[i].trim().toLowerCase();
|
---|
590 | }
|
---|
591 |
|
---|
592 | boolean containsExtraName = authors.length % 2 != 0;
|
---|
593 |
|
---|
594 | for (int i = 0; i + 1 < authors.length; i = i + 2) {
|
---|
595 | String s = authors[i] + ", " + authors[i + 1];
|
---|
596 |
|
---|
597 | //Handle names with jr. in them
|
---|
598 | if (containsExtraName) {
|
---|
599 | if (i + 2 < authors.length
|
---|
600 | && authors[i + 2].contains("jr")) {
|
---|
601 | s += " " + authors[i + 2];
|
---|
602 | i++;
|
---|
603 | }
|
---|
604 | }
|
---|
605 |
|
---|
606 | s = s.replaceAll("[.]", "");
|
---|
607 | // System.err.println(s);
|
---|
608 | resultDocument.addAuthor(s);
|
---|
609 | if (allAuthors.containsKey(s)) {
|
---|
610 | HashMap docMap = (HashMap) allAuthors.get(s);
|
---|
611 | docMap.put(docID, null);
|
---|
612 | allAuthors.put(s, docMap);
|
---|
613 | } else {
|
---|
614 | HashMap docMap = new HashMap();
|
---|
615 | docMap.put(docID, null);
|
---|
616 | allAuthors.put(s, docMap);
|
---|
617 | }
|
---|
618 | }
|
---|
619 | } else if (metadata.compareTo("Title") == 0) {
|
---|
620 | resultDocument.setTitle(metadataval);
|
---|
621 | } else if (metadata.compareTo("Booktitle") == 0) {
|
---|
622 | resultDocument.setBooktitle(metadataval);
|
---|
623 | if (allBooktitles.containsKey(metadataval)) {
|
---|
624 | HashMap docMap = (HashMap) allBooktitles.get(metadataval);
|
---|
625 | docMap.put(docID, null);
|
---|
626 | allBooktitles.put(metadataval, docMap);
|
---|
627 | } else {
|
---|
628 | HashMap docMap = new HashMap();
|
---|
629 | docMap.put(docID, null);
|
---|
630 | allBooktitles.put(metadataval, docMap);
|
---|
631 | }
|
---|
632 | } else if (metadata.compareTo("Date") == 0) {
|
---|
633 | resultDocument.setDate(metadataval.replaceAll("[^0-9]", ""));
|
---|
634 | if (allDates.containsKey(metadataval)) {
|
---|
635 | HashMap docMap = (HashMap) allDates.get(metadataval);
|
---|
636 | docMap.put(docID, null);
|
---|
637 | allDates.put(metadataval, docMap);
|
---|
638 | } else {
|
---|
639 | HashMap docMap = new HashMap();
|
---|
640 | docMap.put(docID, null);
|
---|
641 | allDates.put(metadataval, docMap);
|
---|
642 | }
|
---|
643 | } else if (metadata.compareTo("Pages") == 0) {
|
---|
644 | resultDocument.setPages(metadataval);
|
---|
645 | } else if (metadata.compareTo("Journal") == 0) {
|
---|
646 | resultDocument.setJournal(metadataval);
|
---|
647 | if (allJournals.containsKey(metadataval)) {
|
---|
648 | HashMap docMap = (HashMap) allJournals.get(metadataval);
|
---|
649 | docMap.put(docID, null);
|
---|
650 | allJournals.put(metadataval, docMap);
|
---|
651 | } else {
|
---|
652 | HashMap docMap = new HashMap();
|
---|
653 | docMap.put(docID, null);
|
---|
654 | allJournals.put(metadataval, docMap);
|
---|
655 | }
|
---|
656 | } else if (metadata.compareTo("Volume") == 0) {
|
---|
657 | resultDocument.setVolume(metadataval);
|
---|
658 | } else if (metadata.compareTo("Number") == 0) {
|
---|
659 | resultDocument.setNumber(metadataval);
|
---|
660 | } else if (metadata.compareTo("Abstract") == 0) {
|
---|
661 | resultDocument.setAbstract(metadataval);
|
---|
662 | } else if (metadata.compareTo("Editor") == 0) {
|
---|
663 | resultDocument.setEditor(metadataval);
|
---|
664 | } else if (metadata.compareTo("Publisher") == 0) {
|
---|
665 | resultDocument.setPublisher(metadataval);
|
---|
666 | }
|
---|
667 |
|
---|
668 | }
|
---|
669 | allResults.put(docID, resultDocument);
|
---|
670 | }
|
---|
671 |
|
---|
672 | public String getClassifierNodeName(String nodeID) {
|
---|
673 | String result = null;
|
---|
674 | String requestBody = "<paramList><param name='metadata' value='Title'/></paramList><classifierNodeList><classifierNode nodeID='NODEID'/></classifierNodeList>";
|
---|
675 | requestBody = requestBody.replaceFirst("NODEID", nodeID);
|
---|
676 | String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
|
---|
677 | "ClassifierBrowseMetadataRetrieve");
|
---|
678 | request = request.replaceFirst("PROCESSTYPE", "process");
|
---|
679 | request = request.replaceFirst("REQUESTBODY", requestBody);
|
---|
680 |
|
---|
681 | request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
|
---|
682 | "ClassifierBrowseMetadataRetrieve")
|
---|
683 | + request.length() + "\n\n" + request;
|
---|
684 |
|
---|
685 | // System.err.println(request);
|
---|
686 | result = doRequest(request);
|
---|
687 | // System.err.println(result);
|
---|
688 |
|
---|
689 | StringReader sr = new StringReader(result);
|
---|
690 | InputSource is = new InputSource(sr);
|
---|
691 | DOMParser p = new DOMParser();
|
---|
692 | try {
|
---|
693 | p.parse(is);
|
---|
694 | } catch (SAXException se) {
|
---|
695 | System.err.println(se);
|
---|
696 | } catch (IOException ioe) {
|
---|
697 | System.err.println(ioe);
|
---|
698 | }
|
---|
699 |
|
---|
700 | String returnName = null;
|
---|
701 |
|
---|
702 | Document d = p.getDocument();
|
---|
703 | NodeList metadataList = d.getElementsByTagName("metadata");
|
---|
704 | for (int i = 0; i < metadataList.getLength(); i++) {
|
---|
705 | Node n = metadataList.item(i);
|
---|
706 | NamedNodeMap nnm = n.getAttributes();
|
---|
707 | Node att = nnm.getNamedItem("name");
|
---|
708 | if (att.getNodeValue().compareTo("Title") == 0) {
|
---|
709 | returnName = n.getFirstChild().getNodeValue();
|
---|
710 | }
|
---|
711 | }
|
---|
712 | return returnName;
|
---|
713 | }
|
---|
714 |
|
---|
715 | public void getClassifierNodes(String rootNode) {
|
---|
716 | String result = null;
|
---|
717 | String requestBody = "<paramList><param name='structure' value='children'/></paramList><classifierNodeList><classifierNode nodeID='CLASSIFIER'/></classifierNodeList>";
|
---|
718 | requestBody = requestBody.replaceFirst("CLASSIFIER", rootNode);
|
---|
719 | String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
|
---|
720 | "ClassifierBrowse");
|
---|
721 | request = request.replaceFirst("PROCESSTYPE", "process");
|
---|
722 | request = request.replaceFirst("REQUESTBODY", requestBody);
|
---|
723 |
|
---|
724 | request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
|
---|
725 | "ClassifierBrowse")
|
---|
726 | + request.length() + "\n\n" + request;
|
---|
727 |
|
---|
728 | System.err.println(getClassifierNodeName(rootNode));
|
---|
729 | // System.err.print(rootNode + "#");
|
---|
730 |
|
---|
731 | // System.err.println(request);
|
---|
732 | result = doRequest(request);
|
---|
733 | // System.err.println(result);
|
---|
734 |
|
---|
735 | StringReader sr = new StringReader(result);
|
---|
736 | InputSource is = new InputSource(sr);
|
---|
737 | DOMParser p = new DOMParser();
|
---|
738 | try {
|
---|
739 | p.parse(is);
|
---|
740 | } catch (SAXException se) {
|
---|
741 | System.err.println(se);
|
---|
742 | } catch (IOException ioe) {
|
---|
743 | System.err.println(ioe);
|
---|
744 | }
|
---|
745 | Document d = p.getDocument();
|
---|
746 |
|
---|
747 | NodeList childList = d.getElementsByTagName("classifierNode");
|
---|
748 | NodeList documentList = d.getElementsByTagName("documentNode");
|
---|
749 | // System.err.println("\td " + documentList.getLength());
|
---|
750 | // System.err.println("\tc " + childList.getLength());
|
---|
751 |
|
---|
752 | if (childList.getLength() > 0) {
|
---|
753 | for (int i = 0; i < childList.getLength(); i++) {
|
---|
754 | Node n = childList.item(i);
|
---|
755 | NamedNodeMap nnm = n.getAttributes();
|
---|
756 | Node nid = nnm.getNamedItem("nodeID");
|
---|
757 | String nodeID = nid.getFirstChild().getNodeValue();
|
---|
758 |
|
---|
759 | // System.err.println("\tchild : " + nodeID);
|
---|
760 |
|
---|
761 | if (nodeID.compareTo(rootNode) != 0
|
---|
762 | && nodeID.compareTo("2.6.22") != 0) {
|
---|
763 | // System.err.println("\t" + nodeID);
|
---|
764 | getClassifierNodes(nodeID);
|
---|
765 | }
|
---|
766 | }
|
---|
767 | }
|
---|
768 | if (documentList.getLength() > 0)
|
---|
769 | System.out.println(getClassifierNodeName(rootNode) + "#"
|
---|
770 | + documentList.getLength());
|
---|
771 | }
|
---|
772 | }
|
---|