source: trunk/src/org/expeditee/greenstone/Greenstone3Connection.java@ 1443

Last change on this file since 1443 was 919, checked in by jts21, 10 years ago

Added license headers to all files, added full GPL3 license file, moved license header generator script to dev/bin/scripts

File size: 28.1 KB
Line 
1/**
2 * Greenstone3Connection.java
3 * Copyright (C) 2010 New Zealand Digital Library, http://expeditee.org
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19package org.expeditee.greenstone;
20
21import java.io.BufferedReader;
22import java.io.IOException;
23import java.io.InputStreamReader;
24import java.io.PrintWriter;
25import java.io.StringReader;
26import java.net.Socket;
27import java.net.UnknownHostException;
28import java.util.ArrayList;
29import java.util.Collections;
30import java.util.HashMap;
31import java.util.HashSet;
32import java.util.Iterator;
33import java.util.List;
34import java.util.ListIterator;
35import java.util.Map;
36import java.util.Set;
37
38import org.apache.xerces.parsers.DOMParser;
39import org.w3c.dom.Document;
40import org.w3c.dom.NamedNodeMap;
41import org.w3c.dom.Node;
42import org.w3c.dom.NodeList;
43import org.xml.sax.InputSource;
44import org.xml.sax.SAXException;
45
46/**
47 * This class provides a simple API for communicating with a Greenstone 3 server
48 * using SOAP.
49 * <p>
50 * Greenstone 3 does not yet 'properly' implement SOAP-based web services. We
51 * would like to use a Greenstone WSDL (Web Services Definition Language) file
52 * and a higher level SOAP Client interface. But we can't. To get around this,
53 * this API uses a simple socket connection to the Greenstone 3 server, and
54 * sends SOAP requests as strings (XML documents). This works but isn't elegant.
55 * The server responds with a string representing an XML document.
56 * <p>
57 * The server's hostname and port are hard-coded. <b>Do not modify them.</b>
58 * <p>
59 * The Greenstone collection to use is <i>hcibib</i>, and this is also
60 * hard-coded. <b>Do not modify this.</b>
61 * <p>
62 * This collection can be accessed from a web browser at <a
63 * href="http://delaware.resnet.scms.waikato.ac.nz:8111/greenstone3/library?a=p&sa=about&c=hcibib">
64 * this location</a>.
65 */
66public class Greenstone3Connection {
67 /** an ordered list of {@link Query} objects */
68 private List<Query> queryList;
69
70 /**
71 * a HashMap of {@link ResultDocument} objects with document IDs as the
72 * keys. All the results returned in this session.
73 */
74 private Map<String, ResultDocument> allResults;
75
76 /**
77 * a HashMap keyed on the keywords found for all documents returned in this
78 * session. Each item in the map is itself a HashMap, keyed on document IDs
79 * with each item being NULL.
80 */
81 private Map<String, Set<String>> allKeywords;
82
83 /**
84 * a set of authors names
85 */
86 private Map<String, Set<String>> allAuthors;
87
88 /**
89 * a HashMap keyed on the publication dates found for all documents returned
90 * in this session. Each item in the map is itself a HashMap, keyed on
91 * document IDs with each item being NULL.
92 */
93 private Map<String, Set<String>> allDates;
94
95 /**
96 * a HashMap keyed on the journal names found for all documents returned in
97 * this session. Each item in the map is itself a HashMap, keyed on document
98 * IDs with each item being NULL.
99 */
100 private Map<String, Set<String>> allJournals;
101
102 /**
103 * a HashMap keyed on the book titles found for all documents returned in
104 * this session. Each item in the map is itself a HashMap, keyed on document
105 * IDs with each item being NULL.
106 */
107 private Map<String, Set<String>> allBooktitles;
108
109 /** the <i>hostname</i> where the Greenstone 3 server is running */
110 private String hostname;
111
112 /** the <i>port</i> on which the Greenstone 3 server is running */
113 private int port;
114
115 /** for communication with the server */
116 private Socket socket = null;
117
118 /** for writing the SOAP request strings to the server socket */
119 private PrintWriter toGSDL = null;
120
121 /** for reading the SOAP response strings from the server socket */
122 private BufferedReader fromGSDL = null;
123
124 /** string that starts every SOAP request */
125 private String SOAPrequestHeader;
126
127 /** acts as a template for every SOAP request string */
128 private String SOAPrequestMessage = "<?xml version='1.0' encoding='UTF-8'?><soapenv:Envelope xmlns:soapenv='http://schemas.xmlsoap.org/soap/envelope/' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><soapenv:Body><message><request lang='en' to='hcibib/PROCESSNAME' type='PROCESSTYPE'>REQUESTBODY</request></message></soapenv:Body></soapenv:Envelope>";
129
130 /**
131 * A client application using this API will normally only create one
132 * instance of this class.
133 * <p>
134 * Create an instance with something like this
135 *
136 * <pre>
137 * Greenstone3Connection gsdl = new Greenstone3Connection();
138 * </pre>
139 *
140 * The constructor initialises the following <b>private</b> variables...
141 * <ul>
142 * <li>the <i>hostname</i> where the Greenstone 3 server is running</li>
143 * <li>the <i>port</i> on which the Greenstone 3 server is running</li>
144 * <li><i>queryList</i> an ordered list of {@link Query} objects</li>
145 * <li><i>allResults</i> a HashMap of {@link ResultDocument} objects with
146 * document IDs as the keys. All the results returned in this session.</li>
147 * <li><i>allKeywords</i> a HashMap keyed on the keywords found for all
148 * documents returned in this session. Each item in the map is itself a
149 * HashMap, keyed on document IDs with each item being NULL.</li>
150 * <li><i>allAuthors</i> a HashMap keyed on the author names found for all
151 * documents returned in this session. Each item in the map is itself a
152 * HashMap, keyed on document IDs with each item being NULL.</li>
153 * <li><i>allDates</i> a HashMap keyed on the publication dates found for
154 * all documents returned in this session. Each item in the map is itself a
155 * HashMap, keyed on document IDs with each item being NULL.</li>
156 * <li><i>allJournals</i> a HashMap keyed on the journal names found for
157 * all documents returned in this session. Each item in the map is itself a
158 * HashMap, keyed on document IDs with each item being NULL.</li>
159 * <li><i>allBooktitles</i> a HashMap keyed on the book titles found for
160 * all documents returned in this session. Each item in the map is itself a
161 * HashMap, keyed on document IDs with each item being NULL.</li>
162 * </ul>
163 */
164 public Greenstone3Connection(int location) {
165 if (location == 0) {
166 this.hostname = "comp537.cs.waikato.ac.nz";
167 this.port = 80;
168 this.SOAPrequestHeader = "POST /greenstone3/services/localsite HTTP/1.1\nHost: comp537.cs.waikato.ac.nz:80\nSOAPAction: hcibib/PROCESSNAME\nContent-Type: text/xml;charset=utf-8\nContent-Length: ";
169 } else {
170 this.hostname = "130.217.220.10";
171 this.port = 8111;
172 this.SOAPrequestHeader = "POST /greenstone3/services/localsite HTTP/1.1\nHost: 130.217.220.10:8111\nSOAPAction: hcibib/PROCESSNAME\nContent-Type: text/xml;charset=utf-8\nContent-Length: ";
173 }
174 this.queryList = Collections.synchronizedList(new ArrayList<Query>());
175 this.allResults = Collections
176 .synchronizedMap(new HashMap<String, ResultDocument>());
177 this.allKeywords = Collections.synchronizedMap(new HashMap<String, Set<String>>());
178 this.allAuthors = Collections
179 .synchronizedMap(new HashMap<String, Set<String>>());
180 this.allDates = Collections.synchronizedMap(new HashMap<String, Set<String>>());
181 this.allJournals = Collections.synchronizedMap(new HashMap<String, Set<String>>());
182 this.allBooktitles = Collections.synchronizedMap(new HashMap<String, Set<String>>());
183 }
184
185 public Map<String, ResultDocument> getSessionResults() {
186 return this.allResults;
187 };
188
189 /**
190 * Print a string representation of the list of queries issued in this
191 * session.
192 */
193 public void dumpQueryList() {
194 ListIterator iter = queryList.listIterator();
195 while (iter.hasNext()) {
196 Query query = (Query) iter.next();
197 System.out.println(query.toString());
198 }
199 }
200
201 /**
202 * Print a string representation of the Booktitles occuring for all query
203 * results in this session. For each booktitle print the IDs of the
204 * documents with that booktitle.
205 */
206 public void dumpAllBooktitles() {
207 Set keys = allBooktitles.keySet();
208 Iterator iter = keys.iterator();
209 while (iter.hasNext()) {
210 String booktitle = (String) iter.next();
211 HashMap docMap = (HashMap) allBooktitles.get(booktitle);
212 System.out.println(booktitle);
213 System.out.println(docMap.keySet().toString());
214 }
215 }
216
217 /**
218 * Print a string representation of the Journals occuring for all query
219 * results in this session. For each journal print the IDs of the documents
220 * with that journal.
221 */
222 public void dumpAllJournals() {
223 Set keys = allJournals.keySet();
224 Iterator iter = keys.iterator();
225 while (iter.hasNext()) {
226 String journal = (String) iter.next();
227 HashMap docMap = (HashMap) allJournals.get(journal);
228 System.out.println(journal);
229 System.out.println(docMap.keySet().toString());
230 }
231 }
232
233 /**
234 * Print a string representation of the Dates occuring for all query results
235 * in this session. For each date print the IDs of the documents with that
236 * date.
237 */
238 public void dumpAllDates() {
239 Set keys = allDates.keySet();
240 Iterator iter = keys.iterator();
241 while (iter.hasNext()) {
242 String date = (String) iter.next();
243 HashMap docMap = (HashMap) allDates.get(date);
244 System.out.println(date);
245 System.out.println(docMap.keySet().toString());
246 }
247 }
248
249 /**
250 * Print a string representation of the Authors occuring for all query
251 * results in this session. For each author print the IDs of the documents
252 * with that author.
253 */
254 public void dumpAllAuthors() {
255 Set keys = allAuthors.keySet();
256 Iterator iter = keys.iterator();
257 while (iter.hasNext()) {
258 String author = (String) iter.next();
259 HashMap docMap = (HashMap) allAuthors.get(author);
260 System.out.println(author);
261 System.out.println(docMap.keySet().toString());
262 }
263 }
264
265 /**
266 * Print a string representation of the Keywords occuring for all query
267 * results in this session. For each keyword print the IDs of the documents
268 * with that keyword.
269 */
270 public void dumpAllKeywords() {
271 Set keys = allKeywords.keySet();
272 Iterator iter = keys.iterator();
273 while (iter.hasNext()) {
274 String keyword = (String) iter.next();
275 HashMap docMap = (HashMap) allKeywords.get(keyword);
276 System.out.println(keyword);
277 System.out.println(docMap.keySet().toString());
278 }
279 }
280
281 /**
282 * Print a string representation of all the result documents returned by
283 * queries in this session.
284 */
285 public void dumpAllResults() {
286 Set keys = allResults.keySet();
287 Iterator iter = keys.iterator();
288
289 while (iter.hasNext()) {
290 String docID = (String) iter.next();
291 ResultDocument resultDocument = allResults.get(docID);
292 System.out.println("____________" + docID + " ___________");
293 System.out.println(resultDocument.toString());
294 }
295 }
296
297 /**
298 * Print all the result documents IDs returned by queries in this session,
299 * along with their titles.
300 */
301 public void dumpAllTitles() {
302 Set keys = allResults.keySet();
303 Iterator iter = keys.iterator();
304 while (iter.hasNext()) {
305 String docID = (String) iter.next();
306 ResultDocument resultDocument = allResults.get(docID);
307 System.out.println(docID + "\t" + resultDocument.getTitle());
308 }
309 }
310
311 /**
312 * Provides the {@link ResultDocument} object for the document with the
313 * given ID
314 *
315 * @param docID
316 * is a document identifier, in the form returned by the server
317 * and available from a {@link QueryOutcome}
318 * @return the {@link ResultDocument} object reflecting the state of the
319 * result document at the time that this method was called. The
320 * state can change as more metadata is retrieved for the document
321 * and the document is returned by further queries.
322 */
323 public ResultDocument getDocument(String docID) {
324 return allResults.get(docID);
325 }
326
327 /**
328 * Implements the actual communication with the server. <b>You can not call
329 * this method directly from your client code.</b>
330 * <p>
331 * Throws an exception and exits if the hosthame is not known or the
332 * connection can't be established.
333 * <p>
334 *
335 * @param request
336 * an already well formed string that contains the appropriate
337 * HTTP headers and a SOAP message (in XML form) that will ask
338 * the server for some information.
339 * @return a string containing a SOAP message (an XML document) that the
340 * server returned in response to the request
341 */
342 private String doRequest(String request) {
343 // System.err.println("Connecting to " + hostname + " on port " + port);
344 try {
345 try {
346 socket = new Socket(hostname, port);
347 } catch (SecurityException se) {
348 System.err.println("Security exception : " + se);
349 System.exit(1);
350 }
351 toGSDL = new PrintWriter(socket.getOutputStream(), true);
352 fromGSDL = new BufferedReader(new InputStreamReader(socket
353 .getInputStream()));
354 } catch (UnknownHostException e) {
355 System.err.println("Don't know about GSDL host: " + hostname);
356 System.exit(1);
357 } catch (IOException e) {
358 System.err.println("IO exception : " + e);
359 System.exit(1);
360 }
361
362 String result = null;
363 toGSDL.println(request);
364 // System.err.println("Issued request to " + hostname + " on port " +
365 // port);
366 try {
367 String terminator = "Envelope>";
368 String response = "";
369
370 char c;
371 do {
372 c = (char) fromGSDL.read();
373 response = response + c;
374 } while (!response.endsWith(terminator));
375 toGSDL.close();
376 fromGSDL.close();
377 socket.close();
378
379 int start = response.indexOf("<?xml");
380 result = response.substring(start);
381 // System.out.println(result);
382 int a = result.indexOf('\n');
383 int b = result.indexOf('\n', a + 1);
384 while (a != -1 && b != -1) {
385 // System.out.println(a + " " +b);
386 result = result.substring(0, a - 1) + result.substring(b + 1);
387 a = result.indexOf('\n');
388 b = result.indexOf('\n', a + 1);
389 }
390 } catch (IOException e) {
391 System.err.println(e);
392 System.exit(1);
393 }
394 return result;
395 }
396
397 /**
398 * Produces a SOAP request string, sends it to the server, gets and
399 * processes the response updating the appropriate data structures. Uses the
400 * settings represented in the provided argument to produce a SOAP request
401 * string. The string is sent to the server using the {@link doRequest}
402 * method. The returned XML document is processed and the information
403 * therein is used to store information about the returned documents and
404 * this query.
405 * <p>
406 * This method updates the {@link queryList} and {@link allResults} data
407 * <p>
408 *
409 * @param query
410 * a {@link Query} object that must be constructed and passed to
411 * this method by the calling client application
412 * @return a {@link QueryOutcome} object that stores information about the
413 * server's response
414 *
415 */
416 public QueryOutcome issueQueryToServer(Query query) {
417 QueryOutcome queryOutcome = new QueryOutcome();
418 String result = null;
419 String requestBody = "<paramList><param name='maxDocs' value='MAXDOCS'/><param name='level' value='Sec'/><param name ='index' value='INDEX'/><param name='matchMode' value='MATCHMODE'/><param name='query' value='QUERY'/><param name='case' value='CASE'/><param name='sortBy' value='SORTBY'/><param name='stem' value='STEM'/><param name='firstDoc' value='FIRSTDOC'/><param name='lastDoc' value='LASTDOC'/></paramList>";
420 requestBody = requestBody.replaceFirst("MAXDOCS", query
421 .getMaxDocsToReturn());
422 requestBody = requestBody.replaceFirst("INDEX", query.getIndex());
423 requestBody = requestBody.replaceFirst("MATCHMODE", query
424 .getMatchMode());
425 requestBody = requestBody.replaceFirst("QUERY", query.getQueryText());
426 requestBody = requestBody.replaceFirst("CASE", query.getCasefolding());
427 requestBody = requestBody.replaceFirst("SORTBY", query.getSortBy());
428 requestBody = requestBody.replaceFirst("STEM", query.getStemming());
429 requestBody = requestBody.replaceFirst("FIRSTDOC", query.getFirstDoc());
430 requestBody = requestBody.replaceFirst("LASTDOC", query.getLastDoc());
431 String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
432 "TextQuery");
433 request = request.replaceFirst("PROCESSTYPE", "process");
434 request = request.replaceFirst("REQUESTBODY", requestBody);
435 request = SOAPrequestHeader.replaceFirst("PROCESSNAME", "TextQuery")
436 + request.length() + "\n\n" + request;
437
438 int firstDoc = java.lang.Integer.parseInt(query.getFirstDoc());
439
440 result = doRequest(request);
441 // System.out.println("\n\n" + result + "\n");
442 StringReader sr = new StringReader(result);
443 InputSource is = new InputSource(sr);
444
445 DOMParser p = new DOMParser();
446 try {
447 p.parse(is);
448 } catch (SAXException se) {
449 System.err.println(se);
450 } catch (IOException ioe) {
451 System.err.println(ioe);
452 }
453 Document d = p.getDocument();
454 NodeList metadataList = d.getElementsByTagName("metadata");
455 for (int i = 0; i < metadataList.getLength(); i++) {
456 Node n = metadataList.item(i);
457 NamedNodeMap nnm = n.getAttributes();
458 Node att = nnm.getNamedItem("name");
459 if (att.getNodeValue().compareTo("numDocsMatched") == 0) {
460 queryOutcome.setHowManyDocsMatched(n.getFirstChild()
461 .getNodeValue());
462 } else if (att.getNodeValue().compareTo("numDocsReturned") == 0) {
463 queryOutcome.setHowManyDocsReturned(n.getFirstChild()
464 .getNodeValue());
465 }
466 }
467
468 NodeList documentList = d.getElementsByTagName("documentNode");
469 for (int i = 0; i < documentList.getLength(); i++) {
470 Node n = documentList.item(i);
471 NamedNodeMap nnm = n.getAttributes();
472 Node nid = nnm.getNamedItem("nodeID");
473 Node nscore = nnm.getNamedItem("rank");
474 String docID = nid.getFirstChild().getNodeValue();
475 queryOutcome.addResult(docID, firstDoc + i, nscore.getFirstChild()
476 .getNodeValue());
477 }
478 query.addQueryOutcome(queryOutcome);
479 Query q = (Query) query.clone();
480 queryList.add(q);
481
482 for (int i = 0; i < documentList.getLength(); i++) {
483 Node n = documentList.item(i);
484 NamedNodeMap nnm = n.getAttributes();
485 Node nid = nnm.getNamedItem("nodeID");
486 Node nscore = nnm.getNamedItem("rank");
487 String docID = nid.getFirstChild().getNodeValue();
488
489 QueryContext queryContext = new QueryContext(firstDoc + i, nscore
490 .getFirstChild().getNodeValue(), q);
491 if (allResults.containsKey(docID)) {
492 ResultDocument resultDocument = allResults.get(docID);
493 resultDocument.incrementFrequencyReturned();
494 resultDocument.addQueryContext(queryContext);
495 allResults.put(docID, resultDocument);
496 } else {
497 ResultDocument resultDocument = new ResultDocument();
498 resultDocument.addQueryContext(queryContext);
499 allResults.put(docID, resultDocument);
500 }
501 }
502 return queryOutcome;
503 }
504
505 /**
506 * Produces a SOAP request string, sends it to the server, gets and
507 * processes the response updating the appropriate data structures. Given a
508 * document identifier and the name of a metadata item, this method produces
509 * a SOAP request string. The string is sent to the server using the
510 * {@link doRequest} method.
511 * <p>
512 * The request is simply for the values of the given metadata item of the
513 * given document. <b>If the metadata item for the given document has
514 * already been retrieved from the server, the server is NOT contacted
515 * again.</b>
516 * <p>
517 * The returned XML document is processed. The {@link ResultDocument} object
518 * for the document in question is updated with the returned metadata
519 * information, and the {@link allResults} data is consequently updated.
520 * <p>
521 * If the requested metadata is one of Keywords, Authors, Dates, Journals,
522 * Booktitles then the appropriate data structure is updated.
523 * <p>
524 * The method does not return a value. Private data structures are updated
525 * instead. The calling client application should proceed to access document
526 * metadata using the provided methods.
527 * <p>
528 *
529 * @param docID
530 * is a document identifier, in the form returned by the server
531 * and available from a {@link QueryOutcome}
532 * @param metadata
533 * is the metadata field whose value is to be retrieved. Valid
534 * values are
535 * <ul>
536 * <li>Title</li>
537 * <li>Creator (the authors)</li>
538 * <li>Journal</li>
539 * <li>Booktitle</li>
540 * <li>Volume</li>
541 * <li>Number</li>
542 * <li>Editor</li>
543 * <li>Pages</li>
544 * <li>Publisher</li>
545 * <li>Date</li>
546 * <li>Keywords</li>
547 * <li>Abstract</li>
548 * </ul>
549 */
550 public void getDocumentMetadataFromServer(String docID, String metadata) {
551 ResultDocument resultDocument = allResults.get(docID);
552 if (resultDocument.metadataExists(metadata)) {
553 return;
554 }
555
556 String result = null;
557 String requestBody = "<paramList><param name='metadata' value='METADATAFIELD'/></paramList><documentNodeList><documentNode nodeID='DOCIDVALUE'/></documentNodeList>";
558 requestBody = requestBody.replaceFirst("METADATAFIELD", metadata);
559 requestBody = requestBody.replaceFirst("DOCIDVALUE", docID);
560 String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
561 "DocumentMetadataRetrieve");
562 request = request.replaceFirst("PROCESSTYPE", "process");
563 request = request.replaceFirst("REQUESTBODY", requestBody);
564
565 request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
566 "DocumentMetadataRetrieve")
567 + request.length() + "\n\n" + request;
568
569 result = doRequest(request);
570 StringReader sr = new StringReader(result);
571 InputSource is = new InputSource(sr);
572 DOMParser p = new DOMParser();
573 try {
574 p.parse(is);
575 } catch (SAXException se) {
576 System.err.println(se);
577 } catch (IOException ioe) {
578 System.err.println(ioe);
579 }
580 Document d = p.getDocument();
581 NodeList metadataList = d.getElementsByTagName("metadata");
582 String metadataval = null;
583 if (metadataList.getLength() > 0) {
584 Node n = metadataList.item(0);
585 metadataval = n.getFirstChild().getNodeValue();
586
587 if (metadata.compareTo("Keywords") == 0) {
588 String[] keywords = metadataval.split(",");
589 for (int i = 0; i < keywords.length; i++) {
590 String s = keywords[i].trim().toLowerCase();
591 resultDocument.addKeyword(s);
592 Set<String> docSet = allKeywords.get(metadataval);
593 if (docSet == null) {
594 docSet = new HashSet<String>();
595 }
596 docSet.add(docID);
597 allKeywords.put(metadataval, docSet);
598 }
599 } else if (metadata.compareTo("Creator") == 0) {
600 String[] authors = metadataval.split("(,)|( and )");
601 // System.err.println(metadataval);
602 for (int i = 0; i < authors.length; i++) {
603 authors[i] = authors[i].trim().toLowerCase();
604 }
605
606 boolean containsExtraName = authors.length % 2 != 0;
607
608 for (int i = 0; i + 1 < authors.length; i = i + 2) {
609 String s = authors[i] + ", " + authors[i + 1];
610
611 // Handle names with jr. in them
612 if (containsExtraName) {
613 if (i + 2 < authors.length
614 && authors[i + 2].contains("jr")) {
615 s += " " + authors[i + 2];
616 i++;
617 }
618 }
619
620 s = s.replaceAll("[.]", "");
621 // System.err.println(s);
622 resultDocument.addAuthor(s);
623
624 Set<String> docSet = allAuthors.get(s);
625 if (docSet == null) {
626 docSet = new HashSet<String>();
627 }
628 docSet.add(docID);
629 allAuthors.put(s, docSet);
630 }
631 } else if (metadata.compareTo("Title") == 0) {
632 resultDocument.setTitle(metadataval);
633 } else if (metadata.compareTo("Booktitle") == 0) {
634 resultDocument.setBooktitle(metadataval);
635
636 Set<String> docSet = allBooktitles.get(metadataval);
637 if (docSet == null) {
638 docSet = new HashSet<String>();
639 }
640 docSet.add(docID);
641 allBooktitles.put(metadataval, docSet);
642 } else if (metadata.compareTo("Date") == 0) {
643 resultDocument.setDate(metadataval.replaceAll("[^0-9]", ""));
644 Set<String> docSet = allDates.get(metadataval);
645 if (docSet == null) {
646 docSet = new HashSet<String>();
647 }
648 docSet.add(docID);
649 allDates.put(metadataval, docSet);
650 } else if (metadata.compareTo("Pages") == 0) {
651 resultDocument.setPages(metadataval);
652 } else if (metadata.compareTo("Journal") == 0) {
653 resultDocument.setJournal(metadataval);
654 Set<String> docSet = allJournals.get(metadataval);
655 if (docSet == null) {
656 docSet = new HashSet<String>();
657 }
658 docSet.add(docID);
659 allJournals.put(metadataval, docSet);
660 } else if (metadata.compareTo("Volume") == 0) {
661 resultDocument.setVolume(metadataval);
662 } else if (metadata.compareTo("Number") == 0) {
663 resultDocument.setNumber(metadataval);
664 } else if (metadata.compareTo("Abstract") == 0) {
665 resultDocument.setAbstract(metadataval);
666 } else if (metadata.compareTo("Editor") == 0) {
667 resultDocument.setEditor(metadataval);
668 } else if (metadata.compareTo("Publisher") == 0) {
669 resultDocument.setPublisher(metadataval);
670 }
671
672 }
673 allResults.put(docID, resultDocument);
674 }
675
676 public String getClassifierNodeName(String nodeID) {
677 String result = null;
678 String requestBody = "<paramList><param name='metadata' value='Title'/></paramList><classifierNodeList><classifierNode nodeID='NODEID'/></classifierNodeList>";
679 requestBody = requestBody.replaceFirst("NODEID", nodeID);
680 String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
681 "ClassifierBrowseMetadataRetrieve");
682 request = request.replaceFirst("PROCESSTYPE", "process");
683 request = request.replaceFirst("REQUESTBODY", requestBody);
684
685 request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
686 "ClassifierBrowseMetadataRetrieve")
687 + request.length() + "\n\n" + request;
688
689 // System.err.println(request);
690 result = doRequest(request);
691 // System.err.println(result);
692
693 StringReader sr = new StringReader(result);
694 InputSource is = new InputSource(sr);
695 DOMParser p = new DOMParser();
696 try {
697 p.parse(is);
698 } catch (SAXException se) {
699 System.err.println(se);
700 } catch (IOException ioe) {
701 System.err.println(ioe);
702 }
703
704 String returnName = null;
705
706 Document d = p.getDocument();
707 // Document d = null;
708 NodeList metadataList = d.getElementsByTagName("metadata");
709 for (int i = 0; i < metadataList.getLength(); i++) {
710 Node n = metadataList.item(i);
711 NamedNodeMap nnm = n.getAttributes();
712 Node att = nnm.getNamedItem("name");
713 if (att.getNodeValue().compareTo("Title") == 0) {
714 returnName = n.getFirstChild().getNodeValue();
715 }
716 }
717 return returnName;
718 }
719
720 public void getClassifierNodes(String rootNode) {
721 String result = null;
722 String requestBody = "<paramList><param name='structure' value='children'/></paramList><classifierNodeList><classifierNode nodeID='CLASSIFIER'/></classifierNodeList>";
723 requestBody = requestBody.replaceFirst("CLASSIFIER", rootNode);
724 String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
725 "ClassifierBrowse");
726 request = request.replaceFirst("PROCESSTYPE", "process");
727 request = request.replaceFirst("REQUESTBODY", requestBody);
728
729 request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
730 "ClassifierBrowse")
731 + request.length() + "\n\n" + request;
732
733 System.err.println(getClassifierNodeName(rootNode));
734 // System.err.print(rootNode + "#");
735
736 // System.err.println(request);
737 result = doRequest(request);
738 // System.err.println(result);
739
740 StringReader sr = new StringReader(result);
741 InputSource is = new InputSource(sr);
742 DOMParser p = new DOMParser();
743 try {
744 p.parse(is);
745 } catch (SAXException se) {
746 System.err.println(se);
747 } catch (IOException ioe) {
748 System.err.println(ioe);
749 }
750 Document d = p.getDocument();
751
752 NodeList childList = d.getElementsByTagName("classifierNode");
753 NodeList documentList = d.getElementsByTagName("documentNode");
754 // System.err.println("\td " + documentList.getLength());
755 // System.err.println("\tc " + childList.getLength());
756
757 if (childList.getLength() > 0) {
758 for (int i = 0; i < childList.getLength(); i++) {
759 Node n = childList.item(i);
760 NamedNodeMap nnm = n.getAttributes();
761 Node nid = nnm.getNamedItem("nodeID");
762 String nodeID = nid.getFirstChild().getNodeValue();
763
764 // System.err.println("\tchild : " + nodeID);
765
766 if (nodeID.compareTo(rootNode) != 0
767 && nodeID.compareTo("2.6.22") != 0) {
768 // System.err.println("\t" + nodeID);
769 getClassifierNodes(nodeID);
770 }
771 }
772 }
773 if (documentList.getLength() > 0)
774 System.out.println(getClassifierNodeName(rootNode) + "#"
775 + documentList.getLength());
776 }
777}
Note: See TracBrowser for help on using the repository browser.