source: trunk/src/org/expeditee/greenstone/Greenstone3Connection.java@ 312

Last change on this file since 312 was 312, checked in by ra33, 16 years ago

Added search agents for HCI bib tec

File size: 27.8 KB
Line 
1package org.expeditee.greenstone;
2
3import java.io.BufferedReader;
4import java.io.IOException;
5import java.io.InputStreamReader;
6import java.io.PrintWriter;
7import java.io.StringReader;
8import java.net.Socket;
9import java.net.UnknownHostException;
10import java.util.ArrayList;
11import java.util.Collections;
12import java.util.HashMap;
13import java.util.Iterator;
14import java.util.List;
15import java.util.ListIterator;
16import java.util.Map;
17import java.util.Set;
18
19import org.apache.xerces.parsers.DOMParser;
20import org.w3c.dom.Document;
21import org.w3c.dom.NamedNodeMap;
22import org.w3c.dom.Node;
23import org.w3c.dom.NodeList;
24import org.xml.sax.InputSource;
25import org.xml.sax.SAXException;
26
27/**
28 * This class provides a simple API for communicating with a Greenstone 3 server
29 * using SOAP.
30 * <p>
31 * Greenstone 3 does not yet 'properly' implement SOAP-based web services. We
32 * would like to use a Greenstone WSDL (Web Services Definition Language) file
33 * and a higher level SOAP Client interface. But we can't. To get around this,
34 * this API uses a simple socket connection to the Greenstone 3 server, and
35 * sends SOAP requests as strings (XML documents). This works but isn't elegant.
36 * The server responds with a string representing an XML document.
37 * <p>
38 * The server's hostname and port are hard-coded. <b>Do not modify them.</b>
39 * <p>
40 * The Greenstone collection to use is <i>hcibib</i>, and this is also
41 * hard-coded. <b>Do not modify this.</b>
42 * <p>
43 * This collection can be accessed from a web browser at <a
44 * href="http://delaware.resnet.scms.waikato.ac.nz:8111/greenstone3/library?a=p&sa=about&c=hcibib">
45 * this location</a>.
46 */
47public class Greenstone3Connection {
48 /** an ordered list of {@link Query} objects */
49 private List queryList;
50
51 /**
52 * a HashMap of {@link ResultDocument} objects with document IDs as the
53 * keys. All the results returned in this session.
54 */
55 private Map allResults;
56
57 /**
58 * a HashMap keyed on the keywords found for all documents returned in this
59 * session. Each item in the map is itself a HashMap, keyed on document IDs
60 * with each item being NULL.
61 */
62 private Map allKeywords;
63
64 /**
65 * a HashMap keyed on the author names found for all documents returned in
66 * this session. Each item in the map is itself a HashMap, keyed on document
67 * IDs with each item being NULL.
68 */
69 private Map allAuthors;
70
71 /**
72 * a HashMap keyed on the publication dates found for all documents returned
73 * in this session. Each item in the map is itself a HashMap, keyed on
74 * document IDs with each item being NULL.
75 */
76 private Map allDates;
77
78 /**
79 * a HashMap keyed on the journal names found for all documents returned in
80 * this session. Each item in the map is itself a HashMap, keyed on document
81 * IDs with each item being NULL.
82 */
83 private Map allJournals;
84
85 /**
86 * a HashMap keyed on the book titles found for all documents returned in
87 * this session. Each item in the map is itself a HashMap, keyed on document
88 * IDs with each item being NULL.
89 */
90 private Map allBooktitles;
91
92 /** the <i>hostname</i> where the Greenstone 3 server is running */
93 private String hostname;
94
95 /** the <i>port</i> on which the Greenstone 3 server is running */
96 private int port;
97
98 /** for communication with the server */
99 private Socket socket = null;
100
101 /** for writing the SOAP request strings to the server socket */
102 private PrintWriter toGSDL = null;
103
104 /** for reading the SOAP response strings from the server socket */
105 private BufferedReader fromGSDL = null;
106
107 /** string that starts every SOAP request */
108 private String SOAPrequestHeader;
109
110 /** acts as a template for every SOAP request string */
111 private String SOAPrequestMessage = "<?xml version='1.0' encoding='UTF-8'?><soapenv:Envelope xmlns:soapenv='http://schemas.xmlsoap.org/soap/envelope/' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><soapenv:Body><message><request lang='en' to='hcibib/PROCESSNAME' type='PROCESSTYPE'>REQUESTBODY</request></message></soapenv:Body></soapenv:Envelope>";
112
113 /**
114 * A client application using this API will normally only create one
115 * instance of this class.
116 * <p>
117 * Create an instance with something like this
118 *
119 * <pre>
120 * Greenstone3Connection gsdl = new Greenstone3Connection();
121 * </pre>
122 *
123 * The constructor initialises the following <b>private</b> variables...
124 * <ul>
125 * <li>the <i>hostname</i> where the Greenstone 3 server is running</li>
126 * <li>the <i>port</i> on which the Greenstone 3 server is running</li>
127 * <li><i>queryList</i> an ordered list of {@link Query} objects</li>
128 * <li><i>allResults</i> a HashMap of {@link ResultDocument} objects with
129 * document IDs as the keys. All the results returned in this session.</li>
130 * <li><i>allKeywords</i> a HashMap keyed on the keywords found for all
131 * documents returned in this session. Each item in the map is itself a
132 * HashMap, keyed on document IDs with each item being NULL.</li>
133 * <li><i>allAuthors</i> a HashMap keyed on the author names found for all
134 * documents returned in this session. Each item in the map is itself a
135 * HashMap, keyed on document IDs with each item being NULL.</li>
136 * <li><i>allDates</i> a HashMap keyed on the publication dates found for
137 * all documents returned in this session. Each item in the map is itself a
138 * HashMap, keyed on document IDs with each item being NULL.</li>
139 * <li><i>allJournals</i> a HashMap keyed on the journal names found for
140 * all documents returned in this session. Each item in the map is itself a
141 * HashMap, keyed on document IDs with each item being NULL.</li>
142 * <li><i>allBooktitles</i> a HashMap keyed on the book titles found for
143 * all documents returned in this session. Each item in the map is itself a
144 * HashMap, keyed on document IDs with each item being NULL.</li>
145 * </ul>
146 */
147 public Greenstone3Connection(int location) {
148 if (location == 0) {
149 this.hostname = "comp537.cs.waikato.ac.nz";
150 this.port = 80;
151 this.SOAPrequestHeader = "POST /greenstone3/services/localsite HTTP/1.1\nHost: comp537.cs.waikato.ac.nz:80\nSOAPAction: hcibib/PROCESSNAME\nContent-Type: text/xml;charset=utf-8\nContent-Length: ";
152 } else {
153 this.hostname = "130.217.220.10";
154 this.port = 8111;
155 this.SOAPrequestHeader = "POST /greenstone3/services/localsite HTTP/1.1\nHost: 130.217.220.10:8111\nSOAPAction: hcibib/PROCESSNAME\nContent-Type: text/xml;charset=utf-8\nContent-Length: ";
156 }
157 this.queryList = Collections.synchronizedList(new ArrayList());
158 this.allResults = Collections.synchronizedMap(new HashMap());
159 this.allKeywords = Collections.synchronizedMap(new HashMap());
160 this.allAuthors = Collections.synchronizedMap(new HashMap());
161 this.allDates = Collections.synchronizedMap(new HashMap());
162 this.allJournals = Collections.synchronizedMap(new HashMap());
163 this.allBooktitles = Collections.synchronizedMap(new HashMap());
164 }
165
166 public Map getSessionResults() {
167 return this.allResults;
168 };
169
170 /**
171 * Print a string representation of the list of queries issued in this
172 * session.
173 */
174 public void dumpQueryList() {
175 ListIterator iter = queryList.listIterator();
176 while (iter.hasNext()) {
177 Query query = (Query) iter.next();
178 System.out.println(query.toString());
179 }
180 }
181
182 /**
183 * Print a string representation of the Booktitles occuring for all query
184 * results in this session. For each booktitle print the IDs of the
185 * documents with that booktitle.
186 */
187 public void dumpAllBooktitles() {
188 Set keys = allBooktitles.keySet();
189 Iterator iter = keys.iterator();
190 while (iter.hasNext()) {
191 String booktitle = (String) iter.next();
192 HashMap docMap = (HashMap) allBooktitles.get(booktitle);
193 System.out.println(booktitle);
194 System.out.println(docMap.keySet().toString());
195 }
196 }
197
198 /**
199 * Print a string representation of the Journals occuring for all query
200 * results in this session. For each journal print the IDs of the documents
201 * with that journal.
202 */
203 public void dumpAllJournals() {
204 Set keys = allJournals.keySet();
205 Iterator iter = keys.iterator();
206 while (iter.hasNext()) {
207 String journal = (String) iter.next();
208 HashMap docMap = (HashMap) allJournals.get(journal);
209 System.out.println(journal);
210 System.out.println(docMap.keySet().toString());
211 }
212 }
213
214 /**
215 * Print a string representation of the Dates occuring for all query results
216 * in this session. For each date print the IDs of the documents with that
217 * date.
218 */
219 public void dumpAllDates() {
220 Set keys = allDates.keySet();
221 Iterator iter = keys.iterator();
222 while (iter.hasNext()) {
223 String date = (String) iter.next();
224 HashMap docMap = (HashMap) allDates.get(date);
225 System.out.println(date);
226 System.out.println(docMap.keySet().toString());
227 }
228 }
229
230 /**
231 * Print a string representation of the Authors occuring for all query
232 * results in this session. For each author print the IDs of the documents
233 * with that author.
234 */
235 public void dumpAllAuthors() {
236 Set keys = allAuthors.keySet();
237 Iterator iter = keys.iterator();
238 while (iter.hasNext()) {
239 String author = (String) iter.next();
240 HashMap docMap = (HashMap) allAuthors.get(author);
241 System.out.println(author);
242 System.out.println(docMap.keySet().toString());
243 }
244 }
245
246 /**
247 * Print a string representation of the Keywords occuring for all query
248 * results in this session. For each keyword print the IDs of the documents
249 * with that keyword.
250 */
251 public void dumpAllKeywords() {
252 Set keys = allKeywords.keySet();
253 Iterator iter = keys.iterator();
254 while (iter.hasNext()) {
255 String keyword = (String) iter.next();
256 HashMap docMap = (HashMap) allKeywords.get(keyword);
257 System.out.println(keyword);
258 System.out.println(docMap.keySet().toString());
259 }
260 }
261
262 /**
263 * Print a string representation of all the result documents returned by
264 * queries in this session.
265 */
266 public void dumpAllResults() {
267 Set keys = allResults.keySet();
268 Iterator iter = keys.iterator();
269
270 while (iter.hasNext()) {
271 String docID = (String) iter.next();
272 ResultDocument resultDocument = (ResultDocument) allResults
273 .get(docID);
274 System.out.println("____________" + docID + " ___________");
275 System.out.println(resultDocument.toString());
276 }
277 }
278
279 /**
280 * Print all the result documents IDs returned by queries in this session,
281 * along with their titles.
282 */
283 public void dumpAllTitles() {
284 Set keys = allResults.keySet();
285 Iterator iter = keys.iterator();
286 while (iter.hasNext()) {
287 String docID = (String) iter.next();
288 ResultDocument resultDocument = (ResultDocument) allResults
289 .get(docID);
290 System.out.println(docID + "\t" + resultDocument.getTitle());
291 }
292 }
293
294 /**
295 * Provides the {@link ResultDocument} object for the document with the
296 * given ID
297 *
298 * @param docID
299 * is a document identifier, in the form returned by the server
300 * and available from a {@link QueryOutcome}
301 * @return the {@link ResultDocument} object reflecting the state of the
302 * result document at the time that this method was called. The
303 * state can change as more metadata is retrieved for the document
304 * and the document is returned by further queries.
305 */
306 public ResultDocument getDocument(String docID) {
307 return (ResultDocument) allResults.get(docID);
308 }
309
310 /**
311 * Implements the actual communication with the server. <b>You can not call
312 * this method directly from your client code.</b>
313 * <p>
314 * Throws an exception and exits if the hosthame is not known or the
315 * connection can't be established.
316 * <p>
317 *
318 * @param request
319 * an already well formed string that contains the appropriate
320 * HTTP headers and a SOAP message (in XML form) that will ask
321 * the server for some information.
322 * @return a string containing a SOAP message (an XML document) that the
323 * server returned in response to the request
324 */
325 private String doRequest(String request) {
326 // System.err.println("Connecting to " + hostname + " on port " + port);
327 try {
328 try {
329 socket = new Socket(hostname, port);
330 } catch (SecurityException se) {
331 System.err.println("Security exception : " + se);
332 System.exit(1);
333 }
334 toGSDL = new PrintWriter(socket.getOutputStream(), true);
335 fromGSDL = new BufferedReader(new InputStreamReader(socket
336 .getInputStream()));
337 } catch (UnknownHostException e) {
338 System.err.println("Don't know about GSDL host: " + hostname);
339 System.exit(1);
340 } catch (IOException e) {
341 System.err.println("IO exception : " + e);
342 System.exit(1);
343 }
344
345 String result = null;
346 toGSDL.println(request);
347 // System.err.println("Issued request to " + hostname + " on port " +
348 // port);
349 try {
350 String terminator = "Envelope>";
351 String response = "";
352
353 char c;
354 do {
355 c = (char) fromGSDL.read();
356 response = response + c;
357 } while (!response.endsWith(terminator));
358 toGSDL.close();
359 fromGSDL.close();
360 socket.close();
361
362 int start = response.indexOf("<?xml");
363 result = response.substring(start);
364 // System.out.println(result);
365 int a = result.indexOf('\n');
366 int b = result.indexOf('\n', a + 1);
367 while (a != -1 && b != -1) {
368 // System.out.println(a + " " +b);
369 result = result.substring(0, a - 1) + result.substring(b + 1);
370 a = result.indexOf('\n');
371 b = result.indexOf('\n', a + 1);
372 }
373 } catch (IOException e) {
374 System.err.println(e);
375 System.exit(1);
376 }
377 return result;
378 }
379
380 /**
381 * Produces a SOAP request string, sends it to the server, gets and
382 * processes the response updating the appropriate data structures. Uses the
383 * settings represented in the provided argument to produce a SOAP request
384 * string. The string is sent to the server using the {@link doRequest}
385 * method. The returned XML document is processed and the information
386 * therein is used to store information about the returned documents and
387 * this query.
388 * <p>
389 * This method updates the {@link queryList} and {@link allResults} data
390 * <p>
391 *
392 * @param query
393 * a {@link Query} object that must be constructed and passed to
394 * this method by the calling client application
395 * @return a {@link QueryOutcome} object that stores information about the
396 * server's response
397 *
398 */
399 public QueryOutcome issueQueryToServer(Query query) {
400 QueryOutcome queryOutcome = new QueryOutcome();
401 String result = null;
402 String requestBody = "<paramList><param name='maxDocs' value='MAXDOCS'/><param name='level' value='Sec'/><param name ='index' value='INDEX'/><param name='matchMode' value='MATCHMODE'/><param name='query' value='QUERY'/><param name='case' value='CASE'/><param name='sortBy' value='SORTBY'/><param name='stem' value='STEM'/><param name='firstDoc' value='FIRSTDOC'/><param name='lastDoc' value='LASTDOC'/></paramList>";
403 requestBody = requestBody.replaceFirst("MAXDOCS", query
404 .getMaxDocsToReturn());
405 requestBody = requestBody.replaceFirst("INDEX", query.getIndex());
406 requestBody = requestBody.replaceFirst("MATCHMODE", query
407 .getMatchMode());
408 requestBody = requestBody.replaceFirst("QUERY", query.getQueryText());
409 requestBody = requestBody.replaceFirst("CASE", query.getCasefolding());
410 requestBody = requestBody.replaceFirst("SORTBY", query.getSortBy());
411 requestBody = requestBody.replaceFirst("STEM", query.getStemming());
412 requestBody = requestBody.replaceFirst("FIRSTDOC", query.getFirstDoc());
413 requestBody = requestBody.replaceFirst("LASTDOC", query.getLastDoc());
414 String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
415 "TextQuery");
416 request = request.replaceFirst("PROCESSTYPE", "process");
417 request = request.replaceFirst("REQUESTBODY", requestBody);
418 request = SOAPrequestHeader.replaceFirst("PROCESSNAME", "TextQuery")
419 + request.length() + "\n\n" + request;
420
421 int firstDoc = java.lang.Integer.parseInt(query.getFirstDoc());
422
423 result = doRequest(request);
424 // System.out.println("\n\n" + result + "\n");
425 StringReader sr = new StringReader(result);
426 InputSource is = new InputSource(sr);
427 DOMParser p = new DOMParser();
428 try {
429 p.parse(is);
430 } catch (SAXException se) {
431 System.err.println(se);
432 } catch (IOException ioe) {
433 System.err.println(ioe);
434 }
435 Document d = p.getDocument();
436 NodeList metadataList = d.getElementsByTagName("metadata");
437 for (int i = 0; i < metadataList.getLength(); i++) {
438 Node n = metadataList.item(i);
439 NamedNodeMap nnm = n.getAttributes();
440 Node att = nnm.getNamedItem("name");
441 if (att.getNodeValue().compareTo("numDocsMatched") == 0) {
442 queryOutcome.setHowManyDocsMatched(n.getFirstChild()
443 .getNodeValue());
444 } else if (att.getNodeValue().compareTo("numDocsReturned") == 0) {
445 queryOutcome.setHowManyDocsReturned(n.getFirstChild()
446 .getNodeValue());
447 }
448 }
449
450 NodeList documentList = d.getElementsByTagName("documentNode");
451 for (int i = 0; i < documentList.getLength(); i++) {
452 Node n = documentList.item(i);
453 NamedNodeMap nnm = n.getAttributes();
454 Node nid = nnm.getNamedItem("nodeID");
455 Node nscore = nnm.getNamedItem("rank");
456 String docID = nid.getFirstChild().getNodeValue();
457 queryOutcome.addResult(docID, firstDoc + i, nscore.getFirstChild()
458 .getNodeValue());
459 }
460 query.addQueryOutcome(queryOutcome);
461 Query q = (Query) query.clone();
462 queryList.add(q);
463
464 for (int i = 0; i < documentList.getLength(); i++) {
465 Node n = documentList.item(i);
466 NamedNodeMap nnm = n.getAttributes();
467 Node nid = nnm.getNamedItem("nodeID");
468 Node nscore = nnm.getNamedItem("rank");
469 String docID = nid.getFirstChild().getNodeValue();
470
471 QueryContext queryContext = new QueryContext(firstDoc + i, nscore
472 .getFirstChild().getNodeValue(), q);
473 if (allResults.containsKey(docID)) {
474 ResultDocument resultDocument = (ResultDocument) allResults
475 .get(docID);
476 resultDocument.incrementFrequencyReturned();
477 resultDocument.addQueryContext(queryContext);
478 allResults.put(docID, resultDocument);
479 } else {
480 ResultDocument resultDocument = new ResultDocument();
481 resultDocument.addQueryContext(queryContext);
482 allResults.put(docID, resultDocument);
483 }
484 }
485 return queryOutcome;
486 }
487
488 /**
489 * Produces a SOAP request string, sends it to the server, gets and
490 * processes the response updating the appropriate data structures. Given a
491 * document identifier and the name of a metadata item, this method produces
492 * a SOAP request string. The string is sent to the server using the
493 * {@link doRequest} method.
494 * <p>
495 * The request is simply for the values of the given metadata item of the
496 * given document. <b>If the metadata item for the given document has
497 * already been retrieved from the server, the server is NOT contacted
498 * again.</b>
499 * <p>
500 * The returned XML document is processed. The {@link ResultDocument} object
501 * for the document in question is updated with the returned metadata
502 * information, and the {@link allResults} data is consequently updated.
503 * <p>
504 * If the requested metadata is one of Keywords, Authors, Dates, Journals,
505 * Booktitles then the appropriate data structure is updated.
506 * <p>
507 * The method does not return a value. Private data structures are updated
508 * instead. The calling client application should proceed to access document
509 * metadata using the provided methods.
510 * <p>
511 *
512 * @param docID
513 * is a document identifier, in the form returned by the server
514 * and available from a {@link QueryOutcome}
515 * @param metadata
516 * is the metadata field whose value is to be retrieved. Valid
517 * values are
518 * <ul>
519 * <li>Title</li>
520 * <li>Creator (the authors)</li>
521 * <li>Journal</li>
522 * <li>Booktitle</li>
523 * <li>Volume</li>
524 * <li>Number</li>
525 * <li>Editor</li>
526 * <li>Pages</li>
527 * <li>Publisher</li>
528 * <li>Date</li>
529 * <li>Keywords</li>
530 * <li>Abstract</li>
531 * </ul>
532 */
533 public void getDocumentMetadataFromServer(String docID, String metadata) {
534 ResultDocument resultDocument = (ResultDocument) allResults.get(docID);
535 if (resultDocument.metadataExists(metadata)) {
536 return;
537 }
538
539 String result = null;
540 String requestBody = "<paramList><param name='metadata' value='METADATAFIELD'/></paramList><documentNodeList><documentNode nodeID='DOCIDVALUE'/></documentNodeList>";
541 requestBody = requestBody.replaceFirst("METADATAFIELD", metadata);
542 requestBody = requestBody.replaceFirst("DOCIDVALUE", docID);
543 String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
544 "DocumentMetadataRetrieve");
545 request = request.replaceFirst("PROCESSTYPE", "process");
546 request = request.replaceFirst("REQUESTBODY", requestBody);
547
548 request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
549 "DocumentMetadataRetrieve")
550 + request.length() + "\n\n" + request;
551
552 result = doRequest(request);
553 StringReader sr = new StringReader(result);
554 InputSource is = new InputSource(sr);
555 DOMParser p = new DOMParser();
556 try {
557 p.parse(is);
558 } catch (SAXException se) {
559 System.err.println(se);
560 } catch (IOException ioe) {
561 System.err.println(ioe);
562 }
563 Document d = p.getDocument();
564 NodeList metadataList = d.getElementsByTagName("metadata");
565 String metadataval = null;
566 if (metadataList.getLength() > 0) {
567 Node n = metadataList.item(0);
568 metadataval = n.getFirstChild().getNodeValue();
569
570 if (metadata.compareTo("Keywords") == 0) {
571 String[] keywords = metadataval.split(",");
572 for (int i = 0; i < keywords.length; i++) {
573 String s = keywords[i].trim().toLowerCase();
574 resultDocument.addKeyword(s);
575 if (allKeywords.containsKey(s)) {
576 HashMap docMap = (HashMap) allKeywords.get(s);
577 docMap.put(docID, null);
578 allKeywords.put(s, docMap);
579 } else {
580 HashMap docMap = new HashMap();
581 docMap.put(docID, null);
582 allKeywords.put(s, docMap);
583 }
584 }
585 } else if (metadata.compareTo("Creator") == 0) {
586 String[] authors = metadataval.split("(,)|( and )");
587 // System.err.println(metadataval);
588 for (int i = 0; i < authors.length; i++) {
589 authors[i] = authors[i].trim().toLowerCase();
590 }
591
592 boolean containsExtraName = authors.length % 2 != 0;
593
594 for (int i = 0; i + 1 < authors.length; i = i + 2) {
595 String s = authors[i] + ", " + authors[i + 1];
596
597 //Handle names with jr. in them
598 if (containsExtraName) {
599 if (i + 2 < authors.length
600 && authors[i + 2].contains("jr")) {
601 s += " " + authors[i + 2];
602 i++;
603 }
604 }
605
606 s = s.replaceAll("[.]", "");
607 // System.err.println(s);
608 resultDocument.addAuthor(s);
609 if (allAuthors.containsKey(s)) {
610 HashMap docMap = (HashMap) allAuthors.get(s);
611 docMap.put(docID, null);
612 allAuthors.put(s, docMap);
613 } else {
614 HashMap docMap = new HashMap();
615 docMap.put(docID, null);
616 allAuthors.put(s, docMap);
617 }
618 }
619 } else if (metadata.compareTo("Title") == 0) {
620 resultDocument.setTitle(metadataval);
621 } else if (metadata.compareTo("Booktitle") == 0) {
622 resultDocument.setBooktitle(metadataval);
623 if (allBooktitles.containsKey(metadataval)) {
624 HashMap docMap = (HashMap) allBooktitles.get(metadataval);
625 docMap.put(docID, null);
626 allBooktitles.put(metadataval, docMap);
627 } else {
628 HashMap docMap = new HashMap();
629 docMap.put(docID, null);
630 allBooktitles.put(metadataval, docMap);
631 }
632 } else if (metadata.compareTo("Date") == 0) {
633 resultDocument.setDate(metadataval.replaceAll("[^0-9]", ""));
634 if (allDates.containsKey(metadataval)) {
635 HashMap docMap = (HashMap) allDates.get(metadataval);
636 docMap.put(docID, null);
637 allDates.put(metadataval, docMap);
638 } else {
639 HashMap docMap = new HashMap();
640 docMap.put(docID, null);
641 allDates.put(metadataval, docMap);
642 }
643 } else if (metadata.compareTo("Pages") == 0) {
644 resultDocument.setPages(metadataval);
645 } else if (metadata.compareTo("Journal") == 0) {
646 resultDocument.setJournal(metadataval);
647 if (allJournals.containsKey(metadataval)) {
648 HashMap docMap = (HashMap) allJournals.get(metadataval);
649 docMap.put(docID, null);
650 allJournals.put(metadataval, docMap);
651 } else {
652 HashMap docMap = new HashMap();
653 docMap.put(docID, null);
654 allJournals.put(metadataval, docMap);
655 }
656 } else if (metadata.compareTo("Volume") == 0) {
657 resultDocument.setVolume(metadataval);
658 } else if (metadata.compareTo("Number") == 0) {
659 resultDocument.setNumber(metadataval);
660 } else if (metadata.compareTo("Abstract") == 0) {
661 resultDocument.setAbstract(metadataval);
662 } else if (metadata.compareTo("Editor") == 0) {
663 resultDocument.setEditor(metadataval);
664 } else if (metadata.compareTo("Publisher") == 0) {
665 resultDocument.setPublisher(metadataval);
666 }
667
668 }
669 allResults.put(docID, resultDocument);
670 }
671
672 public String getClassifierNodeName(String nodeID) {
673 String result = null;
674 String requestBody = "<paramList><param name='metadata' value='Title'/></paramList><classifierNodeList><classifierNode nodeID='NODEID'/></classifierNodeList>";
675 requestBody = requestBody.replaceFirst("NODEID", nodeID);
676 String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
677 "ClassifierBrowseMetadataRetrieve");
678 request = request.replaceFirst("PROCESSTYPE", "process");
679 request = request.replaceFirst("REQUESTBODY", requestBody);
680
681 request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
682 "ClassifierBrowseMetadataRetrieve")
683 + request.length() + "\n\n" + request;
684
685 // System.err.println(request);
686 result = doRequest(request);
687 // System.err.println(result);
688
689 StringReader sr = new StringReader(result);
690 InputSource is = new InputSource(sr);
691 DOMParser p = new DOMParser();
692 try {
693 p.parse(is);
694 } catch (SAXException se) {
695 System.err.println(se);
696 } catch (IOException ioe) {
697 System.err.println(ioe);
698 }
699
700 String returnName = null;
701
702 Document d = p.getDocument();
703 NodeList metadataList = d.getElementsByTagName("metadata");
704 for (int i = 0; i < metadataList.getLength(); i++) {
705 Node n = metadataList.item(i);
706 NamedNodeMap nnm = n.getAttributes();
707 Node att = nnm.getNamedItem("name");
708 if (att.getNodeValue().compareTo("Title") == 0) {
709 returnName = n.getFirstChild().getNodeValue();
710 }
711 }
712 return returnName;
713 }
714
715 public void getClassifierNodes(String rootNode) {
716 String result = null;
717 String requestBody = "<paramList><param name='structure' value='children'/></paramList><classifierNodeList><classifierNode nodeID='CLASSIFIER'/></classifierNodeList>";
718 requestBody = requestBody.replaceFirst("CLASSIFIER", rootNode);
719 String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
720 "ClassifierBrowse");
721 request = request.replaceFirst("PROCESSTYPE", "process");
722 request = request.replaceFirst("REQUESTBODY", requestBody);
723
724 request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
725 "ClassifierBrowse")
726 + request.length() + "\n\n" + request;
727
728 System.err.println(getClassifierNodeName(rootNode));
729 // System.err.print(rootNode + "#");
730
731 // System.err.println(request);
732 result = doRequest(request);
733 // System.err.println(result);
734
735 StringReader sr = new StringReader(result);
736 InputSource is = new InputSource(sr);
737 DOMParser p = new DOMParser();
738 try {
739 p.parse(is);
740 } catch (SAXException se) {
741 System.err.println(se);
742 } catch (IOException ioe) {
743 System.err.println(ioe);
744 }
745 Document d = p.getDocument();
746
747 NodeList childList = d.getElementsByTagName("classifierNode");
748 NodeList documentList = d.getElementsByTagName("documentNode");
749 // System.err.println("\td " + documentList.getLength());
750 // System.err.println("\tc " + childList.getLength());
751
752 if (childList.getLength() > 0) {
753 for (int i = 0; i < childList.getLength(); i++) {
754 Node n = childList.item(i);
755 NamedNodeMap nnm = n.getAttributes();
756 Node nid = nnm.getNamedItem("nodeID");
757 String nodeID = nid.getFirstChild().getNodeValue();
758
759 // System.err.println("\tchild : " + nodeID);
760
761 if (nodeID.compareTo(rootNode) != 0
762 && nodeID.compareTo("2.6.22") != 0) {
763 // System.err.println("\t" + nodeID);
764 getClassifierNodes(nodeID);
765 }
766 }
767 }
768 if (documentList.getLength() > 0)
769 System.out.println(getClassifierNodeName(rootNode) + "#"
770 + documentList.getLength());
771 }
772}
Note: See TracBrowser for help on using the repository browser.