Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/src/org/expeditee/greenstone/Greenstone3Connection.java@ 312

Last change on this file since 312 was 312, checked in by ra33, 16 years ago
Added search agents for HCI bib tec
File size: 27.8 KB

Line
1	package org.expeditee.greenstone;
2
3	import java.io.BufferedReader;
4	import java.io.IOException;
5	import java.io.InputStreamReader;
6	import java.io.PrintWriter;
7	import java.io.StringReader;
8	import java.net.Socket;
9	import java.net.UnknownHostException;
10	import java.util.ArrayList;
11	import java.util.Collections;
12	import java.util.HashMap;
13	import java.util.Iterator;
14	import java.util.List;
15	import java.util.ListIterator;
16	import java.util.Map;
17	import java.util.Set;
18
19	import org.apache.xerces.parsers.DOMParser;
20	import org.w3c.dom.Document;
21	import org.w3c.dom.NamedNodeMap;
22	import org.w3c.dom.Node;
23	import org.w3c.dom.NodeList;
24	import org.xml.sax.InputSource;
25	import org.xml.sax.SAXException;
26
27	/**
28	* This class provides a simple API for communicating with a Greenstone 3 server
29	* using SOAP.
30	* <p>
31	* Greenstone 3 does not yet 'properly' implement SOAP-based web services. We
32	* would like to use a Greenstone WSDL (Web Services Definition Language) file
33	* and a higher level SOAP Client interface. But we can't. To get around this,
34	* this API uses a simple socket connection to the Greenstone 3 server, and
35	* sends SOAP requests as strings (XML documents). This works but isn't elegant.
36	* The server responds with a string representing an XML document.
37	* <p>
38	* The server's hostname and port are hard-coded. <b>Do not modify them.</b>
39	* <p>
40	* The Greenstone collection to use is <i>hcibib</i>, and this is also
41	* hard-coded. <b>Do not modify this.</b>
42	* <p>
43	* This collection can be accessed from a web browser at <a
44	* href="http://delaware.resnet.scms.waikato.ac.nz:8111/greenstone3/library?a=p&sa=about&c=hcibib">
45	* this location</a>.
46	*/
47	public class Greenstone3Connection {
48	/** an ordered list of {@link Query} objects */
49	private List queryList;
50
51	/**
52	* a HashMap of {@link ResultDocument} objects with document IDs as the
53	* keys. All the results returned in this session.
54	*/
55	private Map allResults;
56
57	/**
58	* a HashMap keyed on the keywords found for all documents returned in this
59	* session. Each item in the map is itself a HashMap, keyed on document IDs
60	* with each item being NULL.
61	*/
62	private Map allKeywords;
63
64	/**
65	* a HashMap keyed on the author names found for all documents returned in
66	* this session. Each item in the map is itself a HashMap, keyed on document
67	* IDs with each item being NULL.
68	*/
69	private Map allAuthors;
70
71	/**
72	* a HashMap keyed on the publication dates found for all documents returned
73	* in this session. Each item in the map is itself a HashMap, keyed on
74	* document IDs with each item being NULL.
75	*/
76	private Map allDates;
77
78	/**
79	* a HashMap keyed on the journal names found for all documents returned in
80	* this session. Each item in the map is itself a HashMap, keyed on document
81	* IDs with each item being NULL.
82	*/
83	private Map allJournals;
84
85	/**
86	* a HashMap keyed on the book titles found for all documents returned in
87	* this session. Each item in the map is itself a HashMap, keyed on document
88	* IDs with each item being NULL.
89	*/
90	private Map allBooktitles;
91
92	/** the <i>hostname</i> where the Greenstone 3 server is running */
93	private String hostname;
94
95	/** the <i>port</i> on which the Greenstone 3 server is running */
96	private int port;
97
98	/** for communication with the server */
99	private Socket socket = null;
100
101	/** for writing the SOAP request strings to the server socket */
102	private PrintWriter toGSDL = null;
103
104	/** for reading the SOAP response strings from the server socket */
105	private BufferedReader fromGSDL = null;
106
107	/** string that starts every SOAP request */
108	private String SOAPrequestHeader;
109
110	/** acts as a template for every SOAP request string */
111	private String SOAPrequestMessage = "<?xml version='1.0' encoding='UTF-8'?><soapenv:Envelope xmlns:soapenv='http://schemas.xmlsoap.org/soap/envelope/' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><soapenv:Body><message><request lang='en' to='hcibib/PROCESSNAME' type='PROCESSTYPE'>REQUESTBODY</request></message></soapenv:Body></soapenv:Envelope>";
112
113	/**
114	* A client application using this API will normally only create one
115	* instance of this class.
116	* <p>
117	* Create an instance with something like this
118	*
119	* <pre>
120	* Greenstone3Connection gsdl = new Greenstone3Connection();
121	* </pre>
122	*
123	* The constructor initialises the following <b>private</b> variables...
124	* <ul>
125	* <li>the <i>hostname</i> where the Greenstone 3 server is running</li>
126	* <li>the <i>port</i> on which the Greenstone 3 server is running</li>
127	* <li><i>queryList</i> an ordered list of {@link Query} objects</li>
128	* <li><i>allResults</i> a HashMap of {@link ResultDocument} objects with
129	* document IDs as the keys. All the results returned in this session.</li>
130	* <li><i>allKeywords</i> a HashMap keyed on the keywords found for all
131	* documents returned in this session. Each item in the map is itself a
132	* HashMap, keyed on document IDs with each item being NULL.</li>
133	* <li><i>allAuthors</i> a HashMap keyed on the author names found for all
134	* documents returned in this session. Each item in the map is itself a
135	* HashMap, keyed on document IDs with each item being NULL.</li>
136	* <li><i>allDates</i> a HashMap keyed on the publication dates found for
137	* all documents returned in this session. Each item in the map is itself a
138	* HashMap, keyed on document IDs with each item being NULL.</li>
139	* <li><i>allJournals</i> a HashMap keyed on the journal names found for
140	* all documents returned in this session. Each item in the map is itself a
141	* HashMap, keyed on document IDs with each item being NULL.</li>
142	* <li><i>allBooktitles</i> a HashMap keyed on the book titles found for
143	* all documents returned in this session. Each item in the map is itself a
144	* HashMap, keyed on document IDs with each item being NULL.</li>
145	* </ul>
146	*/
147	public Greenstone3Connection(int location) {
148	if (location == 0) {
149	this.hostname = "comp537.cs.waikato.ac.nz";
150	this.port = 80;
151	this.SOAPrequestHeader = "POST /greenstone3/services/localsite HTTP/1.1\nHost: comp537.cs.waikato.ac.nz:80\nSOAPAction: hcibib/PROCESSNAME\nContent-Type: text/xml;charset=utf-8\nContent-Length: ";
152	} else {
153	this.hostname = "130.217.220.10";
154	this.port = 8111;
155	this.SOAPrequestHeader = "POST /greenstone3/services/localsite HTTP/1.1\nHost: 130.217.220.10:8111\nSOAPAction: hcibib/PROCESSNAME\nContent-Type: text/xml;charset=utf-8\nContent-Length: ";
156	}
157	this.queryList = Collections.synchronizedList(new ArrayList());
158	this.allResults = Collections.synchronizedMap(new HashMap());
159	this.allKeywords = Collections.synchronizedMap(new HashMap());
160	this.allAuthors = Collections.synchronizedMap(new HashMap());
161	this.allDates = Collections.synchronizedMap(new HashMap());
162	this.allJournals = Collections.synchronizedMap(new HashMap());
163	this.allBooktitles = Collections.synchronizedMap(new HashMap());
164	}
165
166	public Map getSessionResults() {
167	return this.allResults;
168	};
169
170	/**
171	* Print a string representation of the list of queries issued in this
172	* session.
173	*/
174	public void dumpQueryList() {
175	ListIterator iter = queryList.listIterator();
176	while (iter.hasNext()) {
177	Query query = (Query) iter.next();
178	System.out.println(query.toString());
179	}
180	}
181
182	/**
183	* Print a string representation of the Booktitles occuring for all query
184	* results in this session. For each booktitle print the IDs of the
185	* documents with that booktitle.
186	*/
187	public void dumpAllBooktitles() {
188	Set keys = allBooktitles.keySet();
189	Iterator iter = keys.iterator();
190	while (iter.hasNext()) {
191	String booktitle = (String) iter.next();
192	HashMap docMap = (HashMap) allBooktitles.get(booktitle);
193	System.out.println(booktitle);
194	System.out.println(docMap.keySet().toString());
195	}
196	}
197
198	/**
199	* Print a string representation of the Journals occuring for all query
200	* results in this session. For each journal print the IDs of the documents
201	* with that journal.
202	*/
203	public void dumpAllJournals() {
204	Set keys = allJournals.keySet();
205	Iterator iter = keys.iterator();
206	while (iter.hasNext()) {
207	String journal = (String) iter.next();
208	HashMap docMap = (HashMap) allJournals.get(journal);
209	System.out.println(journal);
210	System.out.println(docMap.keySet().toString());
211	}
212	}
213
214	/**
215	* Print a string representation of the Dates occuring for all query results
216	* in this session. For each date print the IDs of the documents with that
217	* date.
218	*/
219	public void dumpAllDates() {
220	Set keys = allDates.keySet();
221	Iterator iter = keys.iterator();
222	while (iter.hasNext()) {
223	String date = (String) iter.next();
224	HashMap docMap = (HashMap) allDates.get(date);
225	System.out.println(date);
226	System.out.println(docMap.keySet().toString());
227	}
228	}
229
230	/**
231	* Print a string representation of the Authors occuring for all query
232	* results in this session. For each author print the IDs of the documents
233	* with that author.
234	*/
235	public void dumpAllAuthors() {
236	Set keys = allAuthors.keySet();
237	Iterator iter = keys.iterator();
238	while (iter.hasNext()) {
239	String author = (String) iter.next();
240	HashMap docMap = (HashMap) allAuthors.get(author);
241	System.out.println(author);
242	System.out.println(docMap.keySet().toString());
243	}
244	}
245
246	/**
247	* Print a string representation of the Keywords occuring for all query
248	* results in this session. For each keyword print the IDs of the documents
249	* with that keyword.
250	*/
251	public void dumpAllKeywords() {
252	Set keys = allKeywords.keySet();
253	Iterator iter = keys.iterator();
254	while (iter.hasNext()) {
255	String keyword = (String) iter.next();
256	HashMap docMap = (HashMap) allKeywords.get(keyword);
257	System.out.println(keyword);
258	System.out.println(docMap.keySet().toString());
259	}
260	}
261
262	/**
263	* Print a string representation of all the result documents returned by
264	* queries in this session.
265	*/
266	public void dumpAllResults() {
267	Set keys = allResults.keySet();
268	Iterator iter = keys.iterator();
269
270	while (iter.hasNext()) {
271	String docID = (String) iter.next();
272	ResultDocument resultDocument = (ResultDocument) allResults
273	.get(docID);
274	System.out.println("____________" + docID + " ___________");
275	System.out.println(resultDocument.toString());
276	}
277	}
278
279	/**
280	* Print all the result documents IDs returned by queries in this session,
281	* along with their titles.
282	*/
283	public void dumpAllTitles() {
284	Set keys = allResults.keySet();
285	Iterator iter = keys.iterator();
286	while (iter.hasNext()) {
287	String docID = (String) iter.next();
288	ResultDocument resultDocument = (ResultDocument) allResults
289	.get(docID);
290	System.out.println(docID + "\t" + resultDocument.getTitle());
291	}
292	}
293
294	/**
295	* Provides the {@link ResultDocument} object for the document with the
296	* given ID
297	*
298	* @param docID
299	* is a document identifier, in the form returned by the server
300	* and available from a {@link QueryOutcome}
301	* @return the {@link ResultDocument} object reflecting the state of the
302	* result document at the time that this method was called. The
303	* state can change as more metadata is retrieved for the document
304	* and the document is returned by further queries.
305	*/
306	public ResultDocument getDocument(String docID) {
307	return (ResultDocument) allResults.get(docID);
308	}
309
310	/**
311	* Implements the actual communication with the server. <b>You can not call
312	* this method directly from your client code.</b>
313	* <p>
314	* Throws an exception and exits if the hosthame is not known or the
315	* connection can't be established.
316	* <p>
317	*
318	* @param request
319	* an already well formed string that contains the appropriate
320	* HTTP headers and a SOAP message (in XML form) that will ask
321	* the server for some information.
322	* @return a string containing a SOAP message (an XML document) that the
323	* server returned in response to the request
324	*/
325	private String doRequest(String request) {
326	// System.err.println("Connecting to " + hostname + " on port " + port);
327	try {
328	try {
329	socket = new Socket(hostname, port);
330	} catch (SecurityException se) {
331	System.err.println("Security exception : " + se);
332	System.exit(1);
333	}
334	toGSDL = new PrintWriter(socket.getOutputStream(), true);
335	fromGSDL = new BufferedReader(new InputStreamReader(socket
336	.getInputStream()));
337	} catch (UnknownHostException e) {
338	System.err.println("Don't know about GSDL host: " + hostname);
339	System.exit(1);
340	} catch (IOException e) {
341	System.err.println("IO exception : " + e);
342	System.exit(1);
343	}
344
345	String result = null;
346	toGSDL.println(request);
347	// System.err.println("Issued request to " + hostname + " on port " +
348	// port);
349	try {
350	String terminator = "Envelope>";
351	String response = "";
352
353	char c;
354	do {
355	c = (char) fromGSDL.read();
356	response = response + c;
357	} while (!response.endsWith(terminator));
358	toGSDL.close();
359	fromGSDL.close();
360	socket.close();
361
362	int start = response.indexOf("<?xml");
363	result = response.substring(start);
364	// System.out.println(result);
365	int a = result.indexOf('\n');
366	int b = result.indexOf('\n', a + 1);
367	while (a != -1 && b != -1) {
368	// System.out.println(a + " " +b);
369	result = result.substring(0, a - 1) + result.substring(b + 1);
370	a = result.indexOf('\n');
371	b = result.indexOf('\n', a + 1);
372	}
373	} catch (IOException e) {
374	System.err.println(e);
375	System.exit(1);
376	}
377	return result;
378	}
379
380	/**
381	* Produces a SOAP request string, sends it to the server, gets and
382	* processes the response updating the appropriate data structures. Uses the
383	* settings represented in the provided argument to produce a SOAP request
384	* string. The string is sent to the server using the {@link doRequest}
385	* method. The returned XML document is processed and the information
386	* therein is used to store information about the returned documents and
387	* this query.
388	* <p>
389	* This method updates the {@link queryList} and {@link allResults} data
390	* <p>
391	*
392	* @param query
393	* a {@link Query} object that must be constructed and passed to
394	* this method by the calling client application
395	* @return a {@link QueryOutcome} object that stores information about the
396	* server's response
397	*
398	*/
399	public QueryOutcome issueQueryToServer(Query query) {
400	QueryOutcome queryOutcome = new QueryOutcome();
401	String result = null;
402	String requestBody = "<paramList><param name='maxDocs' value='MAXDOCS'/><param name='level' value='Sec'/><param name ='index' value='INDEX'/><param name='matchMode' value='MATCHMODE'/><param name='query' value='QUERY'/><param name='case' value='CASE'/><param name='sortBy' value='SORTBY'/><param name='stem' value='STEM'/><param name='firstDoc' value='FIRSTDOC'/><param name='lastDoc' value='LASTDOC'/></paramList>";
403	requestBody = requestBody.replaceFirst("MAXDOCS", query
404	.getMaxDocsToReturn());
405	requestBody = requestBody.replaceFirst("INDEX", query.getIndex());
406	requestBody = requestBody.replaceFirst("MATCHMODE", query
407	.getMatchMode());
408	requestBody = requestBody.replaceFirst("QUERY", query.getQueryText());
409	requestBody = requestBody.replaceFirst("CASE", query.getCasefolding());
410	requestBody = requestBody.replaceFirst("SORTBY", query.getSortBy());
411	requestBody = requestBody.replaceFirst("STEM", query.getStemming());
412	requestBody = requestBody.replaceFirst("FIRSTDOC", query.getFirstDoc());
413	requestBody = requestBody.replaceFirst("LASTDOC", query.getLastDoc());
414	String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
415	"TextQuery");
416	request = request.replaceFirst("PROCESSTYPE", "process");
417	request = request.replaceFirst("REQUESTBODY", requestBody);
418	request = SOAPrequestHeader.replaceFirst("PROCESSNAME", "TextQuery")
419	+ request.length() + "\n\n" + request;
420
421	int firstDoc = java.lang.Integer.parseInt(query.getFirstDoc());
422
423	result = doRequest(request);
424	// System.out.println("\n\n" + result + "\n");
425	StringReader sr = new StringReader(result);
426	InputSource is = new InputSource(sr);
427	DOMParser p = new DOMParser();
428	try {
429	p.parse(is);
430	} catch (SAXException se) {
431	System.err.println(se);
432	} catch (IOException ioe) {
433	System.err.println(ioe);
434	}
435	Document d = p.getDocument();
436	NodeList metadataList = d.getElementsByTagName("metadata");
437	for (int i = 0; i < metadataList.getLength(); i++) {
438	Node n = metadataList.item(i);
439	NamedNodeMap nnm = n.getAttributes();
440	Node att = nnm.getNamedItem("name");
441	if (att.getNodeValue().compareTo("numDocsMatched") == 0) {
442	queryOutcome.setHowManyDocsMatched(n.getFirstChild()
443	.getNodeValue());
444	} else if (att.getNodeValue().compareTo("numDocsReturned") == 0) {
445	queryOutcome.setHowManyDocsReturned(n.getFirstChild()
446	.getNodeValue());
447	}
448	}
449
450	NodeList documentList = d.getElementsByTagName("documentNode");
451	for (int i = 0; i < documentList.getLength(); i++) {
452	Node n = documentList.item(i);
453	NamedNodeMap nnm = n.getAttributes();
454	Node nid = nnm.getNamedItem("nodeID");
455	Node nscore = nnm.getNamedItem("rank");
456	String docID = nid.getFirstChild().getNodeValue();
457	queryOutcome.addResult(docID, firstDoc + i, nscore.getFirstChild()
458	.getNodeValue());
459	}
460	query.addQueryOutcome(queryOutcome);
461	Query q = (Query) query.clone();
462	queryList.add(q);
463
464	for (int i = 0; i < documentList.getLength(); i++) {
465	Node n = documentList.item(i);
466	NamedNodeMap nnm = n.getAttributes();
467	Node nid = nnm.getNamedItem("nodeID");
468	Node nscore = nnm.getNamedItem("rank");
469	String docID = nid.getFirstChild().getNodeValue();
470
471	QueryContext queryContext = new QueryContext(firstDoc + i, nscore
472	.getFirstChild().getNodeValue(), q);
473	if (allResults.containsKey(docID)) {
474	ResultDocument resultDocument = (ResultDocument) allResults
475	.get(docID);
476	resultDocument.incrementFrequencyReturned();
477	resultDocument.addQueryContext(queryContext);
478	allResults.put(docID, resultDocument);
479	} else {
480	ResultDocument resultDocument = new ResultDocument();
481	resultDocument.addQueryContext(queryContext);
482	allResults.put(docID, resultDocument);
483	}
484	}
485	return queryOutcome;
486	}
487
488	/**
489	* Produces a SOAP request string, sends it to the server, gets and
490	* processes the response updating the appropriate data structures. Given a
491	* document identifier and the name of a metadata item, this method produces
492	* a SOAP request string. The string is sent to the server using the
493	* {@link doRequest} method.
494	* <p>
495	* The request is simply for the values of the given metadata item of the
496	* given document. <b>If the metadata item for the given document has
497	* already been retrieved from the server, the server is NOT contacted
498	* again.</b>
499	* <p>
500	* The returned XML document is processed. The {@link ResultDocument} object
501	* for the document in question is updated with the returned metadata
502	* information, and the {@link allResults} data is consequently updated.
503	* <p>
504	* If the requested metadata is one of Keywords, Authors, Dates, Journals,
505	* Booktitles then the appropriate data structure is updated.
506	* <p>
507	* The method does not return a value. Private data structures are updated
508	* instead. The calling client application should proceed to access document
509	* metadata using the provided methods.
510	* <p>
511	*
512	* @param docID
513	* is a document identifier, in the form returned by the server
514	* and available from a {@link QueryOutcome}
515	* @param metadata
516	* is the metadata field whose value is to be retrieved. Valid
517	* values are
518	* <ul>
519	* <li>Title</li>
520	* <li>Creator (the authors)</li>
521	* <li>Journal</li>
522	* <li>Booktitle</li>
523	* <li>Volume</li>
524	* <li>Number</li>
525	* <li>Editor</li>
526	* <li>Pages</li>
527	* <li>Publisher</li>
528	* <li>Date</li>
529	* <li>Keywords</li>
530	* <li>Abstract</li>
531	* </ul>
532	*/
533	public void getDocumentMetadataFromServer(String docID, String metadata) {
534	ResultDocument resultDocument = (ResultDocument) allResults.get(docID);
535	if (resultDocument.metadataExists(metadata)) {
536	return;
537	}
538
539	String result = null;
540	String requestBody = "<paramList><param name='metadata' value='METADATAFIELD'/></paramList><documentNodeList><documentNode nodeID='DOCIDVALUE'/></documentNodeList>";
541	requestBody = requestBody.replaceFirst("METADATAFIELD", metadata);
542	requestBody = requestBody.replaceFirst("DOCIDVALUE", docID);
543	String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
544	"DocumentMetadataRetrieve");
545	request = request.replaceFirst("PROCESSTYPE", "process");
546	request = request.replaceFirst("REQUESTBODY", requestBody);
547
548	request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
549	"DocumentMetadataRetrieve")
550	+ request.length() + "\n\n" + request;
551
552	result = doRequest(request);
553	StringReader sr = new StringReader(result);
554	InputSource is = new InputSource(sr);
555	DOMParser p = new DOMParser();
556	try {
557	p.parse(is);
558	} catch (SAXException se) {
559	System.err.println(se);
560	} catch (IOException ioe) {
561	System.err.println(ioe);
562	}
563	Document d = p.getDocument();
564	NodeList metadataList = d.getElementsByTagName("metadata");
565	String metadataval = null;
566	if (metadataList.getLength() > 0) {
567	Node n = metadataList.item(0);
568	metadataval = n.getFirstChild().getNodeValue();
569
570	if (metadata.compareTo("Keywords") == 0) {
571	String[] keywords = metadataval.split(",");
572	for (int i = 0; i < keywords.length; i++) {
573	String s = keywords[i].trim().toLowerCase();
574	resultDocument.addKeyword(s);
575	if (allKeywords.containsKey(s)) {
576	HashMap docMap = (HashMap) allKeywords.get(s);
577	docMap.put(docID, null);
578	allKeywords.put(s, docMap);
579	} else {
580	HashMap docMap = new HashMap();
581	docMap.put(docID, null);
582	allKeywords.put(s, docMap);
583	}
584	}
585	} else if (metadata.compareTo("Creator") == 0) {
586	String[] authors = metadataval.split("(,)\|( and )");
587	// System.err.println(metadataval);
588	for (int i = 0; i < authors.length; i++) {
589	authors[i] = authors[i].trim().toLowerCase();
590	}
591
592	boolean containsExtraName = authors.length % 2 != 0;
593
594	for (int i = 0; i + 1 < authors.length; i = i + 2) {
595	String s = authors[i] + ", " + authors[i + 1];
596
597	//Handle names with jr. in them
598	if (containsExtraName) {
599	if (i + 2 < authors.length
600	&& authors[i + 2].contains("jr")) {
601	s += " " + authors[i + 2];
602	i++;
603	}
604	}
605
606	s = s.replaceAll("[.]", "");
607	// System.err.println(s);
608	resultDocument.addAuthor(s);
609	if (allAuthors.containsKey(s)) {
610	HashMap docMap = (HashMap) allAuthors.get(s);
611	docMap.put(docID, null);
612	allAuthors.put(s, docMap);
613	} else {
614	HashMap docMap = new HashMap();
615	docMap.put(docID, null);
616	allAuthors.put(s, docMap);
617	}
618	}
619	} else if (metadata.compareTo("Title") == 0) {
620	resultDocument.setTitle(metadataval);
621	} else if (metadata.compareTo("Booktitle") == 0) {
622	resultDocument.setBooktitle(metadataval);
623	if (allBooktitles.containsKey(metadataval)) {
624	HashMap docMap = (HashMap) allBooktitles.get(metadataval);
625	docMap.put(docID, null);
626	allBooktitles.put(metadataval, docMap);
627	} else {
628	HashMap docMap = new HashMap();
629	docMap.put(docID, null);
630	allBooktitles.put(metadataval, docMap);
631	}
632	} else if (metadata.compareTo("Date") == 0) {
633	resultDocument.setDate(metadataval.replaceAll("[^0-9]", ""));
634	if (allDates.containsKey(metadataval)) {
635	HashMap docMap = (HashMap) allDates.get(metadataval);
636	docMap.put(docID, null);
637	allDates.put(metadataval, docMap);
638	} else {
639	HashMap docMap = new HashMap();
640	docMap.put(docID, null);
641	allDates.put(metadataval, docMap);
642	}
643	} else if (metadata.compareTo("Pages") == 0) {
644	resultDocument.setPages(metadataval);
645	} else if (metadata.compareTo("Journal") == 0) {
646	resultDocument.setJournal(metadataval);
647	if (allJournals.containsKey(metadataval)) {
648	HashMap docMap = (HashMap) allJournals.get(metadataval);
649	docMap.put(docID, null);
650	allJournals.put(metadataval, docMap);
651	} else {
652	HashMap docMap = new HashMap();
653	docMap.put(docID, null);
654	allJournals.put(metadataval, docMap);
655	}
656	} else if (metadata.compareTo("Volume") == 0) {
657	resultDocument.setVolume(metadataval);
658	} else if (metadata.compareTo("Number") == 0) {
659	resultDocument.setNumber(metadataval);
660	} else if (metadata.compareTo("Abstract") == 0) {
661	resultDocument.setAbstract(metadataval);
662	} else if (metadata.compareTo("Editor") == 0) {
663	resultDocument.setEditor(metadataval);
664	} else if (metadata.compareTo("Publisher") == 0) {
665	resultDocument.setPublisher(metadataval);
666	}
667
668	}
669	allResults.put(docID, resultDocument);
670	}
671
672	public String getClassifierNodeName(String nodeID) {
673	String result = null;
674	String requestBody = "<paramList><param name='metadata' value='Title'/></paramList><classifierNodeList><classifierNode nodeID='NODEID'/></classifierNodeList>";
675	requestBody = requestBody.replaceFirst("NODEID", nodeID);
676	String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
677	"ClassifierBrowseMetadataRetrieve");
678	request = request.replaceFirst("PROCESSTYPE", "process");
679	request = request.replaceFirst("REQUESTBODY", requestBody);
680
681	request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
682	"ClassifierBrowseMetadataRetrieve")
683	+ request.length() + "\n\n" + request;
684
685	// System.err.println(request);
686	result = doRequest(request);
687	// System.err.println(result);
688
689	StringReader sr = new StringReader(result);
690	InputSource is = new InputSource(sr);
691	DOMParser p = new DOMParser();
692	try {
693	p.parse(is);
694	} catch (SAXException se) {
695	System.err.println(se);
696	} catch (IOException ioe) {
697	System.err.println(ioe);
698	}
699
700	String returnName = null;
701
702	Document d = p.getDocument();
703	NodeList metadataList = d.getElementsByTagName("metadata");
704	for (int i = 0; i < metadataList.getLength(); i++) {
705	Node n = metadataList.item(i);
706	NamedNodeMap nnm = n.getAttributes();
707	Node att = nnm.getNamedItem("name");
708	if (att.getNodeValue().compareTo("Title") == 0) {
709	returnName = n.getFirstChild().getNodeValue();
710	}
711	}
712	return returnName;
713	}
714
715	public void getClassifierNodes(String rootNode) {
716	String result = null;
717	String requestBody = "<paramList><param name='structure' value='children'/></paramList><classifierNodeList><classifierNode nodeID='CLASSIFIER'/></classifierNodeList>";
718	requestBody = requestBody.replaceFirst("CLASSIFIER", rootNode);
719	String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
720	"ClassifierBrowse");
721	request = request.replaceFirst("PROCESSTYPE", "process");
722	request = request.replaceFirst("REQUESTBODY", requestBody);
723
724	request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
725	"ClassifierBrowse")
726	+ request.length() + "\n\n" + request;
727
728	System.err.println(getClassifierNodeName(rootNode));
729	// System.err.print(rootNode + "#");
730
731	// System.err.println(request);
732	result = doRequest(request);
733	// System.err.println(result);
734
735	StringReader sr = new StringReader(result);
736	InputSource is = new InputSource(sr);
737	DOMParser p = new DOMParser();
738	try {
739	p.parse(is);
740	} catch (SAXException se) {
741	System.err.println(se);
742	} catch (IOException ioe) {
743	System.err.println(ioe);
744	}
745	Document d = p.getDocument();
746
747	NodeList childList = d.getElementsByTagName("classifierNode");
748	NodeList documentList = d.getElementsByTagName("documentNode");
749	// System.err.println("\td " + documentList.getLength());
750	// System.err.println("\tc " + childList.getLength());
751
752	if (childList.getLength() > 0) {
753	for (int i = 0; i < childList.getLength(); i++) {
754	Node n = childList.item(i);
755	NamedNodeMap nnm = n.getAttributes();
756	Node nid = nnm.getNamedItem("nodeID");
757	String nodeID = nid.getFirstChild().getNodeValue();
758
759	// System.err.println("\tchild : " + nodeID);
760
761	if (nodeID.compareTo(rootNode) != 0
762	&& nodeID.compareTo("2.6.22") != 0) {
763	// System.err.println("\t" + nodeID);
764	getClassifierNodes(nodeID);
765	}
766	}
767	}
768	if (documentList.getLength() > 0)
769	System.out.println(getClassifierNodeName(rootNode) + "#"
770	+ documentList.getLength());
771	}
772	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: