Context Navigation

Greenstone3Connection.java@ 1443

Last change on this file since 1443 was 919, checked in by jts21, 10 years ago
Added license headers to all files, added full GPL3 license file, moved license header generator script to dev/bin/scripts
File size: 28.1 KB

Line
1	/**
2	* Greenstone3Connection.java
3	* Copyright (C) 2010 New Zealand Digital Library, http://expeditee.org
4	*
5	* This program is free software: you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation, either version 3 of the License, or
8	* (at your option) any later version.
9	*
10	* This program is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	* GNU General Public License for more details.
14	*
15	* You should have received a copy of the GNU General Public License
16	* along with this program. If not, see <http://www.gnu.org/licenses/>.
17	*/
18
19	package org.expeditee.greenstone;
20
21	import java.io.BufferedReader;
22	import java.io.IOException;
23	import java.io.InputStreamReader;
24	import java.io.PrintWriter;
25	import java.io.StringReader;
26	import java.net.Socket;
27	import java.net.UnknownHostException;
28	import java.util.ArrayList;
29	import java.util.Collections;
30	import java.util.HashMap;
31	import java.util.HashSet;
32	import java.util.Iterator;
33	import java.util.List;
34	import java.util.ListIterator;
35	import java.util.Map;
36	import java.util.Set;
37
38	import org.apache.xerces.parsers.DOMParser;
39	import org.w3c.dom.Document;
40	import org.w3c.dom.NamedNodeMap;
41	import org.w3c.dom.Node;
42	import org.w3c.dom.NodeList;
43	import org.xml.sax.InputSource;
44	import org.xml.sax.SAXException;
45
46	/**
47	* This class provides a simple API for communicating with a Greenstone 3 server
48	* using SOAP.
49	* <p>
50	* Greenstone 3 does not yet 'properly' implement SOAP-based web services. We
51	* would like to use a Greenstone WSDL (Web Services Definition Language) file
52	* and a higher level SOAP Client interface. But we can't. To get around this,
53	* this API uses a simple socket connection to the Greenstone 3 server, and
54	* sends SOAP requests as strings (XML documents). This works but isn't elegant.
55	* The server responds with a string representing an XML document.
56	* <p>
57	* The server's hostname and port are hard-coded. <b>Do not modify them.</b>
58	* <p>
59	* The Greenstone collection to use is <i>hcibib</i>, and this is also
60	* hard-coded. <b>Do not modify this.</b>
61	* <p>
62	* This collection can be accessed from a web browser at <a
63	* href="http://delaware.resnet.scms.waikato.ac.nz:8111/greenstone3/library?a=p&sa=about&c=hcibib">
64	* this location</a>.
65	*/
66	public class Greenstone3Connection {
67	/** an ordered list of {@link Query} objects */
68	private List<Query> queryList;
69
70	/**
71	* a HashMap of {@link ResultDocument} objects with document IDs as the
72	* keys. All the results returned in this session.
73	*/
74	private Map<String, ResultDocument> allResults;
75
76	/**
77	* a HashMap keyed on the keywords found for all documents returned in this
78	* session. Each item in the map is itself a HashMap, keyed on document IDs
79	* with each item being NULL.
80	*/
81	private Map<String, Set<String>> allKeywords;
82
83	/**
84	* a set of authors names
85	*/
86	private Map<String, Set<String>> allAuthors;
87
88	/**
89	* a HashMap keyed on the publication dates found for all documents returned
90	* in this session. Each item in the map is itself a HashMap, keyed on
91	* document IDs with each item being NULL.
92	*/
93	private Map<String, Set<String>> allDates;
94
95	/**
96	* a HashMap keyed on the journal names found for all documents returned in
97	* this session. Each item in the map is itself a HashMap, keyed on document
98	* IDs with each item being NULL.
99	*/
100	private Map<String, Set<String>> allJournals;
101
102	/**
103	* a HashMap keyed on the book titles found for all documents returned in
104	* this session. Each item in the map is itself a HashMap, keyed on document
105	* IDs with each item being NULL.
106	*/
107	private Map<String, Set<String>> allBooktitles;
108
109	/** the <i>hostname</i> where the Greenstone 3 server is running */
110	private String hostname;
111
112	/** the <i>port</i> on which the Greenstone 3 server is running */
113	private int port;
114
115	/** for communication with the server */
116	private Socket socket = null;
117
118	/** for writing the SOAP request strings to the server socket */
119	private PrintWriter toGSDL = null;
120
121	/** for reading the SOAP response strings from the server socket */
122	private BufferedReader fromGSDL = null;
123
124	/** string that starts every SOAP request */
125	private String SOAPrequestHeader;
126
127	/** acts as a template for every SOAP request string */
128	private String SOAPrequestMessage = "<?xml version='1.0' encoding='UTF-8'?><soapenv:Envelope xmlns:soapenv='http://schemas.xmlsoap.org/soap/envelope/' xmlns:xsd='http://www.w3.org/2001/XMLSchema' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'><soapenv:Body><message><request lang='en' to='hcibib/PROCESSNAME' type='PROCESSTYPE'>REQUESTBODY</request></message></soapenv:Body></soapenv:Envelope>";
129
130	/**
131	* A client application using this API will normally only create one
132	* instance of this class.
133	* <p>
134	* Create an instance with something like this
135	*
136	* <pre>
137	* Greenstone3Connection gsdl = new Greenstone3Connection();
138	* </pre>
139	*
140	* The constructor initialises the following <b>private</b> variables...
141	* <ul>
142	* <li>the <i>hostname</i> where the Greenstone 3 server is running</li>
143	* <li>the <i>port</i> on which the Greenstone 3 server is running</li>
144	* <li><i>queryList</i> an ordered list of {@link Query} objects</li>
145	* <li><i>allResults</i> a HashMap of {@link ResultDocument} objects with
146	* document IDs as the keys. All the results returned in this session.</li>
147	* <li><i>allKeywords</i> a HashMap keyed on the keywords found for all
148	* documents returned in this session. Each item in the map is itself a
149	* HashMap, keyed on document IDs with each item being NULL.</li>
150	* <li><i>allAuthors</i> a HashMap keyed on the author names found for all
151	* documents returned in this session. Each item in the map is itself a
152	* HashMap, keyed on document IDs with each item being NULL.</li>
153	* <li><i>allDates</i> a HashMap keyed on the publication dates found for
154	* all documents returned in this session. Each item in the map is itself a
155	* HashMap, keyed on document IDs with each item being NULL.</li>
156	* <li><i>allJournals</i> a HashMap keyed on the journal names found for
157	* all documents returned in this session. Each item in the map is itself a
158	* HashMap, keyed on document IDs with each item being NULL.</li>
159	* <li><i>allBooktitles</i> a HashMap keyed on the book titles found for
160	* all documents returned in this session. Each item in the map is itself a
161	* HashMap, keyed on document IDs with each item being NULL.</li>
162	* </ul>
163	*/
164	public Greenstone3Connection(int location) {
165	if (location == 0) {
166	this.hostname = "comp537.cs.waikato.ac.nz";
167	this.port = 80;
168	this.SOAPrequestHeader = "POST /greenstone3/services/localsite HTTP/1.1\nHost: comp537.cs.waikato.ac.nz:80\nSOAPAction: hcibib/PROCESSNAME\nContent-Type: text/xml;charset=utf-8\nContent-Length: ";
169	} else {
170	this.hostname = "130.217.220.10";
171	this.port = 8111;
172	this.SOAPrequestHeader = "POST /greenstone3/services/localsite HTTP/1.1\nHost: 130.217.220.10:8111\nSOAPAction: hcibib/PROCESSNAME\nContent-Type: text/xml;charset=utf-8\nContent-Length: ";
173	}
174	this.queryList = Collections.synchronizedList(new ArrayList<Query>());
175	this.allResults = Collections
176	.synchronizedMap(new HashMap<String, ResultDocument>());
177	this.allKeywords = Collections.synchronizedMap(new HashMap<String, Set<String>>());
178	this.allAuthors = Collections
179	.synchronizedMap(new HashMap<String, Set<String>>());
180	this.allDates = Collections.synchronizedMap(new HashMap<String, Set<String>>());
181	this.allJournals = Collections.synchronizedMap(new HashMap<String, Set<String>>());
182	this.allBooktitles = Collections.synchronizedMap(new HashMap<String, Set<String>>());
183	}
184
185	public Map<String, ResultDocument> getSessionResults() {
186	return this.allResults;
187	};
188
189	/**
190	* Print a string representation of the list of queries issued in this
191	* session.
192	*/
193	public void dumpQueryList() {
194	ListIterator iter = queryList.listIterator();
195	while (iter.hasNext()) {
196	Query query = (Query) iter.next();
197	System.out.println(query.toString());
198	}
199	}
200
201	/**
202	* Print a string representation of the Booktitles occuring for all query
203	* results in this session. For each booktitle print the IDs of the
204	* documents with that booktitle.
205	*/
206	public void dumpAllBooktitles() {
207	Set keys = allBooktitles.keySet();
208	Iterator iter = keys.iterator();
209	while (iter.hasNext()) {
210	String booktitle = (String) iter.next();
211	HashMap docMap = (HashMap) allBooktitles.get(booktitle);
212	System.out.println(booktitle);
213	System.out.println(docMap.keySet().toString());
214	}
215	}
216
217	/**
218	* Print a string representation of the Journals occuring for all query
219	* results in this session. For each journal print the IDs of the documents
220	* with that journal.
221	*/
222	public void dumpAllJournals() {
223	Set keys = allJournals.keySet();
224	Iterator iter = keys.iterator();
225	while (iter.hasNext()) {
226	String journal = (String) iter.next();
227	HashMap docMap = (HashMap) allJournals.get(journal);
228	System.out.println(journal);
229	System.out.println(docMap.keySet().toString());
230	}
231	}
232
233	/**
234	* Print a string representation of the Dates occuring for all query results
235	* in this session. For each date print the IDs of the documents with that
236	* date.
237	*/
238	public void dumpAllDates() {
239	Set keys = allDates.keySet();
240	Iterator iter = keys.iterator();
241	while (iter.hasNext()) {
242	String date = (String) iter.next();
243	HashMap docMap = (HashMap) allDates.get(date);
244	System.out.println(date);
245	System.out.println(docMap.keySet().toString());
246	}
247	}
248
249	/**
250	* Print a string representation of the Authors occuring for all query
251	* results in this session. For each author print the IDs of the documents
252	* with that author.
253	*/
254	public void dumpAllAuthors() {
255	Set keys = allAuthors.keySet();
256	Iterator iter = keys.iterator();
257	while (iter.hasNext()) {
258	String author = (String) iter.next();
259	HashMap docMap = (HashMap) allAuthors.get(author);
260	System.out.println(author);
261	System.out.println(docMap.keySet().toString());
262	}
263	}
264
265	/**
266	* Print a string representation of the Keywords occuring for all query
267	* results in this session. For each keyword print the IDs of the documents
268	* with that keyword.
269	*/
270	public void dumpAllKeywords() {
271	Set keys = allKeywords.keySet();
272	Iterator iter = keys.iterator();
273	while (iter.hasNext()) {
274	String keyword = (String) iter.next();
275	HashMap docMap = (HashMap) allKeywords.get(keyword);
276	System.out.println(keyword);
277	System.out.println(docMap.keySet().toString());
278	}
279	}
280
281	/**
282	* Print a string representation of all the result documents returned by
283	* queries in this session.
284	*/
285	public void dumpAllResults() {
286	Set keys = allResults.keySet();
287	Iterator iter = keys.iterator();
288
289	while (iter.hasNext()) {
290	String docID = (String) iter.next();
291	ResultDocument resultDocument = allResults.get(docID);
292	System.out.println("____________" + docID + " ___________");
293	System.out.println(resultDocument.toString());
294	}
295	}
296
297	/**
298	* Print all the result documents IDs returned by queries in this session,
299	* along with their titles.
300	*/
301	public void dumpAllTitles() {
302	Set keys = allResults.keySet();
303	Iterator iter = keys.iterator();
304	while (iter.hasNext()) {
305	String docID = (String) iter.next();
306	ResultDocument resultDocument = allResults.get(docID);
307	System.out.println(docID + "\t" + resultDocument.getTitle());
308	}
309	}
310
311	/**
312	* Provides the {@link ResultDocument} object for the document with the
313	* given ID
314	*
315	* @param docID
316	* is a document identifier, in the form returned by the server
317	* and available from a {@link QueryOutcome}
318	* @return the {@link ResultDocument} object reflecting the state of the
319	* result document at the time that this method was called. The
320	* state can change as more metadata is retrieved for the document
321	* and the document is returned by further queries.
322	*/
323	public ResultDocument getDocument(String docID) {
324	return allResults.get(docID);
325	}
326
327	/**
328	* Implements the actual communication with the server. <b>You can not call
329	* this method directly from your client code.</b>
330	* <p>
331	* Throws an exception and exits if the hosthame is not known or the
332	* connection can't be established.
333	* <p>
334	*
335	* @param request
336	* an already well formed string that contains the appropriate
337	* HTTP headers and a SOAP message (in XML form) that will ask
338	* the server for some information.
339	* @return a string containing a SOAP message (an XML document) that the
340	* server returned in response to the request
341	*/
342	private String doRequest(String request) {
343	// System.err.println("Connecting to " + hostname + " on port " + port);
344	try {
345	try {
346	socket = new Socket(hostname, port);
347	} catch (SecurityException se) {
348	System.err.println("Security exception : " + se);
349	System.exit(1);
350	}
351	toGSDL = new PrintWriter(socket.getOutputStream(), true);
352	fromGSDL = new BufferedReader(new InputStreamReader(socket
353	.getInputStream()));
354	} catch (UnknownHostException e) {
355	System.err.println("Don't know about GSDL host: " + hostname);
356	System.exit(1);
357	} catch (IOException e) {
358	System.err.println("IO exception : " + e);
359	System.exit(1);
360	}
361
362	String result = null;
363	toGSDL.println(request);
364	// System.err.println("Issued request to " + hostname + " on port " +
365	// port);
366	try {
367	String terminator = "Envelope>";
368	String response = "";
369
370	char c;
371	do {
372	c = (char) fromGSDL.read();
373	response = response + c;
374	} while (!response.endsWith(terminator));
375	toGSDL.close();
376	fromGSDL.close();
377	socket.close();
378
379	int start = response.indexOf("<?xml");
380	result = response.substring(start);
381	// System.out.println(result);
382	int a = result.indexOf('\n');
383	int b = result.indexOf('\n', a + 1);
384	while (a != -1 && b != -1) {
385	// System.out.println(a + " " +b);
386	result = result.substring(0, a - 1) + result.substring(b + 1);
387	a = result.indexOf('\n');
388	b = result.indexOf('\n', a + 1);
389	}
390	} catch (IOException e) {
391	System.err.println(e);
392	System.exit(1);
393	}
394	return result;
395	}
396
397	/**
398	* Produces a SOAP request string, sends it to the server, gets and
399	* processes the response updating the appropriate data structures. Uses the
400	* settings represented in the provided argument to produce a SOAP request
401	* string. The string is sent to the server using the {@link doRequest}
402	* method. The returned XML document is processed and the information
403	* therein is used to store information about the returned documents and
404	* this query.
405	* <p>
406	* This method updates the {@link queryList} and {@link allResults} data
407	* <p>
408	*
409	* @param query
410	* a {@link Query} object that must be constructed and passed to
411	* this method by the calling client application
412	* @return a {@link QueryOutcome} object that stores information about the
413	* server's response
414	*
415	*/
416	public QueryOutcome issueQueryToServer(Query query) {
417	QueryOutcome queryOutcome = new QueryOutcome();
418	String result = null;
419	String requestBody = "<paramList><param name='maxDocs' value='MAXDOCS'/><param name='level' value='Sec'/><param name ='index' value='INDEX'/><param name='matchMode' value='MATCHMODE'/><param name='query' value='QUERY'/><param name='case' value='CASE'/><param name='sortBy' value='SORTBY'/><param name='stem' value='STEM'/><param name='firstDoc' value='FIRSTDOC'/><param name='lastDoc' value='LASTDOC'/></paramList>";
420	requestBody = requestBody.replaceFirst("MAXDOCS", query
421	.getMaxDocsToReturn());
422	requestBody = requestBody.replaceFirst("INDEX", query.getIndex());
423	requestBody = requestBody.replaceFirst("MATCHMODE", query
424	.getMatchMode());
425	requestBody = requestBody.replaceFirst("QUERY", query.getQueryText());
426	requestBody = requestBody.replaceFirst("CASE", query.getCasefolding());
427	requestBody = requestBody.replaceFirst("SORTBY", query.getSortBy());
428	requestBody = requestBody.replaceFirst("STEM", query.getStemming());
429	requestBody = requestBody.replaceFirst("FIRSTDOC", query.getFirstDoc());
430	requestBody = requestBody.replaceFirst("LASTDOC", query.getLastDoc());
431	String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
432	"TextQuery");
433	request = request.replaceFirst("PROCESSTYPE", "process");
434	request = request.replaceFirst("REQUESTBODY", requestBody);
435	request = SOAPrequestHeader.replaceFirst("PROCESSNAME", "TextQuery")
436	+ request.length() + "\n\n" + request;
437
438	int firstDoc = java.lang.Integer.parseInt(query.getFirstDoc());
439
440	result = doRequest(request);
441	// System.out.println("\n\n" + result + "\n");
442	StringReader sr = new StringReader(result);
443	InputSource is = new InputSource(sr);
444
445	DOMParser p = new DOMParser();
446	try {
447	p.parse(is);
448	} catch (SAXException se) {
449	System.err.println(se);
450	} catch (IOException ioe) {
451	System.err.println(ioe);
452	}
453	Document d = p.getDocument();
454	NodeList metadataList = d.getElementsByTagName("metadata");
455	for (int i = 0; i < metadataList.getLength(); i++) {
456	Node n = metadataList.item(i);
457	NamedNodeMap nnm = n.getAttributes();
458	Node att = nnm.getNamedItem("name");
459	if (att.getNodeValue().compareTo("numDocsMatched") == 0) {
460	queryOutcome.setHowManyDocsMatched(n.getFirstChild()
461	.getNodeValue());
462	} else if (att.getNodeValue().compareTo("numDocsReturned") == 0) {
463	queryOutcome.setHowManyDocsReturned(n.getFirstChild()
464	.getNodeValue());
465	}
466	}
467
468	NodeList documentList = d.getElementsByTagName("documentNode");
469	for (int i = 0; i < documentList.getLength(); i++) {
470	Node n = documentList.item(i);
471	NamedNodeMap nnm = n.getAttributes();
472	Node nid = nnm.getNamedItem("nodeID");
473	Node nscore = nnm.getNamedItem("rank");
474	String docID = nid.getFirstChild().getNodeValue();
475	queryOutcome.addResult(docID, firstDoc + i, nscore.getFirstChild()
476	.getNodeValue());
477	}
478	query.addQueryOutcome(queryOutcome);
479	Query q = (Query) query.clone();
480	queryList.add(q);
481
482	for (int i = 0; i < documentList.getLength(); i++) {
483	Node n = documentList.item(i);
484	NamedNodeMap nnm = n.getAttributes();
485	Node nid = nnm.getNamedItem("nodeID");
486	Node nscore = nnm.getNamedItem("rank");
487	String docID = nid.getFirstChild().getNodeValue();
488
489	QueryContext queryContext = new QueryContext(firstDoc + i, nscore
490	.getFirstChild().getNodeValue(), q);
491	if (allResults.containsKey(docID)) {
492	ResultDocument resultDocument = allResults.get(docID);
493	resultDocument.incrementFrequencyReturned();
494	resultDocument.addQueryContext(queryContext);
495	allResults.put(docID, resultDocument);
496	} else {
497	ResultDocument resultDocument = new ResultDocument();
498	resultDocument.addQueryContext(queryContext);
499	allResults.put(docID, resultDocument);
500	}
501	}
502	return queryOutcome;
503	}
504
505	/**
506	* Produces a SOAP request string, sends it to the server, gets and
507	* processes the response updating the appropriate data structures. Given a
508	* document identifier and the name of a metadata item, this method produces
509	* a SOAP request string. The string is sent to the server using the
510	* {@link doRequest} method.
511	* <p>
512	* The request is simply for the values of the given metadata item of the
513	* given document. <b>If the metadata item for the given document has
514	* already been retrieved from the server, the server is NOT contacted
515	* again.</b>
516	* <p>
517	* The returned XML document is processed. The {@link ResultDocument} object
518	* for the document in question is updated with the returned metadata
519	* information, and the {@link allResults} data is consequently updated.
520	* <p>
521	* If the requested metadata is one of Keywords, Authors, Dates, Journals,
522	* Booktitles then the appropriate data structure is updated.
523	* <p>
524	* The method does not return a value. Private data structures are updated
525	* instead. The calling client application should proceed to access document
526	* metadata using the provided methods.
527	* <p>
528	*
529	* @param docID
530	* is a document identifier, in the form returned by the server
531	* and available from a {@link QueryOutcome}
532	* @param metadata
533	* is the metadata field whose value is to be retrieved. Valid
534	* values are
535	* <ul>
536	* <li>Title</li>
537	* <li>Creator (the authors)</li>
538	* <li>Journal</li>
539	* <li>Booktitle</li>
540	* <li>Volume</li>
541	* <li>Number</li>
542	* <li>Editor</li>
543	* <li>Pages</li>
544	* <li>Publisher</li>
545	* <li>Date</li>
546	* <li>Keywords</li>
547	* <li>Abstract</li>
548	* </ul>
549	*/
550	public void getDocumentMetadataFromServer(String docID, String metadata) {
551	ResultDocument resultDocument = allResults.get(docID);
552	if (resultDocument.metadataExists(metadata)) {
553	return;
554	}
555
556	String result = null;
557	String requestBody = "<paramList><param name='metadata' value='METADATAFIELD'/></paramList><documentNodeList><documentNode nodeID='DOCIDVALUE'/></documentNodeList>";
558	requestBody = requestBody.replaceFirst("METADATAFIELD", metadata);
559	requestBody = requestBody.replaceFirst("DOCIDVALUE", docID);
560	String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
561	"DocumentMetadataRetrieve");
562	request = request.replaceFirst("PROCESSTYPE", "process");
563	request = request.replaceFirst("REQUESTBODY", requestBody);
564
565	request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
566	"DocumentMetadataRetrieve")
567	+ request.length() + "\n\n" + request;
568
569	result = doRequest(request);
570	StringReader sr = new StringReader(result);
571	InputSource is = new InputSource(sr);
572	DOMParser p = new DOMParser();
573	try {
574	p.parse(is);
575	} catch (SAXException se) {
576	System.err.println(se);
577	} catch (IOException ioe) {
578	System.err.println(ioe);
579	}
580	Document d = p.getDocument();
581	NodeList metadataList = d.getElementsByTagName("metadata");
582	String metadataval = null;
583	if (metadataList.getLength() > 0) {
584	Node n = metadataList.item(0);
585	metadataval = n.getFirstChild().getNodeValue();
586
587	if (metadata.compareTo("Keywords") == 0) {
588	String[] keywords = metadataval.split(",");
589	for (int i = 0; i < keywords.length; i++) {
590	String s = keywords[i].trim().toLowerCase();
591	resultDocument.addKeyword(s);
592	Set<String> docSet = allKeywords.get(metadataval);
593	if (docSet == null) {
594	docSet = new HashSet<String>();
595	}
596	docSet.add(docID);
597	allKeywords.put(metadataval, docSet);
598	}
599	} else if (metadata.compareTo("Creator") == 0) {
600	String[] authors = metadataval.split("(,)\|( and )");
601	// System.err.println(metadataval);
602	for (int i = 0; i < authors.length; i++) {
603	authors[i] = authors[i].trim().toLowerCase();
604	}
605
606	boolean containsExtraName = authors.length % 2 != 0;
607
608	for (int i = 0; i + 1 < authors.length; i = i + 2) {
609	String s = authors[i] + ", " + authors[i + 1];
610
611	// Handle names with jr. in them
612	if (containsExtraName) {
613	if (i + 2 < authors.length
614	&& authors[i + 2].contains("jr")) {
615	s += " " + authors[i + 2];
616	i++;
617	}
618	}
619
620	s = s.replaceAll("[.]", "");
621	// System.err.println(s);
622	resultDocument.addAuthor(s);
623
624	Set<String> docSet = allAuthors.get(s);
625	if (docSet == null) {
626	docSet = new HashSet<String>();
627	}
628	docSet.add(docID);
629	allAuthors.put(s, docSet);
630	}
631	} else if (metadata.compareTo("Title") == 0) {
632	resultDocument.setTitle(metadataval);
633	} else if (metadata.compareTo("Booktitle") == 0) {
634	resultDocument.setBooktitle(metadataval);
635
636	Set<String> docSet = allBooktitles.get(metadataval);
637	if (docSet == null) {
638	docSet = new HashSet<String>();
639	}
640	docSet.add(docID);
641	allBooktitles.put(metadataval, docSet);
642	} else if (metadata.compareTo("Date") == 0) {
643	resultDocument.setDate(metadataval.replaceAll("[^0-9]", ""));
644	Set<String> docSet = allDates.get(metadataval);
645	if (docSet == null) {
646	docSet = new HashSet<String>();
647	}
648	docSet.add(docID);
649	allDates.put(metadataval, docSet);
650	} else if (metadata.compareTo("Pages") == 0) {
651	resultDocument.setPages(metadataval);
652	} else if (metadata.compareTo("Journal") == 0) {
653	resultDocument.setJournal(metadataval);
654	Set<String> docSet = allJournals.get(metadataval);
655	if (docSet == null) {
656	docSet = new HashSet<String>();
657	}
658	docSet.add(docID);
659	allJournals.put(metadataval, docSet);
660	} else if (metadata.compareTo("Volume") == 0) {
661	resultDocument.setVolume(metadataval);
662	} else if (metadata.compareTo("Number") == 0) {
663	resultDocument.setNumber(metadataval);
664	} else if (metadata.compareTo("Abstract") == 0) {
665	resultDocument.setAbstract(metadataval);
666	} else if (metadata.compareTo("Editor") == 0) {
667	resultDocument.setEditor(metadataval);
668	} else if (metadata.compareTo("Publisher") == 0) {
669	resultDocument.setPublisher(metadataval);
670	}
671
672	}
673	allResults.put(docID, resultDocument);
674	}
675
676	public String getClassifierNodeName(String nodeID) {
677	String result = null;
678	String requestBody = "<paramList><param name='metadata' value='Title'/></paramList><classifierNodeList><classifierNode nodeID='NODEID'/></classifierNodeList>";
679	requestBody = requestBody.replaceFirst("NODEID", nodeID);
680	String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
681	"ClassifierBrowseMetadataRetrieve");
682	request = request.replaceFirst("PROCESSTYPE", "process");
683	request = request.replaceFirst("REQUESTBODY", requestBody);
684
685	request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
686	"ClassifierBrowseMetadataRetrieve")
687	+ request.length() + "\n\n" + request;
688
689	// System.err.println(request);
690	result = doRequest(request);
691	// System.err.println(result);
692
693	StringReader sr = new StringReader(result);
694	InputSource is = new InputSource(sr);
695	DOMParser p = new DOMParser();
696	try {
697	p.parse(is);
698	} catch (SAXException se) {
699	System.err.println(se);
700	} catch (IOException ioe) {
701	System.err.println(ioe);
702	}
703
704	String returnName = null;
705
706	Document d = p.getDocument();
707	// Document d = null;
708	NodeList metadataList = d.getElementsByTagName("metadata");
709	for (int i = 0; i < metadataList.getLength(); i++) {
710	Node n = metadataList.item(i);
711	NamedNodeMap nnm = n.getAttributes();
712	Node att = nnm.getNamedItem("name");
713	if (att.getNodeValue().compareTo("Title") == 0) {
714	returnName = n.getFirstChild().getNodeValue();
715	}
716	}
717	return returnName;
718	}
719
720	public void getClassifierNodes(String rootNode) {
721	String result = null;
722	String requestBody = "<paramList><param name='structure' value='children'/></paramList><classifierNodeList><classifierNode nodeID='CLASSIFIER'/></classifierNodeList>";
723	requestBody = requestBody.replaceFirst("CLASSIFIER", rootNode);
724	String request = SOAPrequestMessage.replaceFirst("PROCESSNAME",
725	"ClassifierBrowse");
726	request = request.replaceFirst("PROCESSTYPE", "process");
727	request = request.replaceFirst("REQUESTBODY", requestBody);
728
729	request = SOAPrequestHeader.replaceFirst("PROCESSNAME",
730	"ClassifierBrowse")
731	+ request.length() + "\n\n" + request;
732
733	System.err.println(getClassifierNodeName(rootNode));
734	// System.err.print(rootNode + "#");
735
736	// System.err.println(request);
737	result = doRequest(request);
738	// System.err.println(result);
739
740	StringReader sr = new StringReader(result);
741	InputSource is = new InputSource(sr);
742	DOMParser p = new DOMParser();
743	try {
744	p.parse(is);
745	} catch (SAXException se) {
746	System.err.println(se);
747	} catch (IOException ioe) {
748	System.err.println(ioe);
749	}
750	Document d = p.getDocument();
751
752	NodeList childList = d.getElementsByTagName("classifierNode");
753	NodeList documentList = d.getElementsByTagName("documentNode");
754	// System.err.println("\td " + documentList.getLength());
755	// System.err.println("\tc " + childList.getLength());
756
757	if (childList.getLength() > 0) {
758	for (int i = 0; i < childList.getLength(); i++) {
759	Node n = childList.item(i);
760	NamedNodeMap nnm = n.getAttributes();
761	Node nid = nnm.getNamedItem("nodeID");
762	String nodeID = nid.getFirstChild().getNodeValue();
763
764	// System.err.println("\tchild : " + nodeID);
765
766	if (nodeID.compareTo(rootNode) != 0
767	&& nodeID.compareTo("2.6.22") != 0) {
768	// System.err.println("\t" + nodeID);
769	getClassifierNodes(nodeID);
770	}
771	}
772	}
773	if (documentList.getLength() > 0)
774	System.out.println(getClassifierNodeName(rootNode) + "#"
775	+ documentList.getLength());
776	}
777	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/org/expeditee/greenstone/Greenstone3Connection.java@ 1443

Download in other formats: