1414import org .w3c .dom .Element ;
1515import org .xml .sax .SAXException ;
1616
17- import java .io .ByteArrayInputStream ;
18- import java .io .ByteArrayOutputStream ;
19- import java .io .IOException ;
20- import java .io .InputStream ;
21- import java .io .InputStreamReader ;
22- import java .io .Reader ;
17+ import java .io .*;
2318import java .net .HttpURLConnection ;
2419import java .net .URL ;
2520import javax .xml .parsers .DocumentBuilder ;
2621import javax .xml .parsers .DocumentBuilderFactory ;
2722import javax .xml .parsers .ParserConfigurationException ;
28- import javax .xml .transform .Result ;
29- import javax .xml .transform .Transformer ;
30- import javax .xml .transform .TransformerException ;
31- import javax .xml .transform .TransformerFactory ;
23+ import javax .xml .transform .*;
3224import javax .xml .transform .dom .DOMSource ;
3325import javax .xml .transform .stream .StreamResult ;
3426
@@ -61,9 +53,13 @@ public final class SruOpener extends DefaultObjectPipe<String, ObjectReceiver<Re
6153 private int maximumRecords = MAXIMUM_RECORDS ;
6254 private int startRecord = START_RECORD ;
6355 private int totalRecords = Integer .MAX_VALUE ;
56+ int numberOfRecords = Integer .MAX_VALUE ;
6457
6558 private boolean stopRetrieving ;
59+ private int recordsRetrieved ;
6660
61+ private String xmlDeclarationTemplate ="<?xml version=\" %s\" encoding=\" %s\" ?>" ;
62+ private String xmlDeclaration ;
6763
6864 /**
6965 * Default constructor
@@ -149,51 +145,83 @@ public void setVersion(final String version) {
149145 @ Override
150146 public void process (final String baseUrl ) {
151147
152- try {
148+ StringBuilder srUrl = new StringBuilder (baseUrl );
149+ if (query != null ) {
150+ srUrl .append ("?query=" ).append (query ).append ("&operation=" ).append (operation ).append ("&recordSchema=" )
151+ .append (recordSchema ).append ("&version=" ).append (version );
152+ } else {
153+ throw new IllegalArgumentException ("Missing mandatory parameter 'query'" );
154+ }
153155
154- StringBuilder srUrl = new StringBuilder (baseUrl );
155- if (query != null ) {
156- srUrl .append ("?query=" ).append (query ).append ("&operation=" ).append (operation ).append ("&recordSchema=" ).append (recordSchema ).append ("&version=" ).append (version );
157- }
158- else {
159- throw new IllegalArgumentException ("Missing mandatory parameter 'query'" );
160- }
161- int numberOfRecords = Integer .MAX_VALUE ;
162- TransformerFactory tf = TransformerFactory .newInstance ();
163- Transformer t = tf .newTransformer ();
164- while (!stopRetrieving && (startRecord < numberOfRecords )) {
165- /* if (totalRecords >0) {
166- yetToRetrieveRecords = totalRecords - retrievedRecords;
167- if (yetToRetrieveRecords < maximumRecords) {
168- maximumRecords = yetToRetrieveRecords;
156+ try {
157+ //get first document and add a starting root tag
158+ Transformer t = TransformerFactory .newInstance ().newTransformer ();
159+ BufferedReader bufferedReader = new BufferedReader (new InputStreamReader (getXmlDocsViaSru (srUrl )));
160+ String line ;
161+ StringBuilder stringBuilder = new StringBuilder (1024 * 1024 );
162+ boolean rootTagAdded = false ;
163+ while ((line = bufferedReader .readLine ()) != null ) {
164+ if (!rootTagAdded ) {
165+ if (line .matches (".*searchRetrieveResponse.*" )) {
166+ stringBuilder .append (xmlDeclaration +"\n " );
167+ stringBuilder .append ("<harvest>\n " );
168+ rootTagAdded = true ;
169169 }
170- }*/
171- ByteArrayInputStream byteArrayInputStream = retrieve (srUrl , startRecord , maximumRecords );
172-
173-
174- DocumentBuilderFactory factory =DocumentBuilderFactory .newInstance ();
175- DocumentBuilder docBuilder = factory .newDocumentBuilder ();
176- Document xmldoc = docBuilder .parse (byteArrayInputStream );
177-
178- Element element = (Element )xmldoc .getElementsByTagName ("numberOfRecords" ).item (0 );
179- numberOfRecords =Integer .parseInt (element .getTextContent ());
180-
181- ByteArrayOutputStream os = new ByteArrayOutputStream ();
182- Result result = new StreamResult (os );
183- t .transform (new DOMSource (xmldoc ), result );
184- ByteArrayInputStream inputStream = new ByteArrayInputStream (os .toByteArray ());
185-
186- getReceiver ().process (
187- new InputStreamReader (inputStream ));
188- tf = TransformerFactory .newInstance ();
189- t = tf .newTransformer ();
190- t .setOutputProperty ("omit-xml-declaration" , "yes" );
191- startRecord = startRecord + maximumRecords ;
170+ }
171+ stringBuilder .append (line +"\n " );
172+ }
173+ getReceiver ().process (new InputStreamReader (new ByteArrayInputStream (stringBuilder .toString ().getBytes ())));
174+ while (!stopRetrieving && recordsRetrieved < totalRecords && (startRecord < numberOfRecords )) {
175+ InputStream inputStream = getXmlDocsViaSru (srUrl );
176+ getReceiver ().process (new InputStreamReader (inputStream ));
192177 }
178+ //close root tag
179+ getReceiver ().process (new InputStreamReader (new ByteArrayInputStream ("</harvest>\n \n " .getBytes ())));
193180 }
194- catch (final IOException | TransformerException | SAXException | ParserConfigurationException e ) {
181+ catch (TransformerConfigurationException | IOException e ) {
195182 throw new MetafactureException (e );
196183 }
184+ }
185+
186+ private InputStream getXmlDocsViaSru (final StringBuilder srUrl ){
187+ try {
188+ ByteArrayInputStream byteArrayInputStream = retrieve (srUrl , startRecord , maximumRecords );
189+ DocumentBuilderFactory factory = DocumentBuilderFactory .newInstance ();
190+ DocumentBuilder docBuilder = factory .newDocumentBuilder ();
191+ Document xmldoc = docBuilder .parse (byteArrayInputStream );
192+
193+ /* Element newRoot = xmldoc.createElement("harvest");
194+ newRoot.appendChild(xmldoc.getFirstChild());
195+ xmldoc.appendChild(newRoot);*/
196+
197+ numberOfRecords =
198+ Integer .parseInt (((Element ) xmldoc .getElementsByTagName ("numberOfRecords" ).item (0 )).getTextContent ());
199+ int recordPosition =
200+ Integer .parseInt (((Element ) xmldoc .getElementsByTagName ("recordPosition" ).item (0 )).getTextContent ());
201+ int nextRecordPosition =
202+ Integer .parseInt (((Element ) xmldoc .getElementsByTagName ("nextRecordPosition" ).item (0 )).getTextContent ());
203+
204+ String xmlEncoding = xmldoc .getXmlEncoding ();
205+ String xmlVersion = xmldoc .getXmlVersion ();
206+ //<?xml version="1.0" encoding="UTF-8"?>
207+ xmlDeclaration =String .format (xmlDeclarationTemplate ,xmldoc .getXmlVersion (),xmldoc .getXmlEncoding ());
208+ recordsRetrieved = recordsRetrieved + nextRecordPosition - recordPosition ;
209+
210+ ByteArrayOutputStream os = new ByteArrayOutputStream ();
211+
212+ Result result = new StreamResult (os );
213+ Transformer t = TransformerFactory .newInstance ().newTransformer ();
214+ t .setOutputProperty ("omit-xml-declaration" , "yes" );
215+ t .transform (new DOMSource (xmldoc ), result );
216+
217+ ByteArrayInputStream inputStream = new ByteArrayInputStream (os .toByteArray ());
218+ startRecord = startRecord + maximumRecords ;
219+ return inputStream ;
220+
221+ } catch (final IOException | TransformerException | SAXException | ParserConfigurationException e ) {
222+ throw new MetafactureException (e );
223+ }
224+
197225
198226 }
199227
@@ -206,14 +234,9 @@ private ByteArrayInputStream retrieve(StringBuilder srUrl, int startRecord, int
206234 connection .setRequestProperty ("User-Agent" , userAgent );
207235 }
208236 InputStream inputStream = getInputStream (connection );
237+
209238 ByteArrayOutputStream outputStream = new ByteArrayOutputStream ();
210239
211- System .out .println ("srUrl=" +srUrl );
212- System .out .println ("startRecord=" +startRecord );
213- System .out .println ("istream.length=" +inputStream .available ());
214- if (inputStream .available () < 768 ){ // we take it that this is a result without a record
215- stopRetrieving = true ;
216- }
217240 inputStream .transferTo (outputStream );
218241 return new ByteArrayInputStream (outputStream .toByteArray ());
219242 }
0 commit comments