Skip to content

Commit 3ba3caa

Browse files
committed
GXflow FullText Search Migration to GitHub
(cherry picked from commit caad503)
1 parent 918e53d commit 3ba3caa

File tree

7 files changed

+392
-0
lines changed

7 files changed

+392
-0
lines changed

gxflowfulltextsearch/pom.xml

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<parent>
6+
<groupId>com.genexus</groupId>
7+
<artifactId>parent</artifactId>
8+
<version>${revision}${changelist}</version>
9+
</parent>
10+
11+
<artifactId>gxflowfulltextsearch</artifactId>
12+
<name>GXflow FullText Search</name>
13+
14+
<dependencies>
15+
<dependency>
16+
<groupId>org.apache.commons</groupId>
17+
<artifactId>commons-collections4</artifactId>
18+
<version>4.1</version>
19+
</dependency>
20+
<dependency>
21+
<groupId>org.apache.commons</groupId>
22+
<artifactId>commons-compress</artifactId>
23+
<version>1.27.1</version>
24+
</dependency>
25+
<dependency>
26+
<groupId>commons-logging</groupId>
27+
<artifactId>commons-logging</artifactId>
28+
<version>1.2</version>
29+
</dependency>
30+
<dependency>
31+
<groupId>org.apache.pdfbox</groupId>
32+
<artifactId>fontbox</artifactId>
33+
<version>3.0.3</version>
34+
</dependency>
35+
<dependency>
36+
<groupId>org.apache.pdfbox</groupId>
37+
<artifactId>pdfbox</artifactId>
38+
<version>3.0.3</version>
39+
</dependency>
40+
<dependency>
41+
<groupId>org.apache.lucene</groupId>
42+
<artifactId>lucene-core</artifactId>
43+
<version>2.2.0</version>
44+
</dependency>
45+
<dependency>
46+
<groupId>org.apache.poi</groupId>
47+
<artifactId>poi</artifactId>
48+
<version>${poi.version}</version>
49+
</dependency>
50+
<dependency>
51+
<groupId>org.apache.poi</groupId>
52+
<artifactId>poi-ooxml</artifactId>
53+
<version>${poi.version}</version>
54+
</dependency>
55+
<dependency>
56+
<groupId>org.apache.poi</groupId>
57+
<artifactId>poi-ooxml-schemas</artifactId>
58+
<version>4.1.2</version>
59+
</dependency>
60+
</dependencies>
61+
62+
<build>
63+
<finalName>GXflowFullTextSearch</finalName>
64+
<plugins>
65+
<plugin>
66+
<groupId>org.apache.maven.plugins</groupId>
67+
<artifactId>maven-compiler-plugin</artifactId>
68+
<version>3.8.0</version>
69+
<configuration></configuration>
70+
</plugin>
71+
</plugins>
72+
</build>
73+
</project>
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package com.genexus.CA.search;
2+
3+
import java.util.HashMap;
4+
import org.apache.lucene.analysis.Analyzer;
5+
import org.apache.lucene.analysis.standard.StandardAnalyzer;
6+
7+
public class AnalyzerManager {
8+
private static HashMap hash = new HashMap();
9+
10+
public static Analyzer getAnalyzer(String lang) {
11+
Analyzer analyzer = null;
12+
if (hash.containsKey(lang)) {
13+
analyzer = (Analyzer)hash.get(lang);
14+
} else {
15+
if (lang.equals("spa")) {
16+
analyzer = new StandardAnalyzer();
17+
} else {
18+
analyzer = new StandardAnalyzer();
19+
}
20+
21+
hash.put(lang, analyzer);
22+
}
23+
24+
return (Analyzer)analyzer;
25+
}
26+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package com.genexus.CA.search;
2+
3+
import java.util.HashMap;
4+
5+
public class IndexManager {
6+
private static HashMap hash = new HashMap();
7+
8+
public static void addContent(String dir, String uri, String lang, String title, String summary, byte fromFile, String body, String filePath) {
9+
getIndexer(dir).addContent(uri, lang, title, summary, fromFile, body, filePath);
10+
}
11+
12+
public static void deleteContent(String dir, String uri) {
13+
getIndexer(dir).deleteContent(uri);
14+
}
15+
16+
private static synchronized Indexer getIndexer(String dir) {
17+
Indexer indexer = null;
18+
if (hash.containsKey(dir)) {
19+
indexer = (Indexer)hash.get(dir);
20+
} else {
21+
indexer = new Indexer(dir);
22+
hash.put(dir, indexer);
23+
}
24+
25+
return indexer;
26+
}
27+
}
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
package com.genexus.CA.search;
2+
3+
import java.io.BufferedReader;
4+
import java.io.File;
5+
import java.io.FileInputStream;
6+
import java.io.FileReader;
7+
import java.io.IOException;
8+
import java.util.Iterator;
9+
import java.util.List;
10+
import org.apache.lucene.analysis.standard.StandardAnalyzer;
11+
import org.apache.lucene.document.Document;
12+
import org.apache.lucene.document.Field;
13+
import org.apache.lucene.document.Field.Index;
14+
import org.apache.lucene.document.Field.Store;
15+
import org.apache.lucene.index.IndexReader;
16+
import org.apache.lucene.index.IndexWriter;
17+
import org.apache.lucene.index.Term;
18+
import org.apache.lucene.search.BooleanQuery;
19+
import org.apache.lucene.search.Hits;
20+
import org.apache.lucene.search.IndexSearcher;
21+
import org.apache.lucene.search.TermQuery;
22+
import org.apache.lucene.search.BooleanClause.Occur;
23+
import org.apache.pdfbox.Loader;
24+
import org.apache.pdfbox.pdmodel.PDDocument;
25+
import org.apache.pdfbox.text.PDFTextStripper;
26+
import org.apache.pdfbox.text.PDFTextStripperByArea;
27+
import org.apache.poi.xwpf.usermodel.XWPFDocument;
28+
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
29+
30+
public final class Indexer {
31+
private String indexDirectory = ".";
32+
private static final int IDX = 1;
33+
private static final int DLT = 2;
34+
35+
protected Indexer(String directory) {
36+
this.indexDirectory = directory;
37+
if (!this.indexExists(directory)) {
38+
try {
39+
this.indexDirectory = directory;
40+
IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(), true);
41+
writer.close();
42+
} catch (Exception var3) {
43+
Logger.print(var3.toString());
44+
}
45+
}
46+
47+
}
48+
49+
protected void addContent(String uri, String lang, String title, String summary, byte fromFile, String body, String filePath) {
50+
Document doc = null;
51+
doc = new Document();
52+
String content = "";
53+
if (fromFile == 1) {
54+
try {
55+
if (this.isMicrosoftExtension(filePath)) {
56+
FileInputStream file = new FileInputStream(filePath);
57+
XWPFDocument reader = new XWPFDocument(file);
58+
List<XWPFParagraph> data = reader.getParagraphs();
59+
60+
XWPFParagraph p;
61+
for(Iterator var14 = data.iterator(); var14.hasNext(); content = content + p.getText()) {
62+
p = (XWPFParagraph)var14.next();
63+
}
64+
} else if (this.isPdfExtension(filePath)) {
65+
PDDocument document = Loader.loadPDF(new File(filePath));
66+
new PDFTextStripperByArea();
67+
PDFTextStripper tStripper = new PDFTextStripper();
68+
content = content + tStripper.getText(document);
69+
} else if (this.isTxtExtension(filePath)) {
70+
File txt = new File(filePath);
71+
72+
String st;
73+
for(BufferedReader br = new BufferedReader(new FileReader(txt)); (st = br.readLine()) != null; content = content + st) {
74+
}
75+
}
76+
} catch (IOException var16) {
77+
var16.printStackTrace();
78+
}
79+
}
80+
81+
if (doc != null) {
82+
if (this.documentExists(uri, lang)) {
83+
this.indexOperation(2, lang, (Document)null, uri.toLowerCase());
84+
}
85+
86+
doc.add(new Field("uri", uri, Store.YES, Index.UN_TOKENIZED));
87+
doc.add(new Field("content", content, Store.YES, Index.TOKENIZED));
88+
89+
try {
90+
this.indexOperation(1, lang, doc, (String)null);
91+
} catch (Exception var15) {
92+
Logger.print(var15.toString());
93+
}
94+
}
95+
96+
}
97+
98+
protected void deleteContent(String uri) {
99+
try {
100+
this.indexOperation(2, (String)null, (Document)null, uri.toLowerCase());
101+
} catch (Exception var3) {
102+
Logger.print(var3.toString());
103+
}
104+
105+
}
106+
107+
protected synchronized void indexOperation(int op, String lang, Document doc, String uri) {
108+
switch(op) {
109+
case 1:
110+
try {
111+
IndexWriter writer = new IndexWriter(this.getIndexDirectory(), AnalyzerManager.getAnalyzer(lang), false);
112+
writer.addDocument(doc);
113+
writer.optimize();
114+
writer.close();
115+
} catch (Exception var9) {
116+
Logger.print(var9.toString());
117+
}
118+
break;
119+
case 2:
120+
try {
121+
Term term = null;
122+
int docId = 0;
123+
if (lang == null) {
124+
term = new Term("uri", uri);
125+
} else {
126+
docId = this.getDocumentId(uri, lang);
127+
}
128+
129+
IndexReader reader = IndexReader.open(this.getIndexDirectory());
130+
if (lang == null) {
131+
reader.deleteDocuments(term);
132+
} else if (docId != -1) {
133+
reader.deleteDocument(docId);
134+
}
135+
136+
reader.close();
137+
} catch (Exception var8) {
138+
Logger.print(var8.toString());
139+
}
140+
}
141+
142+
}
143+
144+
public String getIndexDirectory() {
145+
return this.indexDirectory;
146+
}
147+
148+
private boolean indexExists(String dir) {
149+
try {
150+
new IndexSearcher(dir);
151+
return true;
152+
} catch (IOException var3) {
153+
return false;
154+
}
155+
}
156+
157+
private boolean documentExists(String uri, String lang) {
158+
boolean value = false;
159+
160+
try {
161+
IndexSearcher searcher = new IndexSearcher(this.indexDirectory);
162+
BooleanQuery query = new BooleanQuery();
163+
query.add(new TermQuery(new Term("uri", uri)), Occur.MUST);
164+
query.add(new TermQuery(new Term("language", lang)), Occur.MUST);
165+
Hits hits = searcher.search(query);
166+
searcher.close();
167+
if (hits.length() > 0) {
168+
value = true;
169+
}
170+
} catch (IOException var7) {
171+
Logger.print(var7.toString());
172+
}
173+
174+
return value;
175+
}
176+
177+
private int getDocumentId(String uri, String lang) {
178+
int value = -1;
179+
180+
try {
181+
IndexSearcher searcher = new IndexSearcher(this.indexDirectory);
182+
BooleanQuery query = new BooleanQuery();
183+
query.add(new TermQuery(new Term("uri", uri)), Occur.MUST);
184+
query.add(new TermQuery(new Term("language", lang)), Occur.MUST);
185+
Hits hits = searcher.search(query);
186+
if (hits.length() > 0) {
187+
value = hits.id(0);
188+
}
189+
190+
searcher.close();
191+
} catch (IOException var7) {
192+
Logger.print(var7.toString());
193+
}
194+
195+
return value;
196+
}
197+
198+
private boolean isMicrosoftExtension(String filePath) {
199+
return filePath.endsWith(".doc") || filePath.endsWith(".docx") || filePath.endsWith(".xls") || filePath.endsWith(".xlsx") || filePath.endsWith(".ppt") || filePath.endsWith(".pptx");
200+
}
201+
202+
private boolean isPdfExtension(String filePath) {
203+
return filePath.endsWith(".pdf");
204+
}
205+
206+
private boolean isTxtExtension(String filePath) {
207+
return filePath.endsWith(".txt") || filePath.endsWith(".html");
208+
}
209+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package com.genexus.CA.search;
2+
3+
public class Logger {
4+
public static void print(String str) {
5+
System.err.println("CASearch:" + str);
6+
}
7+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package com.genexus.CA.search;
2+
3+
import org.apache.lucene.document.Document;
4+
import org.apache.lucene.index.Term;
5+
import org.apache.lucene.queryParser.MultiFieldQueryParser;
6+
import org.apache.lucene.search.BooleanQuery;
7+
import org.apache.lucene.search.Hits;
8+
import org.apache.lucene.search.IndexSearcher;
9+
import org.apache.lucene.search.Query;
10+
import org.apache.lucene.search.TermQuery;
11+
import org.apache.lucene.search.BooleanClause.Occur;
12+
13+
public class Searcher {
14+
public static String search(String dir, String lang, String query, int maxResults, int from) {
15+
StringBuffer buff = new StringBuffer();
16+
17+
try {
18+
IndexSearcher searcher = new IndexSearcher(dir);
19+
String[] fields = new String[]{"title", "content"};
20+
Occur[] clauses = new Occur[]{Occur.SHOULD, Occur.SHOULD};
21+
Query q = MultiFieldQueryParser.parse(query, fields, clauses, AnalyzerManager.getAnalyzer(lang));
22+
if (!lang.equals("IND")) {
23+
Query q2 = new TermQuery(new Term("language", lang));
24+
BooleanQuery bq = new BooleanQuery();
25+
bq.add((Query)q, Occur.MUST);
26+
bq.add(q2, Occur.MUST);
27+
q = bq;
28+
}
29+
30+
Hits hits = searcher.search((Query)q);
31+
String time = "";
32+
int max = hits.length();
33+
buff.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
34+
buff.append("<Results hits = '" + max + "' time = '" + time + "'>");
35+
36+
for(int i = 0; i < max; ++i) {
37+
buff.append("<Result>");
38+
Document doc = hits.doc(i);
39+
buff.append("<URI>" + doc.getField("uri").stringValue() + "</URI>");
40+
buff.append("</Result>");
41+
}
42+
} catch (Exception var15) {
43+
Logger.print(var15.toString());
44+
}
45+
46+
buff.append("</Results>");
47+
return buff.toString();
48+
}
49+
}

0 commit comments

Comments
 (0)