GXflow FullText Search Migration to GitHub

LucasLazogue · LucasLazogue · commit 3ba3caaec473 · 2025-11-17T17:49:59.000-03:00
(cherry picked from commit caad503)
diff --git a/gxflowfulltextsearch/pom.xml b/gxflowfulltextsearch/pom.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>com.genexus</groupId>
+        <artifactId>parent</artifactId>
+        <version>${revision}${changelist}</version>
+    </parent>
+
+    <artifactId>gxflowfulltextsearch</artifactId>
+    <name>GXflow FullText Search</name>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-collections4</artifactId>
+            <version>4.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-compress</artifactId>
+            <version>1.27.1</version>
+        </dependency>
+        <dependency>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+            <version>1.2</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>fontbox</artifactId>
+            <version>3.0.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>pdfbox</artifactId>
+            <version>3.0.3</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-core</artifactId>
+            <version>2.2.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi</artifactId>
+            <version>${poi.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml</artifactId>
+            <version>${poi.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml-schemas</artifactId>
+            <version>4.1.2</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <finalName>GXflowFullTextSearch</finalName>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.8.0</version>
+                <configuration></configuration>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/AnalyzerManager.java b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/AnalyzerManager.java
@@ -0,0 +1,26 @@
+package com.genexus.CA.search;
+
+import java.util.HashMap;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+
+public class AnalyzerManager {
+   private static HashMap hash = new HashMap();
+
+   public static Analyzer getAnalyzer(String lang) {
+      Analyzer analyzer = null;
+      if (hash.containsKey(lang)) {
+         analyzer = (Analyzer)hash.get(lang);
+      } else {
+         if (lang.equals("spa")) {
+            analyzer = new StandardAnalyzer();
+         } else {
+            analyzer = new StandardAnalyzer();
+         }
+
+         hash.put(lang, analyzer);
+      }
+
+      return (Analyzer)analyzer;
+   }
+}
diff --git a/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/IndexManager.java b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/IndexManager.java
@@ -0,0 +1,27 @@
+package com.genexus.CA.search;
+
+import java.util.HashMap;
+
+public class IndexManager {
+   private static HashMap hash = new HashMap();
+
+   public static void addContent(String dir, String uri, String lang, String title, String summary, byte fromFile, String body, String filePath) {
+      getIndexer(dir).addContent(uri, lang, title, summary, fromFile, body, filePath);
+   }
+
+   public static void deleteContent(String dir, String uri) {
+      getIndexer(dir).deleteContent(uri);
+   }
+
+   private static synchronized Indexer getIndexer(String dir) {
+      Indexer indexer = null;
+      if (hash.containsKey(dir)) {
+         indexer = (Indexer)hash.get(dir);
+      } else {
+         indexer = new Indexer(dir);
+         hash.put(dir, indexer);
+      }
+
+      return indexer;
+   }
+}
diff --git a/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Indexer.java b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Indexer.java
@@ -0,0 +1,209 @@
+package com.genexus.CA.search;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.apache.pdfbox.text.PDFTextStripperByArea;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+
+public final class Indexer {
+   private String indexDirectory = ".";
+   private static final int IDX = 1;
+   private static final int DLT = 2;
+
+   protected Indexer(String directory) {
+      this.indexDirectory = directory;
+      if (!this.indexExists(directory)) {
+         try {
+            this.indexDirectory = directory;
+            IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(), true);
+            writer.close();
+         } catch (Exception var3) {
+            Logger.print(var3.toString());
+         }
+      }
+
+   }
+
+   protected void addContent(String uri, String lang, String title, String summary, byte fromFile, String body, String filePath) {
+      Document doc = null;
+      doc = new Document();
+      String content = "";
+      if (fromFile == 1) {
+         try {
+            if (this.isMicrosoftExtension(filePath)) {
+               FileInputStream file = new FileInputStream(filePath);
+               XWPFDocument reader = new XWPFDocument(file);
+               List<XWPFParagraph> data = reader.getParagraphs();
+
+               XWPFParagraph p;
+               for(Iterator var14 = data.iterator(); var14.hasNext(); content = content + p.getText()) {
+                  p = (XWPFParagraph)var14.next();
+               }
+            } else if (this.isPdfExtension(filePath)) {
+               PDDocument document = Loader.loadPDF(new File(filePath));
+               new PDFTextStripperByArea();
+               PDFTextStripper tStripper = new PDFTextStripper();
+               content = content + tStripper.getText(document);
+            } else if (this.isTxtExtension(filePath)) {
+               File txt = new File(filePath);
+
+               String st;
+               for(BufferedReader br = new BufferedReader(new FileReader(txt)); (st = br.readLine()) != null; content = content + st) {
+               }
+            }
+         } catch (IOException var16) {
+            var16.printStackTrace();
+         }
+      }
+
+      if (doc != null) {
+         if (this.documentExists(uri, lang)) {
+            this.indexOperation(2, lang, (Document)null, uri.toLowerCase());
+         }
+
+         doc.add(new Field("uri", uri, Store.YES, Index.UN_TOKENIZED));
+         doc.add(new Field("content", content, Store.YES, Index.TOKENIZED));
+
+         try {
+            this.indexOperation(1, lang, doc, (String)null);
+         } catch (Exception var15) {
+            Logger.print(var15.toString());
+         }
+      }
+
+   }
+
+   protected void deleteContent(String uri) {
+      try {
+         this.indexOperation(2, (String)null, (Document)null, uri.toLowerCase());
+      } catch (Exception var3) {
+         Logger.print(var3.toString());
+      }
+
+   }
+
+   protected synchronized void indexOperation(int op, String lang, Document doc, String uri) {
+      switch(op) {
+      case 1:
+         try {
+            IndexWriter writer = new IndexWriter(this.getIndexDirectory(), AnalyzerManager.getAnalyzer(lang), false);
+            writer.addDocument(doc);
+            writer.optimize();
+            writer.close();
+         } catch (Exception var9) {
+            Logger.print(var9.toString());
+         }
+         break;
+      case 2:
+         try {
+            Term term = null;
+            int docId = 0;
+            if (lang == null) {
+               term = new Term("uri", uri);
+            } else {
+               docId = this.getDocumentId(uri, lang);
+            }
+
+            IndexReader reader = IndexReader.open(this.getIndexDirectory());
+            if (lang == null) {
+               reader.deleteDocuments(term);
+            } else if (docId != -1) {
+               reader.deleteDocument(docId);
+            }
+
+            reader.close();
+         } catch (Exception var8) {
+            Logger.print(var8.toString());
+         }
+      }
+
+   }
+
+   public String getIndexDirectory() {
+      return this.indexDirectory;
+   }
+
+   private boolean indexExists(String dir) {
+      try {
+         new IndexSearcher(dir);
+         return true;
+      } catch (IOException var3) {
+         return false;
+      }
+   }
+
+   private boolean documentExists(String uri, String lang) {
+      boolean value = false;
+
+      try {
+         IndexSearcher searcher = new IndexSearcher(this.indexDirectory);
+         BooleanQuery query = new BooleanQuery();
+         query.add(new TermQuery(new Term("uri", uri)), Occur.MUST);
+         query.add(new TermQuery(new Term("language", lang)), Occur.MUST);
+         Hits hits = searcher.search(query);
+         searcher.close();
+         if (hits.length() > 0) {
+            value = true;
+         }
+      } catch (IOException var7) {
+         Logger.print(var7.toString());
+      }
+
+      return value;
+   }
+
+   private int getDocumentId(String uri, String lang) {
+      int value = -1;
+
+      try {
+         IndexSearcher searcher = new IndexSearcher(this.indexDirectory);
+         BooleanQuery query = new BooleanQuery();
+         query.add(new TermQuery(new Term("uri", uri)), Occur.MUST);
+         query.add(new TermQuery(new Term("language", lang)), Occur.MUST);
+         Hits hits = searcher.search(query);
+         if (hits.length() > 0) {
+            value = hits.id(0);
+         }
+
+         searcher.close();
+      } catch (IOException var7) {
+         Logger.print(var7.toString());
+      }
+
+      return value;
+   }
+
+   private boolean isMicrosoftExtension(String filePath) {
+      return filePath.endsWith(".doc") || filePath.endsWith(".docx") || filePath.endsWith(".xls") || filePath.endsWith(".xlsx") || filePath.endsWith(".ppt") || filePath.endsWith(".pptx");
+   }
+
+   private boolean isPdfExtension(String filePath) {
+      return filePath.endsWith(".pdf");
+   }
+
+   private boolean isTxtExtension(String filePath) {
+      return filePath.endsWith(".txt") || filePath.endsWith(".html");
+   }
+}
diff --git a/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Logger.java b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Logger.java
@@ -0,0 +1,7 @@
+package com.genexus.CA.search;
+
+public class Logger {
+   public static void print(String str) {
+      System.err.println("CASearch:" + str);
+   }
+}
diff --git a/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Searcher.java b/gxflowfulltextsearch/src/main/java/com/genexus/CA/search/Searcher.java
@@ -0,0 +1,49 @@
+package com.genexus.CA.search;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.MultiFieldQueryParser;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.BooleanClause.Occur;
+
+public class Searcher {
+   public static String search(String dir, String lang, String query, int maxResults, int from) {
+      StringBuffer buff = new StringBuffer();
+
+      try {
+         IndexSearcher searcher = new IndexSearcher(dir);
+         String[] fields = new String[]{"title", "content"};
+         Occur[] clauses = new Occur[]{Occur.SHOULD, Occur.SHOULD};
+         Query q = MultiFieldQueryParser.parse(query, fields, clauses, AnalyzerManager.getAnalyzer(lang));
+         if (!lang.equals("IND")) {
+            Query q2 = new TermQuery(new Term("language", lang));
+            BooleanQuery bq = new BooleanQuery();
+            bq.add((Query)q, Occur.MUST);
+            bq.add(q2, Occur.MUST);
+            q = bq;
+         }
+
+         Hits hits = searcher.search((Query)q);
+         String time = "";
+         int max = hits.length();
+         buff.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+         buff.append("<Results hits = '" + max + "' time = '" + time + "'>");
+
+         for(int i = 0; i < max; ++i) {
+            buff.append("<Result>");
+            Document doc = hits.doc(i);
+            buff.append("<URI>" + doc.getField("uri").stringValue() + "</URI>");
+            buff.append("</Result>");
+         }
+      } catch (Exception var15) {
+         Logger.print(var15.toString());
+      }
+
+      buff.append("</Results>");
+      return buff.toString();
+   }
+}
diff --git a/pom.xml b/pom.xml