From 64a232de99ed4730968a93df2016fdc31782bf3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Fri, 10 Oct 2025 14:03:01 -0700 Subject: [PATCH 1/4] Allow sorting in the API calls by path and date MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bernát Gábor --- apiary.apib | 4 ++ .../opengrok/indexer/search/SearchEngine.java | 60 +++++++++++++++++-- .../api/v1/controller/SearchController.java | 11 +++- .../web/api/v1/filter/IncomingFilter.java | 2 +- 4 files changed, 67 insertions(+), 10 deletions(-) diff --git a/apiary.apib b/apiary.apib index 0e9bed9319e..9a7de257a81 100644 --- a/apiary.apib +++ b/apiary.apib @@ -657,6 +657,10 @@ The repository path is relative to source root. + projects (optional, string) - projects to search in + maxresults (optional, string) - maximum number of documents whose hits will be returned (default 1000) + start (optional, string) - start index from which to return results + + sort (optional, string) - sort order for results (default: "relevancy"). Possible values are: + - `relevancy` - by relevancy (Lucene score) + - `fullpath` - by file path + - `lastmodtime` - by last modification date of the file. + Response 200 (application/json) + Body diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java index e12ad247142..0b27ac5525c 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/search/SearchEngine.java @@ -48,6 +48,10 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocsCollector; +import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.util.Version; import org.opengrok.indexer.analysis.AbstractAnalyzer; @@ -66,6 +70,7 @@ import org.opengrok.indexer.util.Statistics; import org.opengrok.indexer.util.TandemPath; import org.opengrok.indexer.web.Prefix; +import org.opengrok.indexer.web.SortOrder; /** * This is an encapsulation of the details on how to search in the index database. @@ -114,6 +119,10 @@ public class SearchEngine { * Holds value of property type. */ private String type; + /** + * Holds value of property sort. + */ + private SortOrder sortOrder; /** * Holds value of property indexDatabase. */ @@ -132,7 +141,7 @@ public class SearchEngine { int cachePages = RuntimeEnvironment.getInstance().getCachePages(); int totalHits = 0; private ScoreDoc[] hits; - private TopScoreDocCollector collector; + private TopDocsCollector collector; private IndexSearcher searcher; boolean allCollected; private final ArrayList searcherList = new ArrayList<>(); @@ -181,6 +190,10 @@ private void searchSingleDatabase(boolean paging) throws IOException { SuperIndexSearcher superIndexSearcher = RuntimeEnvironment.getInstance().getSuperIndexSearcher(""); searcherList.add(superIndexSearcher); searcher = superIndexSearcher; + // If a field-based sort is requested, collect all hits (disable paging optimization) + if (sortOrder != SortOrder.RELEVANCY) { + paging = false; + } searchIndex(superIndexSearcher, paging); } @@ -205,16 +218,33 @@ private void searchMultiDatabase(List projectList, boolean paging) thro } private void searchIndex(IndexSearcher searcher, boolean paging) throws IOException { - collector = TopScoreDocCollector.create(hitsPerPage * cachePages, Short.MAX_VALUE); - Statistics stat = new Statistics(); + Sort luceneSort = null; + if (getSortOrder() == SortOrder.LASTMODIFIED) { + luceneSort = new Sort(new SortField(QueryBuilder.DATE, SortField.Type.STRING, true)); + } else if (getSortOrder() == SortOrder.BY_PATH) { + luceneSort = new Sort(new SortField(QueryBuilder.FULLPATH, SortField.Type.STRING)); + } + if (luceneSort == null) { + collector = TopScoreDocCollector.create(hitsPerPage * cachePages, Short.MAX_VALUE); + } else { + collector = TopFieldCollector.create(luceneSort, hitsPerPage * cachePages, Short.MAX_VALUE); + } searcher.search(query, collector); totalHits = collector.getTotalHits(); + Statistics stat = new Statistics(); stat.report(LOGGER, Level.FINEST, "search via SearchEngine done", "search.latency", new String[]{"category", "engine", "outcome", totalHits > 0 ? "success" : "empty"}); - if (!paging && totalHits > 0) { - collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE); - searcher.search(query, collector); + if (luceneSort == null) { + if (!paging && totalHits > 0) { + collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE); + searcher.search(query, collector); + } + } else { + if (!paging && totalHits > 0) { + collector = TopFieldCollector.create(luceneSort, totalHits, Short.MAX_VALUE); + searcher.search(query, collector); + } } hits = collector.topDocs().scoreDocs; StoredFields storedFields = searcher.storedFields(); @@ -645,4 +675,22 @@ public String getType() { public void setType(String fileType) { this.type = fileType; } + + /** + * Getter for property sort. + * + * @return Value of property sortOrder. + */ + public SortOrder getSortOrder() { + return this.sortOrder; + } + + /** + * Setter for property sort. + * + * @param sortOrder New value of property sortOrder. + */ + public void setSortOrder(SortOrder sortOrder) { + this.sortOrder = sortOrder; + } } diff --git a/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java b/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java index f251c3b8005..08acb2c5089 100644 --- a/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java +++ b/opengrok-web/src/main/java/org/opengrok/web/api/v1/controller/SearchController.java @@ -39,6 +39,7 @@ import org.opengrok.indexer.search.Hit; import org.opengrok.indexer.search.SearchEngine; import org.opengrok.indexer.web.QueryParameters; +import org.opengrok.indexer.web.SortOrder; import org.opengrok.web.PageConfig; import org.opengrok.web.api.v1.filter.CorsEnable; import org.opengrok.web.api.v1.suggester.provider.service.SuggesterService; @@ -58,6 +59,7 @@ public class SearchController { public static final String PATH = "search"; private static final int MAX_RESULTS = 1000; + private static final String DEFAULT_SORT_ORDER = "relevancy"; private final SuggesterService suggester; @@ -81,9 +83,10 @@ public SearchResult search( @QueryParam("projects") final List projects, @QueryParam("maxresults") // Akin to QueryParameters.COUNT_PARAM @DefaultValue(MAX_RESULTS + "") final int maxResults, - @QueryParam(QueryParameters.START_PARAM) @DefaultValue(0 + "") final int startDocIndex + @QueryParam(QueryParameters.START_PARAM) @DefaultValue(0 + "") final int startDocIndex, + @QueryParam(QueryParameters.SORT_PARAM) @DefaultValue(DEFAULT_SORT_ORDER) final String sort ) { - try (SearchEngineWrapper engine = new SearchEngineWrapper(full, def, symbol, path, hist, type)) { + try (SearchEngineWrapper engine = new SearchEngineWrapper(full, def, symbol, path, hist, type, SortOrder.get(sort))) { if (!engine.isValid()) { throw new WebApplicationException("Invalid request", Response.Status.BAD_REQUEST); @@ -119,7 +122,8 @@ private SearchEngineWrapper( final String symbol, final String path, final String hist, - final String type + final String type, + final SortOrder sortOrder ) { engine.setFreetext(full); engine.setDefinition(def); @@ -127,6 +131,7 @@ private SearchEngineWrapper( engine.setFile(path); engine.setHistory(hist); engine.setType(type); + engine.setSortOrder(sortOrder); } public List search( diff --git a/opengrok-web/src/main/java/org/opengrok/web/api/v1/filter/IncomingFilter.java b/opengrok-web/src/main/java/org/opengrok/web/api/v1/filter/IncomingFilter.java index 20848148acb..495d59555cd 100644 --- a/opengrok-web/src/main/java/org/opengrok/web/api/v1/filter/IncomingFilter.java +++ b/opengrok-web/src/main/java/org/opengrok/web/api/v1/filter/IncomingFilter.java @@ -66,7 +66,7 @@ public class IncomingFilter implements ContainerRequestFilter, ConfigurationChan /** * Endpoint paths that are exempted from this filter. * @see SearchController#search(HttpServletRequest, String, String, String, String, String, String, - * java.util.List, int, int) + * java.util.List, int, int, String) * @see SuggesterController#getSuggestions(org.opengrok.web.api.v1.suggester.model.SuggesterQueryData) * @see SuggesterController#getConfig() */ From f23d77fd2f47f178734f86597abb9d95616311b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Tue, 14 Oct 2025 20:47:16 -0700 Subject: [PATCH 2/4] Add test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bernát Gábor --- .../indexer/search/SearchEngineTest.java | 67 +++++++++++++++++++ .../opengrok/indexer/util/TestRepository.java | 50 +++++++++----- 2 files changed, 102 insertions(+), 15 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java index d477ef11295..7ac6c8fe8dc 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java @@ -24,7 +24,9 @@ package org.opengrok.indexer.search; import java.io.File; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import java.util.TreeSet; import org.junit.jupiter.api.AfterAll; @@ -36,7 +38,9 @@ import org.opengrok.indexer.util.TestRepository; import org.opengrok.indexer.history.RepositoryFactory; +import org.opengrok.indexer.web.SortOrder; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; @@ -148,6 +152,69 @@ void testGetQuery() throws Exception { instance.getQuery()); } + @Test + void testSortOrderLastModified() { + SearchEngine instance = new SearchEngine(); + instance.setFile("main.c"); + instance.setFreetext("arguments"); + instance.setSortOrder(SortOrder.LASTMODIFIED); + int hitsCount = instance.search(); + List hits = new ArrayList<>(); + instance.results(0, hitsCount, hits); + assertTrue(hits.size() > 1, "Should return at least 2 hits for RELEVANCY sort to check order"); + + List results = new ArrayList<>(); + for (Hit hit : hits) { + results.add(hit.getPath() + "@" + hit.getLineno()); + } + final String[] expectedResults = { + "/teamware/main.c@5", + "/rcs_test/main.c@5", + "/mercurial/main.c@5", + "/git/main.c@5", + "/cvs_test/cvsrepo/main.c@7", + "/bazaar/main.c@5" + }; + + assertArrayEquals(expectedResults, results.toArray()); + + instance.destroy(); + } + + @Test + void testSortOrderByPath() { + SearchEngine instance = new SearchEngine(); + instance.setFile("main.c OR header.h"); + instance.setFreetext("arguments OR stdio"); + instance.setSortOrder(SortOrder.BY_PATH); + int hitsCount = instance.search(); + List hits = new ArrayList<>(); + instance.results(0, hitsCount, hits); + assertTrue(hits.size() > 1, "Should return at least 2 hits for RELEVANCY sort to check order"); + + List results = new ArrayList<>(); + for (Hit hit : hits) { + results.add(hit.getPath() + "@" + hit.getLineno()); + } + final String[] expectedResults = { + "/bazaar/header.h@2", + "/bazaar/main.c@5", + "/cvs_test/cvsrepo/main.c@7", + "/git/header.h@2", + "/git/main.c@5", + "/mercurial/header.h@2", + "/mercurial/main.c@5", + "/rcs_test/header.h@2", + "/rcs_test/main.c@5", + "/teamware/header.h@2", + "/teamware/main.c@5" + }; + + assertArrayEquals(expectedResults, results.toArray()); + + instance.destroy(); + } + /* see https://github.com/oracle/opengrok/issues/2030 @Test void testSearch() { diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java index 167980d139f..3dd15c04197 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java @@ -33,6 +33,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.stream.Stream; @@ -111,25 +112,44 @@ public void create(@NotNull final URL url) throws IOException, URISyntaxExceptio * @throws IOException on error */ public void copyDirectory(Path src, Path dest) throws IOException { + // Create a deterministic order of paths for creation time, so last modified time indexing is stable in tests + // note we cannot use Files.copy(sourceFile, destPath, REPLACE_EXISTING, COPY_ATTRIBUTES) + // as the original creation time is the user checkout and not different accross files + List allPaths; try (Stream stream = Files.walk(src)) { - stream.forEach(sourceFile -> { - if (sourceFile.equals(src)) { - return; + allPaths = stream.filter(p -> !p.equals(src)).sorted().toList(); + } + // Set base time to now, and go ahead in time for each subsequent path by 1 minute + java.time.Instant baseTime = java.time.Instant.now(); + for (int i = 0; i < allPaths.size(); i++) { + Path sourcePath = allPaths.get(i); + Path destRelativePath = getDestinationRelativePath(src, sourcePath); + Path destPath = dest.resolve(destRelativePath); + var fileTime = java.nio.file.attribute.FileTime.from(baseTime.plusSeconds(i * 60L)); + if (Files.isDirectory(sourcePath)) { + if (!Files.exists(destPath)) { + Files.createDirectories(destPath); } try { - Path destRelativePath = getDestinationRelativePath(src, sourceFile); - Path destPath = dest.resolve(destRelativePath); - if (Files.isDirectory(sourceFile)) { - if (!Files.exists(destPath)) { - Files.createDirectory(destPath); - } - return; - } - Files.copy(sourceFile, destPath, REPLACE_EXISTING, COPY_ATTRIBUTES); - } catch (Exception e) { - throw new RuntimeException(e); + Files.setLastModifiedTime(destPath, fileTime); + Files.setAttribute(destPath, "basic:creationTime", fileTime); + } catch (Exception ignored) { + // Not all filesystems support creationTime + } + } else { + // Ensure parent directory exists before copying file + Path parentDir = destPath.getParent(); + if (parentDir != null && !Files.exists(parentDir)) { + Files.createDirectories(parentDir); } - }); + Files.copy(sourcePath, destPath, REPLACE_EXISTING, COPY_ATTRIBUTES); + Files.setLastModifiedTime(destPath, fileTime); + try { + Files.setAttribute(destPath, "basic:creationTime", fileTime); + } catch (Exception ignored) { + // Not all filesystems support creationTime + } + } } } From 389d766ddb0dff99adcaec1eb8b762b227894da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Tue, 21 Oct 2025 06:30:09 -0700 Subject: [PATCH 3/4] Fix apiary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bernát Gábor --- apiary.apib | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/apiary.apib b/apiary.apib index 9a7de257a81..26bafcce3e7 100644 --- a/apiary.apib +++ b/apiary.apib @@ -643,7 +643,7 @@ The repository path is relative to source root. + repository - repository path with native path separators (of the machine running the service) starting with path separator for which to return type -## Search [/search{?full,def,symbol,path,hist,type,projects,maxresults,start}] +## Search [/search{?full,def,symbol,path,hist,type,projects,maxresults,start,sort}] ## return search results [GET] @@ -657,10 +657,15 @@ The repository path is relative to source root. + projects (optional, string) - projects to search in + maxresults (optional, string) - maximum number of documents whose hits will be returned (default 1000) + start (optional, string) - start index from which to return results - + sort (optional, string) - sort order for results (default: "relevancy"). Possible values are: - - `relevancy` - by relevancy (Lucene score) - - `fullpath` - by file path - - `lastmodtime` - by last modification date of the file. + + sort: relevancy (optional, enum[string]) + + Enum + + relevancy + + fullpath + + lastmodtime + + Description: Sort order for results. Possible values: + - `relevancy`: Sort by Lucene score (most relevant first). + - `fullpath`: Sort by file path (alphabetical). + - `lastmodtime`: Sort by last modification date (newest first). + Response 200 (application/json) + Body From 496bf62c82df435fdbe3d7e6de5d2a397cf2b7b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Tue, 21 Oct 2025 06:57:06 -0700 Subject: [PATCH 4/4] Fix test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bernát Gábor --- .../indexer/search/SearchEngineTest.java | 6 +++- .../opengrok/indexer/util/TestRepository.java | 29 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java index 7ac6c8fe8dc..1e8e4e0cc92 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/search/SearchEngineTest.java @@ -24,6 +24,8 @@ package org.opengrok.indexer.search; import java.io.File; +import java.net.URL; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -59,7 +61,9 @@ class SearchEngineTest { @BeforeAll static void setUpClass() throws Exception { repository = new TestRepository(); - repository.create(HistoryGuru.class.getResource("/repositories")); + URL url = HistoryGuru.class.getResource("/repositories"); + repository.createEmpty(); + repository.copyDirectoryWithUniqueModifiedTime(Path.of(url.toURI()), Path.of(repository.getSourceRoot())); RuntimeEnvironment env = RuntimeEnvironment.getInstance(); env.setSourceRoot(repository.getSourceRoot()); diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java index 3dd15c04197..ae90992118c 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java @@ -112,6 +112,35 @@ public void create(@NotNull final URL url) throws IOException, URISyntaxExceptio * @throws IOException on error */ public void copyDirectory(Path src, Path dest) throws IOException { + try (Stream stream = Files.walk(src)) { + stream.forEach(sourceFile -> { + if (sourceFile.equals(src)) { + return; + } + try { + Path destRelativePath = getDestinationRelativePath(src, sourceFile); + Path destPath = dest.resolve(destRelativePath); + if (Files.isDirectory(sourceFile)) { + if (!Files.exists(destPath)) { + Files.createDirectory(destPath); + } + return; + } + Files.copy(sourceFile, destPath, REPLACE_EXISTING, COPY_ATTRIBUTES); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } + } + + /** + * Assumes the destination directory exists. + * @param src source directory + * @param dest destination directory + * @throws IOException on error + */ + public void copyDirectoryWithUniqueModifiedTime(Path src, Path dest) throws IOException { // Create a deterministic order of paths for creation time, so last modified time indexing is stable in tests // note we cannot use Files.copy(sourceFile, destPath, REPLACE_EXISTING, COPY_ATTRIBUTES) // as the original creation time is the user checkout and not different accross files