Skip to content

Commit 5000153

Browse files
Copilotmyieye
andauthored
Fix FTS5 syntax errors when searching with punctuation characters (#2085)
* Initial plan * Add failing tests for punctuation in search and fix FTS query escaping Co-authored-by: myieye <12587509+myieye@users.noreply.github.com> * Improve FTS query escaping to handle punctuation correctly Co-authored-by: myieye <12587509+myieye@users.noreply.github.com> * Address code review feedback - extract constants and improve matching logic Co-authored-by: myieye <12587509+myieye@users.noreply.github.com> * Simplify FTS5 escaping - always wrap user queries in quotes Co-authored-by: myieye <12587509+myieye@users.noreply.github.com> * Fix Non-FTS test cases to use < 3 character search terms Co-authored-by: myieye <12587509+myieye@users.noreply.github.com> * Simplify comments and interpolation * Fix comment --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: myieye <12587509+myieye@users.noreply.github.com> Co-authored-by: Tim Haasdyk <tim_haasdyk@sil.org>
1 parent e7033b0 commit 5000153

File tree

2 files changed

+31
-2
lines changed

2 files changed

+31
-2
lines changed

backend/FwLite/LcmCrdt/FullTextSearch/EntrySearchService.cs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,20 @@ public class EntrySearchService(LcmCrdtDbContext dbContext, ILogger<EntrySearchS
2626
{
2727
internal IQueryable<EntrySearchRecord> EntrySearchRecords => dbContext.Set<EntrySearchRecord>();
2828

29-
//ling2db table
29+
//linq2db table
3030
private ITable<EntrySearchRecord> EntrySearchRecordsTable => dbContext.GetTable<EntrySearchRecord>();
3131

3232
public IQueryable<Entry> FilterAndRank(IQueryable<Entry> queryable,
3333
string query,
3434
bool rankResults,
3535
bool orderAscending)
3636
{
37+
var ftsString = ToFts5LiteralString(query);
38+
3739
//starting from EntrySearchRecordsTable rather than queryable otherwise linq2db loses track of the table
3840
var filtered = from searchRecord in EntrySearchRecordsTable
3941
from entry in queryable.InnerJoin(r => r.Id == searchRecord.Id)
40-
where Sql.Ext.SQLite().Match(searchRecord, query) && (entry.LexemeForm.SearchValue(query)
42+
where Sql.Ext.SQLite().Match(searchRecord, ftsString) && (entry.LexemeForm.SearchValue(query)
4143
|| entry.CitationForm.SearchValue(query)
4244
|| entry.Senses.Any(s => s.Gloss.SearchValue(query)))
4345
select new { entry, searchRecord };
@@ -57,6 +59,14 @@ where Sql.Ext.SQLite().Match(searchRecord, query) && (entry.LexemeForm.SearchVal
5759
return filtered.Select(t => t.entry);
5860
}
5961

62+
private static string ToFts5LiteralString(string query)
63+
{
64+
// https://sqlite.org/fts5.html#fts5_strings
65+
// - escape double quotes by doubling them
66+
// - wrap the entire query in quotes
67+
return $"\"{query.Replace("\"", "\"\"")}\"";
68+
}
69+
6070
public bool ValidSearchTerm(string query) => query.Normalize(NormalizationForm.FormC).Length >= 3;
6171

6272
public static string? Best(MultiString ms, WritingSystem[] wss, WritingSystemType type)
@@ -243,6 +253,9 @@ private static EntrySearchRecord ToEntrySearchRecord(Entry entry, WritingSystem[
243253

244254
public IAsyncEnumerable<EntrySearchRecord> Search(string query)
245255
{
256+
// (Currently only used by tests)
257+
// This method is for advanced queries with FTS5 syntax (wildcards, operators, etc.).
258+
// So, we don't use ToFts5LiteralString.
246259
return EntrySearchRecords
247260
.ToLinqToDB()
248261
.Where(e => Sql.Ext.SQLite().Match(e, query))

backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,22 @@ public async Task HeadwordOrder(string searchTerm, string wordsAndGlosses, strin
438438
.Select(e => e.LexemeForm["en"]);
439439
string.Join(",", result).Should().Be(expectedOrder);
440440
}
441+
442+
[Theory]
443+
[InlineData("a;", "a;test")] // Non-FTS search (2 chars total)
444+
[InlineData("abc;", "abc;test")] // FTS search (4 chars total)
445+
[InlineData("a:", "a:test")] // Non-FTS with colon (2 chars total)
446+
[InlineData("abc:", "abc:test")] // FTS with colon (4 chars total)
447+
[InlineData("a\"", "a\"test")] // Non-FTS with quote (2 chars total)
448+
[InlineData("abc\"", "abc\"test")] // FTS with quote (4 chars total)
449+
[InlineData("a'", "a'test")] // Non-FTS with apostrophe (U+0027, 2 chars total)
450+
[InlineData("abc'", "abc'test")] // FTS with apostrophe (U+0027, 4 chars total)
451+
public async Task PunctuationWorks(string searchTerm, string word)
452+
{
453+
await Api.CreateEntry(new Entry { LexemeForm = { ["en"] = word } });
454+
var results = await Api.SearchEntries(searchTerm).Select(e => e.LexemeForm["en"]).ToArrayAsync();
455+
results.Should().Contain(word);
456+
}
441457
}
442458

443459
// A seperate class to preserve the readability of the results in the main test class

0 commit comments

Comments
 (0)