Skip to content

Commit 3fc6202

Browse files
доработка поискового движка (добавлен direct index спользующий FrozenDictionary)
1 parent a0a79f0 commit 3fc6202

34 files changed

+1787
-262
lines changed

src/Rsse.Engine.VectorSearch/Algorithms/ExtendedSearchGinDirectOffset.cs

Lines changed: 17 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public void FindExtended(TokenVector searchVector, IMetricsCalculator metricsCal
5252

5353
foreach (var documentId in idFromGin)
5454
{
55-
if (GinExtended.TryGetExternalDocumentId(documentId, out var externalDocumentId))
55+
if (GinExtended.TryGetOffsetTokenVector(documentId, out _, out var externalDocumentId))
5656
{
5757
const int metric = 1;
5858
metricsCalculator.AppendExtended(metric, searchVector, externalDocumentId, GeneralDirectIndex);
@@ -115,13 +115,7 @@ private void CreateExtendedSearchSpace(TokenVector searchVector, IMetricsCalcula
115115
}
116116
}
117117

118-
if (listExists.Count == 1)
119-
{
120-
AppendMetric2(list, listExists, multi, metricsCalculator, searchVector);
121-
return;
122-
}
123-
124-
do
118+
while (listExists.Count > 1)
125119
{
126120
MergeAlgorithm.FindMin(list, listExists, out var minI0, out var docId0, out var docId1);
127121

@@ -168,7 +162,7 @@ private void CreateExtendedSearchSpace(TokenVector searchVector, IMetricsCalcula
168162
CalculateAndAppendMetric(metricsCalculator, searchVector, docId0, sIndex);
169163
}
170164
}
171-
} while (listExists.Count > 1);
165+
}
172166

173167
if (listExists.Count == 1)
174168
{
@@ -193,7 +187,7 @@ private void AppendMetric1(bool isMulti, TokenVector searchVector, IMetricsCalcu
193187
}
194188
else
195189
{
196-
if (GinExtended.TryGetExternalDocumentId(documentId, out var externalDocumentId))
190+
if (GinExtended.TryGetOffsetTokenVector(documentId, out _, out var externalDocumentId))
197191
{
198192
const int metric = 1;
199193
metricsCalculator.AppendExtended(metric, searchVector, externalDocumentId, GeneralDirectIndex);
@@ -220,7 +214,7 @@ private void AppendMetric2(List<InternalDocumentListEnumerator> list, List<int>
220214
do
221215
{
222216
var documentId = enumerator.Current;
223-
if (GinExtended.TryGetExternalDocumentId(documentId, out var externalDocumentId))
217+
if (GinExtended.TryGetOffsetTokenVector(documentId, out _, out var externalDocumentId))
224218
{
225219
const int metric = 1;
226220
metricsCalculator.AppendExtended(metric, searchVector, externalDocumentId, GeneralDirectIndex);
@@ -232,29 +226,25 @@ private void AppendMetric2(List<InternalDocumentListEnumerator> list, List<int>
232226
private void CalculateAndAppendMetric(IMetricsCalculator metricsCalculator, TokenVector searchVector,
233227
InternalDocumentId documentId, int sIndex)
234228
{
235-
if (GinExtended.TryGetOffsetTokenVector(documentId, out var offsetTokenVector))
229+
if (!GinExtended.TryGetOffsetTokenVector(documentId, out var offsetTokenVector, out var externalDocumentId))
236230
{
237-
var position = -1;
238-
var metric = 0;
231+
return;
232+
}
239233

240-
for (var i = sIndex; i < searchVector.Count; i++)
241-
{
242-
var token = searchVector.ElementAt(i);
234+
var position = -1;
235+
var metric = 0;
243236

244-
if (offsetTokenVector.TryFindNextTokenPosition(token, ref position))
245-
{
246-
metric++;
247-
}
248-
}
237+
for (var i = sIndex; i < searchVector.Count; i++)
238+
{
239+
var token = searchVector.ElementAt(i);
249240

250-
if (position >= 0)
241+
if (offsetTokenVector.TryFindNextTokenPosition(token, ref position))
251242
{
252-
if (GinExtended.TryGetExternalDocumentId(documentId, out var externalDocumentId))
253-
{
254-
metricsCalculator.AppendExtended(metric, searchVector, externalDocumentId, GeneralDirectIndex);
255-
}
243+
metric++;
256244
}
257245
}
246+
247+
metricsCalculator.AppendExtended(metric, searchVector, externalDocumentId, GeneralDirectIndex);
258248
}
259249

260250
private static void SwapAndRemoveAt(List<int> listExists, int i)

src/Rsse.Engine.VectorSearch/Algorithms/ExtendedSearchGinDirectOffsetFilter.cs

Lines changed: 34 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
using RsseEngine.Contracts;
77
using RsseEngine.Dto;
88
using RsseEngine.Indexes;
9-
using RsseEngine.Iterators;
109
using RsseEngine.Pools;
1110
using RsseEngine.Processor;
1211

@@ -42,7 +41,7 @@ public void FindExtended(TokenVector searchVector, IMetricsCalculator metricsCal
4241
try
4342
{
4443
if (!RelevanceFilter.FindFilteredDocumentsExtendedMerge(GinExtended, searchVector, idsFromGin,
45-
sortedIds, out var filteredTokensCount))
44+
sortedIds, out var filteredTokensCount, out var minRelevancyCount))
4645
{
4746
return;
4847
}
@@ -59,7 +58,7 @@ public void FindExtended(TokenVector searchVector, IMetricsCalculator metricsCal
5958

6059
foreach (var documentId in idFromGin)
6160
{
62-
if (GinExtended.TryGetExternalDocumentId(documentId, out var externalDocumentId))
61+
if (GinExtended.TryGetOffsetTokenVector(documentId, out _, out var externalDocumentId))
6362
{
6463
metricsCalculator.AppendExtended(1, searchVector, externalDocumentId, GeneralDirectIndex);
6564
}
@@ -72,7 +71,7 @@ public void FindExtended(TokenVector searchVector, IMetricsCalculator metricsCal
7271
if (cancellationToken.IsCancellationRequested)
7372
throw new OperationCanceledException(nameof(ExtendedSearchGinMerge));
7473

75-
CreateExtendedSearchSpace(searchVector, metricsCalculator, sortedIds, filteredTokensCount);
74+
CreateExtendedSearchSpace(searchVector, metricsCalculator, sortedIds, filteredTokensCount, minRelevancyCount);
7675

7776
break;
7877
}
@@ -92,9 +91,10 @@ public void FindExtended(TokenVector searchVector, IMetricsCalculator metricsCal
9291
/// <param name="metricsCalculator"></param>
9392
/// <param name="sortedIds"></param>
9493
/// <param name="filteredTokensCount"></param>
94+
/// <param name="minRelevancyCount">Количество векторов обеспечивающих релевантность.</param>
9595
/// <returns>Список векторов GIN.</returns>
9696
private void CreateExtendedSearchSpace(TokenVector searchVector, IMetricsCalculator metricsCalculator,
97-
List<InternalDocumentIdList> sortedIds, int filteredTokensCount)
97+
List<InternalDocumentIdList> sortedIds, int filteredTokensCount, int minRelevancyCount)
9898
{
9999
var list = TempStoragePool.ListInternalEnumeratorListsStorage.Get();
100100
var listExists = TempStoragePool.IntListsStorage.Get();
@@ -117,20 +117,14 @@ private void CreateExtendedSearchSpace(TokenVector searchVector, IMetricsCalcula
117117
}
118118
}
119119

120-
if (listExists.Count == 1)
121-
{
122-
AppendMetric2(list, listExists, metricsCalculator, searchVector);
123-
return;
124-
}
125-
126-
do
120+
while (listExists.Count > 1)
127121
{
128122
MergeAlgorithm.FindMin(list, listExists, out var minI0, out var docId0, out var docId1);
129123

130124
START:
131125
if (docId0.Value < docId1.Value)
132126
{
133-
AppendMetric1(searchVector, metricsCalculator, docId0);
127+
CalculateAndAppendMetric(metricsCalculator, searchVector, docId0, minRelevancyCount);
134128

135129
ref var enumeratorI = ref CollectionsMarshal.AsSpan(list)[minI0];
136130
if (!enumeratorI.MoveNext())
@@ -146,34 +140,34 @@ private void CreateExtendedSearchSpace(TokenVector searchVector, IMetricsCalcula
146140
}
147141
else if (docId0 == docId1)
148142
{
149-
var sIndex = int.MaxValue;
150-
151143
for (var i = listExists.Count - 1; i >= 0; i--)
152144
{
153145
var index = listExists[i];
154146

155147
ref var enumeratorI = ref CollectionsMarshal.AsSpan(list)[index];
156148
if (docId0 == enumeratorI.Current)
157149
{
158-
sIndex = Math.Min(sIndex, index);
159150
if (!enumeratorI.MoveNext())
160151
{
161152
SwapAndRemoveAt(listExists, i);
162153
}
163154
}
164155
}
165156

166-
// поиск в векторе extended
167-
if (sIndex < int.MaxValue)
168-
{
169-
CalculateAndAppendMetric(metricsCalculator, searchVector, docId0);
170-
}
157+
CalculateAndAppendMetric(metricsCalculator, searchVector, docId0, minRelevancyCount);
171158
}
172-
} while (listExists.Count > 1);
159+
}
173160

174161
if (listExists.Count == 1)
175162
{
176-
AppendMetric2(list, listExists, metricsCalculator, searchVector);
163+
var index = listExists[0];
164+
var enumerator = list[index];
165+
166+
do
167+
{
168+
var documentId = enumerator.Current;
169+
CalculateAndAppendMetric(metricsCalculator, searchVector, documentId, minRelevancyCount);
170+
} while (enumerator.MoveNext());
177171
}
178172
}
179173
finally
@@ -184,53 +178,35 @@ private void CreateExtendedSearchSpace(TokenVector searchVector, IMetricsCalcula
184178
}
185179
}
186180

187-
private void AppendMetric1(TokenVector searchVector, IMetricsCalculator metricsCalculator,
188-
InternalDocumentId documentId)
189-
{
190-
CalculateAndAppendMetric(metricsCalculator, searchVector, documentId);
191-
}
192-
193-
private void AppendMetric2(List<InternalDocumentListEnumerator> list, List<int> listExists,
194-
IMetricsCalculator metricsCalculator, TokenVector searchVector)
181+
private void CalculateAndAppendMetric(IMetricsCalculator metricsCalculator, TokenVector searchVector,
182+
InternalDocumentId documentId, int minRelevancyCount)
195183
{
196-
var index = listExists[0];
197-
var enumerator = list[index];
198-
199-
do
184+
if (!GinExtended.TryGetOffsetTokenVector(documentId, out var offsetTokenVector, out var externalDocumentId))
200185
{
201-
var documentId = enumerator.Current;
202-
CalculateAndAppendMetric(metricsCalculator, searchVector, documentId);
203-
} while (enumerator.MoveNext());
204-
}
186+
return;
187+
}
205188

206-
private void CalculateAndAppendMetric(IMetricsCalculator metricsCalculator, TokenVector searchVector,
207-
InternalDocumentId documentId)
208-
{
209-
const int sIndex = 0;
189+
var position = -1;
190+
var empty = 0;
210191

211-
if (GinExtended.TryGetOffsetTokenVector(documentId, out var offsetTokenVector))
192+
for (var i = 0; i < searchVector.Count; i++)
212193
{
213-
var position = -1;
214-
var metric = 0;
194+
var token = searchVector.ElementAt(i);
215195

216-
for (var i = sIndex; i < searchVector.Count; i++)
196+
if (!offsetTokenVector.TryFindNextTokenPosition(token, ref position))
217197
{
218-
var token = searchVector.ElementAt(i);
198+
empty++;
219199

220-
if (offsetTokenVector.TryFindNextTokenPosition(token, ref position))
200+
if (empty > searchVector.Count - minRelevancyCount)
221201
{
222-
metric++;
223-
}
224-
}
225-
226-
if (position >= 0)
227-
{
228-
if (GinExtended.TryGetExternalDocumentId(documentId, out var externalDocumentId))
229-
{
230-
metricsCalculator.AppendExtended(metric, searchVector, externalDocumentId, GeneralDirectIndex);
202+
return;
231203
}
232204
}
233205
}
206+
207+
var metric = searchVector.Count - empty;
208+
209+
metricsCalculator.AppendExtended(metric, searchVector, externalDocumentId, GeneralDirectIndex);
234210
}
235211

236212
private static void SwapAndRemoveAt(List<int> listExists, int i)

src/Rsse.Engine.VectorSearch/Algorithms/ExtendedSearchGinFastFilter.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public void FindExtended(TokenVector searchVector, IMetricsCalculator metricsCal
4040
try
4141
{
4242
if (!RelevanceFilter.FindFilteredDocumentsExtended(GinExtended, searchVector,
43-
idsFromGin, sortedIds, filteredDocuments))
43+
idsFromGin, sortedIds, filteredDocuments, out var minRelevancyCount))
4444
{
4545
return;
4646
}
@@ -80,8 +80,8 @@ public void FindExtended(TokenVector searchVector, IMetricsCalculator metricsCal
8080

8181
processedDocuments.Add(documentId);
8282

83-
metricsCalculator.AppendExtendedMetric(searchVector, documentId,
84-
GeneralDirectIndex, searchStartIndex);
83+
metricsCalculator.AppendExtendedRelevancyMetric(searchVector, documentId,
84+
GeneralDirectIndex, minRelevancyCount, searchStartIndex);
8585

8686
if (filteredDocuments.Count == processedDocuments.Count)
8787
{
@@ -105,8 +105,8 @@ public void FindExtended(TokenVector searchVector, IMetricsCalculator metricsCal
105105
continue;
106106
}
107107

108-
metricsCalculator.AppendExtendedMetric(searchVector, documentId,
109-
GeneralDirectIndex, searchStartIndex);
108+
metricsCalculator.AppendExtendedRelevancyMetric(searchVector, documentId,
109+
GeneralDirectIndex, minRelevancyCount, searchStartIndex);
110110

111111
if (filteredDocuments.Count == 0)
112112
{

src/Rsse.Engine.VectorSearch/Algorithms/ExtendedSearchGinFilter.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public void FindExtended(TokenVector searchVector, IMetricsCalculator metricsCal
4040
try
4141
{
4242
if (!RelevanceFilter.FindFilteredDocumentsExtended(GinExtended, searchVector,
43-
idsFromGin, sortedIds, filteredDocuments))
43+
idsFromGin, sortedIds, filteredDocuments, out var minRelevancyCount))
4444
{
4545
return;
4646
}
@@ -51,7 +51,7 @@ public void FindExtended(TokenVector searchVector, IMetricsCalculator metricsCal
5151
// поиск в векторе extended
5252
foreach (var documentId in filteredDocuments)
5353
{
54-
metricsCalculator.AppendExtendedMetric(searchVector, documentId, GeneralDirectIndex);
54+
metricsCalculator.AppendExtendedRelevancyMetric(searchVector, documentId, GeneralDirectIndex, minRelevancyCount);
5555
}
5656
}
5757
finally

0 commit comments

Comments
 (0)