Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -158,4 +158,13 @@ public void toOutput(IndexOutput out) throws IOException {
out.writeVInt(graphNodeIdsToDocIds[ord]);
}
}

public int size() {
return docIdsToGraphNodeIds.length;
}

@Override
public String toString() {
return "GraphNodeIdToDocMap{" + "graphNodeIdsToDocIds=" + Arrays.toString(graphNodeIdsToDocIds) + ", docIdsToGraphNodeIds=" + Arrays.toString(docIdsToGraphNodeIds) + '}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,7 @@ class RandomAccessMergedFloatVectorValues implements RandomAccessVectorValues {
* @param mergeState Merge state containing readers and doc maps
*/
public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
log.info("Start initializing RandomAccessMergedFloatVectorValues for field {} in segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);
this.totalDocsCount = Math.toIntExact(Arrays.stream(mergeState.maxDocs).asLongStream().sum());
this.fieldInfo = fieldInfo;
this.mergeState = mergeState;
Expand Down Expand Up @@ -689,6 +690,10 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
}
}

log.info("Total vectors count across all segments for field {}: {}", fieldName, totalVectorsCount);
log.info("Total live vectors count across all segments for field {}: {}", fieldName, totalLiveVectorsCount);
log.info("Base ordinals for each reader: {}", Arrays.toString(baseOrds));

assert (totalVectorsCount <= totalDocsCount) : "Total number of vectors exceeds the total number of documents";
assert (totalLiveVectorsCount <= totalVectorsCount) : "Total number of live vectors exceeds the total number of vectors";
assert (dimension > 0) : "No vectors found for field " + fieldName;
Expand Down Expand Up @@ -840,6 +845,8 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
this.graphNodeIdToDocMap = new GraphNodeIdToDocMap(graphNodeIdToDocIds);
this.compactOrdToDocMap = new GraphNodeIdToDocMap(compactOrdToDocIds);
log.debug("Created RandomAccessMergedFloatVectorValues with {} total vectors from {} readers", size, readers.length);
log.info("Created RandomAccessMergedFloatVectorValues with {} total vectors from {} readers", size, readers.length);
log.info("End initializing RandomAccessMergedFloatVectorValues for field {} in segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);

}

Expand Down Expand Up @@ -1113,6 +1120,7 @@ private boolean tryLeadingSegmentMerge() throws IOException {
SIMD_POOL_MERGE.submit(
() -> IntStream.range(leadingGraph.getIdUpperBound(), heapRavv.size()).parallel().forEach(ord -> {
builder.addGraphNode(ord, vv.get().getVector(ord));
log.info("Adding node {} with vector {}", ord, vv.get().getVector(ord));
})
).join();

Expand All @@ -1121,17 +1129,20 @@ private boolean tryLeadingSegmentMerge() throws IOException {
if (!liveGraphNodesPerReader[LEADING_READER_IDX].get(i)) {
// we need to convert from the "mid" to the "heap" ordinal space to avoid errors
builder.markNodeDeleted(midToHeapOrds[i]);
log.info("Node {} marked as deleted", midToHeapOrds[i]);
}
}

builder.cleanup();

graph = (OnHeapGraphIndex) builder.getGraph();
log.info("Graph Structure after getting graph from merged float vector: {}", graph);
}

// Note that the ordinals for the OnDiskGraphIndex will automatically be compacted
// But the OnHeapGraphIndex will not
var finalOrdToDocMap = new GraphNodeIdToDocMap(finalOrdToDocId);
log.info("Final ordinal mapping for graph index: {}", finalOrdToDocMap);
writeField(fieldInfo, heapRavv, null, finalOrdToDocMap, graph);
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -340,9 +340,16 @@ public void testJVectorKnnIndex_mergeEnabled() throws IOException {
final Query filterQuery = new MatchAllDocsQuery();
final IndexSearcher searcher = newSearcher(reader);
KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
assertEquals(k, topDocs.totalHits.value());
Document doc = reader.storedFields().document(topDocs.scoreDocs[0].doc);
//TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
// One thing I have found when using the match all docs is that none of the documents in the
// segment are actually missing.
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 10);
for (int i = 0; i < 10; i++) {
Document doc = reader.storedFields().document(topDocs.scoreDocs[i].doc);
log.info("DocID: {}, Score: {}, id: {}", topDocs.scoreDocs[i].doc, topDocs.scoreDocs[i].score, doc.get("my_doc_id"));
}
//assertEquals(k, topDocs.totalHits.value());
/*Document doc = reader.storedFields().document(topDocs.scoreDocs[0].doc);
assertEquals("1", doc.get("my_doc_id"));
Assert.assertEquals(
VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f }),
Expand All @@ -362,7 +369,7 @@ public void testJVectorKnnIndex_mergeEnabled() throws IOException {
VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 3.0f }),
topDocs.scoreDocs[2].score,
0.001f
);
);*/
log.info("successfully completed search tests");
}
}
Expand Down
Loading