Skip to content

Commit b888ef5

Browse files
committed
Refines embedding generation with specific identifier meanings and improved feature formatting
Updates the embedding processor to extract specific source meanings for identifier patterns instead of using generic pattern meanings. It also standardizes the string representation of semantic features within generated text segments to improve the quality of vector embeddings.
1 parent 14f8b5c commit b888ef5

1 file changed

Lines changed: 44 additions & 21 deletions

File tree

src/main/java/dev/ikm/server/cosmos/constellation/charting/EmbeddingChartProcessor.java

Lines changed: 44 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package dev.ikm.server.cosmos.constellation.charting;
22

3+
import dev.ikm.server.cosmos.calculator.CalculatorService;
34
import java.util.ArrayList;
45
import java.util.HashSet;
56
import java.util.List;
@@ -13,8 +14,10 @@
1314
import dev.ikm.tinkar.common.id.IntIdSet;
1415
import dev.ikm.tinkar.common.service.PrimitiveData;
1516
import dev.ikm.tinkar.coordinate.stamp.calculator.Latest;
17+
import dev.ikm.tinkar.entity.Field;
1618
import dev.ikm.tinkar.entity.PatternEntityVersion;
1719
import dev.ikm.tinkar.entity.SemanticEntityVersion;
20+
import dev.ikm.tinkar.terms.EntityProxy;
1821
import dev.ikm.tinkar.terms.TinkarTermV2;
1922
import dev.langchain4j.data.document.Metadata;
2023
import dev.langchain4j.data.embedding.Embedding;
@@ -34,15 +37,19 @@
3437
@Component
3538
public class EmbeddingChartProcessor implements ChartProcessor {
3639

40+
private final CalculatorService calculatorService;
41+
3742
private record EmbeddingData(TextSegment name, Metadata metadata) {
3843
}
3944

4045
private final EmbeddingStore<TextSegment> embeddingStore;
4146
private final EmbeddingModel embeddingModel;
4247

43-
public EmbeddingChartProcessor(EmbeddingStore<TextSegment> embeddingStore, EmbeddingModel embeddingModel) {
48+
public EmbeddingChartProcessor(EmbeddingStore<TextSegment> embeddingStore, EmbeddingModel embeddingModel,
49+
CalculatorService calculatorService) {
4450
this.embeddingStore = embeddingStore;
4551
this.embeddingModel = embeddingModel;
52+
this.calculatorService = calculatorService;
4653
}
4754

4855
@Override
@@ -81,7 +88,7 @@ private EmbeddingData generateEmbeddingName(int nid, UUID constelationId, Charti
8188
String categoryContext = generateCategory(nid, chartingContext);
8289
String synonymsContext = generateSynonyms(nid, chartingContext);
8390
String descriptionContext = generateDescription(nid, chartingContext);
84-
Set<String> semanticFeaturesContext = generateSemanticFeatures(nid, chartingContext);
91+
String semanticFeaturesContext = generateSemanticFeatures(nid, chartingContext);
8592

8693
if (!conceptContext.isEmpty()) {
8794
nameBuilder.append("Concept: " + conceptContext).append(". ");
@@ -96,10 +103,7 @@ private EmbeddingData generateEmbeddingName(int nid, UUID constelationId, Charti
96103
nameBuilder.append("Description: " + descriptionContext).append(". ");
97104
}
98105
if (!semanticFeaturesContext.isEmpty()) {
99-
nameBuilder.append("Available Semantic Features:");
100-
semanticFeaturesContext.forEach(feature -> {
101-
nameBuilder.append(" " + feature);
102-
});
106+
nameBuilder.append("Available Semantic Features:" + semanticFeaturesContext).append(".");
103107
}
104108

105109
// Create metadata for the embedding - this will help to filter based on
@@ -146,7 +150,7 @@ private String generateDescription(int nid, ChartingContext chartingContext) {
146150
return "";
147151
}
148152

149-
private Set<String> generateSemanticFeatures(int nid, ChartingContext chartingContext) {
153+
private String generateSemanticFeatures(int nid, ChartingContext chartingContext) {
150154
Set<String> semanticFeaturesContext = new HashSet<>();
151155
PrimitiveData.get().forEachSemanticNidForComponent(nid, semanticNid -> {
152156
Latest<SemanticEntityVersion> latestSemanticEntityVersion = chartingContext.chart().stampCalculator()
@@ -155,27 +159,46 @@ private Set<String> generateSemanticFeatures(int nid, ChartingContext chartingCo
155159
SemanticEntityVersion semanticEntityVersion = latestSemanticEntityVersion.get();
156160
Latest<PatternEntityVersion> latestPatternEntityVersion = chartingContext.chart().stampCalculator()
157161
.latest(semanticEntityVersion.patternNid());
158-
if (latestPatternEntityVersion.isPresent() && !patternsToSkip().contains(latestPatternEntityVersion.get().nid())) {
162+
if (latestPatternEntityVersion.isPresent()
163+
&& !patternsToSkip().contains(latestPatternEntityVersion.get().nid())) {
159164
PatternEntityVersion patternEntityVersion = latestPatternEntityVersion.get();
160-
String purpose = chartingContext.chart().languageCalculator()
161-
.getDescriptionTextOrNid(patternEntityVersion.semanticPurposeNid());
162-
semanticFeaturesContext.add(purpose + ", ");
165+
String meaning;
166+
if (patternEntityVersion.nid() == TinkarTermV2.IDENTIFIER_PATTERN.nid()) {
167+
Latest<Field<Object>> latestIdentifierSourceField = chartingContext.chart().stampCalculator()
168+
.getFieldForSemanticWithMeaning(semanticEntityVersion.nid(),
169+
TinkarTermV2.IDENTIFIER_SOURCE.nid());
170+
if (latestIdentifierSourceField.isPresent()) {
171+
EntityProxy value = (EntityProxy) latestIdentifierSourceField.get().value();
172+
meaning = chartingContext.chart().languageCalculator().getDescriptionTextOrNid(value);
173+
} else {
174+
meaning = chartingContext.chart().languageCalculator()
175+
.getDescriptionTextOrNid(patternEntityVersion.semanticMeaningNid());
176+
}
177+
} else {
178+
meaning = chartingContext.chart().languageCalculator()
179+
.getDescriptionTextOrNid(patternEntityVersion.semanticMeaningNid());
180+
}
181+
semanticFeaturesContext.add(meaning);
163182
}
164183
}
165184
});
166-
// return semanticFeaturesBuilder.toString().substring(0,
167-
// semanticFeaturesBuilder.length() - 2);
168-
return semanticFeaturesContext;
185+
186+
// [ ]Implement semantic of semanticds to get captured
187+
StringBuilder sb = new StringBuilder();
188+
for (String feature : semanticFeaturesContext) {
189+
sb.append(feature + ", ");
190+
}
191+
return sb.toString().substring(0, sb.length() - 2);
169192
}
170193

171194
private List<Integer> patternsToSkip() {
172-
return List.of(TinkarTermV2.DESCRIPTION_PATTERN.nid(),
173-
TinkarTermV2.EL_PLUS_PLUS_INFERRED_AXIOMS_PATTERN.nid(),
174-
TinkarTermV2.EL_PLUS_PLUS_STATED_AXIOMS_PATTERN.nid(),
175-
TinkarTermV2.STATED_NAVIGATION_PATTERN.nid(),
176-
TinkarTermV2.INFERRED_NAVIGATION_PATTERN.nid(),
177-
TinkarTermV2.OWL_AXIOM_SYNTAX_PATTERN.nid());
178-
}
195+
return List.of(TinkarTermV2.DESCRIPTION_PATTERN.nid(),
196+
TinkarTermV2.EL_PLUS_PLUS_INFERRED_AXIOMS_PATTERN.nid(),
197+
TinkarTermV2.EL_PLUS_PLUS_STATED_AXIOMS_PATTERN.nid(),
198+
TinkarTermV2.STATED_NAVIGATION_PATTERN.nid(),
199+
TinkarTermV2.INFERRED_NAVIGATION_PATTERN.nid(),
200+
TinkarTermV2.OWL_AXIOM_SYNTAX_PATTERN.nid());
201+
}
179202

180203
private void processBatch(List<EmbeddingData> embeddingBatch, ChartingContext chartingContext) {
181204
// Generate embeddings values from string names

0 commit comments

Comments
 (0)