Skip to content

Commit 1fd0e23

Browse files
committed
Implements embedding-based charting processor and updates Spring Boot
Provides a full implementation of EmbeddingChartProcessor to transform domain concepts into vector embeddings. The processor generates descriptive text segments including concept definitions, categories, and semantic features, then stores them with associated metadata in a vector store to support retrieval-augmented generation (RAG).
1 parent e626298 commit 1fd0e23

3 files changed

Lines changed: 157 additions & 39 deletions

File tree

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
<protobuf-java.version>4.30.2-r2</protobuf-java.version>
6565

6666
<!-- Spring -->
67-
<spring-boot.version>3.5.5</spring-boot.version>
67+
<spring-boot.version>3.5.11</spring-boot.version>
6868
<spring-data-bom.version>2024.1.5</spring-data-bom.version>
6969
<thymeleaf-layout-dialect.version>3.4.0</thymeleaf-layout-dialect.version>
7070
<htmx-spring-boot-thymeleaf.version>4.0.1</htmx-spring-boot-thymeleaf.version>

src/main/java/dev/ikm/server/cosmos/constellation/ConstellationController.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
package dev.ikm.server.cosmos.constellation;
22

3-
import dev.ikm.server.cosmos.calculator.CalculatorService;
4-
import dev.ikm.server.cosmos.ike.Facade;
5-
import dev.ikm.server.cosmos.observatory.StringToFacade;
6-
import dev.ikm.tinkar.terms.TinkarTermV2;
7-
import io.github.wimdeblauwe.htmx.spring.boot.mvc.HxRequest;
3+
import java.util.Set;
4+
import java.util.UUID;
5+
86
import org.slf4j.Logger;
97
import org.slf4j.LoggerFactory;
10-
import org.springframework.beans.factory.annotation.Autowired;
118
import org.springframework.data.domain.Pageable;
129
import org.springframework.data.web.PageableDefault;
1310
import org.springframework.stereotype.Controller;
@@ -21,8 +18,11 @@
2118
import org.springframework.web.bind.annotation.ResponseBody;
2219
import org.springframework.web.servlet.view.FragmentsRendering;
2320

24-
import java.util.Set;
25-
import java.util.UUID;
21+
import dev.ikm.server.cosmos.calculator.CalculatorService;
22+
import dev.ikm.server.cosmos.ike.Facade;
23+
import dev.ikm.server.cosmos.observatory.StringToFacade;
24+
import dev.ikm.tinkar.terms.TinkarTermV2;
25+
import io.github.wimdeblauwe.htmx.spring.boot.mvc.HxRequest;
2626

2727
@Controller
2828
public class ConstellationController {
Lines changed: 148 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,161 @@
11
package dev.ikm.server.cosmos.constellation.charting;
22

3+
import java.util.ArrayList;
4+
import java.util.List;
5+
import java.util.Map;
6+
import java.util.Optional;
7+
import java.util.UUID;
8+
39
import org.springframework.stereotype.Component;
410

11+
import dev.ikm.tinkar.common.id.IntIdSet;
12+
import dev.ikm.tinkar.common.service.PrimitiveData;
13+
import dev.ikm.tinkar.coordinate.stamp.calculator.Latest;
14+
import dev.ikm.tinkar.entity.PatternEntityVersion;
15+
import dev.ikm.tinkar.entity.SemanticEntityVersion;
16+
import dev.ikm.tinkar.terms.TinkarTermV2;
17+
import dev.langchain4j.data.document.Metadata;
18+
import dev.langchain4j.data.embedding.Embedding;
519
import dev.langchain4j.data.segment.TextSegment;
620
import dev.langchain4j.model.embedding.EmbeddingModel;
721
import dev.langchain4j.store.embedding.EmbeddingStore;
822

23+
/***
24+
* Concept: [Primary Name].
25+
* Category: [Immediate Parent/Is-A].
26+
* Synonyms: [Comma-separated aliases].
27+
* Description: [Primary Definition Semantic].
28+
* Available Semantic Features: [Semantic Name 1] (Provides [Brief
29+
* Purpose/Meaning]), [Semantic Name 2] (Provides [Brief Purpose/Meaning]).
30+
*/
31+
932
@Component
1033
public class EmbeddingChartProcessor implements ChartProcessor {
1134

12-
protected EmbeddingStore<TextSegment> embeddingStore;
13-
protected EmbeddingModel embeddingModel;
14-
15-
@Override
16-
public String getProcessorName() {
17-
return "Embedding Chart Processor";
18-
}
19-
20-
@Override
21-
public void process(ChartingContext chartingContext) {
22-
23-
24-
// final List<TextSegment> names = batch.stream().map(this::generateEmbeddingName).map(TextSegment::from).toList();
25-
// final List<Metadata> metadata = batch.stream().map(row -> Map.of(
26-
// "id", String.valueOf(row.get("id")),
27-
// "constellationId", String.valueOf(row.get("constellationId"))))
28-
// .map(Metadata::from)
29-
// .toList();
30-
// final List<Embedding> embeddings = embeddingModel.embedAll(names).content();
31-
// final List<TextSegment> segments = new ArrayList<>();
32-
33-
// //Create Segments for vector store
34-
// for (int i = 0; i < names.size(); i++) {
35-
// segments.add(TextSegment.from(names.get(i).text(), metadata.get(i)));
36-
// }
37-
// embeddingStore.addAll(embeddings, segments);
38-
39-
40-
chartingContext.progressUpdate().accept(50); // Update progress to 50% as an example
41-
}
35+
private record EmbeddingData(TextSegment name, Metadata metadata) {
36+
}
37+
38+
private final EmbeddingStore<TextSegment> embeddingStore;
39+
private final EmbeddingModel embeddingModel;
40+
41+
public EmbeddingChartProcessor(EmbeddingStore<TextSegment> embeddingStore, EmbeddingModel embeddingModel) {
42+
this.embeddingStore = embeddingStore;
43+
this.embeddingModel = embeddingModel;
44+
}
45+
46+
@Override
47+
public String getProcessorName() {
48+
return "Embedding Chart Processor";
49+
}
50+
51+
@Override
52+
public void process(ChartingContext chartingContext) {
53+
List<EmbeddingData> embeddingBatch = new ArrayList<>();
54+
55+
chartingContext.chart().scopes().stream()
56+
.map(scope -> chartingContext.chart().navigationCalculator().kindOf(scope.id().nid()))
57+
.flatMap(intIdSet -> intIdSet.intStream().boxed())
58+
.forEach(nid -> {
59+
EmbeddingData embeddingData = generateEmbeddingName(nid, chartingContext.chart().constellationId(),
60+
chartingContext);
61+
embeddingBatch.add(embeddingData);
62+
if (embeddingBatch.size() == chartingContext.batchSize()) { // Process in batches of 100
63+
processBatch(embeddingBatch, chartingContext);
64+
embeddingBatch.clear();
65+
}
66+
});
67+
68+
// Process any remaining items in the batch
69+
if (!embeddingBatch.isEmpty()) {
70+
processBatch(embeddingBatch, chartingContext);
71+
}
72+
}
73+
74+
private EmbeddingData generateEmbeddingName(int nid, UUID constelationId, ChartingContext chartingContext) {
75+
// Generate a descriptive name for the embedding based on the concept's
76+
// attributes
77+
StringBuilder nameBuilder = new StringBuilder();
78+
nameBuilder.append("Concept: " + generateConcept(nid, chartingContext)).append(". ");
79+
nameBuilder.append("Category: " + generateCategory(nid, chartingContext)).append(". ");
80+
nameBuilder.append("Synonyms: " + generateSynonyms(nid, chartingContext)).append(". ");
81+
nameBuilder.append("Description: " + generateDescription(nid, chartingContext)).append(". ");
82+
nameBuilder.append("Available Semantic Features: " + generateSemanticFeatures(nid, chartingContext))
83+
.append(". ");
84+
// Create metadata for the embedding - this will help to filter based on
85+
// constellation used in prompts
86+
Metadata metadata = Metadata.from(Map.of(
87+
"id", nid,
88+
"constellationId", constelationId.toString()));
89+
return new EmbeddingData(TextSegment.from(nameBuilder.toString()), metadata);
90+
}
91+
92+
private String generateConcept(int nid, ChartingContext chartingContext) {
93+
return chartingContext.chart().languageCalculator().getFullyQualifiedDescriptionTextWithFallbackOrNid(nid);
94+
}
95+
96+
private String generateCategory(int nid, ChartingContext chartingContext) {
97+
StringBuilder categoryBuilder = new StringBuilder();
98+
IntIdSet intIdSet = chartingContext.chart().navigationCalculator().ancestorsOf(nid);
99+
int[] nids = intIdSet.toArray();
100+
101+
for (int i = 0; i < nids.length && i < 4; i++) {
102+
if (i != TinkarTermV2.INTEGRATED_KNOWLEDGE_MANAGEMENT.nid()) {
103+
categoryBuilder.append(
104+
chartingContext.chart().languageCalculator().getDescriptionTextOrNid(nids[i]))
105+
.append(", ");
106+
}
107+
}
108+
109+
return categoryBuilder.toString().substring(0, categoryBuilder.length() - 2); // Remove trailing ", "
110+
}
111+
112+
private String generateSynonyms(int nid, ChartingContext chartingContext) {
113+
Optional<String> regularName = chartingContext.chart().languageCalculator().getRegularDescriptionText(nid);
114+
if (regularName.isPresent()) {
115+
return regularName.get();
116+
}
117+
return "";
118+
}
119+
120+
private String generateDescription(int nid, ChartingContext chartingContext) {
121+
Optional<String> descriptionName = chartingContext.chart().languageCalculator().getDescriptionText(nid);
122+
if (descriptionName.isPresent()) {
123+
return descriptionName.get();
124+
}
125+
return "";
126+
}
127+
128+
private String generateSemanticFeatures(int nid, ChartingContext chartingContext) {
129+
StringBuilder semanticFeaturesBuilder = new StringBuilder();
130+
PrimitiveData.get().forEachSemanticNidForComponent(nid, semanticNid -> {
131+
Latest<SemanticEntityVersion> latestSemanticEntityVersion = chartingContext.chart().stampCalculator()
132+
.latest(semanticNid);
133+
if (latestSemanticEntityVersion.isPresent()) {
134+
SemanticEntityVersion semanticEntityVersion = latestSemanticEntityVersion.get();
135+
Latest<PatternEntityVersion> latestPatternEntityVersion = chartingContext.chart().stampCalculator()
136+
.latest(semanticEntityVersion.patternNid());
137+
if (latestPatternEntityVersion.isPresent()) {
138+
PatternEntityVersion patternEntityVersion = latestPatternEntityVersion.get();
139+
String semanticName = chartingContext.chart().languageCalculator()
140+
.getDescriptionTextOrNid(patternEntityVersion.nid());
141+
String purpose = chartingContext.chart().languageCalculator()
142+
.getDescriptionTextOrNid(patternEntityVersion.semanticPurposeNid());
143+
semanticFeaturesBuilder.append(semanticName + " (Provides " + purpose + "), ");
144+
}
145+
}
146+
});
147+
return semanticFeaturesBuilder.toString().substring(0, semanticFeaturesBuilder.length() - 2);
148+
}
42149

150+
private void processBatch(List<EmbeddingData> embeddingBatch, ChartingContext chartingContext) {
151+
// Generate embeddings values from string names
152+
List<TextSegment> names = embeddingBatch.stream().map(EmbeddingData::name).toList();
153+
List<Embedding> embeddings = embeddingModel.embedAll(names).content();
154+
// Create Segments for vector store with embedding values and metadata
155+
List<TextSegment> segmentsWithMetaData = embeddingBatch.stream()
156+
.map(embeddingData -> TextSegment.from(embeddingData.name().text(), embeddingData.metadata()))
157+
.toList();
158+
embeddingStore.addAll(embeddings, segmentsWithMetaData);
159+
chartingContext.progressUpdate().accept(chartingContext.batchSize());
160+
}
43161
}

0 commit comments

Comments
 (0)