From 6173757560d39436a398d51b2e5ba9935a0eed5a Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Sun, 1 Mar 2026 00:07:54 +0530 Subject: [PATCH 01/15] Add changes for primary and secondary dataformat in composite engine Signed-off-by: Arpit Bandejiya --- .gitignore | 1 + .../ParquetDataFormatPlugin.java | 40 +++- .../engine/DummyDataUtils.java | 1 - .../engine/ParquetExecutionEngine.java | 19 +- .../fields/ArrowFieldRegistry.java | 17 +- .../fields/ArrowSchemaBuilder.java | 58 +++-- .../fields/ParquetField.java | 15 +- .../fields/core/data/BinaryParquetField.java | 11 +- .../fields/core/data/BooleanParquetField.java | 11 +- .../fields/core/data/IpParquetField.java | 11 +- .../fields/core/data/KeywordParquetField.java | 10 +- .../fields/core/data/TextParquetField.java | 10 +- .../core/data/TokenCountParquetField.java | 11 +- .../core/data/date/DateNanosParquetField.java | 11 +- .../core/data/date/DateParquetField.java | 11 +- .../core/data/number/ByteParquetField.java | 11 +- .../core/data/number/DoubleParquetField.java | 11 +- .../core/data/number/FloatParquetField.java | 11 +- .../data/number/HalfFloatParquetField.java | 11 +- .../core/data/number/IntegerParquetField.java | 11 +- .../core/data/number/LongParquetField.java | 11 +- .../core/data/number/ShortParquetField.java | 11 +- .../data/number/UnsignedLongParquetField.java | 11 +- .../fields/core/metadata/IdParquetField.java | 11 +- .../core/metadata/IgnoredParquetField.java | 10 +- .../core/metadata/RoutingParquetField.java | 10 +- .../core/metadata/SizeParquetField.java | 11 +- .../writer/ParquetDocumentInput.java | 32 ++- .../writer/ParquetWriter.java | 14 +- .../vsr/VSRManagerTests.java | 12 +- .../index/mapper/size/SizeFieldMapper.java | 2 +- .../common/settings/IndexScopedSettings.java | 1 + .../org/opensearch/index/IndexSettings.java | 11 + .../index/engine/exec/DataFormat.java | 25 +-- .../index/engine/exec/DocumentInput.java | 14 ++ .../index/engine/exec/EngineRole.java | 15 ++ .../engine/exec/FieldAssignmentResolver.java | 111 ++++++++++ .../index/engine/exec/FieldAssignments.java | 62 ++++++ .../index/engine/exec/FieldCapability.java | 24 ++ .../engine/exec/FieldSupportRegistry.java | 84 +++++++ .../engine/exec/IndexingExecutionEngine.java | 3 +- .../opensearch/index/engine/exec/Writer.java | 2 +- .../composite/CompositeDataFormatWriter.java | 14 +- .../composite/CompositeFieldValidator.java | 101 +++++++++ .../CompositeIndexingExecutionEngine.java | 135 ++++++++++-- .../index/engine/exec/coord/Any.java | 8 + .../engine/exec/coord/CompositeEngine.java | 1 + .../engine/exec/lucene/LuceneDataFormat.java | 42 ++++ .../exec/lucene/LuceneDataSourcePlugin.java | 79 +++++++ .../engine/exec/lucene/LuceneIEEngine.java | 158 -------------- .../lucene/engine/LuceneExecutionEngine.java | 131 +++++++++++ .../exec/lucene/fields/LuceneField.java | 37 ++++ .../lucene/fields/LuceneFieldRegistry.java | 71 ++++++ .../lucene/fields/data/DoubleLuceneField.java | 38 ++++ .../fields/data/KeywordLuceneField.java | 65 ++++++ .../lucene/fields/data/LongLuceneField.java | 36 +++ .../lucene/fields/data/TextLuceneField.java | 40 ++++ .../lucene/writer/LuceneDocumentInput.java | 97 +++++++++ .../exec/lucene/writer/LuceneWriter.java | 68 ++++++ .../exec/lucene/writer/LuceneWriterCodec.java | 52 +++++ .../index/engine/exec/text/TextDF.java | 73 ------- .../index/engine/exec/text/TextEngine.java | 205 ------------------ .../index/mapper/FieldNamesFieldMapper.java | 2 +- .../index/mapper/NumberFieldMapper.java | 1 - .../opensearch/index/shard/IndexShard.java | 1 + .../main/java/org/opensearch/node/Node.java | 15 ++ .../opensearch/plugins/DataSourcePlugin.java | 21 +- 67 files changed, 1711 insertions(+), 559 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/EngineRole.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/FieldCapability.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/FieldSupportRegistry.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataFormat.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataSourcePlugin.java delete mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DoubleLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/KeywordLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/LongLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TextLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriter.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriterCodec.java delete mode 100644 server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java delete mode 100644 server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java diff --git a/.gitignore b/.gitignore index fd9b9ad386961..7ad6f9da708a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .claude CLAUDE.md .cursor* +.kiro* # intellij files .idea/ diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java index 8be9ec2213c02..170558846a660 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java @@ -23,15 +23,19 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; import org.opensearch.env.NodeEnvironment; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldSupportRegistry; import org.opensearch.index.engine.exec.IndexingExecutionEngine; import com.parquet.parquetdataformat.bridge.RustBridge; import com.parquet.parquetdataformat.engine.ParquetExecutionEngine; +import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.ShardPath; import org.opensearch.index.store.FormatStoreDirectory; import org.opensearch.index.store.GenericStoreDirectory; import org.opensearch.plugins.DataSourcePlugin; -import org.opensearch.index.mapper.MapperService; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.spi.vectorized.DataSourceCodec; import org.opensearch.repositories.RepositoriesService; @@ -41,6 +45,7 @@ import org.opensearch.watcher.ResourceWatcherService; import java.io.IOException; +import java.util.EnumSet; import java.util.HashMap; import java.util.Collection; import java.util.Map; @@ -82,8 +87,16 @@ public class ParquetDataFormatPlugin extends Plugin implements DataSourcePlugin @Override @SuppressWarnings("unchecked") - public IndexingExecutionEngine indexingEngine(MapperService mapperService, ShardPath shardPath, IndexSettings indexSettings) { - return (IndexingExecutionEngine) new ParquetExecutionEngine(settings, () -> ArrowSchemaBuilder.getSchema(mapperService), shardPath, indexSettings); + public IndexingExecutionEngine indexingEngine(EngineConfig engineConfig, MapperService mapperService, boolean isPrimary, ShardPath shardPath, IndexSettings indexSettings, FieldAssignments fieldAssignments) { + ParquetExecutionEngine engine = new ParquetExecutionEngine( + settings, + isPrimary, + () -> ArrowSchemaBuilder.getSchema(mapperService, isPrimary), + shardPath, + indexSettings, + fieldAssignments + ); + return (IndexingExecutionEngine) engine; } @Override @@ -109,6 +122,12 @@ public DataFormat getDataFormat() { return new ParquetDataFormat(); } + // In case of Parquet with multi-datasource, it will act as source of truth + @Override + public boolean isPrimary() { + return true; + } + @Override public Optional> getDataSourceCodecs() { Map codecs = new HashMap<>(); @@ -136,6 +155,21 @@ public BlobContainer createBlobContainer(BlobStore blobStore, BlobPath baseBlobP return blobStore.blobContainer(formatPath); } + @Override + public void registerFieldSupport(FieldSupportRegistry registry) { + DataFormat parquet = getDataFormat(); + java.util.Set storeAndDocValues = EnumSet.of(FieldCapability.STORE, FieldCapability.DOC_VALUES); + + // Parquet supports STORE and DOC_VALUES for numeric and keyword types but not INDEX (no inverted index) + String[] supportedTypes = { + "keyword", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float", + "date", "date_nanos", "boolean", "ip", "binary", "unsigned_long" + }; + for (String type : supportedTypes) { + registry.register(type, parquet, storeAndDocValues); + } + } + @Override public List> getSettings() { return List.of( diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java index 0d6c2519d463a..c3a563b7223cb 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java @@ -3,7 +3,6 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.types.FloatingPointPrecision; import org.opensearch.common.SuppressForbidden; import org.opensearch.index.engine.exec.DocumentInput; import org.opensearch.index.mapper.MappedFieldType; diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java index b40a3fb4751fd..0083ceb5ce57f 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java @@ -14,7 +14,10 @@ import org.apache.logging.log4j.Logger; import org.opensearch.common.settings.Settings; import org.opensearch.index.IndexSettings; +import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldAssignments; import org.opensearch.index.engine.exec.IndexingExecutionEngine; import org.opensearch.index.engine.exec.Merger; import org.opensearch.index.engine.exec.RefreshInput; @@ -75,19 +78,24 @@ public class ParquetExecutionEngine implements IndexingExecutionEngine schema, ShardPath shardPath, - IndexSettings indexSettings + IndexSettings indexSettings, + FieldAssignments fieldAssignments ) { this.schema = schema; this.shardPath = shardPath; this.arrowBufferPool = new ArrowBufferPool(settings); this.indexSettings = indexSettings; this.parquetMerger = new ParquetMergeExecutor(CompactionStrategy.RECORD_BATCH, indexSettings.getIndex().getName()); - + this.isPrimaryEngine = isPrimaryEngine; + this.fieldAssignments = fieldAssignments; // Push current settings to Rust store once on construction, then keep in sync on updates pushSettingsToRust(indexSettings); @@ -143,14 +151,15 @@ public void deleteFiles(Map> filesToDelete) { } @Override - public List supportedFieldTypes() { - return List.of(); + public List supportedFieldTypes(boolean isPrimaryEngine) { + return new java.util.ArrayList<>(ArrowFieldRegistry.getRegisteredFieldNames()); } @Override public Writer createWriter(long writerGeneration) { String fileName = Path.of(shardPath.getDataPath().toString(), getDataFormat().name(), FILE_NAME_PREFIX + "_" + writerGeneration + FILE_NAME_EXT).toString(); - return new ParquetWriter(fileName, schema.get(), writerGeneration, arrowBufferPool, indexSettings); + EngineRole role = isPrimaryEngine ? EngineRole.PRIMARY : EngineRole.SECONDARY; + return new ParquetWriter(fileName, schema.get(), writerGeneration, arrowBufferPool, indexSettings, role, fieldAssignments); } @Override diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java index 1a65f7a116623..d0b12f5c35e69 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java @@ -12,7 +12,9 @@ import com.parquet.parquetdataformat.plugins.fields.CoreDataFieldPlugin; import com.parquet.parquetdataformat.plugins.fields.MetadataFieldPlugin; import com.parquet.parquetdataformat.plugins.fields.ParquetFieldPlugin; +import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.mapper.SeqNoFieldMapper; +import org.opensearch.index.shard.IllegalIndexShardStateException; import java.util.Collections; import java.util.Map; @@ -62,7 +64,7 @@ private static void registerCorePlugins() { // Register core data fields registerPlugin(new CoreDataFieldPlugin(), "CoreDataFields"); - // REgister metadata fields + // Register metadata fields registerPlugin(new MetadataFieldPlugin(), "MetadataFields"); } /** @@ -141,6 +143,19 @@ public static ParquetField getParquetField(String fieldType) { return FIELD_REGISTRY.get(fieldType); } + public static ParquetField getParquetFieldAfterMatchingRole(String fieldType, boolean isPrimary) { + ParquetField field = FIELD_REGISTRY.get(fieldType); + if(field == null) return null; + + // in case of primary, field should be either Primary marked or marked for All + if(isPrimary && field.getFieldRole() == EngineRole.SECONDARY) return null; + + // in case of non-primary, field should either be Secondary marked or marked for All + if(!isPrimary && field.getFieldRole() == EngineRole.PRIMARY) return null; + + return field; + } + public static class RegistryStats { private final int totalFields; private final Set allFieldTypes; diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java index 5430b7fa03101..e71178aa1593f 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java @@ -29,6 +29,10 @@ * Utility class for creating Apache Arrow schemas from OpenSearch mapper services. * This class provides methods to convert OpenSearch field mappings into Arrow schema definitions * that can be used for Parquet data format operations. + * + *

Uses {@link ArrowFieldRegistry} to determine which fields are eligible for the + * engine's role via {@code getParquetFieldAfterMatchingRole()}. For non-primary contexts, + * fields not eligible for the role are excluded from the schema.

*/ public final class ArrowSchemaBuilder { @@ -38,18 +42,18 @@ private ArrowSchemaBuilder() { } /** - * Creates an Apache Arrow Schema from the provided MapperService. - * This method extracts all non-metadata field mappers and converts them to Arrow fields. + * Creates an Apache Arrow Schema from the provided MapperService using the ArrowFieldRegistry. * * @param mapperService the OpenSearch mapper service containing field definitions - * @return a new Schema containing Arrow field definitions for all mapped fields + * @param isPrimary whether this is a primary engine context + * @return a new Schema containing Arrow field definitions for all eligible mapped fields * @throws IllegalArgumentException if mapperService is null - * @throws IllegalStateException if no valid fields are found or if a field type is not supported + * @throws IllegalStateException if no valid fields are found or if a field type is not supported in primary context */ - public static Schema getSchema(final MapperService mapperService) { + public static Schema getSchema(final MapperService mapperService, boolean isPrimary) { Objects.requireNonNull(mapperService, "MapperService cannot be null"); - final List fields = extractFieldsFromMappers(mapperService); + final List fields = extractFieldsFromMappers(mapperService, isPrimary); if (fields.isEmpty()) { throw new IllegalStateException("No valid fields found in mapper service"); @@ -59,12 +63,14 @@ public static Schema getSchema(final MapperService mapperService) { } /** - * Extracts Arrow fields from the mapper service, filtering out metadata fields. + * Extracts Arrow fields from the mapper service, filtering out metadata fields + * and fields not eligible for the engine's role. * * @param mapperService the mapper service to extract fields from + * @param isPrimary whether this is a primary engine context * @return a list of Arrow fields */ - private static List extractFieldsFromMappers(final MapperService mapperService) { + private static List extractFieldsFromMappers(final MapperService mapperService, boolean isPrimary) { final List fields = new ArrayList<>(); for (final Mapper mapper : mapperService.documentMapper().mappers()) { @@ -72,12 +78,15 @@ private static List extractFieldsFromMappers(final MapperService mapperSe continue; } - final Field arrowField = createArrowField(mapper); - fields.add(arrowField); + final Field arrowField = createArrowField(mapper, isPrimary); + if (arrowField != null) { + fields.add(arrowField); + } } - fields.add(new Field(CompositeDataFormatWriter.ROW_ID, new LongParquetField().getFieldType(), null)); - fields.add(new Field(SeqNoFieldMapper.PRIMARY_TERM_NAME, new LongParquetField().getFieldType(), null)); + LongParquetField longField = new LongParquetField(); + fields.add(new Field(CompositeDataFormatWriter.ROW_ID, longField.getFieldType(), null)); + fields.add(new Field(SeqNoFieldMapper.PRIMARY_TERM_NAME, longField.getFieldType(), null)); return fields; } @@ -98,20 +107,27 @@ private static boolean notSupportedMetadataField(final Mapper mapper) { } /** - * Creates an Arrow Field from an OpenSearch Mapper. + * Creates an Arrow Field from an OpenSearch Mapper using the ArrowFieldRegistry. + * For non-primary contexts, returns null if the field type has no eligible ParquetField, + * allowing the caller to skip the field. For primary contexts, throws IllegalStateException + * if no ParquetField is found. * * @param mapper the mapper to convert - * @return a new Arrow Field - * @throws IllegalStateException if the mapper type is not supported + * @param isPrimary whether this is a primary engine context + * @return a new Arrow Field, or null if the field is not eligible for the role + * @throws IllegalStateException if the mapper type is not supported in primary context */ - private static Field createArrowField(final Mapper mapper) { - final ParquetField parquetField = ArrowFieldRegistry.getParquetField(mapper.typeName()); + private static Field createArrowField(final Mapper mapper, boolean isPrimary) { + final ParquetField parquetField = ArrowFieldRegistry.getParquetFieldAfterMatchingRole(mapper.typeName(), isPrimary); if (parquetField == null) { - throw new IllegalStateException( - String.format("Unsupported field type '%s' for field '%s'", - mapper.typeName(), mapper.name()) - ); + if (isPrimary) { + throw new IllegalStateException( + String.format("Unsupported field type '%s' for field '%s'", + mapper.typeName(), mapper.name()) + ); + } + return null; } return new Field(mapper.name(), parquetField.getFieldType(), null); diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java index dc1a7e369d430..e67eead850fc5 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java @@ -11,9 +11,12 @@ import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import org.opensearch.index.mapper.MappedFieldType; import java.util.Objects; +import java.util.Set; /** * Abstract base class for all Parquet field implementations that handle the conversion @@ -49,7 +52,7 @@ public abstract class ParquetField { * @throws IllegalArgumentException if any parameter is invalid for this field type * @throws ClassCastException if parseValue cannot be cast to the expected type */ - protected abstract void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue); + protected abstract void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities); /** * Creates and processes a field entry if the field type supports columnar storage. @@ -66,11 +69,15 @@ public abstract class ParquetField { * @param mappedFieldType the OpenSearch field type metadata, must not be null * @param managedVSR the managed vector schema root, must not be null * @param parseValue the parsed field value to be processed, may be null + * @param engineRole the engine role for this format + * @param assignedCapabilities the capabilities this format is responsible for on this field type * @throws IllegalArgumentException if mappedFieldType or managedVSR is null */ public final void createField(final MappedFieldType mappedFieldType, final ManagedVSR managedVSR, - final Object parseValue) { + final Object parseValue, + final EngineRole engineRole, + final Set assignedCapabilities) { Objects.requireNonNull(mappedFieldType, "MappedFieldType cannot be null"); Objects.requireNonNull(managedVSR, "ManagedVSR cannot be null"); @@ -78,7 +85,7 @@ public final void createField(final MappedFieldType mappedFieldType, // TODO: support dynamic mapping update // for now ignore the field if (managedVSR.getVector(mappedFieldType.name()) != null) { - addToGroup(mappedFieldType, managedVSR, parseValue); + addToGroup(mappedFieldType, managedVSR, parseValue, engineRole, assignedCapabilities); } } } @@ -109,6 +116,8 @@ public final void createField(final MappedFieldType mappedFieldType, */ public abstract FieldType getFieldType(); + public abstract EngineRole getFieldRole(); + /** * Provides a string representation of this ParquetField for debugging purposes. * The default implementation includes the class name and Arrow type information. diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java index eaa4d5209bfc2..dbac38ae6b5c4 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; @@ -15,6 +17,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling binary data types in OpenSearch documents. * @@ -40,7 +44,7 @@ public class BinaryParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { final VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); varBinaryVector.set(rowCount, (byte[]) parseValue); @@ -55,4 +59,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java index 4b2237bf1aa1f..9ffb9a7c8eb44 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -16,6 +18,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling boolean data types in OpenSearch documents. * @@ -41,7 +45,7 @@ public class BooleanParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { BitVector bitVector = (BitVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); bitVector.setSafe(rowIndex, (Boolean) parseValue ? 1 : 0); @@ -56,4 +60,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java index be16d3154b66a..bb8be28997de2 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; @@ -18,6 +20,8 @@ import org.apache.lucene.util.BytesRef; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + import java.net.InetAddress; /** @@ -48,7 +52,7 @@ public class IpParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); final BytesRef bytesRef = new BytesRef(InetAddressPoint.encode((InetAddress) parseValue)); @@ -64,4 +68,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java index 1814e20891f4e..9f5c1a3b7a965 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; @@ -16,6 +18,7 @@ import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; +import java.util.Set; /** * Parquet field implementation for handling keyword data types in OpenSearch documents. @@ -44,7 +47,7 @@ public class KeywordParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); @@ -59,4 +62,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java index e4c93aa9f608f..3e3a54dba646b 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -17,6 +19,7 @@ import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; +import java.util.Set; /** * Parquet field implementation for handling text data types in OpenSearch documents. @@ -45,7 +48,7 @@ public class TextParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); @@ -60,4 +63,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java index 603189bddc80b..b1d621f93604c 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; @@ -15,6 +17,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling token count data types in OpenSearch documents. * @@ -43,7 +47,7 @@ public class TokenCountParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); @@ -58,4 +62,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java index 09ca4d50c9fe7..ceeb0b94570ff 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data.date; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TimeStampNanoVector; @@ -16,6 +18,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling date and timestamp data types in OpenSearch documents. * @@ -43,7 +47,7 @@ public class DateNanosParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { TimeStampNanoVector timeStampNanoVector = (TimeStampNanoVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); timeStampNanoVector.setSafe(rowIndex, (long) parseValue); @@ -58,4 +62,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java index 8554314e722a7..04cfbb7190e6d 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data.date; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TimeStampMilliVector; @@ -16,6 +18,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling date and timestamp data types in OpenSearch documents. * @@ -43,7 +47,7 @@ public class DateParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { TimeStampMilliVector timeStampMilliVector = (TimeStampMilliVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); timeStampMilliVector.setSafe(rowIndex, (long) parseValue); @@ -58,4 +62,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java index d9d45faeb3872..662cbc07a4597 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TinyIntVector; @@ -15,6 +17,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling 8-bit signed byte integer data types in OpenSearch documents. * @@ -40,7 +44,7 @@ public class ByteParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { TinyIntVector tinyIntVector = (TinyIntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); tinyIntVector.setSafe(rowCount, (Byte) parseValue); @@ -55,4 +59,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java index ac2b3a6e62927..849fd08ecfcc5 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float8Vector; @@ -16,6 +18,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling double-precision floating-point data types in OpenSearch documents. * @@ -42,7 +46,7 @@ public class DoubleParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { Float8Vector float8Vector = (Float8Vector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); float8Vector.setSafe(rowCount, (Double) parseValue); @@ -57,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java index a516efd2f990f..fd56dd7acb303 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float4Vector; @@ -16,6 +18,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling single-precision floating-point data types in OpenSearch documents. * @@ -42,7 +46,7 @@ public class FloatParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { Float4Vector float4Vector = (Float4Vector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); float4Vector.setSafe(rowCount, (Float) parseValue); @@ -57,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java index 3019773e6bd42..26eb5e5db6d53 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float2Vector; @@ -16,6 +18,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling half-precision (16-bit) floating-point data types in OpenSearch documents. * @@ -42,7 +46,7 @@ public class HalfFloatParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { Float2Vector float2Vector = (Float2Vector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); float2Vector.setSafe(rowCount, (Short) parseValue); @@ -57,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java index b11d49b666799..93c95b344ee58 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; @@ -15,6 +17,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling 32-bit signed integer data types in OpenSearch documents. * @@ -40,7 +44,7 @@ public class IntegerParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); @@ -55,4 +59,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java index 850ac0f004649..8872d7b834c00 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.BigIntVector; @@ -15,6 +17,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling 64-bit signed long integer data types in OpenSearch documents. * @@ -41,7 +45,7 @@ public class LongParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { BigIntVector bigIntVector = (BigIntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); bigIntVector.setSafe(rowCount, (Long) parseValue); @@ -56,4 +60,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java index 07ee5c1b54814..60e8cf0855c72 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.SmallIntVector; @@ -15,6 +17,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling 16-bit signed short integer data types in OpenSearch documents. * @@ -41,7 +45,7 @@ public class ShortParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { SmallIntVector smallIntVector = (SmallIntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); smallIntVector.setSafe(rowCount, (Short) parseValue); @@ -56,4 +60,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java index 7f8e407f29092..f5480a42b7871 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.UInt8Vector; @@ -15,6 +17,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling 64-bit unsigned long integer data types in OpenSearch documents. * @@ -41,7 +45,7 @@ public class UnsignedLongParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { UInt8Vector uInt8Vector = (UInt8Vector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); long longValue = ((Number) parseValue).longValue(); @@ -57,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java index 413a3938836fc..8b1b803b14978 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.metadata; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; @@ -16,6 +18,8 @@ import org.apache.lucene.util.BytesRef; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling document ID metadata in OpenSearch documents. * @@ -42,7 +46,7 @@ public class IdParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { VarBinaryVector idVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); BytesRef bytesRef = (BytesRef) parseValue; @@ -58,4 +62,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java index c31e3932c2295..a46b7d65b4914 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.metadata; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; @@ -16,6 +18,7 @@ import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; +import java.util.Set; /** * Parquet field implementation for handling ignored field data types in OpenSearch documents. @@ -43,7 +46,7 @@ public class IgnoredParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { VarCharVector varCharVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); varCharVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); @@ -58,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java index ffacfa1995ed4..3167997fd9398 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.metadata; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; @@ -16,6 +18,7 @@ import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; +import java.util.Set; /** * Parquet field implementation for handling routing metadata in OpenSearch documents. @@ -43,7 +46,7 @@ public class RoutingParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { VarCharVector routingVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); routingVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); @@ -58,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java index 1367cc7542155..daf8c2ac52296 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java @@ -8,6 +8,8 @@ package com.parquet.parquetdataformat.fields.core.metadata; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; @@ -15,6 +17,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.Set; + /** * Parquet field implementation for handling document size metadata in OpenSearch documents. * @@ -43,7 +47,7 @@ public class SizeParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); @@ -58,4 +62,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java index 41bb192f55ea3..67d54aca624e7 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java @@ -4,12 +4,17 @@ import com.parquet.parquetdataformat.fields.ParquetField; import org.apache.arrow.vector.BigIntVector; import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FieldCapability; import org.opensearch.index.engine.exec.WriteResult; import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.vsr.ManagedVSR; import java.io.IOException; +import java.util.Objects; +import java.util.Set; /** * Document input wrapper for Parquet-based document processing. @@ -33,9 +38,13 @@ */ public class ParquetDocumentInput implements DocumentInput { private final ManagedVSR managedVSR; + private final EngineRole engineRole; + private final FieldAssignments fieldAssignments; - public ParquetDocumentInput(ManagedVSR managedVSR) { - this.managedVSR = managedVSR; + public ParquetDocumentInput(ManagedVSR managedVSR, EngineRole engineRole, FieldAssignments fieldAssignments) { + this.managedVSR = Objects.requireNonNull(managedVSR, "managedVSR must not be null"); + this.engineRole = Objects.requireNonNull(engineRole, "engineRole must not be null"); + this.fieldAssignments = Objects.requireNonNull(fieldAssignments, "fieldAssignments must not be null"); } @Override @@ -48,15 +57,21 @@ public void addRowIdField(String fieldName, long rowId) { @Override public void addField(MappedFieldType fieldType, Object value) { final String fieldTypeName = fieldType.typeName(); + + // Check if this format should handle this field type at all + if (!fieldAssignments.shouldHandle(fieldTypeName)) { + return; + } + final ParquetField parquetField = ArrowFieldRegistry.getParquetField(fieldTypeName); if (parquetField == null) { - throw new IllegalArgumentException( - String.format("Unsupported field type: %s. Field type is not registered in ArrowFieldRegistry.", fieldTypeName) - ); + // Field type not supported by Parquet format — skip silently + return; } - parquetField.createField(fieldType, managedVSR, value); + Set assignedCapabilities = fieldAssignments.getAssignedCapabilities(fieldTypeName); + parquetField.createField(fieldType, managedVSR, value, engineRole, assignedCapabilities); } @Override @@ -71,6 +86,11 @@ public ManagedVSR getFinalInput() { return managedVSR; } + @Override + public EngineRole getEngineRole() { + return engineRole; + } + @Override public WriteResult addToWriter() throws IOException { // Complete the current document by incrementing row count diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java index cc552b809d575..31c3b1c3f00dc 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java @@ -7,6 +7,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldAssignments; import org.opensearch.index.engine.exec.FileInfos; import org.opensearch.index.engine.exec.FlushIn; import org.opensearch.index.engine.exec.WriteResult; @@ -45,18 +47,24 @@ public class ParquetWriter implements Writer { private final Schema schema; private final VSRManager vsrManager; private final long writerGeneration; + private final EngineRole engineRole; + private final FieldAssignments fieldAssignments; public ParquetWriter( String file, Schema schema, long writerGeneration, ArrowBufferPool arrowBufferPool, - IndexSettings indexSettings + IndexSettings indexSettings, + EngineRole engineRole, + FieldAssignments fieldAssignments ) { this.file = file; this.schema = schema; this.vsrManager = new VSRManager(file, indexSettings.getIndex().getName(), schema, arrowBufferPool); this.writerGeneration = writerGeneration; + this.engineRole = engineRole; + this.fieldAssignments = fieldAssignments; } @Override @@ -87,7 +95,7 @@ public void sync() throws IOException { } @Override - public void close() { + public void close() throws IOException { vsrManager.close(); } @@ -100,6 +108,6 @@ public ParquetDocumentInput newDocumentInput() { } // Get a new ManagedVSR from VSRManager for this document input - return new ParquetDocumentInput(vsrManager.getActiveManagedVSR()); + return new ParquetDocumentInput(vsrManager.getActiveManagedVSR(), engineRole, fieldAssignments); } } diff --git a/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/vsr/VSRManagerTests.java b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/vsr/VSRManagerTests.java index 3c13cc511eb83..33ba6d5644716 100644 --- a/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/vsr/VSRManagerTests.java +++ b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/vsr/VSRManagerTests.java @@ -10,31 +10,23 @@ import com.parquet.parquetdataformat.bridge.ArrowExport; import com.parquet.parquetdataformat.bridge.ParquetFileMetadata; -import com.parquet.parquetdataformat.bridge.RustBridge; import com.parquet.parquetdataformat.memory.ArrowBufferPool; import com.parquet.parquetdataformat.writer.ParquetDocumentInput; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.types.Types; +import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FlushIn; import org.opensearch.index.engine.exec.WriteResult; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.common.settings.Settings; -import org.mockito.MockedStatic; import org.mockito.Mockito; import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; - -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.anyString; /** * Integration tests for VSRManager covering document processing workflows and state management @@ -104,7 +96,7 @@ public void testDocumentAdditionThroughVSRManager() throws Exception { VSRManager vsrManager = new VSRManager(testFileName, testSchema, bufferPool); // Create a document to add - ParquetDocumentInput document = new ParquetDocumentInput(vsrManager.getActiveManagedVSR()); + ParquetDocumentInput document = new ParquetDocumentInput(vsrManager.getActiveManagedVSR(), EngineRole.PRIMARY); // Create mock field types and add fields to document MappedFieldType idFieldType = Mockito.mock(MappedFieldType.class); diff --git a/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java b/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java index e8622fdf1a271..dd3f718ffc5fe 100644 --- a/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java +++ b/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java @@ -101,7 +101,7 @@ public void postParse(ParseContext context) throws IOException { final int value = context.sourceToParse().source().length(); if (isPluggableDataFormatFeatureEnabled(context)) { - context.compositeDocumentInput().addField(fieldType(), value); + context.compositeDocumentInput().addField(fieldType(), value, ); } else { context.doc().addAll(NumberType.INTEGER.createFields(name(), value, true, true, false, true)); } diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index b2fad67343ce0..b6fc3402d126d 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -291,6 +291,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.INDEX_SEARCH_QUERY_PLAN_EXPLAIN_SETTING, IndexSettings.OPTIMIZED_INDEX_ENABLED_SETTING, + IndexSettings.INDEX_COMPOSITE_PRIMARY_DATA_FORMAT_SETTING, IndexSettings.OPTIMIZED_INDEX_CONCURRENT_SEGMENT_SEARCH_MODE, IndexSettings.OPTIMIZED_INDEX_CONCURRENT_SEGMENT_SEARCH_MAX_SLICE_COUNT, diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index 1b1d0619df18d..904ef1922c898 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -863,6 +863,17 @@ private void setSearchQueryPlanExplainEnabled(Boolean searchQueryPlaneExplainEna Property.Final ); + /** + * Declares which data format is primary for a composite index. + * Required when multiple DataSourcePlugins are registered. + * Empty default means "not set" — throws when multiple plugins are registered. + */ + public static final Setting INDEX_COMPOSITE_PRIMARY_DATA_FORMAT_SETTING = Setting.simpleString( + "index.composite.primary_data_format", + "", + Property.IndexScope + ); + private final Index index; private final Version version; private final Logger logger; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java index ef1ad24992256..eac8c9f4ad091 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java @@ -11,7 +11,7 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; -import org.opensearch.index.engine.exec.text.TextDF; +import org.opensearch.index.engine.exec.lucene.LuceneDataFormat; @ExperimentalApi public interface DataFormat { @@ -23,29 +23,6 @@ public interface DataFormat { void configureStore(); - static class LuceneDataFormat implements DataFormat { - @Override - public Setting dataFormatSettings() { - return null; - } - - @Override - public Setting clusterLeveldataFormatSettings() { - return null; - } - - @Override - public String name() { - return ""; - } - - @Override - public void configureStore() { - - } - } DataFormat LUCENE = new LuceneDataFormat(); - - DataFormat TEXT = new TextDF(); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java index 4a3c0fc73f111..fc70c19498df3 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java @@ -12,6 +12,7 @@ import org.opensearch.index.mapper.MappedFieldType; import java.io.IOException; + @ExperimentalApi public interface DocumentInput extends AutoCloseable { @@ -34,4 +35,17 @@ default void setSeqNo(long seqNo) { default void setPrimaryTerm(String fieldName, long seqNo) { // Default no-op implementations, override as needed } + + /** + * Returns the {@link EngineRole} for this document input, indicating whether the engine + * is primary, secondary, or all in a composite configuration. + * Defaults to {@link EngineRole#PRIMARY} for backward compatibility. + */ + default EngineRole getEngineRole() { + return EngineRole.PRIMARY; + } + + default DataFormat getDataFormat() { + return null; + } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/EngineRole.java b/server/src/main/java/org/opensearch/index/engine/exec/EngineRole.java new file mode 100644 index 0000000000000..33ffb9dfd9dff --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/EngineRole.java @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +public enum EngineRole { + PRIMARY, + SECONDARY, + ALL +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java new file mode 100644 index 0000000000000..d3615a10d04d1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java @@ -0,0 +1,111 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +/** + * Resolves which data format handles which capabilities for each field type. + * Uses primary-gets-priority strategy: if the primary format supports a capability + * for a field type, it wins. Secondary formats only get capabilities the primary can't handle. + */ +@ExperimentalApi +public final class FieldAssignmentResolver { + + private FieldAssignmentResolver() {} + + /** + * Resolves field assignments for all mapped fields. + * + * @param registry the field support registry with all format capabilities + * @param roleMap format → engine role mapping + * @param fieldTypes all mapped field types from the mapper service + * @return per-format FieldAssignments + */ + public static Map resolve( + FieldSupportRegistry registry, + Map roleMap, + Iterable fieldTypes + ) { + // Find primary format + DataFormat primaryFormat = null; + for (Map.Entry entry : roleMap.entrySet()) { + if (entry.getValue() == EngineRole.PRIMARY) { + primaryFormat = entry.getKey(); + break; + } + } + + // Build per-format assignment maps + Map>> perFormatMap = new HashMap<>(); + for (DataFormat format : roleMap.keySet()) { + perFormatMap.put(format, new HashMap<>()); + } + + for (MappedFieldType fieldType : fieldTypes) { + String typeName = fieldType.typeName(); + resolveField(registry, roleMap, primaryFormat, perFormatMap, fieldType, typeName); + } + + // Wrap into FieldAssignments + Map result = new HashMap<>(); + for (Map.Entry>> entry : perFormatMap.entrySet()) { + result.put(entry.getKey(), new FieldAssignments(entry.getValue())); + } + return result; + } + + private static void resolveField( + FieldSupportRegistry registry, + Map roleMap, + DataFormat primaryFormat, + Map>> perFormatMap, + MappedFieldType fieldType, + String typeName + ) { + // Determine which capabilities are required by the mapping + Set required = EnumSet.noneOf(FieldCapability.class); + if (fieldType.isSearchable()) { + required.add(FieldCapability.INDEX); + } + if (fieldType.hasDocValues()) { + required.add(FieldCapability.DOC_VALUES); + } + if (fieldType.isStored()) { + required.add(FieldCapability.STORE); + } + + // For each required capability, assign to primary if it supports it, else to secondary + for (FieldCapability cap : required) { + if (primaryFormat != null && registry.hasCapability(typeName, primaryFormat, cap)) { + // Primary handles this capability + perFormatMap.get(primaryFormat) + .computeIfAbsent(typeName, k -> EnumSet.noneOf(FieldCapability.class)) + .add(cap); + } else { + // Find a secondary format that supports it + for (Map.Entry entry : roleMap.entrySet()) { + if (entry.getValue() != EngineRole.PRIMARY + && registry.hasCapability(typeName, entry.getKey(), cap)) { + perFormatMap.get(entry.getKey()) + .computeIfAbsent(typeName, k -> EnumSet.noneOf(FieldCapability.class)) + .add(cap); + break; + } + } + } + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java new file mode 100644 index 0000000000000..5acd981b79733 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Collections; +import java.util.Map; +import java.util.Set; + +/** + * Per-format view of field capability assignments resolved by the composite engine. + * Maps fieldTypeName → Set of FieldCapability that this format is responsible for. + * + *

Used by DocumentInput implementations to decide whether to write a given field. + * If a field type has no entry, this format should skip it entirely. + */ +@ExperimentalApi +public class FieldAssignments { + + /** Sentinel instance that accepts all fields — used in single-format (non-composite) mode. */ + public static final FieldAssignments ACCEPT_ALL = new FieldAssignments(Collections.emptyMap(), true); + + private final Map> assignments; + private final boolean acceptAll; + + public FieldAssignments(Map> assignments) { + this(assignments, false); + } + + private FieldAssignments(Map> assignments, boolean acceptAll) { + this.assignments = assignments; + this.acceptAll = acceptAll; + } + + /** + * Returns true if this format should handle the given field type. + */ + public boolean shouldHandle(String fieldTypeName) { + if (acceptAll) { + return true; + } + return assignments.containsKey(fieldTypeName); + } + + /** + * Returns the assigned capabilities for a field type, or empty set if none. + */ + public Set getAssignedCapabilities(String fieldTypeName) { + if (acceptAll) { + return Collections.emptySet(); + } + Set caps = assignments.get(fieldTypeName); + return caps != null ? Collections.unmodifiableSet(caps) : Collections.emptySet(); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldCapability.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldCapability.java new file mode 100644 index 0000000000000..299d584cfa08e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldCapability.java @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +/** + * Declares what a data format can do with a given field type. + */ +@ExperimentalApi +public enum FieldCapability { + /** The format can persist raw field values for retrieval (stored fields). */ + STORE, + /** The format can build an inverted index for search (indexed fields). */ + INDEX, + /** The format can store columnar data for sorting and aggregations. */ + DOC_VALUES +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldSupportRegistry.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldSupportRegistry.java new file mode 100644 index 0000000000000..f84cdcf381ec0 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldSupportRegistry.java @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Central registry tracking which data formats have which capabilities for which field types. + * Keyed by fieldTypeName → DataFormat → Set<FieldCapability>. + */ +@ExperimentalApi +public class FieldSupportRegistry { + + private final Map>> registry = new HashMap<>(); + + /** + * Registers capabilities for a field type and data format. + * Multiple calls for the same (fieldType, format) pair merge capabilities. + */ + public void register(String fieldTypeName, DataFormat format, Set capabilities) { + registry.computeIfAbsent(fieldTypeName, k -> new HashMap<>()) + .merge(format, EnumSet.copyOf(capabilities), (existing, incoming) -> { + existing.addAll(incoming); + return existing; + }); + } + + /** + * Returns the set of capabilities a format has for a field type, or empty set if none. + */ + public Set getCapabilities(String fieldTypeName, DataFormat format) { + Map> formatMap = registry.get(fieldTypeName); + if (formatMap == null) { + return Collections.emptySet(); + } + Set caps = formatMap.get(format); + return caps != null ? Collections.unmodifiableSet(caps) : Collections.emptySet(); + } + + /** + * Returns true if the format has at least one capability for the field type. + */ + public boolean hasAnyCapability(String fieldTypeName, DataFormat format) { + return !getCapabilities(fieldTypeName, format).isEmpty(); + } + + /** + * Returns true if the format has a specific capability for the field type. + */ + public boolean hasCapability(String fieldTypeName, DataFormat format, FieldCapability capability) { + return getCapabilities(fieldTypeName, format).contains(capability); + } + + /** + * Returns all field type names a format has any capabilities for. + */ + public Set supportedFieldTypes(DataFormat format) { + return registry.entrySet() + .stream() + .filter(e -> e.getValue().containsKey(format)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + } + + /** + * Returns all data formats registered in this registry. + */ + public Set allFormats() { + return registry.values().stream().flatMap(m -> m.keySet().stream()).collect(Collectors.toSet()); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java index 702e64e9f8a20..0f46971034e5d 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java @@ -18,8 +18,9 @@ public interface IndexingExecutionEngine extends Closeable { - List supportedFieldTypes(); + List supportedFieldTypes(boolean isPrimaryEngine); + // Writer should know it's a primary writer or not? Writer> createWriter(long writerGeneration) throws IOException; // A writer responsible for data format vended by this engine. diff --git a/server/src/main/java/org/opensearch/index/engine/exec/Writer.java b/server/src/main/java/org/opensearch/index/engine/exec/Writer.java index d0ad4d35b3fc2..a3f908de17210 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/Writer.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/Writer.java @@ -18,7 +18,7 @@ public interface Writer

> { void sync() throws IOException; - void close(); + void close() throws IOException; P newDocumentInput(); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java index c17a3a63c081e..538ec07f302c4 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java @@ -8,6 +8,8 @@ package org.opensearch.index.engine.exec.composite; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.util.SetOnce; import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.DocumentInput; @@ -35,6 +37,7 @@ public class CompositeDataFormatWriter implements Writer, Lock { + private static final Logger logger = LogManager.getLogger(CompositeDataFormatWriter.class); private final List>>> writers; private final Runnable postWrite; private final ReentrantLock lock; @@ -85,7 +88,7 @@ public void sync() throws IOException { } @Override - public void close() { + public void close() throws IOException { for (Map.Entry>> writerPair : writers) { writerPair.getValue().close(); } @@ -180,6 +183,7 @@ public void addRowIdField(String fieldName, long rowId) { @Override public void addField(MappedFieldType fieldType, Object value) { + // Each delegate's addField uses its own FieldAssignments to decide what to write for (DocumentInput input : inputs) { input.addField(fieldType, value); } @@ -188,13 +192,17 @@ public void addField(MappedFieldType fieldType, Object value) { @Override public void setVersion(long version) { this.version = version; - addField(VersionFieldMapper.VersionFieldType.INSTANCE, version); + for (DocumentInput input : inputs) { + input.addField(VersionFieldMapper.VersionFieldType.INSTANCE, version); + } } @Override public void setSeqNo(long seqNo) { this.seqNo = seqNo; - addField(SeqNoFieldMapper.SeqNoFieldType.INSTANCE, seqNo); + for (DocumentInput input : inputs) { + input.addField(SeqNoFieldMapper.SeqNoFieldType.INSTANCE, seqNo); + } } @Override diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java new file mode 100644 index 0000000000000..89d607f38aa08 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java @@ -0,0 +1,101 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.composite; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldSupportRegistry; +import org.opensearch.index.mapper.MappedFieldType; + +import java.util.Map; + +/** + * Stateless validator that checks field-to-capability compatibility using the + * {@link FieldSupportRegistry} at index creation or mapping update time. + */ +@ExperimentalApi +public final class CompositeFieldValidator { + + private CompositeFieldValidator() {} + + /** + * Validates that the primary data format has at least one capability + * registered for every mapped field type. + * Throws IllegalArgumentException if any field lacks primary coverage. + */ + public static void validatePrimaryCoverage( + FieldSupportRegistry registry, + Map roleMap, + Iterable fieldTypes + ) { + DataFormat primaryFormat = null; + for (Map.Entry entry : roleMap.entrySet()) { + if (entry.getValue() == EngineRole.PRIMARY) { + primaryFormat = entry.getKey(); + break; + } + } + if (primaryFormat == null) { + return; + } + for (MappedFieldType fieldType : fieldTypes) { + if (!registry.hasAnyCapability(fieldType.typeName(), primaryFormat)) { + throw new IllegalArgumentException( + "Field [" + fieldType.name() + "] of type [" + fieldType.typeName() + + "] has no capabilities registered for primary data format [" + primaryFormat.name() + "]" + ); + } + } + } + + /** + * Validates that every field's enabled mapping properties have at least one + * data format with the corresponding capability: + * isSearchable() → INDEX, hasDocValues() → DOC_VALUES, isStored() → STORE. + * Throws IllegalArgumentException if any property lacks coverage. + */ + public static void validateMappingPropertyCoverage( + FieldSupportRegistry registry, + Iterable fieldTypes + ) { + for (MappedFieldType fieldType : fieldTypes) { + String typeName = fieldType.typeName(); + if (fieldType.isSearchable()) { + checkCapabilityCoverage(registry, fieldType, typeName, FieldCapability.INDEX, "index"); + } + if (fieldType.hasDocValues()) { + checkCapabilityCoverage(registry, fieldType, typeName, FieldCapability.DOC_VALUES, "doc_values"); + } + if (fieldType.isStored()) { + checkCapabilityCoverage(registry, fieldType, typeName, FieldCapability.STORE, "store"); + } + } + } + + private static void checkCapabilityCoverage( + FieldSupportRegistry registry, + MappedFieldType fieldType, + String typeName, + FieldCapability requiredCapability, + String propertyName + ) { + for (DataFormat format : registry.allFormats()) { + if (registry.hasCapability(typeName, format, requiredCapability)) { + return; + } + } + throw new IllegalArgumentException( + "Field [" + fieldType.name() + "] of type [" + typeName + + "] requires [" + requiredCapability + "] capability (mapping property [" + propertyName + + "]=true) but no data format provides it" + ); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java index 5ce88dea6bd67..89a39298bb801 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java @@ -8,6 +8,9 @@ package org.opensearch.index.engine.exec.composite; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.exec.coord.Segment; import java.util.Collections; @@ -17,6 +20,10 @@ import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldAssignmentResolver; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FieldSupportRegistry; import org.opensearch.index.engine.exec.FileInfos; import org.opensearch.index.engine.exec.IndexingExecutionEngine; import org.opensearch.index.engine.exec.Merger; @@ -26,7 +33,6 @@ import org.opensearch.index.engine.exec.coord.Any; import org.opensearch.index.engine.exec.coord.CatalogSnapshot; import org.opensearch.index.engine.exec.coord.CompositeDataFormatWriterPool; -import org.opensearch.index.engine.exec.text.TextEngine; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.ShardPath; import org.opensearch.plugins.DataSourcePlugin; @@ -45,8 +51,13 @@ public class CompositeIndexingExecutionEngine implements IndexingExecutionEngine private final Any dataFormat; private final AtomicLong writerGeneration; private final List> delegates = new ArrayList<>(); + private final FieldSupportRegistry fieldSupportRegistry; + private final Map roleMap; + + private static final Logger logger = LogManager.getLogger(CompositeIndexingExecutionEngine.class); public CompositeIndexingExecutionEngine( + EngineConfig engineConfig, MapperService mapperService, PluginsService pluginsService, ShardPath shardPath, @@ -54,26 +65,116 @@ public CompositeIndexingExecutionEngine( IndexSettings indexSettings ) { this.writerGeneration = new AtomicLong(initialWriterGeneration); + List dataSourcePlugins = pluginsService.filterPlugins(DataSourcePlugin.class) + .stream().toList(); + if (dataSourcePlugins.isEmpty()) throw new IllegalStateException("No data formats found, can't initialise Engine"); + + boolean singlePlugin = dataSourcePlugins.size() == 1; + + // Setting-based role resolution + String primaryDataFormatName = indexSettings.getValue(IndexSettings.INDEX_COMPOSITE_PRIMARY_DATA_FORMAT_SETTING); + this.roleMap = resolveRoles(primaryDataFormatName, dataSourcePlugins, singlePlugin); + + // Build FieldSupportRegistry from plugin registrations + this.fieldSupportRegistry = new FieldSupportRegistry(); + for (DataSourcePlugin plugin : dataSourcePlugins) { + plugin.registerFieldSupport(fieldSupportRegistry); + } + + // Validate field capabilities if composite (multiple plugins) + if (!singlePlugin) { + CompositeFieldValidator.validatePrimaryCoverage(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); + CompositeFieldValidator.validateMappingPropertyCoverage(fieldSupportRegistry, mapperService.fieldTypes()); + } + + // Resolve field assignments: which format handles which capability for each field type + Map fieldAssignmentsMap; + if (singlePlugin) { + fieldAssignmentsMap = Map.of(dataSourcePlugins.get(0).getDataFormat(), FieldAssignments.ACCEPT_ALL); + } else { + fieldAssignmentsMap = FieldAssignmentResolver.resolve(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); + } + + // Determine primary format from role map + DataFormat primaryDataFormat = roleMap.entrySet().stream() + .filter(e -> e.getValue() == EngineRole.PRIMARY) + .map(Map.Entry::getKey) + .findFirst() + .orElseThrow(); + List dataFormats = new ArrayList<>(); - try { - DataSourcePlugin plugin = pluginsService.filterPlugins(DataSourcePlugin.class) - .stream() - .findAny() - .orElseThrow(() -> new IllegalArgumentException("dataformat [" + DataFormat.TEXT + "] is not registered.")); + for (DataSourcePlugin plugin : dataSourcePlugins) { dataFormats.add(plugin.getDataFormat()); - delegates.add(plugin.indexingEngine(mapperService, shardPath, indexSettings)); - } catch (NullPointerException e) { - delegates.add(new TextEngine()); - } - this.dataFormat = new Any(dataFormats, dataFormats.getFirst()); - this.dataFormatWriterPool = - new CompositeDataFormatWriterPool( - () -> new CompositeDataFormatWriter(this, writerGeneration.getAndIncrement()), - LinkedList::new, - Runtime.getRuntime().availableProcessors() + boolean isPrimary = roleMap.get(plugin.getDataFormat()) == EngineRole.PRIMARY; + FieldAssignments assignments = fieldAssignmentsMap.getOrDefault( + plugin.getDataFormat(), FieldAssignments.ACCEPT_ALL + ); + IndexingExecutionEngine indexingEngine = plugin.indexingEngine( + engineConfig, mapperService, isPrimary, shardPath, indexSettings, assignments ); + delegates.add(indexingEngine); + } + + this.dataFormat = new Any(dataFormats, primaryDataFormat); + + logger.debug("Registered dataformats: {}", this.dataFormat); + this.dataFormatWriterPool = new CompositeDataFormatWriterPool( + () -> new CompositeDataFormatWriter(this, writerGeneration.getAndIncrement()), + LinkedList::new, + Runtime.getRuntime().availableProcessors() + ); + } + + /** + * Pure function: resolves engine roles from the primary data format setting. + * Single plugin → always PRIMARY regardless of setting. + * Valid setting → matching format is PRIMARY, others SECONDARY. + * Unknown format name → IllegalArgumentException. + * Empty setting with multiple plugins → IllegalArgumentException. + */ + static Map resolveRoles( + String primaryDataFormatName, + List plugins, + boolean singlePlugin + ) { + Map roles = new HashMap<>(); + if (singlePlugin) { + roles.put(plugins.get(0).getDataFormat(), EngineRole.PRIMARY); + return roles; + } + if (primaryDataFormatName != null && !primaryDataFormatName.isEmpty()) { + boolean found = false; + for (DataSourcePlugin plugin : plugins) { + if (plugin.getDataFormat().name().equals(primaryDataFormatName)) { + roles.put(plugin.getDataFormat(), EngineRole.PRIMARY); + found = true; + } else { + roles.put(plugin.getDataFormat(), EngineRole.SECONDARY); + } + } + if (!found) { + throw new IllegalArgumentException( + "Unrecognized primary data format [" + primaryDataFormatName + "]. Available: " + + plugins.stream().map(p -> p.getDataFormat().name()).toList() + ); + } + return roles; + } + throw new IllegalArgumentException( + "index.composite.primary_data_format is required when multiple data formats are registered. Available: " + + plugins.stream().map(p -> p.getDataFormat().name()).toList() + ); + } + + public FieldSupportRegistry getFieldSupportRegistry() { + return fieldSupportRegistry; + } + + public Map getRoleMap() { + return Collections.unmodifiableMap(roleMap); } + @Override public Any getDataFormat() { return dataFormat; @@ -104,7 +205,7 @@ public long getCurrentWriterGeneration() { } @Override - public List supportedFieldTypes() { + public List supportedFieldTypes(boolean isPrimaryEngine) { throw new UnsupportedOperationException(); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java index aa51849b5dbd1..e97e5fc0bcae9 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java @@ -54,4 +54,12 @@ public void configureStore() { dataFormat.configureStore(); } } + + @Override + public String toString() { + return "Any{" + + "dataFormats=" + dataFormats + + ", primaryDataFormat=" + primaryDataFormat + + '}'; + } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java index 2553e48f3e156..13ee98d5a4802 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java @@ -319,6 +319,7 @@ public void onFailure(String reason, Exception ex) { logger.debug("While initialising Composite Engine - lst commit generation : " + lastCommittedWriterGeneration.get()); this.engine = new CompositeIndexingExecutionEngine( + engineConfig, mapperService, pluginsService, shardPath, diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataFormat.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataFormat.java new file mode 100644 index 0000000000000..bd56febd5ed9b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataFormat.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene; + +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.engine.exec.DataFormat; + +public class LuceneDataFormat implements DataFormat { + + private final String LUCENE_DATA_FORMAT = "LuceneDataFormat"; + @Override + public Setting dataFormatSettings() { + return null; + } + + @Override + public Setting clusterLeveldataFormatSettings() { + return null; + } + + @Override + public String name() { + return "Lucene"; + } + + @Override + public void configureStore() { + + } + + @Override + public String toString() { + return LUCENE_DATA_FORMAT; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataSourcePlugin.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataSourcePlugin.java new file mode 100644 index 0000000000000..7972abf3b3e24 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataSourcePlugin.java @@ -0,0 +1,79 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene; + +import org.opensearch.common.blobstore.BlobContainer; +import org.opensearch.common.blobstore.BlobPath; +import org.opensearch.common.blobstore.BlobStore; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.EngineConfig; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldSupportRegistry; +import org.opensearch.index.engine.exec.IndexingExecutionEngine; +import org.opensearch.index.engine.exec.lucene.engine.LuceneExecutionEngine; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.index.store.FormatStoreDirectory; +import org.opensearch.plugins.DataSourcePlugin; + +import org.opensearch.plugins.Plugin; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Set; + +public class LuceneDataSourcePlugin extends Plugin implements DataSourcePlugin { + + @Override + @SuppressWarnings("unchecked") + public IndexingExecutionEngine indexingEngine(EngineConfig engineConfig, MapperService mapperService, boolean isPrimary, ShardPath shardPath, IndexSettings indexSettings, FieldAssignments fieldAssignments) { + return (IndexingExecutionEngine) new LuceneExecutionEngine(engineConfig, mapperService, isPrimary, shardPath, indexSettings, fieldAssignments); + } + + @Override + public FormatStoreDirectory createFormatStoreDirectory(IndexSettings indexSettings, ShardPath shardPath) throws IOException { + return null; + } + + @Override + public BlobContainer createBlobContainer(BlobStore blobStore, BlobPath blobPath) throws IOException { + return null; + } + + @Override + public DataFormat getDataFormat() { + return new LuceneDataFormat(); + } + + @Override + public void registerFieldSupport(FieldSupportRegistry registry) { + DataFormat lucene = getDataFormat(); + Set allCaps = EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + Set storeAndIndex = EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX); + + // Most field types support all three capabilities + String[] fullSupportTypes = { + "keyword", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float", + "date", "date_nanos", "boolean", "ip", "geo_point", "geo_shape", "binary", "integer_range", + "float_range", "long_range", "double_range", "date_range", "ip_range", "completion", + "search_as_you_type", "token_count", "murmur3", "flat_object", "unsigned_long" + }; + for (String type : fullSupportTypes) { + registry.register(type, lucene, allCaps); + } + + // text fields support STORE and INDEX but not DOC_VALUES + registry.register("text", lucene, storeAndIndex); + registry.register("match_only_text", lucene, storeAndIndex); + registry.register("annotated_text", lucene, storeAndIndex); + } + +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java deleted file mode 100644 index aae78e4b6983e..0000000000000 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine.exec.lucene; - -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.util.BytesRef; -import org.opensearch.index.engine.InternalEngine; -import org.opensearch.index.engine.exec.DataFormat; -import org.opensearch.index.engine.exec.DocumentInput; -import org.opensearch.index.engine.exec.FileInfos; -import org.opensearch.index.engine.exec.FlushIn; -import org.opensearch.index.engine.exec.IndexingExecutionEngine; -import org.opensearch.index.engine.exec.Merger; -import org.opensearch.index.engine.exec.RefreshInput; -import org.opensearch.index.engine.exec.RefreshResult; -import org.opensearch.index.engine.exec.WriteResult; -import org.opensearch.index.engine.exec.Writer; -import org.opensearch.index.engine.exec.coord.CatalogSnapshot; -import org.opensearch.index.mapper.KeywordFieldMapper; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.ParseContext; - -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.Map; - -public class LuceneIEEngine implements IndexingExecutionEngine { - - private final InternalEngine internalEngine; - - public LuceneIEEngine(InternalEngine internalEngine) { - this.internalEngine = internalEngine; - } - - @Override - public List supportedFieldTypes() { - return List.of(); - } - - @Override - public Writer> createWriter(long writerGeneration) throws IOException { - return new LuceneWriter(internalEngine.indexWriter, writerGeneration); - } - - @Override - public void loadWriterFiles(CatalogSnapshot catalogSnapshot) { - - } - - @Override - public void deleteFiles(Map> filesToDelete) throws IOException { - - } - - @Override - public Merger getMerger() { - throw new UnsupportedOperationException(); - } - - @Override - public RefreshResult refresh(RefreshInput refreshInput) throws IOException { - internalEngine.refresh(refreshInput.getClass().getName()); - return null; - } - - @Override - public DataFormat getDataFormat() { - return DataFormat.LUCENE; - } - - @Override - public void close() throws IOException { - - } - - public static class LuceneDocumentInput implements DocumentInput { - - private final ParseContext.Document doc; - private final IndexWriter writer; - - public LuceneDocumentInput(ParseContext.Document doc, IndexWriter w) { - this.doc = doc; - this.writer = w; - } - - @Override - public void addRowIdField(String fieldName, long rowId) { - doc.add(new NumericDocValuesField(fieldName, rowId)); - } - - @Override - public void addField(MappedFieldType fieldType, Object value) { - doc.add(new KeywordFieldMapper.KeywordField("f1", new BytesRef("good_field"), null)); - } - - @Override - public ParseContext.Document getFinalInput() { - return doc; - } - - @Override - public WriteResult addToWriter() throws IOException { - writer.addDocument(doc); - return null; - } - - @Override - public void close() throws Exception { - // no-op, reuse writer - } - } - - public static class LuceneWriter implements Writer { - - private final IndexWriter writer; - private final long writerGeneration; - - public LuceneWriter(IndexWriter writer, long writerGeneration) { - this.writer = writer; - this.writerGeneration = writerGeneration; - } - - @Override - public WriteResult addDoc(LuceneDocumentInput d) throws IOException { - writer.addDocument(d.doc); - return null; - } - - @Override - public FileInfos flush(FlushIn flushIn) throws IOException { - writer.flush(); - return null; - } - - @Override - public void sync() throws IOException { - writer.flush(); - } - - @Override - public void close() { - // no-op - } - - @Override - public LuceneDocumentInput newDocumentInput() { - return new LuceneDocumentInput(new ParseContext.Document(), writer); - } - } -} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java new file mode 100644 index 0000000000000..74f87c343a308 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java @@ -0,0 +1,131 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.engine; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.NIOFSDirectory; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.EngineConfig; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.IndexingExecutionEngine; +import org.opensearch.index.engine.exec.Merger; +import org.opensearch.index.engine.exec.RefreshInput; +import org.opensearch.index.engine.exec.RefreshResult; +import org.opensearch.index.engine.exec.Writer; +import org.opensearch.index.engine.exec.coord.CatalogSnapshot; +import org.opensearch.index.engine.exec.lucene.LuceneDataFormat; +import org.opensearch.index.engine.exec.lucene.fields.LuceneFieldRegistry; +import org.opensearch.index.engine.exec.lucene.writer.LuceneWriter; +import org.opensearch.index.engine.exec.lucene.writer.LuceneWriterCodec; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.shard.ShardPath; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import static org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter.ROW_ID; + +public class LuceneExecutionEngine implements IndexingExecutionEngine { + + private final MapperService mapperService; + private final ShardPath shardPath; + private final DataFormat dataFormat; + private final EngineConfig engineConfig; + private static final Logger logger = LogManager.getLogger(LuceneExecutionEngine.class); + private final boolean isPrimaryEngine; + private final FieldAssignments fieldAssignments; + + public LuceneExecutionEngine(EngineConfig engineConfig, MapperService mapperService, boolean isPrimaryEngine, ShardPath shardPath, IndexSettings indexSettings, FieldAssignments fieldAssignments) { + this.engineConfig = engineConfig; + this.mapperService = mapperService; + this.dataFormat = DataFormat.LUCENE; + this.isPrimaryEngine = isPrimaryEngine; + this.shardPath = shardPath; + this.fieldAssignments = fieldAssignments; + + // TODO: Add check for Lucene being the primary engine and MapperService has an unknown field, currently + // in POC it's only a secondary engine so we don't need to have all fields in this. + } + + @Override + public List supportedFieldTypes(boolean isPrimaryEngine) { + // Delegate to the static LuceneFieldRegistry — each registered field type is supported + return new ArrayList<>(LuceneFieldRegistry.getRegisteredFieldNames()); + } + + @Override + public Writer> createWriter(long writerGeneration) throws IOException { + Path directoryPath = Files.createTempDirectory(Long.toString(System.nanoTime())); // TODO:: Is this the right name? + EngineRole role = isPrimaryEngine ? EngineRole.PRIMARY : EngineRole.SECONDARY; + return new LuceneWriter(directoryPath, createWriter(directoryPath, writerGeneration), writerGeneration, role, fieldAssignments); + + } + + private IndexWriter createWriter(Path directoryPath, long writerGeneration) { + try { + IndexWriterConfig indexWriterConfig = getIndexWriterConfig(writerGeneration, this.engineConfig); + Directory directory = NIOFSDirectory.open(directoryPath); + return new IndexWriter(directory, indexWriterConfig); + } catch (IOException e) { + throw new RuntimeException("Failed to create lucene writer: {}", e); + } + } + + private IndexWriterConfig getIndexWriterConfig(long writerGeneration, EngineConfig engineConfig) { + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(); + indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); + indexWriterConfig.setIndexSort(new Sort(new SortField(ROW_ID, SortField.Type.LONG))); + indexWriterConfig.setCodec(new LuceneWriterCodec(engineConfig.getCodec().getName(), engineConfig.getCodec(), writerGeneration)); + return indexWriterConfig; + } + + @Override + public Merger getMerger() { + return null; + } + + @Override + public RefreshResult refresh(RefreshInput refreshInput) throws IOException { + return null; + } + + @Override + public DataFormat getDataFormat() { + return null; + } + + @Override + public void loadWriterFiles(CatalogSnapshot catalogSnapshot) throws IOException { + + } + + @Override + public void deleteFiles(Map> filesToDelete) throws IOException { + + } + + @Override + public void close() throws IOException { + + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java new file mode 100644 index 0000000000000..73bbf2d3a230b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields; + +import org.apache.lucene.document.Field; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.FieldNamesFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; +import org.opensearch.index.mapper.ParseContext.Document; + +import java.util.Set; + +public abstract class LuceneField { + + public abstract void createField(MappedFieldType mappedFieldType, Document document, Object parseValue, EngineRole engineRole, Set assignedCapabilities); + + protected final void createFieldNamesField(MappedFieldType mappedFieldType, Document document, ParseContext context) { + assert !mappedFieldType.hasDocValues() : "_field_names should only be used when doc_values are turned off"; + FieldNamesFieldMapper.FieldNamesFieldType fieldNamesFieldType = + context.docMapper().metadataMapper(FieldNamesFieldMapper.class).fieldType(); + if (fieldNamesFieldType != null && fieldNamesFieldType.isEnabled()) { + for (String fieldName : FieldNamesFieldMapper.extractFieldNames(mappedFieldType.name())) { + document.add(new Field(FieldNamesFieldMapper.NAME, fieldName, FieldNamesFieldMapper.Defaults.FIELD_TYPE)); + } + } + } + + public abstract EngineRole getFieldRole(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java new file mode 100644 index 0000000000000..deb060802c4d3 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields; + +import org.opensearch.index.engine.exec.lucene.fields.data.DoubleLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.KeywordLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.LongLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.TextLuceneField; +import org.opensearch.index.mapper.BooleanFieldMapper; +import org.opensearch.index.mapper.DateFieldMapper; +import org.opensearch.index.mapper.IdFieldMapper; +import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.SeqNoFieldMapper; +import org.opensearch.index.mapper.TextFieldMapper; +import org.opensearch.index.mapper.VersionFieldMapper; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class LuceneFieldRegistry { + + /** + * All registered field mappings (thread-safe, mutable) + */ + private static final Map FIELD_REGISTRY = new ConcurrentHashMap<>(); + + // Static initialization block to populate the field registry + static { + initialize(); + } + + // Private constructor to prevent instantiation of utility class + private LuceneFieldRegistry() { + throw new UnsupportedOperationException("Registry class should not be instantiated"); + } + + /** + * Initialize the registry with all available plugins. + * This method should be called during node startup after all plugins are loaded. + */ + public static synchronized void initialize() { + FIELD_REGISTRY.put(KeywordFieldMapper.CONTENT_TYPE, new KeywordLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.DOUBLE.typeName(), new DoubleLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.LONG.typeName(), new LongLuceneField()); + + FIELD_REGISTRY.put(TextFieldMapper.CONTENT_TYPE, new TextLuceneField()); + + } + + /** + * Returns the LuceneField implementation for the specified OpenSearch field type, or null if not found. + */ + public static LuceneField getLuceneField(String fieldType) { + return FIELD_REGISTRY.get(fieldType); + } + + /** + * Returns all registered field type names. + */ + public static java.util.Set getRegisteredFieldNames() { + return java.util.Collections.unmodifiableSet(FIELD_REGISTRY.keySet()); + } + +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DoubleLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DoubleLuceneField.java new file mode 100644 index 0000000000000..42ac1bd475cf3 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DoubleLuceneField.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.NumericUtils; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.ParseContext; + +import java.util.Set; + +public class DoubleLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + final Number value = (Number) parseValue; + // Only storing the SortedNumericDocValuesField, assuming this is for the ROW_ID field + document.add(new SortedNumericDocValuesField(fieldType.name(), NumericUtils.doubleToSortableLong(value.doubleValue()))); + } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/KeywordLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/KeywordLuceneField.java new file mode 100644 index 0000000000000..d7189bbc45fb5 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/KeywordLuceneField.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.util.BytesRef; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.Set; + +public class KeywordLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + String value = (String) parseValue; + KeywordFieldMapper.KeywordFieldType keywordFieldType = (KeywordFieldMapper.KeywordFieldType) mappedFieldType; + + // Convert to utf8 only once before feeding postings/dv/stored fields + final BytesRef binaryValue = new BytesRef(value); + + FieldType fieldType = getFieldType(keywordFieldType); + + if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { + Field field = new KeywordFieldMapper.KeywordField(mappedFieldType.name(), binaryValue, fieldType); + document.add(field); + + if (keywordFieldType.hasDocValues() == false && fieldType.omitNorms()) { + createFieldNamesField(mappedFieldType, document, null); + } + } + + if (keywordFieldType.hasDocValues()) { + document.add(new SortedSetDocValuesField(mappedFieldType.name(), binaryValue)); + } + } + + private FieldType getFieldType(KeywordFieldMapper.KeywordFieldType keywordFieldType) { + FieldType fieldType = new FieldType(); + fieldType.setTokenized(false); + fieldType.setStored(keywordFieldType.isStored()); + fieldType.setOmitNorms(true); + fieldType.setIndexOptions(keywordFieldType.isSearchable() ? IndexOptions.DOCS : IndexOptions.NONE); + fieldType.freeze(); + return fieldType; + } + + @Override + public EngineRole getFieldRole() { + return EngineRole.ALL; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/LongLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/LongLuceneField.java new file mode 100644 index 0000000000000..4a2c06e1975b0 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/LongLuceneField.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.ParseContext; + +import java.util.Set; + +public class LongLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + final Number value = (Number) parseValue; + document.add(SortedNumericDocValuesField.indexedField(fieldType.name(), value.longValue())); // Is this right? + } + + @Override + public EngineRole getFieldRole() { + return EngineRole.PRIMARY; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TextLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TextLuceneField.java new file mode 100644 index 0000000000000..95362e8aab2fa --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TextLuceneField.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; +import org.opensearch.index.mapper.TextFieldMapper; + +import java.util.Set; + +public class TextLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + final TextFieldMapper.TextFieldType textFieldType = (TextFieldMapper.TextFieldType) mappedFieldType; + final String value = (String) parseValue; + FieldType fieldType = new FieldType(); + fieldType.setStored(textFieldType.isStored()); //TODO: What does it translate to? + fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); // TODO:: how to decide this one? + Field field = new Field(textFieldType.name(), value, fieldType); + document.add(field); + } + + @Override + public EngineRole getFieldRole() { + return EngineRole.ALL; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java new file mode 100644 index 0000000000000..7e8582760f521 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java @@ -0,0 +1,97 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.writer; + +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.IndexWriter; +import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.WriteResult; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.engine.exec.lucene.fields.LuceneFieldRegistry; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.io.IOException; +import java.util.Set; + +public class LuceneDocumentInput implements DocumentInput { + private final ParseContext.Document document; + private final IndexWriter indexWriter; + private final EngineRole engineRole; + private final FieldAssignments fieldAssignments; + + public LuceneDocumentInput(ParseContext.Document document, IndexWriter indexWriter, EngineRole engineRole, FieldAssignments fieldAssignments) { + this.document = document; + this.indexWriter = indexWriter; + this.engineRole = engineRole; + this.fieldAssignments = fieldAssignments; + } + + @Override + public void addRowIdField(String fieldName, long rowId) { + document.add(new NumericDocValuesField(fieldName, rowId)); + } + + @SuppressWarnings("unchecked") + @Override + public void addField(MappedFieldType fieldType, Object value) { + final String fieldTypeName = fieldType.typeName(); + + // Check if this format should handle this field type at all + if (!fieldAssignments.shouldHandle(fieldTypeName)) { + return; + } + + final LuceneField luceneField = LuceneFieldRegistry.getLuceneField(fieldTypeName); + + if (luceneField == null) { + // Field type not supported by Lucene format — skip silently + return; + } + + Set assignedCapabilities = fieldAssignments.getAssignedCapabilities(fieldTypeName); + luceneField.createField(fieldType, document, value, engineRole, assignedCapabilities); + } + + /** + * Returns the underlying {@link ParseContext.Document} for ingesters to access + * and add Lucene fields directly. + */ + public ParseContext.Document getDocument() { + return document; + } + + @Override + public EngineRole getEngineRole() { + return engineRole; + } + + @Override + public ParseContext.Document getFinalInput() { + return document; + } + + @Override + public WriteResult addToWriter() { + try { + long seqNum = indexWriter.addDocument(document); + return new WriteResult(true, null, 1, 1, seqNum); + } catch (IOException exception) { + return new WriteResult(false, exception, 1, 1, 1); + } + } + + @Override + public void close() throws Exception { + // no-op, reuse writer + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriter.java new file mode 100644 index 0000000000000..254ad12199d0c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriter.java @@ -0,0 +1,68 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.writer; + +import org.apache.lucene.index.IndexWriter; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FileInfos; +import org.opensearch.index.engine.exec.FlushIn; +import org.opensearch.index.engine.exec.WriteResult; +import org.opensearch.index.engine.exec.Writer; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.mapper.ParseContext; + +import java.io.IOException; +import java.nio.file.Path; + +public class LuceneWriter implements Writer { + + private final IndexWriter writer; + private final long writerGeneration; + private final Path directoryPath; + private final EngineRole engineRole; + private final FieldAssignments fieldAssignments; + + public LuceneWriter(Path directoryPath, IndexWriter writer, long writerGeneration, EngineRole engineRole, FieldAssignments fieldAssignments) { + this.directoryPath = directoryPath; + this.writer = writer; + this.writerGeneration = writerGeneration; + this.engineRole = engineRole; + this.fieldAssignments = fieldAssignments; + } + + @Override + public WriteResult addDoc(LuceneDocumentInput documentInput) throws IOException { + return documentInput.addToWriter(); + } + + @Override + public FileInfos flush(FlushIn flushIn) throws IOException { + writer.forceMerge(1); + WriterFileSet.Builder writerFileSetBuilder = + WriterFileSet.builder().directory(directoryPath).writerGeneration(writerGeneration).addNumRows(writer.getDocStats().numDocs); + return FileInfos.builder().putWriterFileSet(DataFormat.LUCENE, writerFileSetBuilder.build()).build(); + } + + @Override + public void sync() throws IOException { + + } + + @Override + public void close() throws IOException { + writer.close(); + } + + @Override + public LuceneDocumentInput newDocumentInput() { + return new LuceneDocumentInput(new ParseContext.Document(), writer, engineRole, fieldAssignments); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriterCodec.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriterCodec.java new file mode 100644 index 0000000000000..237804f3a93a7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriterCodec.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.writer; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +import java.io.IOException; + +public class LuceneWriterCodec extends FilterCodec { + + private final long writerGeneration; + + /** + * Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec + * and a unique name to this ctor. + * + * @param name + * @param delegate + */ + public LuceneWriterCodec(String name, Codec delegate, long writerGeneration) { + super(name, delegate); + this.writerGeneration = writerGeneration; + } + + // TODO:: Why this? What does it do? + @Override + public SegmentInfoFormat segmentInfoFormat() { + return new SegmentInfoFormat() { + @Override + public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException { + return delegate.segmentInfoFormat().read(directory, segmentName, segmentID, context); + } + + @Override + public void write(Directory directory, SegmentInfo info, IOContext ioContext) throws IOException { + info.putAttribute("writer_generation", String.valueOf(writerGeneration)); + delegate.segmentInfoFormat().write(directory, info, ioContext); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java b/server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java deleted file mode 100644 index 6a3948fa10466..0000000000000 --- a/server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine.exec.text; - -import org.opensearch.common.blobstore.BlobContainer; -import org.opensearch.common.blobstore.BlobPath; -import org.opensearch.common.blobstore.BlobStore; -import org.opensearch.common.settings.Setting; -import org.opensearch.common.settings.Settings; -import org.opensearch.index.IndexSettings; -import org.opensearch.index.engine.exec.DataFormat; -import org.opensearch.index.engine.exec.IndexingExecutionEngine; -import org.opensearch.index.mapper.MapperService; -import org.opensearch.index.shard.ShardPath; -import org.opensearch.index.store.FormatStoreDirectory; -import org.opensearch.index.store.GenericStoreDirectory; -import org.opensearch.plugins.DataSourcePlugin; -import org.opensearch.plugins.Plugin; - -import java.io.IOException; - - -public class TextDF extends Plugin implements DataFormat, DataSourcePlugin { - @Override - public Setting dataFormatSettings() { - return null; - } - - @Override - public Setting clusterLeveldataFormatSettings() { - return null; - } - - @Override - public String name() { - return "text"; - } - - @Override - public void configureStore() { - - } - - @Override - public IndexingExecutionEngine indexingEngine(MapperService mapperService, ShardPath shardPath, IndexSettings indexSettings) { - return (IndexingExecutionEngine) new TextEngine(); - } - - @Override - public FormatStoreDirectory createFormatStoreDirectory(IndexSettings indexSettings, ShardPath shardPath) throws IOException { - return new GenericStoreDirectory<>( - new TextDF(), - shardPath - ); - } - - @Override - public BlobContainer createBlobContainer(BlobStore blobStore, BlobPath blobPath) throws IOException { - BlobPath formatPath = blobPath.add(getDataFormat().name().toLowerCase()); - return blobStore.blobContainer(formatPath); - } - - @Override - public DataFormat getDataFormat() { - return new TextDF(); - } -} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java deleted file mode 100644 index 8f43091693274..0000000000000 --- a/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java +++ /dev/null @@ -1,205 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine.exec.text; - -import org.opensearch.index.engine.exec.coord.Segment; - -import org.opensearch.index.engine.exec.DataFormat; -import org.opensearch.index.engine.exec.DocumentInput; -import org.opensearch.index.engine.exec.FileInfos; -import org.opensearch.index.engine.exec.FileMetadata; -import org.opensearch.index.engine.exec.FlushIn; -import org.opensearch.index.engine.exec.IndexingExecutionEngine; -import org.opensearch.index.engine.exec.Merger; -import org.opensearch.index.engine.exec.RefreshInput; -import org.opensearch.index.engine.exec.RefreshResult; -import org.opensearch.index.engine.exec.WriteResult; -import org.opensearch.index.engine.exec.Writer; -import org.opensearch.index.engine.exec.WriterFileSet; -import org.opensearch.index.engine.exec.coord.CatalogSnapshot; -import org.opensearch.index.engine.exec.merge.MergeResult; -import org.opensearch.index.engine.exec.merge.RowIdMapping; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.shard.ShardPath; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; - -public class TextEngine implements IndexingExecutionEngine { - - private final AtomicLong counter = new AtomicLong(); - private final Set openWriters = new HashSet<>(); - private final List openFiles = new ArrayList<>(); - - @Override - public List supportedFieldTypes() { - return List.of(); - } - - @Override - public Writer> createWriter(long writerGeneration) throws IOException { - return new TextWriter("text_file" + counter.getAndIncrement(), this, writerGeneration); - } - - @Override - public Merger getMerger() { - return new TextMerger(); - } - - @Override - public DataFormat getDataFormat() { - return DataFormat.TEXT; - } - - @Override - public void loadWriterFiles(CatalogSnapshot catalogSnapshot) { - - } - - @Override - public void deleteFiles(Map> filesToDelete) throws IOException { - - } - - @Override - public RefreshResult refresh(RefreshInput refreshInput) throws IOException { - openFiles.addAll(refreshInput.getWriterFiles()); - RefreshResult refreshResult = new RefreshResult(); - Segment segment = new Segment(0); - openFiles.forEach(file -> segment.addSearchableFiles(DataFormat.TEXT.name(), file)); - refreshResult.setRefreshedSegments(List.of(segment)); - return refreshResult; - } - - @Override - public void close() throws IOException { - - } - - public static class TextInput implements DocumentInput { - - private final StringBuilder sb = new StringBuilder(); - private final TextWriter writer; - - public TextInput(TextWriter writer) { - this.writer = writer; - } - - @Override - public void addRowIdField(String fieldName, long rowId) { - sb.append(fieldName).append("=").append(rowId).append(";"); - } - - @Override - public void addField(MappedFieldType fieldType, Object value) { - sb.append(fieldType.name()).append("=").append(value).append(";"); - } - - @Override - public String getFinalInput() { - return sb.append("\n").toString(); - } - - @Override - public WriteResult addToWriter() throws IOException { - return writer.addDoc(this); - } - - @Override - public void close() throws Exception { - //no op - } - } - - public static class TextMerger implements Merger { - - @Override - public MergeResult merge(List fileMetadataList, long writerGeneration) { - // Here we will implementation of logic for merging files and reassign the row-ids - // and creating the mapping of the old segment+id to new row id. - // - // Needed when this data format is configured as primary data format. - throw new UnsupportedOperationException("merge not supported"); - } - - @Override - public MergeResult merge(List fileMetadataList, RowIdMapping rowIdMapping, long writerGeneration) { - // Here we will have implementation of the merge logic where we will have the mapping of the old row id to new id - // and merging the files. - // - // Needed when data format is not configured as primary data format. - throw new UnsupportedOperationException("merge not supported"); - } - } - - public static class TextWriter implements Writer { - - private final StringBuilder sb = new StringBuilder(); - private final File currentFile; - private final AtomicBoolean flushed = new AtomicBoolean(false); - private final Runnable onClose; - private final long writerGeneration; - - public TextWriter(String currentFile, TextEngine engine, long writerGeneration) throws IOException { - this.currentFile = new File("/Users/shnkgo/mustang" + currentFile); - this.currentFile.createNewFile(); - this.writerGeneration = writerGeneration; - boolean canWrite = this.currentFile.setWritable(true); - if (!canWrite) { - throw new IllegalStateException("Cannot write to file [" + currentFile + "]"); - } - engine.openWriters.add(this); - onClose = () -> engine.openWriters.remove(this); - } - - @Override - public WriteResult addDoc(TextInput d) throws IOException { - sb.append(d.getFinalInput()); - return new WriteResult(true, null, 1, 1, 1); - } - - @Override - public FileInfos flush(FlushIn flushIn) throws IOException { - try (FileWriter fw = new FileWriter(currentFile)) { - fw.write(sb.toString()); - } - flushed.set(true); - WriterFileSet writerFileSet = WriterFileSet.builder() - .directory(currentFile.toPath().getParent()) - .writerGeneration(writerGeneration) - .addFile(currentFile.getName()) - .build(); - return FileInfos.builder().putWriterFileSet(DataFormat.TEXT, writerFileSet).build(); - } - - @Override - public void sync() throws IOException { - } - - @Override - public void close() { - onClose.run(); - } - - @Override - public TextInput newDocumentInput() { - return new TextInput(this); - } - - } -} diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldNamesFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FieldNamesFieldMapper.java index dc2ea1f35f8b8..345ad79210851 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FieldNamesFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FieldNamesFieldMapper.java @@ -192,7 +192,7 @@ public FieldNamesFieldType fieldType() { return (FieldNamesFieldType) super.fieldType(); } - static Iterable extractFieldNames(final String fullPath) { + public static Iterable extractFieldNames(final String fullPath) { return new Iterable() { @Override public Iterator iterator() { diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 09f4be2d2d8d6..79e886aaff700 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -58,7 +58,6 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.support.XContentMapValues; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.xcontent.XContentBuilder; diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 809c598f3b562..f91e8b8109535 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -4646,6 +4646,7 @@ private DocumentMapperForType docMapper() { return mapperService.documentMapperWithAutoCreate(); } + // TODO:: Understand this one.. Do we need same type of Engine Config in lucene as secondary engine? private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) throws IOException { final Sort indexSort = indexSortSupplier.get(); final Engine.Warmer warmer = reader -> { diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 4083484648bca..9aa198c77759f 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -165,6 +165,7 @@ import org.opensearch.index.compositeindex.CompositeIndexSettings; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.MergedSegmentWarmerFactory; +import org.opensearch.index.engine.exec.lucene.LuceneDataSourcePlugin; import org.opensearch.index.mapper.MappingTransformerRegistry; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.remote.RemoteIndexPathUploader; @@ -552,6 +553,20 @@ protected Node(final Environment initialEnvironment, Collection clas // Ensure feature flags from opensearch.yml are valid during plugin initialization. FeatureFlags.initializeFeatureFlags(tmpSettings); + PluginInfo lucenePluginInfo = new PluginInfo( + "LuceneDataformatPlugin", + "Lucene composite dataformat plugin", + "1.0", + Version.CURRENT, + "1.8", + LuceneDataSourcePlugin.class.getName(), + null, + Collections.emptyList(), + false + ); + + classpathPlugins = List.of(lucenePluginInfo); + this.pluginsService = new PluginsService( tmpSettings, initialEnvironment.configDir(), diff --git a/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java b/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java index 1b71fcd8da8a5..65b25ec8ba039 100644 --- a/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java +++ b/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java @@ -12,7 +12,10 @@ import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.BlobStore; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FieldSupportRegistry; import org.opensearch.index.engine.exec.IndexingExecutionEngine; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.ShardPath; @@ -20,6 +23,7 @@ import org.opensearch.index.store.FormatStoreDirectory; import java.io.IOException; +import java.util.List; import java.util.Map; import java.util.Optional; @@ -28,7 +32,7 @@ default Optional IndexingExecutionEngine indexingEngine(MapperService mapperService, ShardPath shardPath, IndexSettings indexSettings); + IndexingExecutionEngine indexingEngine(EngineConfig engineConfig, MapperService mapperService, boolean isPrimary, ShardPath shardPath, IndexSettings indexSettings, FieldAssignments fieldAssignments); FormatStoreDirectory createFormatStoreDirectory( IndexSettings indexSettings, @@ -38,4 +42,19 @@ FormatStoreDirectory createFormatStoreDirectory( BlobContainer createBlobContainer(BlobStore blobStore, BlobPath blobPath) throws IOException; DataFormat getDataFormat(); + + // This is used to resolve the conflicts in case of multi-datasource plugins + // In case we have single plugin, it should not consider this value and go with considering the only DataSource as primary + default boolean isPrimary() { + return false; + } + + /** + * Registers the field type capabilities this plugin's data format supports. + * Plugins override this to declare which field types their format can handle + * and with what capabilities (STORE, INDEX, DOC_VALUES). + */ + default void registerFieldSupport(FieldSupportRegistry registry) { + // Default no-op; plugins override to register their field type capabilities + } } From ae7b4e6d040a63f301e31c67f28515b223f36ccb Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Mon, 2 Mar 2026 21:35:16 +0530 Subject: [PATCH 02/15] Add changes for more fields and refining logic --- .../ParquetDataFormatPlugin.java | 14 +---- .../fields/ArrowFieldRegistry.java | 18 ++---- .../fields/ArrowSchemaBuilder.java | 2 +- .../fields/ParquetField.java | 13 ++-- .../fields/core/data/BinaryParquetField.java | 8 +-- .../fields/core/data/BooleanParquetField.java | 8 +-- .../fields/core/data/IpParquetField.java | 8 +-- .../fields/core/data/KeywordParquetField.java | 8 +-- .../fields/core/data/TextParquetField.java | 8 +-- .../core/data/TokenCountParquetField.java | 8 +-- .../core/data/date/DateNanosParquetField.java | 7 +-- .../core/data/date/DateParquetField.java | 7 +-- .../core/data/number/ByteParquetField.java | 8 +-- .../core/data/number/DoubleParquetField.java | 8 +-- .../core/data/number/FloatParquetField.java | 8 +-- .../data/number/HalfFloatParquetField.java | 8 +-- .../core/data/number/IntegerParquetField.java | 8 +-- .../core/data/number/LongParquetField.java | 8 +-- .../core/data/number/ShortParquetField.java | 8 +-- .../data/number/UnsignedLongParquetField.java | 8 +-- .../fields/core/metadata/IdParquetField.java | 7 +-- .../core/metadata/IgnoredParquetField.java | 7 +-- .../core/metadata/RoutingParquetField.java | 7 +-- .../core/metadata/SizeParquetField.java | 7 +-- .../writer/ParquetDocumentInput.java | 2 +- .../exec/lucene/LuceneDataSourcePlugin.java | 25 ++------ .../exec/lucene/fields/LuceneField.java | 9 ++- .../lucene/fields/LuceneFieldRegistry.java | 63 ++++++++++++++++++- .../lucene/fields/data/BinaryLuceneField.java | 35 +++++++++++ .../fields/data/BooleanLuceneField.java | 49 +++++++++++++++ .../lucene/fields/data/ByteLuceneField.java | 44 +++++++++++++ .../lucene/fields/data/DateLuceneField.java | 42 +++++++++++++ .../fields/data/DateNanosLuceneField.java | 42 +++++++++++++ .../fields/data/DocCountLuceneField.java | 38 +++++++++++ .../lucene/fields/data/DoubleLuceneField.java | 19 ++++-- .../lucene/fields/data/FloatLuceneField.java | 45 +++++++++++++ .../fields/data/HalfFloatLuceneField.java | 44 +++++++++++++ .../lucene/fields/data/IdLuceneField.java | 39 ++++++++++++ .../fields/data/IgnoredLuceneField.java | 34 ++++++++++ .../fields/data/IntegerLuceneField.java | 44 +++++++++++++ .../lucene/fields/data/IpLuceneField.java | 46 ++++++++++++++ .../fields/data/KeywordLuceneField.java | 44 +++++-------- .../lucene/fields/data/LongLuceneField.java | 18 ++++-- .../fields/data/RoutingLuceneField.java | 39 ++++++++++++ .../lucene/fields/data/ShortLuceneField.java | 44 +++++++++++++ .../lucene/fields/data/SizeLuceneField.java | 38 +++++++++++ .../lucene/fields/data/TextLuceneField.java | 26 ++++---- .../fields/data/TokenCountLuceneField.java | 42 +++++++++++++ .../fields/data/UnsignedLongLuceneField.java | 44 +++++++++++++ .../lucene/writer/LuceneDocumentInput.java | 2 +- 50 files changed, 930 insertions(+), 188 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ByteLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateNanosLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DocCountLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/FloatLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/HalfFloatLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IdLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IgnoredLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IntegerLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IpLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/RoutingLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ShortLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/SizeLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TokenCountLuceneField.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/UnsignedLongLuceneField.java diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java index 170558846a660..2599760bc2858 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java @@ -26,7 +26,6 @@ import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.FieldAssignments; -import org.opensearch.index.engine.exec.FieldCapability; import org.opensearch.index.engine.exec.FieldSupportRegistry; import org.opensearch.index.engine.exec.IndexingExecutionEngine; import com.parquet.parquetdataformat.bridge.RustBridge; @@ -45,7 +44,6 @@ import org.opensearch.watcher.ResourceWatcherService; import java.io.IOException; -import java.util.EnumSet; import java.util.HashMap; import java.util.Collection; import java.util.Map; @@ -158,15 +156,9 @@ public BlobContainer createBlobContainer(BlobStore blobStore, BlobPath baseBlobP @Override public void registerFieldSupport(FieldSupportRegistry registry) { DataFormat parquet = getDataFormat(); - java.util.Set storeAndDocValues = EnumSet.of(FieldCapability.STORE, FieldCapability.DOC_VALUES); - - // Parquet supports STORE and DOC_VALUES for numeric and keyword types but not INDEX (no inverted index) - String[] supportedTypes = { - "keyword", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float", - "date", "date_nanos", "boolean", "ip", "binary", "unsigned_long" - }; - for (String type : supportedTypes) { - registry.register(type, parquet, storeAndDocValues); + for (Map.Entry entry : + com.parquet.parquetdataformat.fields.ArrowFieldRegistry.getRegisteredFields().entrySet()) { + registry.register(entry.getKey(), parquet, entry.getValue().getFieldCapabilities()); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java index d0b12f5c35e69..81de6e41ef659 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java @@ -12,9 +12,7 @@ import com.parquet.parquetdataformat.plugins.fields.CoreDataFieldPlugin; import com.parquet.parquetdataformat.plugins.fields.MetadataFieldPlugin; import com.parquet.parquetdataformat.plugins.fields.ParquetFieldPlugin; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.mapper.SeqNoFieldMapper; -import org.opensearch.index.shard.IllegalIndexShardStateException; import java.util.Collections; import java.util.Map; @@ -143,17 +141,11 @@ public static ParquetField getParquetField(String fieldType) { return FIELD_REGISTRY.get(fieldType); } - public static ParquetField getParquetFieldAfterMatchingRole(String fieldType, boolean isPrimary) { - ParquetField field = FIELD_REGISTRY.get(fieldType); - if(field == null) return null; - - // in case of primary, field should be either Primary marked or marked for All - if(isPrimary && field.getFieldRole() == EngineRole.SECONDARY) return null; - - // in case of non-primary, field should either be Secondary marked or marked for All - if(!isPrimary && field.getFieldRole() == EngineRole.PRIMARY) return null; - - return field; + /** + * Returns an unmodifiable view of all registered field mappings. + */ + public static Map getRegisteredFields() { + return Collections.unmodifiableMap(FIELD_REGISTRY); } public static class RegistryStats { diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java index e71178aa1593f..b8601d420981a 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java @@ -118,7 +118,7 @@ private static boolean notSupportedMetadataField(final Mapper mapper) { * @throws IllegalStateException if the mapper type is not supported in primary context */ private static Field createArrowField(final Mapper mapper, boolean isPrimary) { - final ParquetField parquetField = ArrowFieldRegistry.getParquetFieldAfterMatchingRole(mapper.typeName(), isPrimary); + final ParquetField parquetField = ArrowFieldRegistry.getParquetField(mapper.typeName()); if (parquetField == null) { if (isPrimary) { diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java index e67eead850fc5..481ebb47b294b 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java @@ -11,7 +11,6 @@ import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import org.opensearch.index.mapper.MappedFieldType; @@ -52,7 +51,7 @@ public abstract class ParquetField { * @throws IllegalArgumentException if any parameter is invalid for this field type * @throws ClassCastException if parseValue cannot be cast to the expected type */ - protected abstract void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities); + protected abstract void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities); /** * Creates and processes a field entry if the field type supports columnar storage. @@ -69,14 +68,12 @@ public abstract class ParquetField { * @param mappedFieldType the OpenSearch field type metadata, must not be null * @param managedVSR the managed vector schema root, must not be null * @param parseValue the parsed field value to be processed, may be null - * @param engineRole the engine role for this format * @param assignedCapabilities the capabilities this format is responsible for on this field type * @throws IllegalArgumentException if mappedFieldType or managedVSR is null */ public final void createField(final MappedFieldType mappedFieldType, final ManagedVSR managedVSR, final Object parseValue, - final EngineRole engineRole, final Set assignedCapabilities) { Objects.requireNonNull(mappedFieldType, "MappedFieldType cannot be null"); Objects.requireNonNull(managedVSR, "ManagedVSR cannot be null"); @@ -85,7 +82,7 @@ public final void createField(final MappedFieldType mappedFieldType, // TODO: support dynamic mapping update // for now ignore the field if (managedVSR.getVector(mappedFieldType.name()) != null) { - addToGroup(mappedFieldType, managedVSR, parseValue, engineRole, assignedCapabilities); + addToGroup(mappedFieldType, managedVSR, parseValue, assignedCapabilities); } } } @@ -116,7 +113,11 @@ public final void createField(final MappedFieldType mappedFieldType, */ public abstract FieldType getFieldType(); - public abstract EngineRole getFieldRole(); + /** + * Returns the set of capabilities this field supports. + * The engine uses this to populate the FieldSupportRegistry. + */ + public abstract Set getFieldCapabilities(); /** * Provides a string representation of this ParquetField for debugging purposes. diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java index dbac38ae6b5c4..43e238e6ed5d8 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -17,6 +16,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; /** @@ -44,7 +44,7 @@ public class BinaryParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { final VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); varBinaryVector.set(rowCount, (byte[]) parseValue); @@ -61,7 +61,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java index 9ffb9a7c8eb44..5ba967bce75fb 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; @@ -18,6 +17,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; /** @@ -45,7 +45,7 @@ public class BooleanParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { BitVector bitVector = (BitVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); bitVector.setSafe(rowIndex, (Boolean) parseValue ? 1 : 0); @@ -62,7 +62,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java index bb8be28997de2..e494d8024614f 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -20,6 +19,7 @@ import org.apache.lucene.util.BytesRef; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; import java.net.InetAddress; @@ -52,7 +52,7 @@ public class IpParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); final BytesRef bytesRef = new BytesRef(InetAddressPoint.encode((InetAddress) parseValue)); @@ -70,7 +70,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java index 9f5c1a3b7a965..27ecdba2e93a2 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -18,6 +17,7 @@ import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; +import java.util.EnumSet; import java.util.Set; /** @@ -47,7 +47,7 @@ public class KeywordParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); @@ -64,7 +64,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java index 3e3a54dba646b..be4b0df22db15 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; @@ -19,6 +18,7 @@ import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; +import java.util.EnumSet; import java.util.Set; /** @@ -48,7 +48,7 @@ public class TextParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); @@ -65,7 +65,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java index b1d621f93604c..cd53a6d074153 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -17,6 +16,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; /** @@ -47,7 +47,7 @@ public class TokenCountParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); @@ -64,7 +64,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java index ceeb0b94570ff..ae75d876a8eca 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data.date; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -47,7 +46,7 @@ public class DateNanosParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { TimeStampNanoVector timeStampNanoVector = (TimeStampNanoVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); timeStampNanoVector.setSafe(rowIndex, (long) parseValue); @@ -64,7 +63,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java index 04cfbb7190e6d..1f44a7d4278d3 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data.date; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -47,7 +46,7 @@ public class DateParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { TimeStampMilliVector timeStampMilliVector = (TimeStampMilliVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); timeStampMilliVector.setSafe(rowIndex, (long) parseValue); @@ -64,7 +63,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java index 662cbc07a4597..a8909d71756bd 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data.number; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -17,6 +16,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; /** @@ -44,7 +44,7 @@ public class ByteParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { TinyIntVector tinyIntVector = (TinyIntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); tinyIntVector.setSafe(rowCount, (Byte) parseValue); @@ -61,7 +61,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java index 849fd08ecfcc5..74b484f46aee6 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data.number; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -18,6 +17,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; /** @@ -46,7 +46,7 @@ public class DoubleParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { Float8Vector float8Vector = (Float8Vector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); float8Vector.setSafe(rowCount, (Double) parseValue); @@ -63,7 +63,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java index fd56dd7acb303..613a49f615c41 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data.number; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -18,6 +17,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; /** @@ -46,7 +46,7 @@ public class FloatParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { Float4Vector float4Vector = (Float4Vector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); float4Vector.setSafe(rowCount, (Float) parseValue); @@ -63,7 +63,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java index 26eb5e5db6d53..9dc68c4d378a3 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data.number; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -18,6 +17,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; /** @@ -46,7 +46,7 @@ public class HalfFloatParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { Float2Vector float2Vector = (Float2Vector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); float2Vector.setSafe(rowCount, (Short) parseValue); @@ -63,7 +63,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java index 93c95b344ee58..50927fd324dd1 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data.number; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -17,6 +16,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; /** @@ -44,7 +44,7 @@ public class IntegerParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); @@ -61,7 +61,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java index 8872d7b834c00..cff2b86795b64 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data.number; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -17,6 +16,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; /** @@ -45,7 +45,7 @@ public class LongParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { BigIntVector bigIntVector = (BigIntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); bigIntVector.setSafe(rowCount, (Long) parseValue); @@ -62,7 +62,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java index 60e8cf0855c72..7274712de95ba 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data.number; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -17,6 +16,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; /** @@ -45,7 +45,7 @@ public class ShortParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { SmallIntVector smallIntVector = (SmallIntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); smallIntVector.setSafe(rowCount, (Short) parseValue); @@ -62,7 +62,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java index f5480a42b7871..00989e9ffc9a6 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.data.number; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -17,6 +16,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import java.util.EnumSet; import java.util.Set; /** @@ -45,7 +45,7 @@ public class UnsignedLongParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { UInt8Vector uInt8Vector = (UInt8Vector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); long longValue = ((Number) parseValue).longValue(); @@ -63,7 +63,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java index 8b1b803b14978..9cd6d581d42dd 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.metadata; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -46,7 +45,7 @@ public class IdParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { VarBinaryVector idVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); BytesRef bytesRef = (BytesRef) parseValue; @@ -64,7 +63,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java index a46b7d65b4914..6eb852b3c04e5 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.metadata; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -46,7 +45,7 @@ public class IgnoredParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { VarCharVector varCharVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); varCharVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); @@ -63,7 +62,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java index 3167997fd9398..a36d0718568e7 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.metadata; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -46,7 +45,7 @@ public class RoutingParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { VarCharVector routingVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); routingVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); @@ -63,7 +62,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java index daf8c2ac52296..baf4b5cc35cf3 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java @@ -8,7 +8,6 @@ package com.parquet.parquetdataformat.fields.core.metadata; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -47,7 +46,7 @@ public class SizeParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); @@ -64,7 +63,7 @@ public FieldType getFieldType() { } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java index 67d54aca624e7..7f2c21f9829c1 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java @@ -71,7 +71,7 @@ public void addField(MappedFieldType fieldType, Object value) { } Set assignedCapabilities = fieldAssignments.getAssignedCapabilities(fieldTypeName); - parquetField.createField(fieldType, managedVSR, value, engineRole, assignedCapabilities); + parquetField.createField(fieldType, managedVSR, value, assignedCapabilities); } @Override diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataSourcePlugin.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataSourcePlugin.java index 7972abf3b3e24..213d1da3ff24b 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataSourcePlugin.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataSourcePlugin.java @@ -15,10 +15,11 @@ import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.FieldAssignments; -import org.opensearch.index.engine.exec.FieldCapability; import org.opensearch.index.engine.exec.FieldSupportRegistry; import org.opensearch.index.engine.exec.IndexingExecutionEngine; import org.opensearch.index.engine.exec.lucene.engine.LuceneExecutionEngine; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.engine.exec.lucene.fields.LuceneFieldRegistry; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.ShardPath; import org.opensearch.index.store.FormatStoreDirectory; @@ -27,8 +28,7 @@ import org.opensearch.plugins.Plugin; import java.io.IOException; -import java.util.EnumSet; -import java.util.Set; +import java.util.Map; public class LuceneDataSourcePlugin extends Plugin implements DataSourcePlugin { @@ -56,24 +56,9 @@ public DataFormat getDataFormat() { @Override public void registerFieldSupport(FieldSupportRegistry registry) { DataFormat lucene = getDataFormat(); - Set allCaps = EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); - Set storeAndIndex = EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX); - - // Most field types support all three capabilities - String[] fullSupportTypes = { - "keyword", "long", "integer", "short", "byte", "double", "float", "half_float", "scaled_float", - "date", "date_nanos", "boolean", "ip", "geo_point", "geo_shape", "binary", "integer_range", - "float_range", "long_range", "double_range", "date_range", "ip_range", "completion", - "search_as_you_type", "token_count", "murmur3", "flat_object", "unsigned_long" - }; - for (String type : fullSupportTypes) { - registry.register(type, lucene, allCaps); + for (Map.Entry entry : LuceneFieldRegistry.getRegisteredFields().entrySet()) { + registry.register(entry.getKey(), lucene, entry.getValue().getFieldCapabilities()); } - - // text fields support STORE and INDEX but not DOC_VALUES - registry.register("text", lucene, storeAndIndex); - registry.register("match_only_text", lucene, storeAndIndex); - registry.register("annotated_text", lucene, storeAndIndex); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java index 73bbf2d3a230b..189dcebd1bfac 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java @@ -9,7 +9,6 @@ package org.opensearch.index.engine.exec.lucene.fields; import org.apache.lucene.document.Field; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import org.opensearch.index.mapper.FieldNamesFieldMapper; import org.opensearch.index.mapper.MappedFieldType; @@ -20,7 +19,7 @@ public abstract class LuceneField { - public abstract void createField(MappedFieldType mappedFieldType, Document document, Object parseValue, EngineRole engineRole, Set assignedCapabilities); + public abstract void createField(MappedFieldType mappedFieldType, Document document, Object parseValue, Set assignedCapabilities); protected final void createFieldNamesField(MappedFieldType mappedFieldType, Document document, ParseContext context) { assert !mappedFieldType.hasDocValues() : "_field_names should only be used when doc_values are turned off"; @@ -33,5 +32,9 @@ protected final void createFieldNamesField(MappedFieldType mappedFieldType, Docu } } - public abstract EngineRole getFieldRole(); + /** + * Returns the set of capabilities this field supports. + * The engine uses this to populate the FieldSupportRegistry. + */ + public abstract Set getFieldCapabilities(); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java index deb060802c4d3..c10a2b7b69129 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java @@ -8,15 +8,37 @@ package org.opensearch.index.engine.exec.lucene.fields; +import org.opensearch.index.engine.exec.lucene.fields.data.BinaryLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.BooleanLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.ByteLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.DateLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.DateNanosLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.DocCountLuceneField; import org.opensearch.index.engine.exec.lucene.fields.data.DoubleLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.FloatLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.HalfFloatLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.IdLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.IgnoredLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.IntegerLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.IpLuceneField; import org.opensearch.index.engine.exec.lucene.fields.data.KeywordLuceneField; import org.opensearch.index.engine.exec.lucene.fields.data.LongLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.RoutingLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.ShortLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.SizeLuceneField; import org.opensearch.index.engine.exec.lucene.fields.data.TextLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.TokenCountLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.UnsignedLongLuceneField; +import org.opensearch.index.mapper.BinaryFieldMapper; import org.opensearch.index.mapper.BooleanFieldMapper; import org.opensearch.index.mapper.DateFieldMapper; +import org.opensearch.index.mapper.DocCountFieldMapper; import org.opensearch.index.mapper.IdFieldMapper; +import org.opensearch.index.mapper.IgnoredFieldMapper; +import org.opensearch.index.mapper.IpFieldMapper; import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.RoutingFieldMapper; import org.opensearch.index.mapper.SeqNoFieldMapper; import org.opensearch.index.mapper.TextFieldMapper; import org.opensearch.index.mapper.VersionFieldMapper; @@ -46,12 +68,42 @@ private LuceneFieldRegistry() { * This method should be called during node startup after all plugins are loaded. */ public static synchronized void initialize() { + // Text-based fields FIELD_REGISTRY.put(KeywordFieldMapper.CONTENT_TYPE, new KeywordLuceneField()); - FIELD_REGISTRY.put(NumberFieldMapper.NumberType.DOUBLE.typeName(), new DoubleLuceneField()); + FIELD_REGISTRY.put(TextFieldMapper.CONTENT_TYPE, new TextLuceneField()); + FIELD_REGISTRY.put(IpFieldMapper.CONTENT_TYPE, new IpLuceneField()); + + // Numeric fields + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.BYTE.typeName(), new ByteLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.SHORT.typeName(), new ShortLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.INTEGER.typeName(), new IntegerLuceneField()); FIELD_REGISTRY.put(NumberFieldMapper.NumberType.LONG.typeName(), new LongLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.UNSIGNED_LONG.typeName(), new UnsignedLongLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.HALF_FLOAT.typeName(), new HalfFloatLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.FLOAT.typeName(), new FloatLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.DOUBLE.typeName(), new DoubleLuceneField()); + FIELD_REGISTRY.put("token_count", new TokenCountLuceneField()); + FIELD_REGISTRY.put("scaled_float", new LongLuceneField()); - FIELD_REGISTRY.put(TextFieldMapper.CONTENT_TYPE, new TextLuceneField()); + // Temporal fields + FIELD_REGISTRY.put(DateFieldMapper.CONTENT_TYPE, new DateLuceneField()); + FIELD_REGISTRY.put(DateFieldMapper.DATE_NANOS_CONTENT_TYPE, new DateNanosLuceneField()); + + // Boolean field + FIELD_REGISTRY.put(BooleanFieldMapper.CONTENT_TYPE, new BooleanLuceneField()); + // Binary field + FIELD_REGISTRY.put(BinaryFieldMapper.CONTENT_TYPE, new BinaryLuceneField()); + + // Metadata fields + FIELD_REGISTRY.put(IdFieldMapper.CONTENT_TYPE, new IdLuceneField()); + FIELD_REGISTRY.put(RoutingFieldMapper.CONTENT_TYPE, new RoutingLuceneField()); + FIELD_REGISTRY.put(IgnoredFieldMapper.CONTENT_TYPE, new IgnoredLuceneField()); + FIELD_REGISTRY.put("_size", new SizeLuceneField()); + FIELD_REGISTRY.put(DocCountFieldMapper.CONTENT_TYPE, new DocCountLuceneField()); + FIELD_REGISTRY.put(SeqNoFieldMapper.CONTENT_TYPE, new LongLuceneField()); + FIELD_REGISTRY.put(VersionFieldMapper.CONTENT_TYPE, new LongLuceneField()); + FIELD_REGISTRY.put(SeqNoFieldMapper.PRIMARY_TERM_NAME, new LongLuceneField()); } /** @@ -68,4 +120,11 @@ public static java.util.Set getRegisteredFieldNames() { return java.util.Collections.unmodifiableSet(FIELD_REGISTRY.keySet()); } + /** + * Returns an unmodifiable view of all registered field mappings. + */ + public static Map getRegisteredFields() { + return java.util.Collections.unmodifiableMap(FIELD_REGISTRY); + } + } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java new file mode 100644 index 0000000000000..2056c9cf5ea33 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.BytesRef; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class BinaryLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final BytesRef value = (BytesRef) parseValue; + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(mappedFieldType.name(), value)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java new file mode 100644 index 0000000000000..9d4578b2c3ae9 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java @@ -0,0 +1,49 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.IndexOptions; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class BooleanLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final Boolean value = (Boolean) parseValue; + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + FieldType ft = new FieldType(); + ft.setOmitNorms(true); + ft.setIndexOptions(IndexOptions.DOCS); + ft.setTokenized(false); + ft.freeze(); + document.add(new Field(mappedFieldType.name(), value ? "T" : "F", ft)); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(mappedFieldType.name(), value ? 1 : 0)); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(mappedFieldType.name(), value ? "T" : "F")); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ByteLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ByteLuceneField.java new file mode 100644 index 0000000000000..d592b1f2dedb8 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ByteLuceneField.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class ByteLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + final Number value = (Number) parseValue; + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new IntPoint(fieldType.name(), value.byteValue())); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.byteValue())); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(fieldType.name(), value.byteValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateLuceneField.java new file mode 100644 index 0000000000000..f01c95e8d0029 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class DateLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final long timestamp = (long) parseValue; + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new LongPoint(mappedFieldType.name(), timestamp)); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(mappedFieldType.name(), timestamp)); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(mappedFieldType.name(), timestamp)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateNanosLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateNanosLuceneField.java new file mode 100644 index 0000000000000..540b5bde3eab8 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateNanosLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class DateNanosLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final long timestamp = (long) parseValue; + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new LongPoint(mappedFieldType.name(), timestamp)); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(mappedFieldType.name(), timestamp)); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(mappedFieldType.name(), timestamp)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DocCountLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DocCountLuceneField.java new file mode 100644 index 0000000000000..19ecdda88941a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DocCountLuceneField.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class DocCountLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final Number value = (Number) parseValue; + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(mappedFieldType.name(), value.longValue())); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(mappedFieldType.name(), value.longValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DoubleLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DoubleLuceneField.java index 42ac1bd475cf3..1415b90cd8206 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DoubleLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DoubleLuceneField.java @@ -12,27 +12,34 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.util.NumericUtils; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ParseContext; +import java.util.EnumSet; import java.util.Set; public class DoubleLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; final Number value = (Number) parseValue; - // Only storing the SortedNumericDocValuesField, assuming this is for the ROW_ID field - document.add(new SortedNumericDocValuesField(fieldType.name(), NumericUtils.doubleToSortableLong(value.doubleValue()))); + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new DoublePoint(fieldType.name(), value.doubleValue())); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(fieldType.name(), NumericUtils.doubleToSortableLong(value.doubleValue()))); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(fieldType.name(), value.doubleValue())); + } } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/FloatLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/FloatLuceneField.java new file mode 100644 index 0000000000000..3d40e9b273454 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/FloatLuceneField.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.NumericUtils; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class FloatLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + final Number value = (Number) parseValue; + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new FloatPoint(fieldType.name(), value.floatValue())); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(fieldType.name(), NumericUtils.floatToSortableInt(value.floatValue()))); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(fieldType.name(), value.floatValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/HalfFloatLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/HalfFloatLuceneField.java new file mode 100644 index 0000000000000..dceaa091ee7a6 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/HalfFloatLuceneField.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.sandbox.document.HalfFloatPoint; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class HalfFloatLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + final Number value = (Number) parseValue; + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new HalfFloatPoint(fieldType.name(), value.floatValue())); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(fieldType.name(), HalfFloatPoint.halfFloatToSortableShort(value.floatValue()))); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(fieldType.name(), value.floatValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IdLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IdLuceneField.java new file mode 100644 index 0000000000000..3934f7e80d26b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IdLuceneField.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.BytesRef; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class IdLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final BytesRef value = (BytesRef) parseValue; + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new BinaryDocValuesField(mappedFieldType.name(), value)); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(mappedFieldType.name(), value)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IgnoredLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IgnoredLuceneField.java new file mode 100644 index 0000000000000..9a9200e75496f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IgnoredLuceneField.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class IgnoredLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final String value = parseValue.toString(); + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(mappedFieldType.name(), value)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IntegerLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IntegerLuceneField.java new file mode 100644 index 0000000000000..e7fa69c127254 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IntegerLuceneField.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class IntegerLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + final Number value = (Number) parseValue; + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new IntPoint(fieldType.name(), value.intValue())); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.intValue())); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(fieldType.name(), value.intValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IpLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IpLuceneField.java new file mode 100644 index 0000000000000..163a70db95824 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IpLuceneField.java @@ -0,0 +1,46 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.BytesRef; +import org.opensearch.common.network.InetAddresses; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.net.InetAddress; +import java.util.EnumSet; +import java.util.Set; + +public class IpLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final InetAddress address = (InetAddress) parseValue; + final byte[] encoded = InetAddresses.forString(address.getHostAddress()).getAddress(); + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new InetAddressPoint(mappedFieldType.name(), InetAddresses.forString(address.getHostAddress()))); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedSetDocValuesField(mappedFieldType.name(), new BytesRef(encoded))); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(mappedFieldType.name(), new BytesRef(encoded))); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/KeywordLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/KeywordLuceneField.java index d7189bbc45fb5..e35494ba2da3a 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/KeywordLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/KeywordLuceneField.java @@ -8,58 +8,46 @@ package org.opensearch.index.engine.exec.lucene.fields.data; -import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.util.BytesRef; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; +import java.util.EnumSet; import java.util.Set; public class KeywordLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { String value = (String) parseValue; - KeywordFieldMapper.KeywordFieldType keywordFieldType = (KeywordFieldMapper.KeywordFieldType) mappedFieldType; - - // Convert to utf8 only once before feeding postings/dv/stored fields final BytesRef binaryValue = new BytesRef(value); - FieldType fieldType = getFieldType(keywordFieldType); - - if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { - Field field = new KeywordFieldMapper.KeywordField(mappedFieldType.name(), binaryValue, fieldType); - document.add(field); - - if (keywordFieldType.hasDocValues() == false && fieldType.omitNorms()) { - createFieldNamesField(mappedFieldType, document, null); - } + boolean shouldIndex = assignedCapabilities.contains(FieldCapability.INDEX); + boolean shouldStore = assignedCapabilities.contains(FieldCapability.STORE); + + if (shouldIndex || shouldStore) { + FieldType fieldType = new FieldType(); + fieldType.setTokenized(false); + fieldType.setStored(shouldStore); + fieldType.setOmitNorms(true); + fieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS : IndexOptions.NONE); + fieldType.freeze(); + document.add(new KeywordFieldMapper.KeywordField(mappedFieldType.name(), binaryValue, fieldType)); } - if (keywordFieldType.hasDocValues()) { + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { document.add(new SortedSetDocValuesField(mappedFieldType.name(), binaryValue)); } } - private FieldType getFieldType(KeywordFieldMapper.KeywordFieldType keywordFieldType) { - FieldType fieldType = new FieldType(); - fieldType.setTokenized(false); - fieldType.setStored(keywordFieldType.isStored()); - fieldType.setOmitNorms(true); - fieldType.setIndexOptions(keywordFieldType.isSearchable() ? IndexOptions.DOCS : IndexOptions.NONE); - fieldType.freeze(); - return fieldType; - } - @Override - public EngineRole getFieldRole() { - return EngineRole.ALL; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/LongLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/LongLuceneField.java index 4a2c06e1975b0..676989fa917db 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/LongLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/LongLuceneField.java @@ -11,26 +11,34 @@ import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ParseContext; +import java.util.EnumSet; import java.util.Set; public class LongLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; final Number value = (Number) parseValue; - document.add(SortedNumericDocValuesField.indexedField(fieldType.name(), value.longValue())); // Is this right? + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new LongPoint(fieldType.name(), value.longValue())); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.longValue())); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(fieldType.name(), value.longValue())); + } } @Override - public EngineRole getFieldRole() { - return EngineRole.PRIMARY; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/RoutingLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/RoutingLuceneField.java new file mode 100644 index 0000000000000..d514ffc0eb0ee --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/RoutingLuceneField.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.BytesRef; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class RoutingLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final String value = parseValue.toString(); + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedSetDocValuesField(mappedFieldType.name(), new BytesRef(value))); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(mappedFieldType.name(), value)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ShortLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ShortLuceneField.java new file mode 100644 index 0000000000000..510f170ee76eb --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ShortLuceneField.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class ShortLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + final Number value = (Number) parseValue; + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new IntPoint(fieldType.name(), value.shortValue())); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.shortValue())); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(fieldType.name(), value.shortValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/SizeLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/SizeLuceneField.java new file mode 100644 index 0000000000000..5fa5a32092048 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/SizeLuceneField.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class SizeLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final Number value = (Number) parseValue; + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(mappedFieldType.name(), value.intValue())); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(mappedFieldType.name(), value.intValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TextLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TextLuceneField.java index 95362e8aab2fa..b4a86260afb01 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TextLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TextLuceneField.java @@ -11,30 +11,34 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; -import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldCapability; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; -import org.opensearch.index.mapper.TextFieldMapper; +import java.util.EnumSet; import java.util.Set; public class TextLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, EngineRole engineRole, Set assignedCapabilities) { - final TextFieldMapper.TextFieldType textFieldType = (TextFieldMapper.TextFieldType) mappedFieldType; + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { final String value = (String) parseValue; - FieldType fieldType = new FieldType(); - fieldType.setStored(textFieldType.isStored()); //TODO: What does it translate to? - fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); // TODO:: how to decide this one? - Field field = new Field(textFieldType.name(), value, fieldType); - document.add(field); + + boolean shouldIndex = assignedCapabilities.contains(FieldCapability.INDEX); + boolean shouldStore = assignedCapabilities.contains(FieldCapability.STORE); + + if (shouldIndex || shouldStore) { + FieldType fieldType = new FieldType(); + fieldType.setStored(shouldStore); + fieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS : IndexOptions.NONE); + Field field = new Field(mappedFieldType.name(), value, fieldType); + document.add(field); + } } @Override - public EngineRole getFieldRole() { - return EngineRole.ALL; + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TokenCountLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TokenCountLuceneField.java new file mode 100644 index 0000000000000..beaf5e9337a76 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TokenCountLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class TokenCountLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final Number value = (Number) parseValue; + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new IntPoint(mappedFieldType.name(), value.intValue())); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(mappedFieldType.name(), value.intValue())); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(mappedFieldType.name(), value.intValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/UnsignedLongLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/UnsignedLongLuceneField.java new file mode 100644 index 0000000000000..049155531093b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/UnsignedLongLuceneField.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class UnsignedLongLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + final Number value = (Number) parseValue; + if (assignedCapabilities.contains(FieldCapability.INDEX)) { + document.add(new LongPoint(fieldType.name(), value.longValue())); + } + if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.longValue())); + } + if (assignedCapabilities.contains(FieldCapability.STORE)) { + document.add(new StoredField(fieldType.name(), value.longValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java index 7e8582760f521..2cf83a645c32f 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java @@ -59,7 +59,7 @@ public void addField(MappedFieldType fieldType, Object value) { } Set assignedCapabilities = fieldAssignments.getAssignedCapabilities(fieldTypeName); - luceneField.createField(fieldType, document, value, engineRole, assignedCapabilities); + luceneField.createField(fieldType, document, value, assignedCapabilities); } /** From ce410b5532066912052109a4783cdb2704522953 Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Tue, 3 Mar 2026 20:47:18 +0530 Subject: [PATCH 03/15] Add changes for commit and refresh flow --- .../index/engine/exec/WriterFileSet.java | 32 +++++++++++- .../index/engine/exec/commit/Committer.java | 4 +- .../exec/commit/LuceneCommitEngine.java | 52 ++++++++++++++++--- .../CompositeIndexingExecutionEngine.java | 6 ++- .../exec/coord/CatalogSnapshotManager.java | 35 ++++++------- .../engine/exec/coord/CompositeEngine.java | 4 +- .../lucene/engine/LuceneExecutionEngine.java | 2 +- 7 files changed, 102 insertions(+), 33 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java index 932b12126b5ae..82a6c98c1277b 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java @@ -16,7 +16,6 @@ import java.io.Serializable; import java.nio.file.Path; import java.util.HashSet; -import java.util.List; import java.util.Set; public class WriterFileSet implements Serializable, Writeable { @@ -25,18 +24,32 @@ public class WriterFileSet implements Serializable, Writeable { private final long writerGeneration; private final Set files; private final long numRows; + private boolean isRefreshed; public WriterFileSet(Path directory, long writerGeneration, long numRows) { this.numRows = numRows; this.files = new HashSet<>(); this.writerGeneration = writerGeneration; this.directory = directory.toString(); + this.isRefreshed = false; } + public WriterFileSet withDirectoryAndFiles(String newDirectory, Set files) { + return WriterFileSet.builder() + .directory(Path.of(newDirectory)) + .writerGeneration(this.writerGeneration) + .addNumRows(this.numRows) + .isRefreshed(this.isRefreshed) + .addFiles(files) + .build(); + } + + public WriterFileSet(StreamInput in) throws IOException { this.directory = in.readString(); this.writerGeneration = in.readLong(); this.numRows = in.readVInt(); + this.isRefreshed = in.readBoolean(); int fileCount = in.readVInt(); this.files = new HashSet<>(fileCount); @@ -50,6 +63,7 @@ public WriterFileSet withDirectory(String newDirectory) { .directory(Path.of(newDirectory)) .writerGeneration(this.writerGeneration) .addFiles(this.files) + .isRefreshed(this.isRefreshed) .build(); } @@ -61,6 +75,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(directory); out.writeLong(writerGeneration); out.writeVInt((int) numRows); + out.writeBoolean(isRefreshed); out.writeVInt(files.size()); for (String file : files) { out.writeString(file); @@ -123,10 +138,20 @@ public static Builder builder() { return new Builder(); } + public boolean refresh() { + // Dummy re-write + return isRefreshed; + } + + public void setRefreshed(){ + this.isRefreshed = true; + } + public static class Builder { private Path directory; private Long writerGeneration; private long numRows; + private boolean isRefreshed = false; private final Set files = new HashSet<>(); public Builder directory(Path directory) { @@ -167,5 +192,10 @@ public WriterFileSet build() { fileSet.files.addAll(this.files); return fileSet; } + + public Builder isRefreshed(boolean isRefreshed) { + this.isRefreshed = isRefreshed; + return this; + } } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java index 4fcfd3117221a..89d7b963087b1 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java @@ -11,14 +11,16 @@ import org.opensearch.index.engine.CommitStats; import org.opensearch.index.engine.SafeCommitInfo; import org.opensearch.index.engine.exec.coord.CatalogSnapshot; +import org.opensearch.index.engine.exec.coord.Segment; import java.io.Closeable; import java.io.IOException; +import java.util.List; import java.util.Map; public interface Committer extends Closeable { - void addLuceneIndexes(CatalogSnapshot catalogSnapshot); + void addLuceneIndexes(List catalogSnapshot) throws IOException; CommitPoint commit(Iterable> commitData, CatalogSnapshot catalogSnapshot); diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java index fc3272087ab95..697056d4e2eb9 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java @@ -9,14 +9,19 @@ package org.opensearch.index.engine.exec.commit; import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.SegmentCommitInfo; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.store.NIOFSDirectory; import org.opensearch.common.collect.MapBuilder; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.logging.Loggers; +import org.opensearch.common.lucene.Lucene; import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.engine.CombinedDeletionPolicy; import org.opensearch.index.engine.CommitStats; @@ -26,15 +31,20 @@ import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.WriterFileSet; import org.opensearch.index.engine.exec.coord.CatalogSnapshot; +import org.opensearch.index.engine.exec.coord.Segment; +import org.opensearch.index.engine.exec.lucene.LuceneDataFormat; import org.opensearch.index.store.Store; import org.opensearch.index.translog.TranslogDeletionPolicy; import java.io.IOException; import java.nio.file.Path; import java.util.Base64; -import java.util.Collection; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.function.Function; import java.util.function.LongSupplier; +import java.util.stream.Collectors; public class LuceneCommitEngine implements Committer { @@ -50,6 +60,7 @@ public LuceneCommitEngine(Store store, TranslogDeletionPolicy translogDeletionPo this.combinedDeletionPolicy = new CombinedDeletionPolicy(logger, translogDeletionPolicy, null, globalCheckpointSupplier); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(); indexWriterConfig.setIndexDeletionPolicy(combinedDeletionPolicy); + indexWriterConfig.setMergePolicy(NoMergePolicy.INSTANCE); this.store = store; this.lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo(); if (primaryMode) { @@ -58,20 +69,45 @@ public LuceneCommitEngine(Store store, TranslogDeletionPolicy translogDeletionPo } @Override - public void addLuceneIndexes(CatalogSnapshot catalogSnapshot) { - Collection luceneFileCollection = catalogSnapshot.getSearchableFiles(DataFormat.LUCENE.name()); - luceneFileCollection.forEach(writerFileSet -> { + public synchronized void addLuceneIndexes(List segments) throws IOException { + + for(Segment segment : segments) { + WriterFileSet wfs = segment.getDFGroupedSearchableFiles().get(LuceneDataFormat.LUCENE.name()); + if(wfs == null || wfs.refresh()) continue; + try { - indexWriter.addIndexes(new NIOFSDirectory(Path.of(writerFileSet.getDirectory()))); + indexWriter.addIndexes(new NIOFSDirectory(Path.of(wfs.getDirectory()))); + wfs.setRefreshed(); } catch (IOException e) { - throw new RuntimeException(e); + throw new RuntimeException("Not able to copy it to the main writer in commiter"); } - }); + } + + final Map segmentByGeneration = + segments.stream().collect(Collectors.toMap(Segment::getGeneration, Function.identity())); + + try (DirectoryReader dr = DirectoryReader.open(indexWriter)){ + for(LeafReaderContext leaf : dr.getContext().leaves()) { + SegmentCommitInfo segmentCommitInfo = Lucene.segmentReader(leaf.reader()).getSegmentInfo(); + String generationAttr = segmentCommitInfo.info.getAttribute("writer_generation"); + if(generationAttr == null) { + throw new RuntimeException("failed to fetch writer generation"); + } + long writerGeneration = Long.parseLong(generationAttr); + if (segmentByGeneration.containsKey(writerGeneration)) { + WriterFileSet writerFileSet = + segmentByGeneration.get(writerGeneration).getDFGroupedSearchableFiles().get(DataFormat.LUCENE.name()); + segmentByGeneration.get(writerGeneration).addSearchableFiles( + DataFormat.LUCENE.name(), + writerFileSet.withDirectoryAndFiles(indexWriter.getDirectory().toString(), new HashSet<>(segmentCommitInfo.files())) + ); + } + } + } } @Override public synchronized CommitPoint commit(Iterable> commitData, CatalogSnapshot catalogSnapshot) { - addLuceneIndexes(catalogSnapshot); indexWriter.setLiveCommitData(commitData); try { indexWriter.commit(); diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java index 89a39298bb801..47ec882201d4f 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java @@ -211,6 +211,7 @@ public List supportedFieldTypes(boolean isPrimaryEngine) { @Override public void loadWriterFiles(CatalogSnapshot catalogSnapshot) throws IOException { + // If this get's called will it not throw exception? for (IndexingExecutionEngine delegate : delegates) { delegate.loadWriterFiles(catalogSnapshot); } @@ -219,6 +220,7 @@ public void loadWriterFiles(CatalogSnapshot catalogSnapshot) throws IOException @Override public void deleteFiles(Map> filesToDelete) throws IOException { for (IndexingExecutionEngine delegate : delegates) { + // Why creating a map when we are always passing for that format here? Map> formatSpecificFilesToDelete = new HashMap<>(); formatSpecificFilesToDelete.put(delegate.getDataFormat().name(), filesToDelete.get(delegate.getDataFormat().name())); delegate.deleteFiles(formatSpecificFilesToDelete); @@ -235,11 +237,11 @@ public Writer createCompositeW } @Override - public RefreshResult refresh(RefreshInput ignore) throws IOException { + public RefreshResult refresh(RefreshInput refreshInput) throws IOException { RefreshResult finalResult; try { List dataFormatWriters = dataFormatWriterPool.checkoutAll(); - List refreshedSegment = ignore.getExistingSegments(); + List refreshedSegment = refreshInput.getExistingSegments(); List newSegmentList = new ArrayList<>(); // flush to disk for (CompositeDataFormatWriter dataFormatWriter : dataFormatWriters) { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java index 29c8e7e0e3449..fca62d1115282 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java @@ -10,7 +10,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.index.engine.exec.coord.Segment; import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.RefreshResult; @@ -80,16 +79,10 @@ public void close() { }; } - public synchronized void applyRefreshResult(RefreshResult refreshResult) { - commitCatalogSnapshot( - new CompositeEngineCatalogSnapshot( - latestCatalogSnapshot.getId() + 1, - latestCatalogSnapshot.getVersion() + 1, - refreshResult.getRefreshedSegments(), - catalogSnapshotMap, - indexFileDeleter::get - ) - ); + public synchronized void applyRefreshResult(RefreshResult refreshResult) throws IOException { + // Will refresh always trigger a commit? --> It should be a flush? + // ApplyRefreshResult --> CatalogSnapshot --> Committer(add Indexes) + advanceCatalogSnapshot(refreshResult.getRefreshedSegments()); } public synchronized void applyReplicationChanges(CatalogSnapshot catalogSnapshot, ShardPath shardPath) { @@ -112,7 +105,7 @@ public synchronized void applyReplicationChanges(CatalogSnapshot catalogSnapshot } } - public synchronized void applyMergeResults(MergeResult mergeResult, OneMerge oneMerge) { + public synchronized void applyMergeResults(MergeResult mergeResult, OneMerge oneMerge) throws IOException { List segmentList = new ArrayList<>(latestCatalogSnapshot.getSegments()); @@ -147,19 +140,25 @@ public synchronized void applyMergeResults(MergeResult mergeResult, OneMerge one if (!inserted) { segmentList.add(0, segmentToAdd); } - CompositeEngineCatalogSnapshot newCatSnap = new CompositeEngineCatalogSnapshot(latestCatalogSnapshot.getId() + 1, latestCatalogSnapshot.getVersion() + 1, segmentList, catalogSnapshotMap, indexFileDeleter::get); // Commit new catalog snapshot - commitCatalogSnapshot(newCatSnap); + advanceCatalogSnapshot(segmentList); } - private synchronized void commitCatalogSnapshot(CompositeEngineCatalogSnapshot newCatSnap) { - catalogSnapshotMap.put(newCatSnap.getId(), newCatSnap); + private synchronized void advanceCatalogSnapshot(List refreshedSegments) throws IOException { + compositeEngineCommitter.addLuceneIndexes(refreshedSegments); + CompositeEngineCatalogSnapshot cecs = new CompositeEngineCatalogSnapshot( + latestCatalogSnapshot.getId() + 1, + latestCatalogSnapshot.getVersion() + 1, + refreshedSegments, + catalogSnapshotMap, + indexFileDeleter::get + ); + catalogSnapshotMap.put(cecs.getId(), cecs); if (latestCatalogSnapshot != null) { latestCatalogSnapshot.decRef(); } - latestCatalogSnapshot = newCatSnap; - compositeEngineCommitter.addLuceneIndexes(latestCatalogSnapshot); + latestCatalogSnapshot = cecs; } private Segment getSegment(Map writerFileSetMap) { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java index 13ee98d5a4802..bdad934d5f69f 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java @@ -806,7 +806,7 @@ public synchronized void refresh(String source) throws EngineException { RefreshInput refreshInput = new RefreshInput(); refreshInput.setExistingSegments(new ArrayList<>(catalogSnapshotReleasableRef.getRef().getSegments())); - RefreshResult refreshResult = engine.refresh(refreshInput); + RefreshResult refreshResult = engine.refresh(refreshInput); // It should refresh the primary engine, i.e parquet if (refreshResult != null) { catalogSnapshotManager.applyRefreshResult(refreshResult); refreshed = true; @@ -1036,7 +1036,7 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { boolean shouldPeriodicallyFlush = shouldPeriodicallyFlush(); if (force || shouldFlush() || shouldPeriodicallyFlush || getProcessedLocalCheckpoint() > Long.parseLong( readLastCommittedData().get(SequenceNumbers.LOCAL_CHECKPOINT_KEY))) { - + refresh("flush in composite engine"); translogManager.ensureCanFlush(); try { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java index 74f87c343a308..1f5cb86f6442c 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java @@ -111,7 +111,7 @@ public RefreshResult refresh(RefreshInput refreshInput) throws IOException { @Override public DataFormat getDataFormat() { - return null; + return new LuceneDataFormat(); } @Override From d4c281038e8b0fa872f8c5aca0b3d6eb2a38dfff Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Wed, 4 Mar 2026 13:09:54 +0530 Subject: [PATCH 04/15] Add changes for handling index mappings --- .../engine/ParquetDataFormat.java | 6 +- .../core/data/number/ByteParquetField.java | 2 +- .../core/data/number/DoubleParquetField.java | 2 +- .../core/data/number/FloatParquetField.java | 2 +- .../data/number/HalfFloatParquetField.java | 2 +- .../core/data/number/IntegerParquetField.java | 2 +- .../core/data/number/LongParquetField.java | 2 +- .../core/data/number/ShortParquetField.java | 2 +- .../{ => number}/TokenCountParquetField.java | 4 +- .../data/number/UnsignedLongParquetField.java | 2 +- .../core/data/{ => text}/IpParquetField.java | 2 +- .../data/{ => text}/KeywordParquetField.java | 9 +- .../data/{ => text}/TextParquetField.java | 2 +- .../plugins/fields/CoreDataFieldPlugin.java | 10 +- .../writer/ParquetDocumentInput.java | 6 + plugins/engine-datafusion/README.md | 335 ++++++++++++++++-- .../org/opensearch/index/IndexSettings.java | 4 +- .../engine/exec/FieldAssignmentResolver.java | 43 ++- .../composite/CompositeDataFormatWriter.java | 2 + .../composite/CompositeFieldValidator.java | 26 ++ .../CompositeIndexingExecutionEngine.java | 14 + .../exec/coord/CatalogSnapshotManager.java | 7 + .../engine/exec/coord/CompositeEngine.java | 15 + .../engine/exec/lucene/LuceneDataFormat.java | 12 + .../lucene/fields/LuceneFieldRegistry.java | 38 +- .../data/{ => date}/DateLuceneField.java | 2 +- .../data/{ => date}/DateNanosLuceneField.java | 2 +- .../data/{ => metadata}/IdLuceneField.java | 2 +- .../{ => metadata}/IgnoredLuceneField.java | 2 +- .../{ => metadata}/RoutingLuceneField.java | 2 +- .../data/{ => metadata}/SizeLuceneField.java | 2 +- .../data/{ => number}/ByteLuceneField.java | 2 +- .../{ => number}/DocCountLuceneField.java | 2 +- .../data/{ => number}/DoubleLuceneField.java | 2 +- .../data/{ => number}/FloatLuceneField.java | 2 +- .../{ => number}/HalfFloatLuceneField.java | 2 +- .../data/{ => number}/IntegerLuceneField.java | 2 +- .../data/{ => number}/LongLuceneField.java | 2 +- .../data/{ => number}/ShortLuceneField.java | 2 +- .../{ => number}/TokenCountLuceneField.java | 2 +- .../{ => number}/UnsignedLongLuceneField.java | 2 +- .../fields/data/{ => text}/IpLuceneField.java | 2 +- .../data/{ => text}/KeywordLuceneField.java | 12 +- .../data/{ => text}/TextLuceneField.java | 2 +- .../lucene/writer/LuceneDocumentInput.java | 6 + 45 files changed, 520 insertions(+), 85 deletions(-) rename modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/{ => number}/TokenCountParquetField.java (94%) rename modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/{ => text}/IpParquetField.java (97%) rename modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/{ => text}/KeywordParquetField.java (85%) rename modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/{ => text}/TextParquetField.java (97%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => date}/DateLuceneField.java (95%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => date}/DateNanosLuceneField.java (96%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => metadata}/IdLuceneField.java (95%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => metadata}/IgnoredLuceneField.java (93%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => metadata}/RoutingLuceneField.java (95%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => metadata}/SizeLuceneField.java (95%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => number}/ByteLuceneField.java (96%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => number}/DocCountLuceneField.java (95%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => number}/DoubleLuceneField.java (96%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => number}/FloatLuceneField.java (96%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => number}/HalfFloatLuceneField.java (96%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => number}/IntegerLuceneField.java (96%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => number}/LongLuceneField.java (96%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => number}/ShortLuceneField.java (96%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => number}/TokenCountLuceneField.java (95%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => number}/UnsignedLongLuceneField.java (96%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => text}/IpLuceneField.java (96%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => text}/KeywordLuceneField.java (70%) rename server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/{ => text}/TextLuceneField.java (96%) diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java index de0e808cc1cfa..d49f38012efa2 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java @@ -50,12 +50,14 @@ public void configureStore() { @Override public boolean equals(Object obj) { - return true; + if (this == obj) return true; + if (!(obj instanceof DataFormat)) return false; + return name().equals(((DataFormat) obj).name()); } @Override public int hashCode() { - return 0; + return name().hashCode(); } @Override diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java index a8909d71756bd..36451b44aa3ee 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java @@ -62,6 +62,6 @@ public FieldType getFieldType() { @Override public Set getFieldCapabilities() { - return EnumSet.of(FieldCapability.DOC_VALUES); + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java index 74b484f46aee6..931bea50d4727 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java @@ -64,6 +64,6 @@ public FieldType getFieldType() { @Override public Set getFieldCapabilities() { - return EnumSet.of(FieldCapability.DOC_VALUES); + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java index 613a49f615c41..0b7dfcc229a98 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java @@ -64,6 +64,6 @@ public FieldType getFieldType() { @Override public Set getFieldCapabilities() { - return EnumSet.of(FieldCapability.DOC_VALUES); + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java index 9dc68c4d378a3..68942854f7ce2 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java @@ -64,6 +64,6 @@ public FieldType getFieldType() { @Override public Set getFieldCapabilities() { - return EnumSet.of(FieldCapability.DOC_VALUES); + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java index 50927fd324dd1..79edb7031702e 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java @@ -62,6 +62,6 @@ public FieldType getFieldType() { @Override public Set getFieldCapabilities() { - return EnumSet.of(FieldCapability.DOC_VALUES); + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java index cff2b86795b64..dc8dab9f9c5e2 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java @@ -63,6 +63,6 @@ public FieldType getFieldType() { @Override public Set getFieldCapabilities() { - return EnumSet.of(FieldCapability.DOC_VALUES); + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java index 7274712de95ba..852a20d85c84c 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java @@ -63,6 +63,6 @@ public FieldType getFieldType() { @Override public Set getFieldCapabilities() { - return EnumSet.of(FieldCapability.DOC_VALUES); + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java similarity index 94% rename from modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java rename to modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java index cd53a6d074153..06dcfa55b0829 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package com.parquet.parquetdataformat.fields.core.data; +package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; @@ -65,6 +65,6 @@ public FieldType getFieldType() { @Override public Set getFieldCapabilities() { - return EnumSet.of(FieldCapability.DOC_VALUES); + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java index 00989e9ffc9a6..ffd61971d809a 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java @@ -64,6 +64,6 @@ public FieldType getFieldType() { @Override public Set getFieldCapabilities() { - return EnumSet.of(FieldCapability.DOC_VALUES); + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java similarity index 97% rename from modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java rename to modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java index e494d8024614f..aa43afecee46c 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package com.parquet.parquetdataformat.fields.core.data; +package com.parquet.parquetdataformat.fields.core.data.text; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java similarity index 85% rename from modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java rename to modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java index 27ecdba2e93a2..e326b6f0f6d4c 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package com.parquet.parquetdataformat.fields.core.data; +package com.parquet.parquetdataformat.fields.core.data.text; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ParquetField; @@ -16,6 +16,9 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.mapper.MappedFieldType; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import java.nio.charset.StandardCharsets; import java.util.EnumSet; import java.util.Set; @@ -46,11 +49,15 @@ */ public class KeywordParquetField extends ParquetField { + private static final Logger logger = LogManager.getLogger(KeywordParquetField.class); + @Override public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); + logger.info("[COMPOSITE_DEBUG] KeywordParquetField.addToGroup: field=[{}] value=[{}] rowIndex=[{}] capabilities={}", + mappedFieldType.name(), parseValue, rowIndex, assignedCapabilities); } @Override diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java similarity index 97% rename from modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java rename to modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java index be4b0df22db15..38fba8e4d17c8 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package com.parquet.parquetdataformat.fields.core.data; +package com.parquet.parquetdataformat.fields.core.data.text; import org.opensearch.index.engine.exec.FieldCapability; import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/plugins/fields/CoreDataFieldPlugin.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/plugins/fields/CoreDataFieldPlugin.java index 20bdfc9610d13..98e2747497497 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/plugins/fields/CoreDataFieldPlugin.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/plugins/fields/CoreDataFieldPlugin.java @@ -10,13 +10,9 @@ import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.fields.core.data.BinaryParquetField; -import com.parquet.parquetdataformat.fields.core.data.date.DateNanosParquetField; -import com.parquet.parquetdataformat.fields.core.data.TokenCountParquetField; import com.parquet.parquetdataformat.fields.core.data.BooleanParquetField; +import com.parquet.parquetdataformat.fields.core.data.date.DateNanosParquetField; import com.parquet.parquetdataformat.fields.core.data.date.DateParquetField; -import com.parquet.parquetdataformat.fields.core.data.IpParquetField; -import com.parquet.parquetdataformat.fields.core.data.KeywordParquetField; -import com.parquet.parquetdataformat.fields.core.data.TextParquetField; import com.parquet.parquetdataformat.fields.core.data.number.ByteParquetField; import com.parquet.parquetdataformat.fields.core.data.number.DoubleParquetField; import com.parquet.parquetdataformat.fields.core.data.number.FloatParquetField; @@ -24,7 +20,11 @@ import com.parquet.parquetdataformat.fields.core.data.number.IntegerParquetField; import com.parquet.parquetdataformat.fields.core.data.number.LongParquetField; import com.parquet.parquetdataformat.fields.core.data.number.ShortParquetField; +import com.parquet.parquetdataformat.fields.core.data.number.TokenCountParquetField; import com.parquet.parquetdataformat.fields.core.data.number.UnsignedLongParquetField; +import com.parquet.parquetdataformat.fields.core.data.text.IpParquetField; +import com.parquet.parquetdataformat.fields.core.data.text.KeywordParquetField; +import com.parquet.parquetdataformat.fields.core.data.text.TextParquetField; import org.opensearch.index.mapper.BinaryFieldMapper; import org.opensearch.index.mapper.BooleanFieldMapper; import org.opensearch.index.mapper.DateFieldMapper; diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java index 7f2c21f9829c1..55fe18be729df 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java @@ -3,6 +3,8 @@ import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; import org.apache.arrow.vector.BigIntVector; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.index.engine.exec.DocumentInput; import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FieldAssignments; @@ -37,6 +39,7 @@ * intermediate ParquetDocument representation for improved performance and memory efficiency. */ public class ParquetDocumentInput implements DocumentInput { + private static final Logger logger = LogManager.getLogger(ParquetDocumentInput.class); private final ManagedVSR managedVSR; private final EngineRole engineRole; private final FieldAssignments fieldAssignments; @@ -60,6 +63,7 @@ public void addField(MappedFieldType fieldType, Object value) { // Check if this format should handle this field type at all if (!fieldAssignments.shouldHandle(fieldTypeName)) { + logger.debug("[COMPOSITE_DEBUG] Parquet SKIP field=[{}] type=[{}] — not assigned to this format", fieldType.name(), fieldTypeName); return; } @@ -67,10 +71,12 @@ public void addField(MappedFieldType fieldType, Object value) { if (parquetField == null) { // Field type not supported by Parquet format — skip silently + logger.debug("[COMPOSITE_DEBUG] Parquet SKIP field=[{}] type=[{}] — no ParquetField registered in ArrowFieldRegistry", fieldType.name(), fieldTypeName); return; } Set assignedCapabilities = fieldAssignments.getAssignedCapabilities(fieldTypeName); + logger.debug("[COMPOSITE_DEBUG] Parquet ACCEPT field=[{}] type=[{}] value=[{}] capabilities={}", fieldType.name(), fieldTypeName, value, assignedCapabilities); parquetField.createField(fieldType, managedVSR, value, assignedCapabilities); } diff --git a/plugins/engine-datafusion/README.md b/plugins/engine-datafusion/README.md index 032dfb7fa7730..42617133401eb 100644 --- a/plugins/engine-datafusion/README.md +++ b/plugins/engine-datafusion/README.md @@ -38,28 +38,321 @@ curl --location --request PUT 'http://localhost:9200/index-7' \ "optimized.enabled": true }, "mappings": { + "dynamic": "false", "properties": { - "id": { - "type": "keyword" - }, - "name": { - "type": "keyword" - }, - "age": { - "type": "integer" - }, - "salary": { - "type": "long" - }, - "score": { - "type": "double" - }, - "active": { - "type": "boolean" - }, - "created_date": { - "type": "date" - } + "AdvEngineID": { + "type": "short" + }, + "Age": { + "type": "short" + }, + "BrowserCountry": { + "type": "keyword" + }, + "BrowserLanguage": { + "type": "keyword" + }, + "CLID": { + "type": "integer" + }, + "ClientEventTime": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis" + }, + "ClientIP": { + "type": "integer" + }, + "ClientTimeZone": { + "type": "short" + }, + "CodeVersion": { + "type": "integer" + }, + "ConnectTiming": { + "type": "integer" + }, + "CookieEnable": { + "type": "short" + }, + "CounterClass": { + "type": "short" + }, + "CounterID": { + "type": "integer" + }, + "DNSTiming": { + "type": "integer" + }, + "DontCountHits": { + "type": "short" + }, + "EventDate": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis" + }, + "EventTime": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis" + }, + "FUniqID": { + "type": "long" + }, + "FetchTiming": { + "type": "integer" + }, + "FlashMajor": { + "type": "short" + }, + "FlashMinor": { + "type": "short" + }, + "FlashMinor2": { + "type": "short" + }, + "FromTag": { + "type": "keyword" + }, + "GoodEvent": { + "type": "short" + }, + "HID": { + "type": "integer" + }, + "HTTPError": { + "type": "short" + }, + "HasGCLID": { + "type": "short" + }, + "HistoryLength": { + "type": "short" + }, + "HitColor": { + "type": "keyword" + }, + "IPNetworkID": { + "type": "integer" + }, + "Income": { + "type": "short" + }, + "Interests": { + "type": "short" + }, + "IsArtifical": { + "type": "short" + }, + "IsDownload": { + "type": "short" + }, + "IsEvent": { + "type": "short" + }, + "IsLink": { + "type": "short" + }, + "IsMobile": { + "type": "short" + }, + "IsNotBounce": { + "type": "short" + }, + "IsOldCounter": { + "type": "short" + }, + "IsParameter": { + "type": "short" + }, + "IsRefresh": { + "type": "short" + }, + "JavaEnable": { + "type": "short" + }, + "JavascriptEnable": { + "type": "short" + }, + "LocalEventTime": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis" + }, + "MobilePhone": { + "type": "short" + }, + "MobilePhoneModel": { + "type": "keyword" + }, + "NetMajor": { + "type": "short" + }, + "NetMinor": { + "type": "short" + }, + "OS": { + "type": "short" + }, + "OpenerName": { + "type": "integer" + }, + "OpenstatAdID": { + "type": "keyword" + }, + "OpenstatCampaignID": { + "type": "keyword" + }, + "OpenstatServiceName": { + "type": "keyword" + }, + "OpenstatSourceID": { + "type": "keyword" + }, + "OriginalURL": { + "type": "keyword" + }, + "PageCharset": { + "type": "keyword" + }, + "ParamCurrency": { + "type": "keyword" + }, + "ParamCurrencyID": { + "type": "short" + }, + "ParamOrderID": { + "type": "keyword" + }, + "ParamPrice": { + "type": "long" + }, + "Params": { + "type": "keyword" + }, + "Referer": { + "type": "keyword" + }, + "RefererCategoryID": { + "type": "short" + }, + "RefererHash": { + "type": "long" + }, + "RefererRegionID": { + "type": "integer" + }, + "RegionID": { + "type": "integer" + }, + "RemoteIP": { + "type": "integer" + }, + "ResolutionDepth": { + "type": "short" + }, + "ResolutionHeight": { + "type": "short" + }, + "ResolutionWidth": { + "type": "short" + }, + "ResponseEndTiming": { + "type": "integer" + }, + "ResponseStartTiming": { + "type": "integer" + }, + "Robotness": { + "type": "short" + }, + "SearchEngineID": { + "type": "short" + }, + "SearchPhrase": { + "type": "keyword" + }, + "SendTiming": { + "type": "integer" + }, + "Sex": { + "type": "short" + }, + "SilverlightVersion1": { + "type": "short" + }, + "SilverlightVersion2": { + "type": "short" + }, + "SilverlightVersion3": { + "type": "integer" + }, + "SilverlightVersion4": { + "type": "short" + }, + "SocialSourceNetworkID": { + "type": "short" + }, + "SocialSourcePage": { + "type": "keyword" + }, + "Title": { + "type": "keyword" + }, + "TraficSourceID": { + "type": "short" + }, + "URL": { + "type": "keyword" + }, + "URLCategoryID": { + "type": "short" + }, + "URLHash": { + "type": "long" + }, + "URLRegionID": { + "type": "integer" + }, + "UTMCampaign": { + "type": "keyword" + }, + "UTMContent": { + "type": "keyword" + }, + "UTMMedium": { + "type": "keyword" + }, + "UTMSource": { + "type": "keyword" + }, + "UTMTerm": { + "type": "keyword" + }, + "UserAgent": { + "type": "short" + }, + "UserAgentMajor": { + "type": "short" + }, + "UserAgentMinor": { + "type": "keyword" + }, + "UserID": { + "type": "long" + }, + "WatchID": { + "type": "long" + }, + "WindowClientHeight": { + "type": "short" + }, + "WindowClientWidth": { + "type": "short" + }, + "WindowName": { + "type": "integer" + }, + "WithHash": { + "type": "short" + } } } }' diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index 904ef1922c898..794efc532e32c 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -866,11 +866,11 @@ private void setSearchQueryPlanExplainEnabled(Boolean searchQueryPlaneExplainEna /** * Declares which data format is primary for a composite index. * Required when multiple DataSourcePlugins are registered. - * Empty default means "not set" — throws when multiple plugins are registered. + * Defaults to "parquet". */ public static final Setting INDEX_COMPOSITE_PRIMARY_DATA_FORMAT_SETTING = Setting.simpleString( "index.composite.primary_data_format", - "", + "parquet", Property.IndexScope ); diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java index d3615a10d04d1..428d5cb655e3e 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java @@ -8,6 +8,8 @@ package org.opensearch.index.engine.exec; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.mapper.MappedFieldType; @@ -24,6 +26,8 @@ @ExperimentalApi public final class FieldAssignmentResolver { + private static final Logger logger = LogManager.getLogger(FieldAssignmentResolver.class); + private FieldAssignmentResolver() {} /** @@ -55,6 +59,10 @@ public static Map resolve( } for (MappedFieldType fieldType : fieldTypes) { + // Skip internal metadata fields (e.g. _id, _index, _source) — managed by the engine, not data format plugins + if (fieldType.typeName().startsWith("_")) { + continue; + } String typeName = fieldType.typeName(); resolveField(registry, roleMap, primaryFormat, perFormatMap, fieldType, typeName); } @@ -63,6 +71,10 @@ public static Map resolve( Map result = new HashMap<>(); for (Map.Entry>> entry : perFormatMap.entrySet()) { result.put(entry.getKey(), new FieldAssignments(entry.getValue())); + logger.info("[COMPOSITE_DEBUG] Field assignments for format [{}]:", entry.getKey().name()); + for (Map.Entry> fieldEntry : entry.getValue().entrySet()) { + logger.info("[COMPOSITE_DEBUG] fieldType=[{}] -> capabilities={}", fieldEntry.getKey(), fieldEntry.getValue()); + } } return result; } @@ -87,24 +99,47 @@ private static void resolveField( required.add(FieldCapability.STORE); } + logger.info("[COMPOSITE_DEBUG] resolveField: field=[{}] type=[{}] required capabilities={} (isSearchable={}, hasDocValues={}, isStored={})", + fieldType.name(), typeName, required, fieldType.isSearchable(), fieldType.hasDocValues(), fieldType.isStored()); + // For each required capability, assign to primary if it supports it, else to secondary for (FieldCapability cap : required) { - if (primaryFormat != null && registry.hasCapability(typeName, primaryFormat, cap)) { + boolean primaryHasCap = primaryFormat != null && registry.hasCapability(typeName, primaryFormat, cap); + logger.info("[COMPOSITE_DEBUG] capability [{}]: primary format [{}] hasCapability={}, registry capabilities for type={}", + cap, primaryFormat != null ? primaryFormat.name() : "null", primaryHasCap, + primaryFormat != null ? registry.getCapabilities(typeName, primaryFormat) : "N/A"); + + if (primaryHasCap) { // Primary handles this capability perFormatMap.get(primaryFormat) .computeIfAbsent(typeName, k -> EnumSet.noneOf(FieldCapability.class)) .add(cap); + logger.info("[COMPOSITE_DEBUG] -> assigned [{}] to PRIMARY format [{}]", cap, primaryFormat.name()); } else { // Find a secondary format that supports it + boolean assignedToSecondary = false; for (Map.Entry entry : roleMap.entrySet()) { - if (entry.getValue() != EngineRole.PRIMARY - && registry.hasCapability(typeName, entry.getKey(), cap)) { - perFormatMap.get(entry.getKey()) + DataFormat secondaryFormat = entry.getKey(); + EngineRole role = entry.getValue(); + boolean isSecondary = role != EngineRole.PRIMARY; + boolean secondaryHasCap = registry.hasCapability(typeName, secondaryFormat, cap); + logger.info("[COMPOSITE_DEBUG] checking secondary format [{}] role={} isSecondary={} hasCapability={} registryCapabilities={}", + secondaryFormat.name(), role, isSecondary, secondaryHasCap, + registry.getCapabilities(typeName, secondaryFormat)); + + if (isSecondary && secondaryHasCap) { + perFormatMap.get(secondaryFormat) .computeIfAbsent(typeName, k -> EnumSet.noneOf(FieldCapability.class)) .add(cap); + logger.info("[COMPOSITE_DEBUG] -> assigned [{}] to SECONDARY format [{}]", cap, secondaryFormat.name()); + assignedToSecondary = true; break; } } + if (!assignedToSecondary) { + logger.warn("[COMPOSITE_DEBUG] -> capability [{}] for field=[{}] type=[{}] NOT assigned to any format!", + cap, fieldType.name(), typeName); + } } } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java index 538ec07f302c4..a90ad48c6c20f 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java @@ -184,6 +184,8 @@ public void addRowIdField(String fieldName, long rowId) { @Override public void addField(MappedFieldType fieldType, Object value) { // Each delegate's addField uses its own FieldAssignments to decide what to write + logger.debug("[COMPOSITE_DEBUG] addField: field=[{}] type=[{}] value=[{}] — delegating to {} format inputs", + fieldType.name(), fieldType.typeName(), value, inputs.size()); for (DocumentInput input : inputs) { input.addField(fieldType, value); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java index 89d607f38aa08..67f7f4472f379 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java @@ -8,6 +8,8 @@ package org.opensearch.index.engine.exec.composite; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.EngineRole; @@ -20,12 +22,26 @@ /** * Stateless validator that checks field-to-capability compatibility using the * {@link FieldSupportRegistry} at index creation or mapping update time. + *

+ * Internal metadata fields (type names starting with {@code _}) are skipped + * because they are managed by the engine itself, not by data format plugins. */ @ExperimentalApi public final class CompositeFieldValidator { private CompositeFieldValidator() {} + private static final Logger logger = LogManager.getLogger(CompositeFieldValidator.class); + + /** + * Returns true if the field type is an internal metadata field that should + * be excluded from composite validation. Internal fields have type names + * starting with '_' (e.g. _id, _index, _source, _seq_no, _routing). + */ + private static boolean isInternalMetadataField(MappedFieldType fieldType) { + return fieldType.typeName().startsWith("_"); + } + /** * Validates that the primary data format has at least one capability * registered for every mapped field type. @@ -47,12 +63,19 @@ public static void validatePrimaryCoverage( return; } for (MappedFieldType fieldType : fieldTypes) { + if (isInternalMetadataField(fieldType)) { + logger.info("[COMPOSITE_DEBUG] validatePrimaryCoverage: SKIP internal metadata field=[{}] type=[{}]", + fieldType.name(), fieldType.typeName()); + continue; + } if (!registry.hasAnyCapability(fieldType.typeName(), primaryFormat)) { throw new IllegalArgumentException( "Field [" + fieldType.name() + "] of type [" + fieldType.typeName() + "] has no capabilities registered for primary data format [" + primaryFormat.name() + "]" ); } + logger.info("[COMPOSITE_DEBUG] validatePrimaryCoverage: OK field=[{}] type=[{}] has capabilities {} in primary format [{}]", + fieldType.name(), fieldType.typeName(), registry.getCapabilities(fieldType.typeName(), primaryFormat), primaryFormat.name()); } } @@ -67,6 +90,9 @@ public static void validateMappingPropertyCoverage( Iterable fieldTypes ) { for (MappedFieldType fieldType : fieldTypes) { + if (isInternalMetadataField(fieldType)) { + continue; + } String typeName = fieldType.typeName(); if (fieldType.isSearchable()) { checkCapabilityCoverage(registry, fieldType, typeName, FieldCapability.INDEX, "index"); diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java index 47ec882201d4f..2f720a93bd432 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java @@ -74,23 +74,31 @@ public CompositeIndexingExecutionEngine( // Setting-based role resolution String primaryDataFormatName = indexSettings.getValue(IndexSettings.INDEX_COMPOSITE_PRIMARY_DATA_FORMAT_SETTING); this.roleMap = resolveRoles(primaryDataFormatName, dataSourcePlugins, singlePlugin); + logger.info("[COMPOSITE_DEBUG] Resolved engine roles: {}", roleMap.entrySet().stream() + .map(e -> e.getKey().name() + " -> " + e.getValue()) + .collect(java.util.stream.Collectors.joining(", "))); // Build FieldSupportRegistry from plugin registrations this.fieldSupportRegistry = new FieldSupportRegistry(); for (DataSourcePlugin plugin : dataSourcePlugins) { plugin.registerFieldSupport(fieldSupportRegistry); } + logger.info("[COMPOSITE_DEBUG] FieldSupportRegistry built. Registered formats: {}", + fieldSupportRegistry.allFormats().stream().map(DataFormat::name).collect(java.util.stream.Collectors.joining(", "))); // Validate field capabilities if composite (multiple plugins) if (!singlePlugin) { CompositeFieldValidator.validatePrimaryCoverage(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); CompositeFieldValidator.validateMappingPropertyCoverage(fieldSupportRegistry, mapperService.fieldTypes()); + logger.info("[COMPOSITE_DEBUG] Composite field validation passed for all mapped fields"); } // Resolve field assignments: which format handles which capability for each field type Map fieldAssignmentsMap; if (singlePlugin) { fieldAssignmentsMap = Map.of(dataSourcePlugins.get(0).getDataFormat(), FieldAssignments.ACCEPT_ALL); + logger.info("[COMPOSITE_DEBUG] Single plugin mode — using ACCEPT_ALL field assignments for [{}]", + dataSourcePlugins.get(0).getDataFormat().name()); } else { fieldAssignmentsMap = FieldAssignmentResolver.resolve(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); } @@ -243,11 +251,15 @@ public RefreshResult refresh(RefreshInput refreshInput) throws IOException { List dataFormatWriters = dataFormatWriterPool.checkoutAll(); List refreshedSegment = refreshInput.getExistingSegments(); List newSegmentList = new ArrayList<>(); + logger.info("[COMPOSITE_DEBUG] CompositeIndexingExecutionEngine.refresh: flushing {} writers, existing segments={}", + dataFormatWriters.size(), refreshedSegment.size()); // flush to disk for (CompositeDataFormatWriter dataFormatWriter : dataFormatWriters) { Segment newSegment = new Segment(dataFormatWriter.getWriterGeneration()); FileInfos fileInfos = dataFormatWriter.flush(null); fileInfos.getWriterFilesMap().forEach((key, value) -> { + logger.info("[COMPOSITE_DEBUG] writer gen={} flushed format=[{}] files={}", + dataFormatWriter.getWriterGeneration(), key.name(), value.getFiles()); newSegment.addSearchableFiles(key.name(), value); }); dataFormatWriter.close(); @@ -257,8 +269,10 @@ public RefreshResult refresh(RefreshInput refreshInput) throws IOException { } if (newSegmentList.isEmpty()) { + logger.info("[COMPOSITE_DEBUG] No new segments produced from flush"); return null; } else { + logger.info("[COMPOSITE_DEBUG] Produced {} new segments from flush", newSegmentList.size()); refreshedSegment.addAll(newSegmentList); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java index fca62d1115282..c544ca114e7cf 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java @@ -146,6 +146,8 @@ public synchronized void applyMergeResults(MergeResult mergeResult, OneMerge one } private synchronized void advanceCatalogSnapshot(List refreshedSegments) throws IOException { + logger.info("[COMPOSITE_DEBUG] advanceCatalogSnapshot: previous id={}, version={}, old segment count={}", + latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), latestCatalogSnapshot.getSegments().size()); compositeEngineCommitter.addLuceneIndexes(refreshedSegments); CompositeEngineCatalogSnapshot cecs = new CompositeEngineCatalogSnapshot( latestCatalogSnapshot.getId() + 1, @@ -159,6 +161,11 @@ private synchronized void advanceCatalogSnapshot(List refreshedSegments latestCatalogSnapshot.decRef(); } latestCatalogSnapshot = cecs; + logger.info("[COMPOSITE_DEBUG] advanceCatalogSnapshot: new id={}, version={}, new segment count={}", + latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), refreshedSegments.size()); + for (Segment seg : refreshedSegments) { + logger.info("[COMPOSITE_DEBUG] segment gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); + } } private Segment getSegment(Map writerFileSetMap) { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java index bdad934d5f69f..2f7dadbb35d88 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java @@ -608,6 +608,8 @@ public Engine.IndexResult index(Engine.Index index) throws IOException { index.documentInput.setSeqNo(index.seqNo()); index.documentInput.setPrimaryTerm(SeqNoFieldMapper.PRIMARY_TERM_NAME, index.primaryTerm()); index.documentInput.setVersion(1); // we are not supporting update in parquet + logger.info("[COMPOSITE_DEBUG] Indexing doc id=[{}] seqNo=[{}] primaryTerm=[{}] — writing to engine", + index.id(), index.seqNo(), index.primaryTerm()); WriteResult writeResult = index.documentInput.addToWriter(); indexResult = new Engine.IndexResult(writeResult.version(), index.primaryTerm(), index.seqNo(), writeResult.success()); @@ -804,12 +806,25 @@ public synchronized void refresh(String source) throws EngineException { try (CompositeEngine.ReleasableRef catalogSnapshotReleasableRef = catalogSnapshotManager.acquireSnapshot()) { refreshListeners.forEach(PRE_REFRESH_LISTENER_CONSUMER); + CatalogSnapshot preRefreshSnapshot = catalogSnapshotReleasableRef.getRef(); + logger.info("[COMPOSITE_DEBUG] refresh(source=[{}]) starting. Pre-refresh CatalogSnapshot: id={}, version={}, segments={}", + source, preRefreshSnapshot.getId(), preRefreshSnapshot.getVersion(), preRefreshSnapshot.getSegments().size()); + for (org.opensearch.index.engine.exec.coord.Segment seg : preRefreshSnapshot.getSegments()) { + logger.info("[COMPOSITE_DEBUG] pre-refresh segment: gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); + } + RefreshInput refreshInput = new RefreshInput(); refreshInput.setExistingSegments(new ArrayList<>(catalogSnapshotReleasableRef.getRef().getSegments())); RefreshResult refreshResult = engine.refresh(refreshInput); // It should refresh the primary engine, i.e parquet if (refreshResult != null) { + logger.info("[COMPOSITE_DEBUG] refresh produced {} segments", refreshResult.getRefreshedSegments().size()); + for (org.opensearch.index.engine.exec.coord.Segment seg : refreshResult.getRefreshedSegments()) { + logger.info("[COMPOSITE_DEBUG] refreshed segment: gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); + } catalogSnapshotManager.applyRefreshResult(refreshResult); refreshed = true; + } else { + logger.info("[COMPOSITE_DEBUG] refresh returned null (no new data to flush)"); } invokeRefreshListeners(refreshed); diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataFormat.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataFormat.java index bd56febd5ed9b..8b7a68e503371 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataFormat.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataFormat.java @@ -39,4 +39,16 @@ public void configureStore() { public String toString() { return LUCENE_DATA_FORMAT; } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!(obj instanceof DataFormat)) return false; + return name().equals(((DataFormat) obj).name()); + } + + @Override + public int hashCode() { + return name().hashCode(); + } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java index c10a2b7b69129..b487f6614d129 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java @@ -10,25 +10,25 @@ import org.opensearch.index.engine.exec.lucene.fields.data.BinaryLuceneField; import org.opensearch.index.engine.exec.lucene.fields.data.BooleanLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.ByteLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.DateLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.DateNanosLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.DocCountLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.DoubleLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.FloatLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.HalfFloatLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.IdLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.IgnoredLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.IntegerLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.IpLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.KeywordLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.LongLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.RoutingLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.ShortLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.SizeLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.TextLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.TokenCountLuceneField; -import org.opensearch.index.engine.exec.lucene.fields.data.UnsignedLongLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.date.DateLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.date.DateNanosLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.metadata.IdLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.metadata.IgnoredLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.metadata.RoutingLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.metadata.SizeLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.ByteLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.DocCountLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.DoubleLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.FloatLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.HalfFloatLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.IntegerLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.LongLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.ShortLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.TokenCountLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.UnsignedLongLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.text.IpLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.text.KeywordLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.text.TextLuceneField; import org.opensearch.index.mapper.BinaryFieldMapper; import org.opensearch.index.mapper.BooleanFieldMapper; import org.opensearch.index.mapper.DateFieldMapper; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java similarity index 95% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java index f01c95e8d0029..c1b7d5a9e36f1 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.date; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateNanosLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java similarity index 96% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateNanosLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java index 540b5bde3eab8..5f0cff41955b1 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DateNanosLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.date; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IdLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java similarity index 95% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IdLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java index 3934f7e80d26b..5a7ae34a96923 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IdLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.metadata; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.StoredField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IgnoredLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java similarity index 93% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IgnoredLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java index 9a9200e75496f..d03525cf6c532 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IgnoredLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.metadata; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/RoutingLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java similarity index 95% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/RoutingLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java index d514ffc0eb0ee..95dba2a358fe7 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/RoutingLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.metadata; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StoredField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/SizeLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java similarity index 95% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/SizeLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java index 5fa5a32092048..5a5a0b2236dd2 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/SizeLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.metadata; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ByteLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java similarity index 96% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ByteLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java index d592b1f2dedb8..cb50337021588 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ByteLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.number; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.SortedNumericDocValuesField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DocCountLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java similarity index 95% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DocCountLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java index 19ecdda88941a..f26476846cc7e 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DocCountLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.number; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DoubleLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java similarity index 96% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DoubleLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java index 1415b90cd8206..b468d9ae283d6 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/DoubleLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.number; import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.SortedNumericDocValuesField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/FloatLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java similarity index 96% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/FloatLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java index 3d40e9b273454..0c67d646bca30 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/FloatLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.number; import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.SortedNumericDocValuesField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/HalfFloatLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java similarity index 96% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/HalfFloatLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java index dceaa091ee7a6..4fc0edae344a7 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/HalfFloatLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.number; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IntegerLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java similarity index 96% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IntegerLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java index e7fa69c127254..46f4bc73c094e 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IntegerLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.number; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.SortedNumericDocValuesField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/LongLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java similarity index 96% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/LongLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java index 676989fa917db..cf7beda7d35c9 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/LongLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.number; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ShortLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java similarity index 96% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ShortLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java index 510f170ee76eb..8b40ae609c452 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/ShortLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.number; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.SortedNumericDocValuesField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TokenCountLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java similarity index 95% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TokenCountLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java index beaf5e9337a76..151f5faf63886 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TokenCountLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.number; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.SortedNumericDocValuesField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/UnsignedLongLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java similarity index 96% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/UnsignedLongLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java index 049155531093b..b47fdef0a06b3 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/UnsignedLongLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.number; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IpLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java similarity index 96% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IpLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java index 163a70db95824..b0921ae2d9a72 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/IpLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.text; import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.document.SortedSetDocValuesField; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/KeywordLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java similarity index 70% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/KeywordLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java index e35494ba2da3a..3a70117a93542 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/KeywordLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.text; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; @@ -18,11 +18,16 @@ import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + import java.util.EnumSet; import java.util.Set; public class KeywordLuceneField extends LuceneField { + private static final Logger logger = LogManager.getLogger(KeywordLuceneField.class); + @Override public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { String value = (String) parseValue; @@ -31,6 +36,9 @@ public void createField(MappedFieldType mappedFieldType, ParseContext.Document d boolean shouldIndex = assignedCapabilities.contains(FieldCapability.INDEX); boolean shouldStore = assignedCapabilities.contains(FieldCapability.STORE); + logger.info("[COMPOSITE_DEBUG] KeywordLuceneField.createField: field=[{}] value=[{}] capabilities={} shouldIndex={} shouldStore={} hasDocValues={}", + mappedFieldType.name(), value, assignedCapabilities, shouldIndex, shouldStore, assignedCapabilities.contains(FieldCapability.DOC_VALUES)); + if (shouldIndex || shouldStore) { FieldType fieldType = new FieldType(); fieldType.setTokenized(false); @@ -39,10 +47,12 @@ public void createField(MappedFieldType mappedFieldType, ParseContext.Document d fieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS : IndexOptions.NONE); fieldType.freeze(); document.add(new KeywordFieldMapper.KeywordField(mappedFieldType.name(), binaryValue, fieldType)); + logger.debug("[COMPOSITE_DEBUG] KeywordLuceneField: added KeywordField for [{}] indexed={} stored={}", mappedFieldType.name(), shouldIndex, shouldStore); } if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { document.add(new SortedSetDocValuesField(mappedFieldType.name(), binaryValue)); + logger.debug("[COMPOSITE_DEBUG] KeywordLuceneField: added SortedSetDocValuesField for [{}]", mappedFieldType.name()); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TextLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java similarity index 96% rename from server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TextLuceneField.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java index b4a86260afb01..d9e609d9cc96e 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/TextLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.lucene.fields.data; +package org.opensearch.index.engine.exec.lucene.fields.data.text; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java index 2cf83a645c32f..f28fe220a745c 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java @@ -8,6 +8,8 @@ package org.opensearch.index.engine.exec.lucene.writer; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.IndexWriter; import org.opensearch.index.engine.exec.DocumentInput; @@ -24,6 +26,7 @@ import java.util.Set; public class LuceneDocumentInput implements DocumentInput { + private static final Logger logger = LogManager.getLogger(LuceneDocumentInput.class); private final ParseContext.Document document; private final IndexWriter indexWriter; private final EngineRole engineRole; @@ -48,6 +51,7 @@ public void addField(MappedFieldType fieldType, Object value) { // Check if this format should handle this field type at all if (!fieldAssignments.shouldHandle(fieldTypeName)) { + logger.debug("[COMPOSITE_DEBUG] Lucene SKIP field=[{}] type=[{}] — not assigned to this format", fieldType.name(), fieldTypeName); return; } @@ -55,10 +59,12 @@ public void addField(MappedFieldType fieldType, Object value) { if (luceneField == null) { // Field type not supported by Lucene format — skip silently + logger.debug("[COMPOSITE_DEBUG] Lucene SKIP field=[{}] type=[{}] — no LuceneField registered in LuceneFieldRegistry", fieldType.name(), fieldTypeName); return; } Set assignedCapabilities = fieldAssignments.getAssignedCapabilities(fieldTypeName); + logger.debug("[COMPOSITE_DEBUG] Lucene ACCEPT field=[{}] type=[{}] value=[{}] capabilities={}", fieldType.name(), fieldTypeName, value, assignedCapabilities); luceneField.createField(fieldType, document, value, assignedCapabilities); } From fd4e9d353969c738bf082c313fb4c19004706f13 Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Wed, 4 Mar 2026 23:35:42 +0530 Subject: [PATCH 05/15] Add field name based FieldDescriptors --- .../ParquetDataFormatPlugin.java | 3 +- .../engine/DummyDataUtils.java | 19 +++- .../engine/ParquetExecutionEngine.java | 8 +- .../fields/ParquetField.java | 30 +++--- .../fields/core/data/BinaryParquetField.java | 6 +- .../fields/core/data/BooleanParquetField.java | 6 +- .../core/data/date/DateNanosParquetField.java | 6 +- .../core/data/date/DateParquetField.java | 6 +- .../core/data/number/ByteParquetField.java | 6 +- .../core/data/number/DoubleParquetField.java | 6 +- .../core/data/number/FloatParquetField.java | 6 +- .../data/number/HalfFloatParquetField.java | 6 +- .../core/data/number/IntegerParquetField.java | 6 +- .../core/data/number/LongParquetField.java | 6 +- .../core/data/number/ShortParquetField.java | 6 +- .../data/number/TokenCountParquetField.java | 6 +- .../data/number/UnsignedLongParquetField.java | 6 +- .../fields/core/data/text/IpParquetField.java | 6 +- .../core/data/text/KeywordParquetField.java | 8 +- .../core/data/text/TextParquetField.java | 6 +- .../fields/core/metadata/IdParquetField.java | 6 +- .../core/metadata/IgnoredParquetField.java | 6 +- .../core/metadata/RoutingParquetField.java | 6 +- .../core/metadata/SizeParquetField.java | 6 +- .../writer/ParquetDocumentInput.java | 38 +++---- .../writer/ParquetWriter.java | 8 +- .../index/engine/exec/DocumentInput.java | 9 +- .../engine/exec/FieldAssignmentResolver.java | 100 ++++++++++++------ .../index/engine/exec/FieldAssignments.java | 42 +++----- .../index/engine/exec/FieldDescriptor.java | 98 +++++++++++++++++ .../composite/CompositeDataFormatWriter.java | 64 +++++++++-- .../CompositeIndexingExecutionEngine.java | 22 ++-- .../lucene/engine/LuceneExecutionEngine.java | 4 +- .../exec/lucene/fields/LuceneField.java | 19 +++- .../lucene/fields/data/BinaryLuceneField.java | 8 +- .../fields/data/BooleanLuceneField.java | 16 +-- .../fields/data/date/DateLuceneField.java | 16 +-- .../data/date/DateNanosLuceneField.java | 16 +-- .../fields/data/metadata/IdLuceneField.java | 12 +-- .../data/metadata/IgnoredLuceneField.java | 8 +- .../data/metadata/RoutingLuceneField.java | 12 +-- .../fields/data/metadata/SizeLuceneField.java | 12 +-- .../fields/data/number/ByteLuceneField.java | 18 ++-- .../data/number/DocCountLuceneField.java | 12 +-- .../fields/data/number/DoubleLuceneField.java | 18 ++-- .../fields/data/number/FloatLuceneField.java | 18 ++-- .../data/number/HalfFloatLuceneField.java | 18 ++-- .../data/number/IntegerLuceneField.java | 18 ++-- .../fields/data/number/LongLuceneField.java | 18 ++-- .../fields/data/number/ShortLuceneField.java | 18 ++-- .../data/number/TokenCountLuceneField.java | 16 +-- .../data/number/UnsignedLongLuceneField.java | 18 ++-- .../fields/data/text/IpLuceneField.java | 16 +-- .../fields/data/text/KeywordLuceneField.java | 20 ++-- .../fields/data/text/TextLuceneField.java | 10 +- .../lucene/writer/LuceneDocumentInput.java | 45 ++++---- .../exec/lucene/writer/LuceneWriter.java | 7 +- 57 files changed, 562 insertions(+), 394 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/FieldDescriptor.java diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java index 2599760bc2858..69727316cab46 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java @@ -91,8 +91,7 @@ public IndexingExecutionEngine indexingEngine(EngineCo isPrimary, () -> ArrowSchemaBuilder.getSchema(mapperService, isPrimary), shardPath, - indexSettings, - fieldAssignments + indexSettings ); return (IndexingExecutionEngine) engine; } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java index c3a563b7223cb..fbd6f1598179a 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java @@ -5,10 +5,13 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.opensearch.common.SuppressForbidden; import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.converter.FieldTypeConverter; import java.util.Arrays; +import java.util.EnumSet; import java.util.Random; @SuppressForbidden(reason = "Need random for creating temp files") @@ -24,14 +27,22 @@ public static Schema getSchema() { } public static void populateDocumentInput(DocumentInput documentInput) { + EnumSet allCapabilities = EnumSet.allOf(FieldCapability.class); + MappedFieldType idField = FieldTypeConverter.convertToMappedFieldType(ID, new ArrowType.Int(32, true)); - documentInput.addField(idField, generateRandomId()); + documentInput.addField(new FieldDescriptor(idField.name(), idField.typeName(), allCapabilities), generateRandomId()); + MappedFieldType nameField = FieldTypeConverter.convertToMappedFieldType(NAME, new ArrowType.Utf8()); - documentInput.addField(nameField, generateRandomName()); + documentInput.addField(new FieldDescriptor(nameField.name(), nameField.typeName(), allCapabilities), generateRandomName()); + MappedFieldType designationField = FieldTypeConverter.convertToMappedFieldType(DESIGNATION, new ArrowType.Utf8()); - documentInput.addField(designationField, generateRandomDesignation()); + documentInput.addField( + new FieldDescriptor(designationField.name(), designationField.typeName(), allCapabilities), + generateRandomDesignation() + ); + MappedFieldType salaryField = FieldTypeConverter.convertToMappedFieldType(SALARY, new ArrowType.Int(32, true)); - documentInput.addField(salaryField, random.nextInt(100000)); + documentInput.addField(new FieldDescriptor(salaryField.name(), salaryField.typeName(), allCapabilities), random.nextInt(100000)); } private static final String ID = "id"; diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java index 0083ceb5ce57f..6590d870c0479 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java @@ -17,7 +17,6 @@ import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.EngineRole; -import org.opensearch.index.engine.exec.FieldAssignments; import org.opensearch.index.engine.exec.IndexingExecutionEngine; import org.opensearch.index.engine.exec.Merger; import org.opensearch.index.engine.exec.RefreshInput; @@ -79,15 +78,13 @@ public class ParquetExecutionEngine implements IndexingExecutionEngine schema, ShardPath shardPath, - IndexSettings indexSettings, - FieldAssignments fieldAssignments + IndexSettings indexSettings ) { this.schema = schema; this.shardPath = shardPath; @@ -95,7 +92,6 @@ public ParquetExecutionEngine( this.indexSettings = indexSettings; this.parquetMerger = new ParquetMergeExecutor(CompactionStrategy.RECORD_BATCH, indexSettings.getIndex().getName()); this.isPrimaryEngine = isPrimaryEngine; - this.fieldAssignments = fieldAssignments; // Push current settings to Rust store once on construction, then keep in sync on updates pushSettingsToRust(indexSettings); @@ -159,7 +155,7 @@ public List supportedFieldTypes(boolean isPrimaryEngine) { public Writer createWriter(long writerGeneration) { String fileName = Path.of(shardPath.getDataPath().toString(), getDataFormat().name(), FILE_NAME_PREFIX + "_" + writerGeneration + FILE_NAME_EXT).toString(); EngineRole role = isPrimaryEngine ? EngineRole.PRIMARY : EngineRole.SECONDARY; - return new ParquetWriter(fileName, schema.get(), writerGeneration, arrowBufferPool, indexSettings, role, fieldAssignments); + return new ParquetWriter(fileName, schema.get(), writerGeneration, arrowBufferPool, indexSettings, role); } @Override diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java index 481ebb47b294b..91b45c613b8b4 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java @@ -12,7 +12,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.engine.exec.FieldDescriptor; import java.util.Objects; import java.util.Set; @@ -45,13 +45,13 @@ public abstract class ParquetField { *

Implementations must handle null values appropriately and ensure type safety * when casting the parseValue to the expected type.

* - * @param mappedFieldType the OpenSearch field type metadata containing field configuration + * @param descriptor the per-field descriptor carrying field name, type name, and capability flags * @param managedVSR the managed vector schema root for columnar data storage * @param parseValue the parsed field value to be stored, may be null * @throws IllegalArgumentException if any parameter is invalid for this field type * @throws ClassCastException if parseValue cannot be cast to the expected type */ - protected abstract void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities); + protected abstract void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue); /** * Creates and processes a field entry if the field type supports columnar storage. @@ -61,29 +61,25 @@ public abstract class ParquetField { *

The method performs the following operations: *

    *
  1. Validates input parameters
  2. - *
  3. Checks if the field supports columnar storage
  4. + *
  5. Checks if the field vector exists in the managed VSR
  6. *
  7. Delegates to {@link #addToGroup} for actual data processing
  8. *
* - * @param mappedFieldType the OpenSearch field type metadata, must not be null + * @param descriptor the per-field descriptor carrying field name, type name, and capability flags, must not be null * @param managedVSR the managed vector schema root, must not be null * @param parseValue the parsed field value to be processed, may be null - * @param assignedCapabilities the capabilities this format is responsible for on this field type - * @throws IllegalArgumentException if mappedFieldType or managedVSR is null + * @throws IllegalArgumentException if descriptor or managedVSR is null */ - public final void createField(final MappedFieldType mappedFieldType, + public final void createField(final FieldDescriptor descriptor, final ManagedVSR managedVSR, - final Object parseValue, - final Set assignedCapabilities) { - Objects.requireNonNull(mappedFieldType, "MappedFieldType cannot be null"); + final Object parseValue) { + Objects.requireNonNull(descriptor, "FieldDescriptor cannot be null"); Objects.requireNonNull(managedVSR, "ManagedVSR cannot be null"); - if (mappedFieldType.isColumnar()) { - // TODO: support dynamic mapping update - // for now ignore the field - if (managedVSR.getVector(mappedFieldType.name()) != null) { - addToGroup(mappedFieldType, managedVSR, parseValue, assignedCapabilities); - } + // TODO: support dynamic mapping update + // for now ignore the field + if (managedVSR.getVector(descriptor.fieldName()) != null) { + addToGroup(descriptor, managedVSR, parseValue); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java index 43e238e6ed5d8..a692847049cf8 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java @@ -9,12 +9,12 @@ package com.parquet.parquetdataformat.fields.core.data; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -44,8 +44,8 @@ public class BinaryParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - final VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + final VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(descriptor.fieldName()); int rowCount = managedVSR.getRowCount(); varBinaryVector.set(rowCount, (byte[]) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java index 5ba967bce75fb..a3ddc80151cdb 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java @@ -9,13 +9,13 @@ package com.parquet.parquetdataformat.fields.core.data; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -45,8 +45,8 @@ public class BooleanParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - BitVector bitVector = (BitVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + BitVector bitVector = (BitVector) managedVSR.getVector(descriptor.fieldName()); int rowIndex = managedVSR.getRowCount(); bitVector.setSafe(rowIndex, (Boolean) parseValue ? 1 : 0); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java index ae75d876a8eca..4a77c37ddc39c 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java @@ -9,13 +9,13 @@ package com.parquet.parquetdataformat.fields.core.data.date; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TimeStampNanoVector; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.Set; @@ -46,8 +46,8 @@ public class DateNanosParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - TimeStampNanoVector timeStampNanoVector = (TimeStampNanoVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + TimeStampNanoVector timeStampNanoVector = (TimeStampNanoVector) managedVSR.getVector(descriptor.fieldName()); int rowIndex = managedVSR.getRowCount(); timeStampNanoVector.setSafe(rowIndex, (long) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java index 1f44a7d4278d3..441fd3415b514 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java @@ -9,13 +9,13 @@ package com.parquet.parquetdataformat.fields.core.data.date; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TimeStampMilliVector; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.Set; @@ -46,8 +46,8 @@ public class DateParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - TimeStampMilliVector timeStampMilliVector = (TimeStampMilliVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + TimeStampMilliVector timeStampMilliVector = (TimeStampMilliVector) managedVSR.getVector(descriptor.fieldName()); int rowIndex = managedVSR.getRowCount(); timeStampMilliVector.setSafe(rowIndex, (long) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java index 36451b44aa3ee..9bc2416c31df1 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java @@ -9,12 +9,12 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -44,8 +44,8 @@ public class ByteParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - TinyIntVector tinyIntVector = (TinyIntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + TinyIntVector tinyIntVector = (TinyIntVector) managedVSR.getVector(descriptor.fieldName()); int rowCount = managedVSR.getRowCount(); tinyIntVector.setSafe(rowCount, (Byte) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java index 931bea50d4727..11f0b707bd0a1 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java @@ -9,13 +9,13 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -46,8 +46,8 @@ public class DoubleParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - Float8Vector float8Vector = (Float8Vector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + Float8Vector float8Vector = (Float8Vector) managedVSR.getVector(descriptor.fieldName()); int rowCount = managedVSR.getRowCount(); float8Vector.setSafe(rowCount, (Double) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java index 0b7dfcc229a98..5792bd9af1a5a 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java @@ -9,13 +9,13 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -46,8 +46,8 @@ public class FloatParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - Float4Vector float4Vector = (Float4Vector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + Float4Vector float4Vector = (Float4Vector) managedVSR.getVector(descriptor.fieldName()); int rowCount = managedVSR.getRowCount(); float4Vector.setSafe(rowCount, (Float) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java index 68942854f7ce2..27b3afe2f9993 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java @@ -9,13 +9,13 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float2Vector; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -46,8 +46,8 @@ public class HalfFloatParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - Float2Vector float2Vector = (Float2Vector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + Float2Vector float2Vector = (Float2Vector) managedVSR.getVector(descriptor.fieldName()); int rowCount = managedVSR.getRowCount(); float2Vector.setSafe(rowCount, (Short) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java index 79edb7031702e..954e137a6054e 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java @@ -9,12 +9,12 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -44,8 +44,8 @@ public class IntegerParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + IntVector intVector = (IntVector) managedVSR.getVector(descriptor.fieldName()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java index dc8dab9f9c5e2..99727deb6e778 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java @@ -9,12 +9,12 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -45,8 +45,8 @@ public class LongParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - BigIntVector bigIntVector = (BigIntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + BigIntVector bigIntVector = (BigIntVector) managedVSR.getVector(descriptor.fieldName()); int rowCount = managedVSR.getRowCount(); bigIntVector.setSafe(rowCount, (Long) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java index 852a20d85c84c..d496662202e96 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java @@ -9,12 +9,12 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.SmallIntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -45,8 +45,8 @@ public class ShortParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - SmallIntVector smallIntVector = (SmallIntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + SmallIntVector smallIntVector = (SmallIntVector) managedVSR.getVector(descriptor.fieldName()); int rowCount = managedVSR.getRowCount(); smallIntVector.setSafe(rowCount, (Short) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java index 06dcfa55b0829..74b2e25341f06 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java @@ -9,12 +9,12 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -47,8 +47,8 @@ public class TokenCountParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + IntVector intVector = (IntVector) managedVSR.getVector(descriptor.fieldName()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java index ffd61971d809a..18bb70f8ed3a9 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java @@ -9,12 +9,12 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -45,8 +45,8 @@ public class UnsignedLongParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - UInt8Vector uInt8Vector = (UInt8Vector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + UInt8Vector uInt8Vector = (UInt8Vector) managedVSR.getVector(descriptor.fieldName()); int rowCount = managedVSR.getRowCount(); long longValue = ((Number) parseValue).longValue(); uInt8Vector.setSafe(rowCount, longValue); diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java index aa43afecee46c..691183e524b52 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java @@ -9,6 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.text; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; @@ -17,7 +18,6 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.util.BytesRef; -import org.opensearch.index.mapper.MappedFieldType; import java.util.EnumSet; import java.util.Set; @@ -52,8 +52,8 @@ public class IpParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(descriptor.fieldName()); int rowIndex = managedVSR.getRowCount(); final BytesRef bytesRef = new BytesRef(InetAddressPoint.encode((InetAddress) parseValue)); varBinaryVector.setSafe(rowIndex, bytesRef.bytes, bytesRef.offset, bytesRef.length); diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java index e326b6f0f6d4c..8db009c87574b 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java @@ -9,12 +9,12 @@ package com.parquet.parquetdataformat.fields.core.data.text; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -52,12 +52,12 @@ public class KeywordParquetField extends ParquetField { private static final Logger logger = LogManager.getLogger(KeywordParquetField.class); @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + VarCharVector textVector = (VarCharVector) managedVSR.getVector(descriptor.fieldName()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); logger.info("[COMPOSITE_DEBUG] KeywordParquetField.addToGroup: field=[{}] value=[{}] rowIndex=[{}] capabilities={}", - mappedFieldType.name(), parseValue, rowIndex, assignedCapabilities); + descriptor.fieldName(), parseValue, rowIndex, descriptor.assignedCapabilities()); } @Override diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java index 38fba8e4d17c8..888032ee3368e 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java @@ -9,13 +9,13 @@ package com.parquet.parquetdataformat.fields.core.data.text; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; import java.util.EnumSet; @@ -48,8 +48,8 @@ public class TextParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + VarCharVector textVector = (VarCharVector) managedVSR.getVector(descriptor.fieldName()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java index 9cd6d581d42dd..4eb9f2ebaac08 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java @@ -9,13 +9,13 @@ package com.parquet.parquetdataformat.fields.core.metadata; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.lucene.util.BytesRef; -import org.opensearch.index.mapper.MappedFieldType; import java.util.Set; @@ -45,8 +45,8 @@ public class IdParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - VarBinaryVector idVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + VarBinaryVector idVector = (VarBinaryVector) managedVSR.getVector(descriptor.fieldName()); int rowIndex = managedVSR.getRowCount(); BytesRef bytesRef = (BytesRef) parseValue; idVector.setSafe(rowIndex, bytesRef.bytes, bytesRef.offset, bytesRef.length); diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java index 6eb852b3c04e5..9f363e6c3bb36 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java @@ -9,12 +9,12 @@ package com.parquet.parquetdataformat.fields.core.metadata; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; import java.util.Set; @@ -45,8 +45,8 @@ public class IgnoredParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - VarCharVector varCharVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + VarCharVector varCharVector = (VarCharVector) managedVSR.getVector(descriptor.fieldName()); int rowIndex = managedVSR.getRowCount(); varCharVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java index a36d0718568e7..6886f27e45fe9 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java @@ -9,12 +9,12 @@ package com.parquet.parquetdataformat.fields.core.metadata; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; import java.util.Set; @@ -45,8 +45,8 @@ public class RoutingParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - VarCharVector routingVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + VarCharVector routingVector = (VarCharVector) managedVSR.getVector(descriptor.fieldName()); int rowIndex = managedVSR.getRowCount(); routingVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java index baf4b5cc35cf3..d02affc4f0269 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java @@ -9,12 +9,12 @@ package com.parquet.parquetdataformat.fields.core.metadata; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.util.Set; @@ -46,8 +46,8 @@ public class SizeParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue, Set assignedCapabilities) { - IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { + IntVector intVector = (IntVector) managedVSR.getVector(descriptor.fieldName()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java index 55fe18be729df..fdc32b0123e8a 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java @@ -5,18 +5,17 @@ import org.apache.arrow.vector.BigIntVector; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import com.parquet.parquetdataformat.engine.ParquetDataFormat; +import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.DocumentInput; import org.opensearch.index.engine.exec.EngineRole; -import org.opensearch.index.engine.exec.FieldAssignments; -import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.WriteResult; import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; -import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.vsr.ManagedVSR; import java.io.IOException; import java.util.Objects; -import java.util.Set; /** * Document input wrapper for Parquet-based document processing. @@ -30,7 +29,7 @@ * *

Key responsibilities: *

    - *
  • Direct field vector population using OpenSearch's {@link MappedFieldType}
  • + *
  • Direct field vector population using {@link FieldDescriptor}
  • *
  • Document lifecycle management via ManagedVSR
  • *
  • Integration with the Arrow-based Parquet writer pipeline
  • *
@@ -42,12 +41,10 @@ public class ParquetDocumentInput implements DocumentInput { private static final Logger logger = LogManager.getLogger(ParquetDocumentInput.class); private final ManagedVSR managedVSR; private final EngineRole engineRole; - private final FieldAssignments fieldAssignments; - public ParquetDocumentInput(ManagedVSR managedVSR, EngineRole engineRole, FieldAssignments fieldAssignments) { + public ParquetDocumentInput(ManagedVSR managedVSR, EngineRole engineRole) { this.managedVSR = Objects.requireNonNull(managedVSR, "managedVSR must not be null"); this.engineRole = Objects.requireNonNull(engineRole, "engineRole must not be null"); - this.fieldAssignments = Objects.requireNonNull(fieldAssignments, "fieldAssignments must not be null"); } @Override @@ -58,26 +55,17 @@ public void addRowIdField(String fieldName, long rowId) { } @Override - public void addField(MappedFieldType fieldType, Object value) { - final String fieldTypeName = fieldType.typeName(); - - // Check if this format should handle this field type at all - if (!fieldAssignments.shouldHandle(fieldTypeName)) { - logger.debug("[COMPOSITE_DEBUG] Parquet SKIP field=[{}] type=[{}] — not assigned to this format", fieldType.name(), fieldTypeName); - return; - } - - final ParquetField parquetField = ArrowFieldRegistry.getParquetField(fieldTypeName); + public void addField(FieldDescriptor descriptor, Object value) { + final ParquetField parquetField = ArrowFieldRegistry.getParquetField(descriptor.typeName()); if (parquetField == null) { // Field type not supported by Parquet format — skip silently - logger.debug("[COMPOSITE_DEBUG] Parquet SKIP field=[{}] type=[{}] — no ParquetField registered in ArrowFieldRegistry", fieldType.name(), fieldTypeName); + logger.debug("[COMPOSITE_DEBUG] Parquet SKIP field=[{}] type=[{}] — no ParquetField registered in ArrowFieldRegistry", descriptor.fieldName(), descriptor.typeName()); return; } - Set assignedCapabilities = fieldAssignments.getAssignedCapabilities(fieldTypeName); - logger.debug("[COMPOSITE_DEBUG] Parquet ACCEPT field=[{}] type=[{}] value=[{}] capabilities={}", fieldType.name(), fieldTypeName, value, assignedCapabilities); - parquetField.createField(fieldType, managedVSR, value, assignedCapabilities); + logger.debug("[COMPOSITE_DEBUG] Parquet ACCEPT field=[{}] type=[{}] value=[{}] capabilities={}", descriptor.fieldName(), descriptor.typeName(), value, descriptor.assignedCapabilities()); + parquetField.createField(descriptor, managedVSR, value); } @Override @@ -108,6 +96,11 @@ public WriteResult addToWriter() throws IOException { return new WriteResult(true, null, 1, 1, 1); } + @Override + public DataFormat getDataFormat() { + return ParquetDataFormat.PARQUET_DATA_FORMAT; + } + @Override public void close() throws Exception { // NOTE: ParquetDocumentInput does NOT own the ManagedVSR lifecycle @@ -117,4 +110,5 @@ public void close() throws Exception { // No cleanup needed here - VSRManager handles the ManagedVSR lifecycle } + } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java index 31c3b1c3f00dc..757116158b2b7 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java @@ -8,7 +8,6 @@ import org.apache.logging.log4j.Logger; import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.exec.EngineRole; -import org.opensearch.index.engine.exec.FieldAssignments; import org.opensearch.index.engine.exec.FileInfos; import org.opensearch.index.engine.exec.FlushIn; import org.opensearch.index.engine.exec.WriteResult; @@ -48,7 +47,6 @@ public class ParquetWriter implements Writer { private final VSRManager vsrManager; private final long writerGeneration; private final EngineRole engineRole; - private final FieldAssignments fieldAssignments; public ParquetWriter( String file, @@ -56,15 +54,13 @@ public ParquetWriter( long writerGeneration, ArrowBufferPool arrowBufferPool, IndexSettings indexSettings, - EngineRole engineRole, - FieldAssignments fieldAssignments + EngineRole engineRole ) { this.file = file; this.schema = schema; this.vsrManager = new VSRManager(file, indexSettings.getIndex().getName(), schema, arrowBufferPool); this.writerGeneration = writerGeneration; this.engineRole = engineRole; - this.fieldAssignments = fieldAssignments; } @Override @@ -108,6 +104,6 @@ public ParquetDocumentInput newDocumentInput() { } // Get a new ManagedVSR from VSRManager for this document input - return new ParquetDocumentInput(vsrManager.getActiveManagedVSR(), engineRole, fieldAssignments); + return new ParquetDocumentInput(vsrManager.getActiveManagedVSR(), engineRole); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java index fc70c19498df3..787b09b39c7e9 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java @@ -9,7 +9,6 @@ package org.opensearch.index.engine.exec; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.mapper.MappedFieldType; import java.io.IOException; @@ -18,7 +17,13 @@ public interface DocumentInput extends AutoCloseable { void addRowIdField(String fieldName, long rowId); - void addField(MappedFieldType fieldType, Object value); + /** + * Adds a field value to this document input. + * + * @param descriptor the {@link FieldDescriptor} carrying the field's name, type, and assigned capabilities + * @param value the field value to add + */ + void addField(FieldDescriptor descriptor, Object value); T getFinalInput(); diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java index 428d5cb655e3e..4e8297b36f7d4 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java @@ -19,9 +19,12 @@ import java.util.Set; /** - * Resolves which data format handles which capabilities for each field type. + * Resolves which data format handles which capabilities for each mapped field. * Uses primary-gets-priority strategy: if the primary format supports a capability - * for a field type, it wins. Secondary formats only get capabilities the primary can't handle. + * for a field's type, it wins. Secondary formats only get capabilities the primary can't handle. + * + *

Resolution is keyed by field name (not type name), so two fields of the same type + * with different mapping attributes receive different capability sets. */ @ExperimentalApi public final class FieldAssignmentResolver { @@ -33,10 +36,10 @@ private FieldAssignmentResolver() {} /** * Resolves field assignments for all mapped fields. * - * @param registry the field support registry with all format capabilities + * @param registry the field support registry with type-level format capabilities * @param roleMap format → engine role mapping * @param fieldTypes all mapped field types from the mapper service - * @return per-format FieldAssignments + * @return per-format FieldAssignments keyed by field name */ public static Map resolve( FieldSupportRegistry registry, @@ -52,10 +55,12 @@ public static Map resolve( } } - // Build per-format assignment maps - Map>> perFormatMap = new HashMap<>(); + // Accumulate capabilities per field name per format before creating FieldDescriptors + Map>> perFormatCaps = new HashMap<>(); + // Track typeName per fieldName for FieldDescriptor construction + Map fieldNameToTypeName = new HashMap<>(); for (DataFormat format : roleMap.keySet()) { - perFormatMap.put(format, new HashMap<>()); + perFormatCaps.put(format, new HashMap<>()); } for (MappedFieldType fieldType : fieldTypes) { @@ -63,17 +68,30 @@ public static Map resolve( if (fieldType.typeName().startsWith("_")) { continue; } + String fieldName = fieldType.name(); String typeName = fieldType.typeName(); - resolveField(registry, roleMap, primaryFormat, perFormatMap, fieldType, typeName); + fieldNameToTypeName.put(fieldName, typeName); + resolveField(registry, roleMap, primaryFormat, perFormatCaps, fieldType, fieldName, typeName); } - // Wrap into FieldAssignments + // Convert accumulated capabilities into FieldDescriptor objects and wrap into FieldAssignments Map result = new HashMap<>(); - for (Map.Entry>> entry : perFormatMap.entrySet()) { - result.put(entry.getKey(), new FieldAssignments(entry.getValue())); - logger.info("[COMPOSITE_DEBUG] Field assignments for format [{}]:", entry.getKey().name()); - for (Map.Entry> fieldEntry : entry.getValue().entrySet()) { - logger.info("[COMPOSITE_DEBUG] fieldType=[{}] -> capabilities={}", fieldEntry.getKey(), fieldEntry.getValue()); + for (Map.Entry>> formatEntry : perFormatCaps.entrySet()) { + DataFormat format = formatEntry.getKey(); + Map> fieldCaps = formatEntry.getValue(); + Map descriptors = new HashMap<>(); + for (Map.Entry> fieldEntry : fieldCaps.entrySet()) { + String fieldName = fieldEntry.getKey(); + EnumSet caps = fieldEntry.getValue(); + if (!caps.isEmpty()) { + String typeName = fieldNameToTypeName.get(fieldName); + descriptors.put(fieldName, new FieldDescriptor(fieldName, typeName, caps)); + } + } + result.put(format, new FieldAssignments(descriptors)); + logger.info("[COMPOSITE_DEBUG] Field assignments for format [{}]:", format.name()); + for (Map.Entry descEntry : descriptors.entrySet()) { + logger.info("[COMPOSITE_DEBUG] field=[{}] -> {}", descEntry.getKey(), descEntry.getValue()); } } return result; @@ -83,11 +101,12 @@ private static void resolveField( FieldSupportRegistry registry, Map roleMap, DataFormat primaryFormat, - Map>> perFormatMap, + Map>> perFormatCaps, MappedFieldType fieldType, + String fieldName, String typeName ) { - // Determine which capabilities are required by the mapping + // Determine which capabilities are required by this field's mapping attributes Set required = EnumSet.noneOf(FieldCapability.class); if (fieldType.isSearchable()) { required.add(FieldCapability.INDEX); @@ -99,21 +118,30 @@ private static void resolveField( required.add(FieldCapability.STORE); } - logger.info("[COMPOSITE_DEBUG] resolveField: field=[{}] type=[{}] required capabilities={} (isSearchable={}, hasDocValues={}, isStored={})", - fieldType.name(), typeName, required, fieldType.isSearchable(), fieldType.hasDocValues(), fieldType.isStored()); + logger.info( + "[COMPOSITE_DEBUG] resolveField: field=[{}] type=[{}] required capabilities={} (isSearchable={}, hasDocValues={}, isStored={})", + fieldName, + typeName, + required, + fieldType.isSearchable(), + fieldType.hasDocValues(), + fieldType.isStored() + ); // For each required capability, assign to primary if it supports it, else to secondary for (FieldCapability cap : required) { boolean primaryHasCap = primaryFormat != null && registry.hasCapability(typeName, primaryFormat, cap); - logger.info("[COMPOSITE_DEBUG] capability [{}]: primary format [{}] hasCapability={}, registry capabilities for type={}", - cap, primaryFormat != null ? primaryFormat.name() : "null", primaryHasCap, - primaryFormat != null ? registry.getCapabilities(typeName, primaryFormat) : "N/A"); + logger.info( + "[COMPOSITE_DEBUG] capability [{}]: primary format [{}] hasCapability={}, registry capabilities for type={}", + cap, + primaryFormat != null ? primaryFormat.name() : "null", + primaryHasCap, + primaryFormat != null ? registry.getCapabilities(typeName, primaryFormat) : "N/A" + ); if (primaryHasCap) { // Primary handles this capability - perFormatMap.get(primaryFormat) - .computeIfAbsent(typeName, k -> EnumSet.noneOf(FieldCapability.class)) - .add(cap); + perFormatCaps.get(primaryFormat).computeIfAbsent(fieldName, k -> EnumSet.noneOf(FieldCapability.class)).add(cap); logger.info("[COMPOSITE_DEBUG] -> assigned [{}] to PRIMARY format [{}]", cap, primaryFormat.name()); } else { // Find a secondary format that supports it @@ -123,13 +151,18 @@ private static void resolveField( EngineRole role = entry.getValue(); boolean isSecondary = role != EngineRole.PRIMARY; boolean secondaryHasCap = registry.hasCapability(typeName, secondaryFormat, cap); - logger.info("[COMPOSITE_DEBUG] checking secondary format [{}] role={} isSecondary={} hasCapability={} registryCapabilities={}", - secondaryFormat.name(), role, isSecondary, secondaryHasCap, - registry.getCapabilities(typeName, secondaryFormat)); + logger.info( + "[COMPOSITE_DEBUG] checking secondary format [{}] role={} isSecondary={} hasCapability={} registryCapabilities={}", + secondaryFormat.name(), + role, + isSecondary, + secondaryHasCap, + registry.getCapabilities(typeName, secondaryFormat) + ); if (isSecondary && secondaryHasCap) { - perFormatMap.get(secondaryFormat) - .computeIfAbsent(typeName, k -> EnumSet.noneOf(FieldCapability.class)) + perFormatCaps.get(secondaryFormat) + .computeIfAbsent(fieldName, k -> EnumSet.noneOf(FieldCapability.class)) .add(cap); logger.info("[COMPOSITE_DEBUG] -> assigned [{}] to SECONDARY format [{}]", cap, secondaryFormat.name()); assignedToSecondary = true; @@ -137,10 +170,15 @@ private static void resolveField( } } if (!assignedToSecondary) { - logger.warn("[COMPOSITE_DEBUG] -> capability [{}] for field=[{}] type=[{}] NOT assigned to any format!", - cap, fieldType.name(), typeName); + logger.warn( + "[COMPOSITE_DEBUG] -> capability [{}] for field=[{}] type=[{}] NOT assigned to any format!", + cap, + fieldName, + typeName + ); } } } } } + diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java index 5acd981b79733..a5e2031ae98fb 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java @@ -16,47 +16,39 @@ /** * Per-format view of field capability assignments resolved by the composite engine. - * Maps fieldTypeName → Set of FieldCapability that this format is responsible for. + * Maps fieldName → FieldDescriptor that this format is responsible for. * *

Used by DocumentInput implementations to decide whether to write a given field. - * If a field type has no entry, this format should skip it entirely. + * If a field name has no entry, this format should skip it entirely. */ @ExperimentalApi public class FieldAssignments { - /** Sentinel instance that accepts all fields — used in single-format (non-composite) mode. */ - public static final FieldAssignments ACCEPT_ALL = new FieldAssignments(Collections.emptyMap(), true); + private final Map descriptors; - private final Map> assignments; - private final boolean acceptAll; - - public FieldAssignments(Map> assignments) { - this(assignments, false); + public FieldAssignments(Map descriptors) { + this.descriptors = Map.copyOf(descriptors); } - private FieldAssignments(Map> assignments, boolean acceptAll) { - this.assignments = assignments; - this.acceptAll = acceptAll; + /** + * Returns true if this format should handle the given field name. + */ + public boolean shouldHandle(String fieldName) { + return descriptors.containsKey(fieldName); } /** - * Returns true if this format should handle the given field type. + * Returns the assigned capabilities for a field name, or empty set if none. */ - public boolean shouldHandle(String fieldTypeName) { - if (acceptAll) { - return true; - } - return assignments.containsKey(fieldTypeName); + public Set getAssignedCapabilities(String fieldName) { + FieldDescriptor fd = descriptors.get(fieldName); + return fd != null ? fd.assignedCapabilities() : Collections.emptySet(); } /** - * Returns the assigned capabilities for a field type, or empty set if none. + * Returns the full FieldDescriptor for a given field name, or null if none. */ - public Set getAssignedCapabilities(String fieldTypeName) { - if (acceptAll) { - return Collections.emptySet(); - } - Set caps = assignments.get(fieldTypeName); - return caps != null ? Collections.unmodifiableSet(caps) : Collections.emptySet(); + public FieldDescriptor getDescriptor(String fieldName) { + return descriptors.get(fieldName); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldDescriptor.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldDescriptor.java new file mode 100644 index 0000000000000..6de1e5669cbb2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldDescriptor.java @@ -0,0 +1,98 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Collections; +import java.util.EnumSet; +import java.util.Objects; +import java.util.Set; + +/** + * Immutable per-field descriptor that carries a field's name, type name, resolved capabilities, + * and pre-computed boolean flags for O(1) hot-path capability checks. + * + *

Created by {@link FieldAssignmentResolver} during engine initialization. Each mapped field + * gets its own descriptor per data format, replacing the previous type-name-keyed lookup. + */ +@ExperimentalApi +public final class FieldDescriptor { + + private final String fieldName; + private final String typeName; + private final Set assignedCapabilities; + private final boolean searchable; + private final boolean hasDocValues; + private final boolean stored; + + /** + * Constructs a new FieldDescriptor. + * + * @param fieldName the mapped field name (e.g., "title", "price") + * @param typeName the field type name (e.g., "keyword", "long") + * @param assignedCapabilities the capabilities this format is responsible for on this field + */ + public FieldDescriptor(String fieldName, String typeName, Set assignedCapabilities) { + this.fieldName = Objects.requireNonNull(fieldName); + this.typeName = Objects.requireNonNull(typeName); + this.assignedCapabilities = Collections.unmodifiableSet(EnumSet.copyOf(assignedCapabilities)); + this.searchable = assignedCapabilities.contains(FieldCapability.INDEX); + this.hasDocValues = assignedCapabilities.contains(FieldCapability.DOC_VALUES); + this.stored = assignedCapabilities.contains(FieldCapability.STORE); + } + + /** Returns the mapped field name. */ + public String fieldName() { + return fieldName; + } + + /** Returns the field type name. */ + public String typeName() { + return typeName; + } + + /** Returns the immutable set of assigned capabilities. */ + public Set assignedCapabilities() { + return assignedCapabilities; + } + + /** Returns true if the assigned capabilities include {@link FieldCapability#INDEX}. */ + public boolean isSearchable() { + return searchable; + } + + /** Returns true if the assigned capabilities include {@link FieldCapability#DOC_VALUES}. */ + public boolean hasDocValues() { + return hasDocValues; + } + + /** Returns true if the assigned capabilities include {@link FieldCapability#STORE}. */ + public boolean isStored() { + return stored; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldDescriptor that = (FieldDescriptor) o; + return fieldName.equals(that.fieldName) && typeName.equals(that.typeName) && assignedCapabilities.equals(that.assignedCapabilities); + } + + @Override + public int hashCode() { + return Objects.hash(fieldName, typeName, assignedCapabilities); + } + + @Override + public String toString() { + return "FieldDescriptor{" + "fieldName='" + fieldName + '\'' + ", typeName='" + typeName + '\'' + ", capabilities=" + assignedCapabilities + '}'; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java index a90ad48c6c20f..613203c973779 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java @@ -13,6 +13,9 @@ import org.apache.lucene.util.SetOnce; import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.FileInfos; import org.opensearch.index.engine.exec.FlushIn; import org.opensearch.index.engine.exec.RowIdGenerator; @@ -33,7 +36,6 @@ import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; -import java.util.stream.Collectors; public class CompositeDataFormatWriter implements Writer, Lock { @@ -46,6 +48,7 @@ public class CompositeDataFormatWriter implements Writer fieldAssignmentsMap; public static final String ROW_ID = "___row_id"; public CompositeDataFormatWriter(CompositeIndexingExecutionEngine engine, long writerGeneration) { @@ -53,6 +56,7 @@ public CompositeDataFormatWriter(CompositeIndexingExecutionEngine engine, long w this.lock = new ReentrantLock(); this.aborted = false; this.writerGeneration = writerGeneration; + this.fieldAssignmentsMap = engine.getFieldAssignmentsMap(); engine.getDelegates().forEach(delegate -> { try { writers.add(new AbstractMap.SimpleImmutableEntry<>(delegate.getDataFormat(), delegate.createWriter(writerGeneration))); @@ -96,10 +100,15 @@ public void close() throws IOException { @Override public CompositeDocumentInput newDocumentInput() { + List> inputs = new ArrayList<>(); + for (Map.Entry>> writerEntry : writers) { + inputs.add(writerEntry.getValue().newDocumentInput()); + } CompositeDocumentInput compositeDocumentInput = new CompositeDocumentInput( - writers.stream().map(Map.Entry::getValue).map(Writer::newDocumentInput).collect(Collectors.toList()), + inputs, + fieldAssignmentsMap, this, postWrite ); @@ -162,14 +171,21 @@ public long getWriterGeneration() { public static class CompositeDocumentInput implements DocumentInput>> { List> inputs; + private final Map fieldAssignmentsMap; CompositeDataFormatWriter writer; Runnable onClose; private long version = -1; private long seqNo = -2L; private long primaryTerm = 0; - public CompositeDocumentInput(List> inputs, CompositeDataFormatWriter writer, Runnable onClose) { + public CompositeDocumentInput( + List> inputs, + Map fieldAssignmentsMap, + CompositeDataFormatWriter writer, + Runnable onClose + ) { this.inputs = inputs; + this.fieldAssignmentsMap = fieldAssignmentsMap; this.writer = writer; this.onClose = onClose; } @@ -181,29 +197,59 @@ public void addRowIdField(String fieldName, long rowId) { } } - @Override + /** + * Entry point from the mapper layer. Resolves {@link MappedFieldType} to {@link FieldDescriptor} + * per format using each delegate's {@link FieldAssignments}, then delegates to the format-specific + * {@link DocumentInput#addField(FieldDescriptor, Object)}. + * Skips delegation if no descriptor exists for the field name in that format. + */ public void addField(MappedFieldType fieldType, Object value) { - // Each delegate's addField uses its own FieldAssignments to decide what to write - logger.debug("[COMPOSITE_DEBUG] addField: field=[{}] type=[{}] value=[{}] — delegating to {} format inputs", + logger.debug("[COMPOSITE_DEBUG] addField: field=[{}] type=[{}] value=[{}] — resolving per-format descriptors for {} inputs", fieldType.name(), fieldType.typeName(), value, inputs.size()); for (DocumentInput input : inputs) { - input.addField(fieldType, value); + FieldAssignments assignments = fieldAssignmentsMap.get(input.getDataFormat()); + if (assignments == null) { + continue; + } + FieldDescriptor descriptor = assignments.getDescriptor(fieldType.name()); + if (descriptor == null) { + continue; + } + input.addField(descriptor, value); + } + } + + @Override + public void addField(FieldDescriptor descriptor, Object value) { + // Direct FieldDescriptor delegation — used for pre-resolved fields + for (DocumentInput input : inputs) { + input.addField(descriptor, value); } } @Override public void setVersion(long version) { this.version = version; + FieldDescriptor versionDescriptor = new FieldDescriptor( + VersionFieldMapper.NAME, + VersionFieldMapper.CONTENT_TYPE, + java.util.EnumSet.of(FieldCapability.DOC_VALUES) + ); for (DocumentInput input : inputs) { - input.addField(VersionFieldMapper.VersionFieldType.INSTANCE, version); + input.addField(versionDescriptor, version); } } @Override public void setSeqNo(long seqNo) { this.seqNo = seqNo; + FieldDescriptor seqNoDescriptor = new FieldDescriptor( + SeqNoFieldMapper.NAME, + SeqNoFieldMapper.CONTENT_TYPE, + java.util.EnumSet.of(FieldCapability.INDEX, FieldCapability.DOC_VALUES) + ); for (DocumentInput input : inputs) { - input.addField(SeqNoFieldMapper.SeqNoFieldType.INSTANCE, seqNo); + input.addField(seqNoDescriptor, seqNo); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java index 2f720a93bd432..084155f0544d7 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java @@ -53,6 +53,7 @@ public class CompositeIndexingExecutionEngine implements IndexingExecutionEngine private final List> delegates = new ArrayList<>(); private final FieldSupportRegistry fieldSupportRegistry; private final Map roleMap; + private final Map fieldAssignmentsMap; private static final Logger logger = LogManager.getLogger(CompositeIndexingExecutionEngine.class); @@ -93,15 +94,10 @@ public CompositeIndexingExecutionEngine( logger.info("[COMPOSITE_DEBUG] Composite field validation passed for all mapped fields"); } - // Resolve field assignments: which format handles which capability for each field type - Map fieldAssignmentsMap; - if (singlePlugin) { - fieldAssignmentsMap = Map.of(dataSourcePlugins.get(0).getDataFormat(), FieldAssignments.ACCEPT_ALL); - logger.info("[COMPOSITE_DEBUG] Single plugin mode — using ACCEPT_ALL field assignments for [{}]", - dataSourcePlugins.get(0).getDataFormat().name()); - } else { - fieldAssignmentsMap = FieldAssignmentResolver.resolve(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); - } + // Resolve field assignments: which format handles which capability for each field + // Both single-plugin and multi-plugin modes go through per-field resolution + this.fieldAssignmentsMap = FieldAssignmentResolver.resolve(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); + logger.info("[COMPOSITE_DEBUG] Resolved per-field assignments for {} format(s)", fieldAssignmentsMap.size()); // Determine primary format from role map DataFormat primaryDataFormat = roleMap.entrySet().stream() @@ -114,9 +110,7 @@ public CompositeIndexingExecutionEngine( for (DataSourcePlugin plugin : dataSourcePlugins) { dataFormats.add(plugin.getDataFormat()); boolean isPrimary = roleMap.get(plugin.getDataFormat()) == EngineRole.PRIMARY; - FieldAssignments assignments = fieldAssignmentsMap.getOrDefault( - plugin.getDataFormat(), FieldAssignments.ACCEPT_ALL - ); + FieldAssignments assignments = fieldAssignmentsMap.get(plugin.getDataFormat()); IndexingExecutionEngine indexingEngine = plugin.indexingEngine( engineConfig, mapperService, isPrimary, shardPath, indexSettings, assignments ); @@ -182,6 +176,10 @@ public Map getRoleMap() { return Collections.unmodifiableMap(roleMap); } + public Map getFieldAssignmentsMap() { + return Collections.unmodifiableMap(fieldAssignmentsMap); + } + @Override public Any getDataFormat() { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java index 1f5cb86f6442c..ac304d5de2831 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java @@ -53,7 +53,6 @@ public class LuceneExecutionEngine implements IndexingExecutionEngine supportedFieldTypes(boolean isPrimaryEngine) { public Writer> createWriter(long writerGeneration) throws IOException { Path directoryPath = Files.createTempDirectory(Long.toString(System.nanoTime())); // TODO:: Is this the right name? EngineRole role = isPrimaryEngine ? EngineRole.PRIMARY : EngineRole.SECONDARY; - return new LuceneWriter(directoryPath, createWriter(directoryPath, writerGeneration), writerGeneration, role, fieldAssignments); + return new LuceneWriter(directoryPath, createWriter(directoryPath, writerGeneration), writerGeneration, role); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java index 189dcebd1bfac..9ec8031fab3cf 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java @@ -9,7 +9,9 @@ package org.opensearch.index.engine.exec.lucene.fields; import org.apache.lucene.document.Field; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.mapper.FieldNamesFieldMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; @@ -17,9 +19,24 @@ import java.util.Set; +/** + * Base class for Lucene field implementations in the composite engine. + * + *

Each subclass handles a specific field type (keyword, long, text, etc.) and + * creates the appropriate Lucene index fields based on the capabilities described + * in the {@link FieldDescriptor}. + */ +@ExperimentalApi public abstract class LuceneField { - public abstract void createField(MappedFieldType mappedFieldType, Document document, Object parseValue, Set assignedCapabilities); + /** + * Creates Lucene index fields for the given value based on the descriptor's assigned capabilities. + * + * @param descriptor the per-field descriptor carrying field name, type name, and capability flags + * @param document the Lucene document to add fields to + * @param parseValue the parsed field value to index + */ + public abstract void createField(FieldDescriptor descriptor, Document document, Object parseValue); protected final void createFieldNamesField(MappedFieldType mappedFieldType, Document document, ParseContext context) { assert !mappedFieldType.hasDocValues() : "_field_names should only be used when doc_values are turned off"; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java index 2056c9cf5ea33..256d3a53a82af 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java @@ -11,8 +11,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.util.BytesRef; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -21,10 +21,10 @@ public class BinaryLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final BytesRef value = (BytesRef) parseValue; - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(mappedFieldType.name(), value)); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java index 9d4578b2c3ae9..ad2bf281d49d1 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java @@ -14,8 +14,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -24,21 +24,21 @@ public class BooleanLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Boolean value = (Boolean) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { + if (descriptor.isSearchable()) { FieldType ft = new FieldType(); ft.setOmitNorms(true); ft.setIndexOptions(IndexOptions.DOCS); ft.setTokenized(false); ft.freeze(); - document.add(new Field(mappedFieldType.name(), value ? "T" : "F", ft)); + document.add(new Field(descriptor.fieldName(), value ? "T" : "F", ft)); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(mappedFieldType.name(), value ? 1 : 0)); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value ? 1 : 0)); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(mappedFieldType.name(), value ? "T" : "F")); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value ? "T" : "F")); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java index c1b7d5a9e36f1..67c6fa7f87bc2 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class DateLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final long timestamp = (long) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new LongPoint(mappedFieldType.name(), timestamp)); + if (descriptor.isSearchable()) { + document.add(new LongPoint(descriptor.fieldName(), timestamp)); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(mappedFieldType.name(), timestamp)); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), timestamp)); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(mappedFieldType.name(), timestamp)); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), timestamp)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java index 5f0cff41955b1..ab27b411ba78e 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class DateNanosLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final long timestamp = (long) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new LongPoint(mappedFieldType.name(), timestamp)); + if (descriptor.isSearchable()) { + document.add(new LongPoint(descriptor.fieldName(), timestamp)); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(mappedFieldType.name(), timestamp)); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), timestamp)); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(mappedFieldType.name(), timestamp)); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), timestamp)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java index 5a7ae34a96923..13f389195ad45 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.util.BytesRef; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,13 +22,13 @@ public class IdLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final BytesRef value = (BytesRef) parseValue; - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new BinaryDocValuesField(mappedFieldType.name(), value)); + if (descriptor.hasDocValues()) { + document.add(new BinaryDocValuesField(descriptor.fieldName(), value)); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(mappedFieldType.name(), value)); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java index d03525cf6c532..aa47de8f92e55 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java @@ -10,8 +10,8 @@ import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -20,10 +20,10 @@ public class IgnoredLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final String value = parseValue.toString(); - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(mappedFieldType.name(), value)); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java index 95dba2a358fe7..b9dba9844d3e5 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.util.BytesRef; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,13 +22,13 @@ public class RoutingLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final String value = parseValue.toString(); - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedSetDocValuesField(mappedFieldType.name(), new BytesRef(value))); + if (descriptor.hasDocValues()) { + document.add(new SortedSetDocValuesField(descriptor.fieldName(), new BytesRef(value))); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(mappedFieldType.name(), value)); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java index 5a5a0b2236dd2..28209584a7aad 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java @@ -11,8 +11,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -21,13 +21,13 @@ public class SizeLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(mappedFieldType.name(), value.intValue())); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.intValue())); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(mappedFieldType.name(), value.intValue())); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value.intValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java index cb50337021588..830a63e62b88d 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java @@ -12,9 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -23,17 +22,16 @@ public class ByteLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { - final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new IntPoint(fieldType.name(), value.byteValue())); + if (descriptor.isSearchable()) { + document.add(new IntPoint(descriptor.fieldName(), value.byteValue())); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(fieldType.name(), value.byteValue())); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.byteValue())); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(fieldType.name(), value.byteValue())); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value.byteValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java index f26476846cc7e..a7e566a72c3ec 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java @@ -11,8 +11,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -21,13 +21,13 @@ public class DocCountLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(mappedFieldType.name(), value.longValue())); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.longValue())); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(mappedFieldType.name(), value.longValue())); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value.longValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java index b468d9ae283d6..5d48bf6811872 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java @@ -13,9 +13,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.util.NumericUtils; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -24,17 +23,16 @@ public class DoubleLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { - final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new DoublePoint(fieldType.name(), value.doubleValue())); + if (descriptor.isSearchable()) { + document.add(new DoublePoint(descriptor.fieldName(), value.doubleValue())); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(fieldType.name(), NumericUtils.doubleToSortableLong(value.doubleValue()))); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), NumericUtils.doubleToSortableLong(value.doubleValue()))); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(fieldType.name(), value.doubleValue())); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value.doubleValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java index 0c67d646bca30..83b17f7244dde 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java @@ -13,9 +13,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.util.NumericUtils; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -24,17 +23,16 @@ public class FloatLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { - final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new FloatPoint(fieldType.name(), value.floatValue())); + if (descriptor.isSearchable()) { + document.add(new FloatPoint(descriptor.fieldName(), value.floatValue())); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(fieldType.name(), NumericUtils.floatToSortableInt(value.floatValue()))); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), NumericUtils.floatToSortableInt(value.floatValue()))); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(fieldType.name(), value.floatValue())); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value.floatValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java index 4fc0edae344a7..9e3e7dff23dc2 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java @@ -12,9 +12,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -23,17 +22,16 @@ public class HalfFloatLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { - final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new HalfFloatPoint(fieldType.name(), value.floatValue())); + if (descriptor.isSearchable()) { + document.add(new HalfFloatPoint(descriptor.fieldName(), value.floatValue())); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(fieldType.name(), HalfFloatPoint.halfFloatToSortableShort(value.floatValue()))); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), HalfFloatPoint.halfFloatToSortableShort(value.floatValue()))); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(fieldType.name(), value.floatValue())); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value.floatValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java index 46f4bc73c094e..c9a8727a6fb7c 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java @@ -12,9 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -23,17 +22,16 @@ public class IntegerLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { - final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new IntPoint(fieldType.name(), value.intValue())); + if (descriptor.isSearchable()) { + document.add(new IntPoint(descriptor.fieldName(), value.intValue())); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(fieldType.name(), value.intValue())); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.intValue())); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(fieldType.name(), value.intValue())); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value.intValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java index cf7beda7d35c9..1c5739893c150 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java @@ -12,9 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -23,17 +22,16 @@ public class LongLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { - final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new LongPoint(fieldType.name(), value.longValue())); + if (descriptor.isSearchable()) { + document.add(new LongPoint(descriptor.fieldName(), value.longValue())); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(fieldType.name(), value.longValue())); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.longValue())); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(fieldType.name(), value.longValue())); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value.longValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java index 8b40ae609c452..f85f51445abf8 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java @@ -12,9 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -23,17 +22,16 @@ public class ShortLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { - final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new IntPoint(fieldType.name(), value.shortValue())); + if (descriptor.isSearchable()) { + document.add(new IntPoint(descriptor.fieldName(), value.shortValue())); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(fieldType.name(), value.shortValue())); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.shortValue())); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(fieldType.name(), value.shortValue())); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value.shortValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java index 151f5faf63886..5c558987ce1ba 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class TokenCountLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new IntPoint(mappedFieldType.name(), value.intValue())); + if (descriptor.isSearchable()) { + document.add(new IntPoint(descriptor.fieldName(), value.intValue())); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(mappedFieldType.name(), value.intValue())); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.intValue())); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(mappedFieldType.name(), value.intValue())); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value.intValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java index b47fdef0a06b3..1e9cf4ca83aa8 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java @@ -12,9 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -23,17 +22,16 @@ public class UnsignedLongLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { - final NumberFieldMapper.NumberFieldType fieldType = (NumberFieldMapper.NumberFieldType) mappedFieldType; + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new LongPoint(fieldType.name(), value.longValue())); + if (descriptor.isSearchable()) { + document.add(new LongPoint(descriptor.fieldName(), value.longValue())); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedNumericDocValuesField(fieldType.name(), value.longValue())); + if (descriptor.hasDocValues()) { + document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.longValue())); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(fieldType.name(), value.longValue())); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), value.longValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java index b0921ae2d9a72..d663a9825c3ea 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java @@ -14,8 +14,8 @@ import org.apache.lucene.util.BytesRef; import org.opensearch.common.network.InetAddresses; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.net.InetAddress; @@ -25,17 +25,17 @@ public class IpLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final InetAddress address = (InetAddress) parseValue; final byte[] encoded = InetAddresses.forString(address.getHostAddress()).getAddress(); - if (assignedCapabilities.contains(FieldCapability.INDEX)) { - document.add(new InetAddressPoint(mappedFieldType.name(), InetAddresses.forString(address.getHostAddress()))); + if (descriptor.isSearchable()) { + document.add(new InetAddressPoint(descriptor.fieldName(), InetAddresses.forString(address.getHostAddress()))); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedSetDocValuesField(mappedFieldType.name(), new BytesRef(encoded))); + if (descriptor.hasDocValues()) { + document.add(new SortedSetDocValuesField(descriptor.fieldName(), new BytesRef(encoded))); } - if (assignedCapabilities.contains(FieldCapability.STORE)) { - document.add(new StoredField(mappedFieldType.name(), new BytesRef(encoded))); + if (descriptor.isStored()) { + document.add(new StoredField(descriptor.fieldName(), new BytesRef(encoded))); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java index 3a70117a93542..2a18b3da9c6db 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java @@ -13,9 +13,9 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.util.BytesRef; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; import org.opensearch.index.mapper.KeywordFieldMapper; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import org.apache.logging.log4j.LogManager; @@ -29,15 +29,15 @@ public class KeywordLuceneField extends LuceneField { private static final Logger logger = LogManager.getLogger(KeywordLuceneField.class); @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { String value = (String) parseValue; final BytesRef binaryValue = new BytesRef(value); - boolean shouldIndex = assignedCapabilities.contains(FieldCapability.INDEX); - boolean shouldStore = assignedCapabilities.contains(FieldCapability.STORE); + boolean shouldIndex = descriptor.isSearchable(); + boolean shouldStore = descriptor.isStored(); logger.info("[COMPOSITE_DEBUG] KeywordLuceneField.createField: field=[{}] value=[{}] capabilities={} shouldIndex={} shouldStore={} hasDocValues={}", - mappedFieldType.name(), value, assignedCapabilities, shouldIndex, shouldStore, assignedCapabilities.contains(FieldCapability.DOC_VALUES)); + descriptor.fieldName(), value, descriptor.assignedCapabilities(), shouldIndex, shouldStore, descriptor.hasDocValues()); if (shouldIndex || shouldStore) { FieldType fieldType = new FieldType(); @@ -46,13 +46,13 @@ public void createField(MappedFieldType mappedFieldType, ParseContext.Document d fieldType.setOmitNorms(true); fieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS : IndexOptions.NONE); fieldType.freeze(); - document.add(new KeywordFieldMapper.KeywordField(mappedFieldType.name(), binaryValue, fieldType)); - logger.debug("[COMPOSITE_DEBUG] KeywordLuceneField: added KeywordField for [{}] indexed={} stored={}", mappedFieldType.name(), shouldIndex, shouldStore); + document.add(new KeywordFieldMapper.KeywordField(descriptor.fieldName(), binaryValue, fieldType)); + logger.debug("[COMPOSITE_DEBUG] KeywordLuceneField: added KeywordField for [{}] indexed={} stored={}", descriptor.fieldName(), shouldIndex, shouldStore); } - if (assignedCapabilities.contains(FieldCapability.DOC_VALUES)) { - document.add(new SortedSetDocValuesField(mappedFieldType.name(), binaryValue)); - logger.debug("[COMPOSITE_DEBUG] KeywordLuceneField: added SortedSetDocValuesField for [{}]", mappedFieldType.name()); + if (descriptor.hasDocValues()) { + document.add(new SortedSetDocValuesField(descriptor.fieldName(), binaryValue)); + logger.debug("[COMPOSITE_DEBUG] KeywordLuceneField: added SortedSetDocValuesField for [{}]", descriptor.fieldName()); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java index d9e609d9cc96e..b74a9e9adc7ae 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,17 +22,17 @@ public class TextLuceneField extends LuceneField { @Override - public void createField(MappedFieldType mappedFieldType, ParseContext.Document document, Object parseValue, Set assignedCapabilities) { + public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { final String value = (String) parseValue; - boolean shouldIndex = assignedCapabilities.contains(FieldCapability.INDEX); - boolean shouldStore = assignedCapabilities.contains(FieldCapability.STORE); + boolean shouldIndex = descriptor.isSearchable(); + boolean shouldStore = descriptor.isStored(); if (shouldIndex || shouldStore) { FieldType fieldType = new FieldType(); fieldType.setStored(shouldStore); fieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS : IndexOptions.NONE); - Field field = new Field(mappedFieldType.name(), value, fieldType); + Field field = new Field(descriptor.fieldName(), value, fieldType); document.add(field); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java index f28fe220a745c..a0a5fe80e69ef 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java @@ -12,31 +12,27 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.IndexWriter; +import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.DocumentInput; import org.opensearch.index.engine.exec.EngineRole; -import org.opensearch.index.engine.exec.FieldAssignments; -import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.WriteResult; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; import org.opensearch.index.engine.exec.lucene.fields.LuceneFieldRegistry; -import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.io.IOException; -import java.util.Set; public class LuceneDocumentInput implements DocumentInput { private static final Logger logger = LogManager.getLogger(LuceneDocumentInput.class); private final ParseContext.Document document; private final IndexWriter indexWriter; private final EngineRole engineRole; - private final FieldAssignments fieldAssignments; - public LuceneDocumentInput(ParseContext.Document document, IndexWriter indexWriter, EngineRole engineRole, FieldAssignments fieldAssignments) { + public LuceneDocumentInput(ParseContext.Document document, IndexWriter indexWriter, EngineRole engineRole) { this.document = document; this.indexWriter = indexWriter; this.engineRole = engineRole; - this.fieldAssignments = fieldAssignments; } @Override @@ -46,26 +42,27 @@ public void addRowIdField(String fieldName, long rowId) { @SuppressWarnings("unchecked") @Override - public void addField(MappedFieldType fieldType, Object value) { - final String fieldTypeName = fieldType.typeName(); - - // Check if this format should handle this field type at all - if (!fieldAssignments.shouldHandle(fieldTypeName)) { - logger.debug("[COMPOSITE_DEBUG] Lucene SKIP field=[{}] type=[{}] — not assigned to this format", fieldType.name(), fieldTypeName); - return; - } - - final LuceneField luceneField = LuceneFieldRegistry.getLuceneField(fieldTypeName); + public void addField(FieldDescriptor descriptor, Object value) { + final LuceneField luceneField = LuceneFieldRegistry.getLuceneField(descriptor.typeName()); if (luceneField == null) { // Field type not supported by Lucene format — skip silently - logger.debug("[COMPOSITE_DEBUG] Lucene SKIP field=[{}] type=[{}] — no LuceneField registered in LuceneFieldRegistry", fieldType.name(), fieldTypeName); + logger.debug( + "[COMPOSITE_DEBUG] Lucene SKIP field=[{}] type=[{}] — no LuceneField registered in LuceneFieldRegistry", + descriptor.fieldName(), + descriptor.typeName() + ); return; } - Set assignedCapabilities = fieldAssignments.getAssignedCapabilities(fieldTypeName); - logger.debug("[COMPOSITE_DEBUG] Lucene ACCEPT field=[{}] type=[{}] value=[{}] capabilities={}", fieldType.name(), fieldTypeName, value, assignedCapabilities); - luceneField.createField(fieldType, document, value, assignedCapabilities); + logger.debug( + "[COMPOSITE_DEBUG] Lucene ACCEPT field=[{}] type=[{}] value=[{}] capabilities={}", + descriptor.fieldName(), + descriptor.typeName(), + value, + descriptor.assignedCapabilities() + ); + luceneField.createField(descriptor, document, value); } /** @@ -96,8 +93,14 @@ public WriteResult addToWriter() { } } + @Override + public DataFormat getDataFormat() { + return DataFormat.LUCENE; + } + @Override public void close() throws Exception { // no-op, reuse writer } + } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriter.java index 254ad12199d0c..6995899bb7d64 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriter.java @@ -11,7 +11,6 @@ import org.apache.lucene.index.IndexWriter; import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.EngineRole; -import org.opensearch.index.engine.exec.FieldAssignments; import org.opensearch.index.engine.exec.FileInfos; import org.opensearch.index.engine.exec.FlushIn; import org.opensearch.index.engine.exec.WriteResult; @@ -28,14 +27,12 @@ public class LuceneWriter implements Writer { private final long writerGeneration; private final Path directoryPath; private final EngineRole engineRole; - private final FieldAssignments fieldAssignments; - public LuceneWriter(Path directoryPath, IndexWriter writer, long writerGeneration, EngineRole engineRole, FieldAssignments fieldAssignments) { + public LuceneWriter(Path directoryPath, IndexWriter writer, long writerGeneration, EngineRole engineRole) { this.directoryPath = directoryPath; this.writer = writer; this.writerGeneration = writerGeneration; this.engineRole = engineRole; - this.fieldAssignments = fieldAssignments; } @Override @@ -63,6 +60,6 @@ public void close() throws IOException { @Override public LuceneDocumentInput newDocumentInput() { - return new LuceneDocumentInput(new ParseContext.Document(), writer, engineRole, fieldAssignments); + return new LuceneDocumentInput(new ParseContext.Document(), writer, engineRole); } } From a49e46f393fab7f4172cffa1f677586ab1dd6d29 Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Thu, 5 Mar 2026 11:11:05 +0530 Subject: [PATCH 06/15] Add changes to write in shardPath always --- .../opensearch/index/engine/exec/WriterFileSet.java | 10 +++++++++- .../index/engine/exec/commit/LuceneCommitEngine.java | 2 +- .../exec/lucene/engine/LuceneExecutionEngine.java | 6 +++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java index 82a6c98c1277b..9aba47cb565ad 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java @@ -34,6 +34,14 @@ public WriterFileSet(Path directory, long writerGeneration, long numRows) { this.isRefreshed = false; } + public WriterFileSet(Path directory, long writerGeneration, long numRows, boolean isRefreshed) { + this.numRows = numRows; + this.files = new HashSet<>(); + this.writerGeneration = writerGeneration; + this.directory = directory.toString(); + this.isRefreshed = isRefreshed; + } + public WriterFileSet withDirectoryAndFiles(String newDirectory, Set files) { return WriterFileSet.builder() .directory(Path.of(newDirectory)) @@ -188,7 +196,7 @@ public WriterFileSet build() { throw new IllegalStateException("writerGeneration must be set"); } - WriterFileSet fileSet = new WriterFileSet(directory, writerGeneration, numRows); + WriterFileSet fileSet = new WriterFileSet(directory, writerGeneration, numRows, isRefreshed); fileSet.files.addAll(this.files); return fileSet; } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java index 697056d4e2eb9..164441baa69d8 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java @@ -79,7 +79,7 @@ public synchronized void addLuceneIndexes(List segments) throws IOExcep indexWriter.addIndexes(new NIOFSDirectory(Path.of(wfs.getDirectory()))); wfs.setRefreshed(); } catch (IOException e) { - throw new RuntimeException("Not able to copy it to the main writer in commiter"); + throw new RuntimeException("Not able to copy it to the main writer in commiter: {}", e); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java index ac304d5de2831..01a700193f5f1 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java @@ -73,7 +73,11 @@ public List supportedFieldTypes(boolean isPrimaryEngine) { @Override public Writer> createWriter(long writerGeneration) throws IOException { - Path directoryPath = Files.createTempDirectory(Long.toString(System.nanoTime())); // TODO:: Is this the right name? + + Path tmpDirectoryPath = shardPath.getDataPath().resolve("tmp"); + Files.createDirectories(tmpDirectoryPath); + Path directoryPath = Files.createTempDirectory(tmpDirectoryPath, Long.toString(writerGeneration)); // TODO:: Is this the right name? + //Path directoryPath = Files.createTempDirectory(Long.toString(System.nanoTime())); // TODO:: Is this the right name? EngineRole role = isPrimaryEngine ? EngineRole.PRIMARY : EngineRole.SECONDARY; return new LuceneWriter(directoryPath, createWriter(directoryPath, writerGeneration), writerGeneration, role); From 02c86120a2be764c723753d980c3ad7a26a2d3a8 Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Thu, 5 Mar 2026 23:38:59 +0530 Subject: [PATCH 07/15] Add HardLinkCopyDirectory --- .../index/engine/exec/commit/LuceneCommitEngine.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java index 164441baa69d8..cb4ed5c4f6057 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java @@ -17,6 +17,7 @@ import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.SegmentCommitInfo; import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.misc.store.HardlinkCopyDirectoryWrapper; import org.apache.lucene.store.NIOFSDirectory; import org.opensearch.common.collect.MapBuilder; import org.opensearch.common.concurrent.GatedCloseable; @@ -76,7 +77,7 @@ public synchronized void addLuceneIndexes(List segments) throws IOExcep if(wfs == null || wfs.refresh()) continue; try { - indexWriter.addIndexes(new NIOFSDirectory(Path.of(wfs.getDirectory()))); + indexWriter.addIndexes(new HardlinkCopyDirectoryWrapper(new NIOFSDirectory(Path.of(wfs.getDirectory())))); wfs.setRefreshed(); } catch (IOException e) { throw new RuntimeException("Not able to copy it to the main writer in commiter: {}", e); @@ -97,10 +98,13 @@ public synchronized void addLuceneIndexes(List segments) throws IOExcep if (segmentByGeneration.containsKey(writerGeneration)) { WriterFileSet writerFileSet = segmentByGeneration.get(writerGeneration).getDFGroupedSearchableFiles().get(DataFormat.LUCENE.name()); + Path oldDirectoryPath = Path.of(writerFileSet.getDirectory()); segmentByGeneration.get(writerGeneration).addSearchableFiles( DataFormat.LUCENE.name(), writerFileSet.withDirectoryAndFiles(indexWriter.getDirectory().toString(), new HashSet<>(segmentCommitInfo.files())) ); + // Deletes the older path once the file path has been updated + IOUtils.rm(oldDirectoryPath); } } } From 9271ba375da67b4fb1ad2aceae719a70b7749c3a Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Fri, 6 Mar 2026 11:50:35 +0530 Subject: [PATCH 08/15] Fix build --- .../java/org/opensearch/index/mapper/size/SizeFieldMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java b/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java index dd3f718ffc5fe..e8622fdf1a271 100644 --- a/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java +++ b/plugins/mapper-size/src/main/java/org/opensearch/index/mapper/size/SizeFieldMapper.java @@ -101,7 +101,7 @@ public void postParse(ParseContext context) throws IOException { final int value = context.sourceToParse().source().length(); if (isPluggableDataFormatFeatureEnabled(context)) { - context.compositeDocumentInput().addField(fieldType(), value, ); + context.compositeDocumentInput().addField(fieldType(), value); } else { context.doc().addAll(NumberType.INTEGER.createFields(name(), value, true, true, false, true)); } From 002ab31c6c6ccbfe5132f19850119a351fda0a86 Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Fri, 6 Mar 2026 23:51:08 +0530 Subject: [PATCH 09/15] Force NoMergePolicy and trigger forceMerge --- .../org/opensearch/index/IndexSettings.java | 70 ++++++++++--------- .../lucene/engine/LuceneExecutionEngine.java | 47 ++++++++++++- 2 files changed, 82 insertions(+), 35 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index 794efc532e32c..f6e00bf339f50 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -33,6 +33,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.sandbox.index.MergeOnFlushMergePolicy; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; @@ -1915,40 +1916,41 @@ public boolean getStarTreeIndexEnabled() { * @param isTimeSeriesIndex true if index contains @timestamp field */ public MergePolicy getMergePolicy(boolean isTimeSeriesIndex) { - String indexScopedPolicy = scopedSettings.get(INDEX_MERGE_POLICY); - MergePolicyProvider mergePolicyProvider = null; - IndexMergePolicy indexMergePolicy = IndexMergePolicy.fromString(indexScopedPolicy); - switch (indexMergePolicy) { - case TIERED: - mergePolicyProvider = tieredMergePolicyProvider; - break; - case LOG_BYTE_SIZE: - mergePolicyProvider = logByteSizeMergePolicyProvider; - break; - case DEFAULT_POLICY: - if (isTimeSeriesIndex) { - String nodeScopedTimeSeriesIndexPolicy = TIME_SERIES_INDEX_MERGE_POLICY.get(nodeSettings); - IndexMergePolicy nodeMergePolicy = IndexMergePolicy.fromString(nodeScopedTimeSeriesIndexPolicy); - switch (nodeMergePolicy) { - case TIERED: - case DEFAULT_POLICY: - mergePolicyProvider = tieredMergePolicyProvider; - break; - case LOG_BYTE_SIZE: - mergePolicyProvider = logByteSizeMergePolicyProvider; - break; - } - } else { - mergePolicyProvider = tieredMergePolicyProvider; - } - break; - } - assert mergePolicyProvider != null : "should not happen as validation for invalid merge policy values " - + "are part of setting definition"; - if (logger.isTraceEnabled()) { - logger.trace("Index: " + this.index.getName() + ", Merge policy used: " + mergePolicyProvider); - } - return mergePolicyProvider.getMergePolicy(); + return NoMergePolicy.INSTANCE; +// String indexScopedPolicy = scopedSettings.get(INDEX_MERGE_POLICY); +// MergePolicyProvider mergePolicyProvider = null; +// IndexMergePolicy indexMergePolicy = IndexMergePolicy.fromString(indexScopedPolicy); +// switch (indexMergePolicy) { +// case TIERED: +// mergePolicyProvider = tieredMergePolicyProvider; +// break; +// case LOG_BYTE_SIZE: +// mergePolicyProvider = logByteSizeMergePolicyProvider; +// break; +// case DEFAULT_POLICY: +// if (isTimeSeriesIndex) { +// String nodeScopedTimeSeriesIndexPolicy = TIME_SERIES_INDEX_MERGE_POLICY.get(nodeSettings); +// IndexMergePolicy nodeMergePolicy = IndexMergePolicy.fromString(nodeScopedTimeSeriesIndexPolicy); +// switch (nodeMergePolicy) { +// case TIERED: +// case DEFAULT_POLICY: +// mergePolicyProvider = tieredMergePolicyProvider; +// break; +// case LOG_BYTE_SIZE: +// mergePolicyProvider = logByteSizeMergePolicyProvider; +// break; +// } +// } else { +// mergePolicyProvider = tieredMergePolicyProvider; +// } +// break; +// } +// assert mergePolicyProvider != null : "should not happen as validation for invalid merge policy values " +// + "are part of setting definition"; +// if (logger.isTraceEnabled()) { +// logger.trace("Index: " + this.index.getName() + ", Merge policy used: " + mergePolicyProvider); +// } +// return mergePolicyProvider.getMergePolicy(); } public T getValue(Setting setting) { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java index 01a700193f5f1..eb8473a06c7dd 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java @@ -10,8 +10,14 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.FilterMergePolicy; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MergeTrigger; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.store.Directory; @@ -60,7 +66,6 @@ public LuceneExecutionEngine(EngineConfig engineConfig, MapperService mapperServ this.dataFormat = DataFormat.LUCENE; this.isPrimaryEngine = isPrimaryEngine; this.shardPath = shardPath; - // TODO: Add check for Lucene being the primary engine and MapperService has an unknown field, currently // in POC it's only a secondary engine so we don't need to have all fields in this. } @@ -93,11 +98,51 @@ private IndexWriter createWriter(Path directoryPath, long writerGeneration) { } } + + + public class ForceMergeOnlyPolicy extends FilterMergePolicy { + + public ForceMergeOnlyPolicy(MergePolicy wrappedPolicy) { + super(wrappedPolicy); + } + + // Block regular/automatic merges — return null + @Override + public MergeSpecification findMerges( + MergeTrigger mergeTrigger, + SegmentInfos segmentInfos, + MergeContext mergeContext) throws IOException { + // No automatic merges + return null; + } + + // Allow forceMerge — delegates to wrapped policy + @Override + public MergeSpecification findForcedMerges( + SegmentInfos segmentInfos, + int maxSegmentCount, + Map segmentsToMerge, + MergeContext mergeContext) throws IOException { + return in.findForcedMerges( + segmentInfos, maxSegmentCount, segmentsToMerge, mergeContext); + } + + // Allow forceMergeDeletes — delegates to wrapped policy + @Override + public MergeSpecification findForcedDeletesMerges( + SegmentInfos segmentInfos, + MergeContext mergeContext) throws IOException { + return in.findForcedDeletesMerges(segmentInfos, mergeContext); + } + } + private IndexWriterConfig getIndexWriterConfig(long writerGeneration, EngineConfig engineConfig) { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); indexWriterConfig.setIndexSort(new Sort(new SortField(ROW_ID, SortField.Type.LONG))); indexWriterConfig.setCodec(new LuceneWriterCodec(engineConfig.getCodec().getName(), engineConfig.getCodec(), writerGeneration)); + MergePolicy mergePolicy = indexWriterConfig.getMergePolicy(); + indexWriterConfig.setMergePolicy(new ForceMergeOnlyPolicy(mergePolicy)); return indexWriterConfig; } From 405a54065e30962bf9cffc08d7acdd6bfe1ce079 Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Sat, 7 Mar 2026 14:13:47 +0530 Subject: [PATCH 10/15] Fix substrait issue --- plugins/engine-datafusion/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/engine-datafusion/Cargo.toml b/plugins/engine-datafusion/Cargo.toml index 2252604f5c173..f86b73fd9afe7 100644 --- a/plugins/engine-datafusion/Cargo.toml +++ b/plugins/engine-datafusion/Cargo.toml @@ -49,7 +49,7 @@ object_store = "=0.12.4" url = "2.0" # Substrait support -substrait = "0.47" +substrait = "0.62" # Temporary directory support tempfile = "3.0" From ca3d5c248e919aa714961783adc1e7d7cc5efa6e Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Sat, 7 Mar 2026 14:58:34 +0530 Subject: [PATCH 11/15] Fix for substrait --- plugins/engine-datafusion/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/engine-datafusion/Cargo.toml b/plugins/engine-datafusion/Cargo.toml index f86b73fd9afe7..53bc37c991ffd 100644 --- a/plugins/engine-datafusion/Cargo.toml +++ b/plugins/engine-datafusion/Cargo.toml @@ -49,7 +49,7 @@ object_store = "=0.12.4" url = "2.0" # Substrait support -substrait = "0.62" +substrait = "=0.62.0" # Temporary directory support tempfile = "3.0" From 1ee6be9ca2287c2bc497685e91967a4a201daf1e Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Sat, 7 Mar 2026 21:47:41 +0530 Subject: [PATCH 12/15] Remove loggers --- .../engine/ParquetExecutionEngine.java | 6 +- .../engine/read/ParquetDataSourceCodec.java | 2 +- .../core/data/text/KeywordParquetField.java | 4 +- .../memory/ArrowBufferPool.java | 2 +- .../merge/RecordBatchMergeStrategy.java | 2 +- .../parquetdataformat/vsr/ManagedVSR.java | 2 +- .../parquetdataformat/vsr/VSRManager.java | 28 +++---- .../writer/ParquetDocumentInput.java | 4 +- .../engine/exec/FieldAssignmentResolver.java | 72 ++++++++-------- .../composite/CompositeDataFormatWriter.java | 4 +- .../composite/CompositeFieldValidator.java | 8 +- .../CompositeIndexingExecutionEngine.java | 28 +++---- .../exec/coord/CatalogSnapshotManager.java | 16 ++-- .../engine/exec/coord/CompositeEngine.java | 84 +++++++++---------- .../engine/exec/coord/IndexFileDeleter.java | 2 +- .../fields/data/text/KeywordLuceneField.java | 10 --- .../exec/merge/CompositeMergePolicy.java | 4 +- .../index/engine/exec/merge/MergeHandler.java | 2 +- .../engine/exec/merge/MergeScheduler.java | 14 ++-- 19 files changed, 142 insertions(+), 152 deletions(-) diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java index 6590d870c0479..3dd0997554c86 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java @@ -135,7 +135,7 @@ public void deleteFiles(Map> filesToDelete) { Collection parquetFilesToDelete = filesToDelete.get(PARQUET_DATA_FORMAT.name()); for (String fileName : parquetFilesToDelete) { Path filePath = Paths.get(fileName); - logger.info("Deleting file [ParquetExecutionEngine]: {}", filePath); + // logger.info("Deleting file [ParquetExecutionEngine]: {}", filePath); try { Files.delete(filePath); } catch (Exception e) { @@ -179,8 +179,8 @@ public long getNativeBytesUsed() { long vsrMemory = arrowBufferPool.getTotalAllocatedBytes(); String shardDataPath = shardPath.getDataPath().toString(); long filteredArrowWriterMemory = RustBridge.getFilteredNativeBytesUsed(shardDataPath); - logger.debug("Native memory used by VSR Buffer Pool: {}", vsrMemory); - logger.debug("Native memory used by ArrowWriters in shard path {}: {}", shardDataPath, filteredArrowWriterMemory); + // logger.debug("Native memory used by VSR Buffer Pool: {}", vsrMemory); + // logger.debug("Native memory used by ArrowWriters in shard path {}: {}", shardDataPath, filteredArrowWriterMemory); return vsrMemory + filteredArrowWriterMemory; } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java index c383f4dd958b4..30906b5aea0bc 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java @@ -24,7 +24,7 @@ public class ParquetDataSourceCodec implements DataSourceCodec { static { try { //JniLibraryLoader.loadLibrary(); - logger.info("DataFusion JNI library loaded successfully"); + // logger.info("DataFusion JNI library loaded successfully"); } catch (Exception e) { logger.error("Failed to load DataFusion JNI library", e); throw new RuntimeException("Failed to initialize DataFusion JNI library", e); diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java index 8db009c87574b..bc5c4f31c2540 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java @@ -56,8 +56,8 @@ protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Obj VarCharVector textVector = (VarCharVector) managedVSR.getVector(descriptor.fieldName()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); - logger.info("[COMPOSITE_DEBUG] KeywordParquetField.addToGroup: field=[{}] value=[{}] rowIndex=[{}] capabilities={}", - descriptor.fieldName(), parseValue, rowIndex, descriptor.assignedCapabilities()); + // logger.info("[COMPOSITE_DEBUG] KeywordParquetField.addToGroup: field=[{}] value=[{}] rowIndex=[{}] capabilities={}", + // descriptor.fieldName(), parseValue, rowIndex, descriptor.assignedCapabilities()); } @Override diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java index 99ec60ea700b8..d3501f6289c20 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java @@ -28,7 +28,7 @@ public class ArrowBufferPool implements Closeable { public ArrowBufferPool(Settings settings) { long maxAllocationInBytes = 10L * 1024 * 1024 * 1024; - logger.info("Max native memory allocation for ArrowBufferPool: {} bytes", maxAllocationInBytes); + // logger.info("Max native memory allocation for ArrowBufferPool: {} bytes", maxAllocationInBytes); this.rootAllocator = new RootAllocator(maxAllocationInBytes); this.maxChildAllocation = 1024 * 1024 * 1024; } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/merge/RecordBatchMergeStrategy.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/merge/RecordBatchMergeStrategy.java index 59fdca52ec10a..c6dbac8517df2 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/merge/RecordBatchMergeStrategy.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/merge/RecordBatchMergeStrategy.java @@ -79,7 +79,7 @@ public MergeResult mergeParquetFiles(List files, long writerGener ); try { Files.deleteIfExists(Path.of(mergedFilePath)); - logger.info("Stale Merged File Deleted at : [{}]", mergedFilePath); + // logger.info("Stale Merged File Deleted at : [{}]", mergedFilePath); } catch (Exception innerException) { logger.error( () -> new ParameterizedMessage( diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java index 1044ec0c7c654..a041c8ff1897d 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java @@ -90,7 +90,7 @@ private void setState(VSRState newState) { VSRState oldState = state; state = newState; - logger.debug("State transition: {} -> {} for VSR {}", oldState, newState, id); + // logger.debug("State transition: {} -> {} for VSR {}", oldState, newState, id); } /** diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java index 7ada33f27ecfc..718e8c6567756 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java @@ -85,7 +85,7 @@ public WriteResult addToManagedVSR(ParquetDocumentInput document) throws IOExcep throw new IOException("Cannot add document - VSR is not active: " + currentVSR.getState()); } - logger.debug("addToManagedVSR called for {}, current row count: {}", fileName, currentVSR.getRowCount()); + // logger.debug("addToManagedVSR called for {}, current row count: {}", fileName, currentVSR.getRowCount()); try { // Since ParquetDocumentInput now works directly with ManagedVSR, @@ -94,7 +94,7 @@ public WriteResult addToManagedVSR(ParquetDocumentInput document) throws IOExcep // which will increment the row count. WriteResult result = document.addToWriter(); - logger.debug("After adding document to {}, row count: {}", fileName, currentVSR.getRowCount()); + // logger.debug("After adding document to {}, row count: {}", fileName, currentVSR.getRowCount()); // Check for VSR rotation AFTER successful document processing maybeRotateActiveVSR(); @@ -108,17 +108,17 @@ public WriteResult addToManagedVSR(ParquetDocumentInput document) throws IOExcep public ParquetFileMetadata flush(FlushIn flushIn) throws IOException { ManagedVSR currentVSR = managedVSR.get(); - logger.info("Flush called for {}, row count: {}", fileName, currentVSR.getRowCount()); + // logger.info("Flush called for {}, row count: {}", fileName, currentVSR.getRowCount()); try { // Only flush if we have data if (currentVSR.getRowCount() == 0) { - logger.debug("No data to flush for {}, returning null", fileName); + // logger.debug("No data to flush for {}, returning null", fileName); return null; } // Transition VSR to FROZEN state before flushing currentVSR.moveToFrozen(); - logger.info("Flushing {} rows for {}", currentVSR.getRowCount(), fileName); + // logger.info("Flushing {} rows for {}", currentVSR.getRowCount(), fileName); ParquetFileMetadata metadata; // Write through native writer handle @@ -127,7 +127,7 @@ public ParquetFileMetadata flush(FlushIn flushIn) throws IOException { writer.close(); metadata = writer.getMetadata(); } - logger.debug("Successfully flushed data for {} with metadata: {}", fileName, metadata); + // logger.debug("Successfully flushed data for {} with metadata: {}", fileName, metadata); return metadata; } catch (Exception e) { @@ -184,20 +184,20 @@ public void maybeRotateActiveVSR() throws IOException { boolean rotated = vsrPool.maybeRotateActiveVSR(); if (rotated) { - logger.debug("VSR rotation occurred after document addition for {}", fileName); + // logger.debug("VSR rotation occurred after document addition for {}", fileName); // Get the frozen VSR that was just created by rotation ManagedVSR frozenVSR = vsrPool.getFrozenVSR(); if (frozenVSR != null) { - logger.debug("Processing frozen VSR: {} with {} rows for {}", - frozenVSR.getId(), frozenVSR.getRowCount(), fileName); + // logger.debug("Processing frozen VSR: {} with {} rows for {}", + // frozenVSR.getId(), frozenVSR.getRowCount(), fileName); // Write the frozen VSR data immediately try (ArrowExport export = frozenVSR.exportToArrow()) { writer.write(export.getArrayAddress(), export.getSchemaAddress()); } - logger.debug("Successfully wrote frozen VSR data for {}", fileName); + // logger.debug("Successfully wrote frozen VSR data for {}", fileName); // Complete the VSR processing vsrPool.completeVSR(frozenVSR); @@ -214,8 +214,8 @@ public void maybeRotateActiveVSR() throws IOException { } updateVSRAndReinitialize(oldVSR, newVSR); - logger.debug("VSR rotation completed for {}, new active VSR: {}, row count: {}", - fileName, newVSR.getId(), newVSR.getRowCount()); + // logger.debug("VSR rotation completed for {}, new active VSR: {}, row count: {}", + // fileName, newVSR.getId(), newVSR.getRowCount()); } } catch (IOException e) { logger.error("Error during VSR rotation for {}: {}", fileName, e.getMessage(), e); @@ -237,13 +237,13 @@ private void checkAndHandleVSRRotation() throws IOException { // Check if we got a different VSR (rotation occurred) ManagedVSR oldVSR = managedVSR.get(); if (currentActive != oldVSR) { - logger.debug("VSR rotation detected for {}, updating references", fileName); + // logger.debug("VSR rotation detected for {}, updating references", fileName); // Update the managed VSR reference atomically with field vector map updateVSRAndReinitialize(oldVSR, currentActive); // Note: Writer initialization is not needed per VSR as it's per file - logger.debug("VSR rotation completed for {}, new row count: {}", fileName, currentActive.getRowCount()); + // logger.debug("VSR rotation completed for {}, new row count: {}", fileName, currentActive.getRowCount()); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java index fdc32b0123e8a..e0de2ca2d4a2d 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java @@ -60,11 +60,11 @@ public void addField(FieldDescriptor descriptor, Object value) { if (parquetField == null) { // Field type not supported by Parquet format — skip silently - logger.debug("[COMPOSITE_DEBUG] Parquet SKIP field=[{}] type=[{}] — no ParquetField registered in ArrowFieldRegistry", descriptor.fieldName(), descriptor.typeName()); + // logger.debug("[COMPOSITE_DEBUG] Parquet SKIP field=[{}] type=[{}] — no ParquetField registered in ArrowFieldRegistry", descriptor.fieldName(), descriptor.typeName()); return; } - logger.debug("[COMPOSITE_DEBUG] Parquet ACCEPT field=[{}] type=[{}] value=[{}] capabilities={}", descriptor.fieldName(), descriptor.typeName(), value, descriptor.assignedCapabilities()); + // logger.debug("[COMPOSITE_DEBUG] Parquet ACCEPT field=[{}] type=[{}] value=[{}] capabilities={}", descriptor.fieldName(), descriptor.typeName(), value, descriptor.assignedCapabilities()); parquetField.createField(descriptor, managedVSR, value); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java index 4e8297b36f7d4..d6b163f635160 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java @@ -89,10 +89,10 @@ public static Map resolve( } } result.put(format, new FieldAssignments(descriptors)); - logger.info("[COMPOSITE_DEBUG] Field assignments for format [{}]:", format.name()); - for (Map.Entry descEntry : descriptors.entrySet()) { - logger.info("[COMPOSITE_DEBUG] field=[{}] -> {}", descEntry.getKey(), descEntry.getValue()); - } + // logger.info("[COMPOSITE_DEBUG] Field assignments for format [{}]:", format.name()); + // for (Map.Entry descEntry : descriptors.entrySet()) { + // logger.info("[COMPOSITE_DEBUG] field=[{}] -> {}", descEntry.getKey(), descEntry.getValue()); + // } } return result; } @@ -118,31 +118,31 @@ private static void resolveField( required.add(FieldCapability.STORE); } - logger.info( - "[COMPOSITE_DEBUG] resolveField: field=[{}] type=[{}] required capabilities={} (isSearchable={}, hasDocValues={}, isStored={})", - fieldName, - typeName, - required, - fieldType.isSearchable(), - fieldType.hasDocValues(), - fieldType.isStored() - ); + // logger.info( + // "[COMPOSITE_DEBUG] resolveField: field=[{}] type=[{}] required capabilities={} (isSearchable={}, hasDocValues={}, isStored={})", + // fieldName, + // typeName, + // required, + // fieldType.isSearchable(), + // fieldType.hasDocValues(), + // fieldType.isStored() + // ); // For each required capability, assign to primary if it supports it, else to secondary for (FieldCapability cap : required) { boolean primaryHasCap = primaryFormat != null && registry.hasCapability(typeName, primaryFormat, cap); - logger.info( - "[COMPOSITE_DEBUG] capability [{}]: primary format [{}] hasCapability={}, registry capabilities for type={}", - cap, - primaryFormat != null ? primaryFormat.name() : "null", - primaryHasCap, - primaryFormat != null ? registry.getCapabilities(typeName, primaryFormat) : "N/A" - ); + // logger.info( + // "[COMPOSITE_DEBUG] capability [{}]: primary format [{}] hasCapability={}, registry capabilities for type={}", + // cap, + // primaryFormat != null ? primaryFormat.name() : "null", + // primaryHasCap, + // primaryFormat != null ? registry.getCapabilities(typeName, primaryFormat) : "N/A" + // ); if (primaryHasCap) { // Primary handles this capability perFormatCaps.get(primaryFormat).computeIfAbsent(fieldName, k -> EnumSet.noneOf(FieldCapability.class)).add(cap); - logger.info("[COMPOSITE_DEBUG] -> assigned [{}] to PRIMARY format [{}]", cap, primaryFormat.name()); + // logger.info("[COMPOSITE_DEBUG] -> assigned [{}] to PRIMARY format [{}]", cap, primaryFormat.name()); } else { // Find a secondary format that supports it boolean assignedToSecondary = false; @@ -151,31 +151,31 @@ private static void resolveField( EngineRole role = entry.getValue(); boolean isSecondary = role != EngineRole.PRIMARY; boolean secondaryHasCap = registry.hasCapability(typeName, secondaryFormat, cap); - logger.info( - "[COMPOSITE_DEBUG] checking secondary format [{}] role={} isSecondary={} hasCapability={} registryCapabilities={}", - secondaryFormat.name(), - role, - isSecondary, - secondaryHasCap, - registry.getCapabilities(typeName, secondaryFormat) - ); + // logger.info( + // "[COMPOSITE_DEBUG] checking secondary format [{}] role={} isSecondary={} hasCapability={} registryCapabilities={}", + // secondaryFormat.name(), + // role, + // isSecondary, + // secondaryHasCap, + // registry.getCapabilities(typeName, secondaryFormat) + // ); if (isSecondary && secondaryHasCap) { perFormatCaps.get(secondaryFormat) .computeIfAbsent(fieldName, k -> EnumSet.noneOf(FieldCapability.class)) .add(cap); - logger.info("[COMPOSITE_DEBUG] -> assigned [{}] to SECONDARY format [{}]", cap, secondaryFormat.name()); + // logger.info("[COMPOSITE_DEBUG] -> assigned [{}] to SECONDARY format [{}]", cap, secondaryFormat.name()); assignedToSecondary = true; break; } } if (!assignedToSecondary) { - logger.warn( - "[COMPOSITE_DEBUG] -> capability [{}] for field=[{}] type=[{}] NOT assigned to any format!", - cap, - fieldName, - typeName - ); + // logger.warn( + // "[COMPOSITE_DEBUG] -> capability [{}] for field=[{}] type=[{}] NOT assigned to any format!", + // cap, + // fieldName, + // typeName + // ); } } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java index 613203c973779..33155bf7bd6a5 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java @@ -204,8 +204,8 @@ public void addRowIdField(String fieldName, long rowId) { * Skips delegation if no descriptor exists for the field name in that format. */ public void addField(MappedFieldType fieldType, Object value) { - logger.debug("[COMPOSITE_DEBUG] addField: field=[{}] type=[{}] value=[{}] — resolving per-format descriptors for {} inputs", - fieldType.name(), fieldType.typeName(), value, inputs.size()); + // logger.debug("[COMPOSITE_DEBUG] addField: field=[{}] type=[{}] value=[{}] — resolving per-format descriptors for {} inputs", + // fieldType.name(), fieldType.typeName(), value, inputs.size()); for (DocumentInput input : inputs) { FieldAssignments assignments = fieldAssignmentsMap.get(input.getDataFormat()); if (assignments == null) { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java index 67f7f4472f379..9487d574d0ab8 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java @@ -64,8 +64,8 @@ public static void validatePrimaryCoverage( } for (MappedFieldType fieldType : fieldTypes) { if (isInternalMetadataField(fieldType)) { - logger.info("[COMPOSITE_DEBUG] validatePrimaryCoverage: SKIP internal metadata field=[{}] type=[{}]", - fieldType.name(), fieldType.typeName()); + // logger.info("[COMPOSITE_DEBUG] validatePrimaryCoverage: SKIP internal metadata field=[{}] type=[{}]", + // fieldType.name(), fieldType.typeName()); continue; } if (!registry.hasAnyCapability(fieldType.typeName(), primaryFormat)) { @@ -74,8 +74,8 @@ public static void validatePrimaryCoverage( + "] has no capabilities registered for primary data format [" + primaryFormat.name() + "]" ); } - logger.info("[COMPOSITE_DEBUG] validatePrimaryCoverage: OK field=[{}] type=[{}] has capabilities {} in primary format [{}]", - fieldType.name(), fieldType.typeName(), registry.getCapabilities(fieldType.typeName(), primaryFormat), primaryFormat.name()); + // logger.info("[COMPOSITE_DEBUG] validatePrimaryCoverage: OK field=[{}] type=[{}] has capabilities {} in primary format [{}]", + // fieldType.name(), fieldType.typeName(), registry.getCapabilities(fieldType.typeName(), primaryFormat), primaryFormat.name()); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java index 084155f0544d7..f04fe2f0ddcac 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java @@ -75,29 +75,29 @@ public CompositeIndexingExecutionEngine( // Setting-based role resolution String primaryDataFormatName = indexSettings.getValue(IndexSettings.INDEX_COMPOSITE_PRIMARY_DATA_FORMAT_SETTING); this.roleMap = resolveRoles(primaryDataFormatName, dataSourcePlugins, singlePlugin); - logger.info("[COMPOSITE_DEBUG] Resolved engine roles: {}", roleMap.entrySet().stream() - .map(e -> e.getKey().name() + " -> " + e.getValue()) - .collect(java.util.stream.Collectors.joining(", "))); + // logger.info("[COMPOSITE_DEBUG] Resolved engine roles: {}", roleMap.entrySet().stream() + // .map(e -> e.getKey().name() + " -> " + e.getValue()) + // .collect(java.util.stream.Collectors.joining(", "))); // Build FieldSupportRegistry from plugin registrations this.fieldSupportRegistry = new FieldSupportRegistry(); for (DataSourcePlugin plugin : dataSourcePlugins) { plugin.registerFieldSupport(fieldSupportRegistry); } - logger.info("[COMPOSITE_DEBUG] FieldSupportRegistry built. Registered formats: {}", - fieldSupportRegistry.allFormats().stream().map(DataFormat::name).collect(java.util.stream.Collectors.joining(", "))); + // logger.info("[COMPOSITE_DEBUG] FieldSupportRegistry built. Registered formats: {}", + // fieldSupportRegistry.allFormats().stream().map(DataFormat::name).collect(java.util.stream.Collectors.joining(", "))); // Validate field capabilities if composite (multiple plugins) if (!singlePlugin) { CompositeFieldValidator.validatePrimaryCoverage(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); CompositeFieldValidator.validateMappingPropertyCoverage(fieldSupportRegistry, mapperService.fieldTypes()); - logger.info("[COMPOSITE_DEBUG] Composite field validation passed for all mapped fields"); + // logger.info("[COMPOSITE_DEBUG] Composite field validation passed for all mapped fields"); } // Resolve field assignments: which format handles which capability for each field // Both single-plugin and multi-plugin modes go through per-field resolution this.fieldAssignmentsMap = FieldAssignmentResolver.resolve(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); - logger.info("[COMPOSITE_DEBUG] Resolved per-field assignments for {} format(s)", fieldAssignmentsMap.size()); + // logger.info("[COMPOSITE_DEBUG] Resolved per-field assignments for {} format(s)", fieldAssignmentsMap.size()); // Determine primary format from role map DataFormat primaryDataFormat = roleMap.entrySet().stream() @@ -119,7 +119,7 @@ public CompositeIndexingExecutionEngine( this.dataFormat = new Any(dataFormats, primaryDataFormat); - logger.debug("Registered dataformats: {}", this.dataFormat); + // logger.debug("Registered dataformats: {}", this.dataFormat); this.dataFormatWriterPool = new CompositeDataFormatWriterPool( () -> new CompositeDataFormatWriter(this, writerGeneration.getAndIncrement()), LinkedList::new, @@ -249,15 +249,15 @@ public RefreshResult refresh(RefreshInput refreshInput) throws IOException { List dataFormatWriters = dataFormatWriterPool.checkoutAll(); List refreshedSegment = refreshInput.getExistingSegments(); List newSegmentList = new ArrayList<>(); - logger.info("[COMPOSITE_DEBUG] CompositeIndexingExecutionEngine.refresh: flushing {} writers, existing segments={}", - dataFormatWriters.size(), refreshedSegment.size()); + // logger.info("[COMPOSITE_DEBUG] CompositeIndexingExecutionEngine.refresh: flushing {} writers, existing segments={}", + // dataFormatWriters.size(), refreshedSegment.size()); // flush to disk for (CompositeDataFormatWriter dataFormatWriter : dataFormatWriters) { Segment newSegment = new Segment(dataFormatWriter.getWriterGeneration()); FileInfos fileInfos = dataFormatWriter.flush(null); fileInfos.getWriterFilesMap().forEach((key, value) -> { - logger.info("[COMPOSITE_DEBUG] writer gen={} flushed format=[{}] files={}", - dataFormatWriter.getWriterGeneration(), key.name(), value.getFiles()); + // logger.info("[COMPOSITE_DEBUG] writer gen={} flushed format=[{}] files={}", + // dataFormatWriter.getWriterGeneration(), key.name(), value.getFiles()); newSegment.addSearchableFiles(key.name(), value); }); dataFormatWriter.close(); @@ -267,10 +267,10 @@ public RefreshResult refresh(RefreshInput refreshInput) throws IOException { } if (newSegmentList.isEmpty()) { - logger.info("[COMPOSITE_DEBUG] No new segments produced from flush"); + // logger.info("[COMPOSITE_DEBUG] No new segments produced from flush"); return null; } else { - logger.info("[COMPOSITE_DEBUG] Produced {} new segments from flush", newSegmentList.size()); + // logger.info("[COMPOSITE_DEBUG] Produced {} new segments from flush", newSegmentList.size()); refreshedSegment.addAll(newSegmentList); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java index c544ca114e7cf..cf0d537b4ebec 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java @@ -58,7 +58,7 @@ public CatalogSnapshotManager(CompositeEngine compositeEngine, Committer composi }); indexFileDeleter.set(new IndexFileDeleter(compositeEngine, latestCatalogSnapshot, shardPath, deleteUnreferencedFiles)); - logger.debug("[RESET_DEBUG] IndexFileDeleter created, latestCatalogSnapshot={}, deleteUnreferencedFiles={}", latestCatalogSnapshot, deleteUnreferencedFiles); + // logger.debug("[RESET_DEBUG] IndexFileDeleter created, latestCatalogSnapshot={}, deleteUnreferencedFiles={}", latestCatalogSnapshot, deleteUnreferencedFiles); if(latestCatalogSnapshot != null) { latestCatalogSnapshot.setIndexFileDeleterSupplier(indexFileDeleter::get); latestCatalogSnapshot.setCatalogSnapshotMap(catalogSnapshotMap); @@ -146,8 +146,8 @@ public synchronized void applyMergeResults(MergeResult mergeResult, OneMerge one } private synchronized void advanceCatalogSnapshot(List refreshedSegments) throws IOException { - logger.info("[COMPOSITE_DEBUG] advanceCatalogSnapshot: previous id={}, version={}, old segment count={}", - latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), latestCatalogSnapshot.getSegments().size()); + // logger.info("[COMPOSITE_DEBUG] advanceCatalogSnapshot: previous id={}, version={}, old segment count={}", + // latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), latestCatalogSnapshot.getSegments().size()); compositeEngineCommitter.addLuceneIndexes(refreshedSegments); CompositeEngineCatalogSnapshot cecs = new CompositeEngineCatalogSnapshot( latestCatalogSnapshot.getId() + 1, @@ -161,11 +161,11 @@ private synchronized void advanceCatalogSnapshot(List refreshedSegments latestCatalogSnapshot.decRef(); } latestCatalogSnapshot = cecs; - logger.info("[COMPOSITE_DEBUG] advanceCatalogSnapshot: new id={}, version={}, new segment count={}", - latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), refreshedSegments.size()); - for (Segment seg : refreshedSegments) { - logger.info("[COMPOSITE_DEBUG] segment gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); - } + // logger.info("[COMPOSITE_DEBUG] advanceCatalogSnapshot: new id={}, version={}, new segment count={}", + // latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), refreshedSegments.size()); + // for (Segment seg : refreshedSegments) { + // logger.info("[COMPOSITE_DEBUG] segment gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); + // } } private Segment getSegment(Map writerFileSetMap) { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java index 2f7dadbb35d88..86d1da2fdad5e 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java @@ -275,7 +275,7 @@ public CompositeEngine( // Now read the userData from the newly created commit userData = store.readLastCommittedSegmentsInfo().getUserData(); - logger.debug("Created initial empty commit with translog UUID: {}", translogUUID); + // logger.debug("Created initial empty commit with translog UUID: {}", translogUUID); } } TranslogEventListener internalTranslogEventListener = new TranslogEventListener() { @@ -317,7 +317,7 @@ public void onFailure(String reason, Exception ex) { lastCommittedWriterGeneration.set(Long.parseLong(lastCommittedData.get(LAST_COMPOSITE_WRITER_GEN_KEY))); } - logger.debug("While initialising Composite Engine - lst commit generation : " + lastCommittedWriterGeneration.get()); + // logger.debug("While initialising Composite Engine - lst commit generation : " + lastCommittedWriterGeneration.get()); this.engine = new CompositeIndexingExecutionEngine( engineConfig, mapperService, @@ -429,7 +429,7 @@ public void onFailure(String reason, Exception ex) { } } } - logger.trace("created new CompositeEngine"); + // logger.trace("created new CompositeEngine"); } private LocalCheckpointTracker createLocalCheckpointTracker( @@ -443,14 +443,14 @@ private LocalCheckpointTracker createLocalCheckpointTracker( SequenceNumbers.loadSeqNoInfoFromLuceneCommit(store.readLastCommittedSegmentsInfo().getUserData().entrySet()); maxSeqNo = seqNoStats.maxSeqNo; localCheckpoint = seqNoStats.localCheckpoint; - logger.trace("recovered maximum sequence number [{}] and local checkpoint [{}]", maxSeqNo, localCheckpoint); + // logger.trace("recovered maximum sequence number [{}] and local checkpoint [{}]", maxSeqNo, localCheckpoint); } catch (org.apache.lucene.index.IndexNotFoundException e) { // Local store is empty (remote store recovery scenario) // Initialize with NO_OPS_PERFORMED (-1) - checkpoint will be restored from CatalogSnapshot during first flush - logger.debug( - "Local store is empty during engine initialization, initializing checkpoint tracker with NO_OPS_PERFORMED. " - + "This is expected during remote store recovery where local store has not been initialized yet." - ); + // logger.debug( + // "Local store is empty during engine initialization, initializing checkpoint tracker with NO_OPS_PERFORMED. " + // + "This is expected during remote store recovery where local store has not been initialized yet." + // ); return localCheckpointTrackerSupplier.apply( SequenceNumbers.NO_OPS_PERFORMED, SequenceNumbers.NO_OPS_PERFORMED @@ -548,10 +548,10 @@ public void initializeRefreshListeners(EngineConfig engineConfig) { } } - logger.trace( - "CompositeEngine initialized with {} catalog snapshot aware refresh listeners", - catalogSnapshotAwareRefreshListeners.size() - ); + // logger.trace( + // "CompositeEngine initialized with {} catalog snapshot aware refresh listeners", + // catalogSnapshotAwareRefreshListeners.size() + // ); } public SearchExecEngine getReadEngine(DataFormat dataFormat) { @@ -608,8 +608,8 @@ public Engine.IndexResult index(Engine.Index index) throws IOException { index.documentInput.setSeqNo(index.seqNo()); index.documentInput.setPrimaryTerm(SeqNoFieldMapper.PRIMARY_TERM_NAME, index.primaryTerm()); index.documentInput.setVersion(1); // we are not supporting update in parquet - logger.info("[COMPOSITE_DEBUG] Indexing doc id=[{}] seqNo=[{}] primaryTerm=[{}] — writing to engine", - index.id(), index.seqNo(), index.primaryTerm()); + // logger.info("[COMPOSITE_DEBUG] Indexing doc id=[{}] seqNo=[{}] primaryTerm=[{}] — writing to engine", + // index.id(), index.seqNo(), index.primaryTerm()); WriteResult writeResult = index.documentInput.addToWriter(); indexResult = new Engine.IndexResult(writeResult.version(), index.primaryTerm(), index.seqNo(), writeResult.success()); @@ -807,24 +807,24 @@ public synchronized void refresh(String source) throws EngineException { refreshListeners.forEach(PRE_REFRESH_LISTENER_CONSUMER); CatalogSnapshot preRefreshSnapshot = catalogSnapshotReleasableRef.getRef(); - logger.info("[COMPOSITE_DEBUG] refresh(source=[{}]) starting. Pre-refresh CatalogSnapshot: id={}, version={}, segments={}", - source, preRefreshSnapshot.getId(), preRefreshSnapshot.getVersion(), preRefreshSnapshot.getSegments().size()); - for (org.opensearch.index.engine.exec.coord.Segment seg : preRefreshSnapshot.getSegments()) { - logger.info("[COMPOSITE_DEBUG] pre-refresh segment: gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); - } + // logger.info("[COMPOSITE_DEBUG] refresh(source=[{}]) starting. Pre-refresh CatalogSnapshot: id={}, version={}, segments={}", + // source, preRefreshSnapshot.getId(), preRefreshSnapshot.getVersion(), preRefreshSnapshot.getSegments().size()); + // for (org.opensearch.index.engine.exec.coord.Segment seg : preRefreshSnapshot.getSegments()) { + // logger.info("[COMPOSITE_DEBUG] pre-refresh segment: gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); + // } RefreshInput refreshInput = new RefreshInput(); refreshInput.setExistingSegments(new ArrayList<>(catalogSnapshotReleasableRef.getRef().getSegments())); RefreshResult refreshResult = engine.refresh(refreshInput); // It should refresh the primary engine, i.e parquet if (refreshResult != null) { - logger.info("[COMPOSITE_DEBUG] refresh produced {} segments", refreshResult.getRefreshedSegments().size()); - for (org.opensearch.index.engine.exec.coord.Segment seg : refreshResult.getRefreshedSegments()) { - logger.info("[COMPOSITE_DEBUG] refreshed segment: gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); - } + // logger.info("[COMPOSITE_DEBUG] refresh produced {} segments", refreshResult.getRefreshedSegments().size()); + // for (org.opensearch.index.engine.exec.coord.Segment seg : refreshResult.getRefreshedSegments()) { + // logger.info("[COMPOSITE_DEBUG] refreshed segment: gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); + // } catalogSnapshotManager.applyRefreshResult(refreshResult); refreshed = true; } else { - logger.info("[COMPOSITE_DEBUG] refresh returned null (no new data to flush)"); + // logger.info("[COMPOSITE_DEBUG] refresh returned null (no new data to flush)"); } invokeRefreshListeners(refreshed); @@ -1041,11 +1041,11 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { if (waitIfOngoing == false) { return; } - logger.trace("waiting for in-flight flush to finish"); + // logger.trace("waiting for in-flight flush to finish"); flushLock.lock(); - logger.trace("acquired flush lock after blocking"); + // logger.trace("acquired flush lock after blocking"); } else { - logger.trace("acquired flush lock immediately"); + // logger.trace("acquired flush lock immediately"); } try { boolean shouldPeriodicallyFlush = shouldPeriodicallyFlush(); @@ -1056,7 +1056,7 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { try { translogManager.rollTranslogGeneration(); - logger.trace("starting commit for flush; commitTranslog=true"); + // logger.trace("starting commit for flush; commitTranslog=true"); CompositeEngine.ReleasableRef catalogSnapshotToFlushRef = catalogSnapshotManager.acquireSnapshot(); final CatalogSnapshot catalogSnapshotToFlush = catalogSnapshotToFlushRef.getRef(); @@ -1093,7 +1093,7 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { () -> commitData.entrySet().iterator(), catalogSnapshotToFlush ); - logger.trace("finished commit for flush"); + // logger.trace("finished commit for flush"); if (lastCommitedCatalogSnapshotRef != null && lastCommitedCatalogSnapshotRef.getRef() != null) lastCommitedCatalogSnapshotRef.close(); @@ -1269,17 +1269,17 @@ public String getHistoryUUID() { @Override public void flushAndClose() throws IOException { if (isClosed.get() == false) { - logger.trace("flushAndClose now acquire writeLock"); + // logger.trace("flushAndClose now acquire writeLock"); try (ReleasableLock lock = writeLock.acquire()) { - logger.trace("flushAndClose now acquired writeLock"); + // logger.trace("flushAndClose now acquired writeLock"); try { - logger.debug("flushing shard on close - this might take some time to sync files to disk"); + // logger.debug("flushing shard on close - this might take some time to sync files to disk"); try { // TODO we might force a flush in the future since we have the write lock already even though recoveries // are running. flush(false, true); } catch (AlreadyClosedException ex) { - logger.debug("engine already closed - skipping flushAndClose"); + // logger.debug("engine already closed - skipping flushAndClose"); } } finally { close(); // double close is not a problem @@ -1362,21 +1362,21 @@ public void failEngine(String reason, @Nullable Exception failure) { logger.warn("failEngine threw exception", inner); // don't bubble up these exceptions up } } else { - logger.debug( - () -> new ParameterizedMessage( - "tried to fail composite engine but could not acquire lock - composite engine should " + "be failed by now [{}]", - reason - ), failure - ); + // logger.debug( + // () -> new ParameterizedMessage( + // "tried to fail composite engine but could not acquire lock - composite engine should " + "be failed by now [{}]", + // reason + // ), failure + // ); } } @Override public void close() throws IOException { if (isClosed.get() == false) { // don't acquire the write lock if we are already closed - logger.debug("close now acquiring writeLock"); + // logger.debug("close now acquiring writeLock"); try (ReleasableLock lock = writeLock.acquire()) { - logger.debug("close acquired writeLock"); + // logger.debug("close acquired writeLock"); closeNoLock("api", closedLatch); } } @@ -1407,7 +1407,7 @@ protected void closeNoLock(String reason, CountDownLatch closedLatch) { } finally { try { store.decRef(); - logger.debug("engine closed [{}]", reason); + // logger.debug("engine closed [{}]", reason); } finally { closedLatch.countDown(); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java index 57fdb8546c91b..1c4107d26a1fb 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java @@ -77,7 +77,7 @@ public synchronized void removeFileReferences(CatalogSnapshot snapshot) { } if (!dfFilesToDelete.isEmpty()) { - System.out.println("Files to delete : " + dfFilesToDelete); + // System.out.println("Files to delete : " + dfFilesToDelete); deleteUnreferencedFiles(dfFilesToDelete); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java index 2a18b3da9c6db..c57b3fcbabf61 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java @@ -18,16 +18,11 @@ import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.ParseContext; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - import java.util.EnumSet; import java.util.Set; public class KeywordLuceneField extends LuceneField { - private static final Logger logger = LogManager.getLogger(KeywordLuceneField.class); - @Override public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { String value = (String) parseValue; @@ -36,9 +31,6 @@ public void createField(FieldDescriptor descriptor, ParseContext.Document docume boolean shouldIndex = descriptor.isSearchable(); boolean shouldStore = descriptor.isStored(); - logger.info("[COMPOSITE_DEBUG] KeywordLuceneField.createField: field=[{}] value=[{}] capabilities={} shouldIndex={} shouldStore={} hasDocValues={}", - descriptor.fieldName(), value, descriptor.assignedCapabilities(), shouldIndex, shouldStore, descriptor.hasDocValues()); - if (shouldIndex || shouldStore) { FieldType fieldType = new FieldType(); fieldType.setTokenized(false); @@ -47,12 +39,10 @@ public void createField(FieldDescriptor descriptor, ParseContext.Document docume fieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS : IndexOptions.NONE); fieldType.freeze(); document.add(new KeywordFieldMapper.KeywordField(descriptor.fieldName(), binaryValue, fieldType)); - logger.debug("[COMPOSITE_DEBUG] KeywordLuceneField: added KeywordField for [{}] indexed={} stored={}", descriptor.fieldName(), shouldIndex, shouldStore); } if (descriptor.hasDocValues()) { document.add(new SortedSetDocValuesField(descriptor.fieldName(), binaryValue)); - logger.debug("[COMPOSITE_DEBUG] KeywordLuceneField: added SortedSetDocValuesField for [{}]", descriptor.fieldName()); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java b/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java index f53e5efa0aba0..7cbe9dd86f7a2 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java @@ -50,11 +50,11 @@ public CompositeMergePolicy( ) { this.luceneMergePolicy = mergePolicy; this.logger = Loggers.getLogger(getClass(), shardId); - logger.info("Initialized merge policy: {}", mergePolicy); + // logger.info("Initialized merge policy: {}", mergePolicy); this.infoStream = new InfoStream() { @Override public void message(String component, String message) { - logger.trace(() -> new ParameterizedMessage("Merge [{}]: {}", component, message)); + // logger.trace(() -> new ParameterizedMessage("Merge [{}]: {}", component, message)); } @Override diff --git a/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeHandler.java b/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeHandler.java index 2f6c7c45ae7b9..872a7c5ad069f 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeHandler.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeHandler.java @@ -187,7 +187,7 @@ private void cleanupStaleMergedFiles(Map mergedWriter Path path = Path.of(wfs.getDirectory(), file); try { Files.deleteIfExists(path); - logger.info("Stale Merged File Deleted at : [{}]", path); + // logger.info("Stale Merged File Deleted at : [{}]", path); } catch (Exception exception) { logger.error( () -> new ParameterizedMessage( diff --git a/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeScheduler.java b/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeScheduler.java index a14b668471d51..d20d3b891aa89 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeScheduler.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeScheduler.java @@ -81,8 +81,8 @@ public synchronized void refreshConfig() { return; } - logger.info(() -> new ParameterizedMessage("Updating from merge scheduler config: maxThreadCount {} -> {}, " + - "maxMergeCount {} -> {}", this.maxConcurrentMerges, newMaxThreadCount, this.maxMergeCount, newMaxMergeCount)); + // logger.info(() -> new ParameterizedMessage("Updating from merge scheduler config: maxThreadCount {} -> {}, " + + // "maxMergeCount {} -> {}", this.maxConcurrentMerges, newMaxThreadCount, this.maxMergeCount, newMaxMergeCount)); this.maxConcurrentMerges = newMaxThreadCount; this.maxMergeCount = newMaxMergeCount; @@ -172,7 +172,7 @@ public void run() { long tookMS = 0; try { if (isShutdown.get()) { - logger.debug("[{}] MergeScheduler is shutdown, skipping merge", getName()); + // logger.debug("[{}] MergeScheduler is shutdown, skipping merge", getName()); return; } @@ -180,15 +180,15 @@ public void run() { currentMergesNumDocs.inc(totalNumDocs); currentMergesSizeInBytes.inc(totalSizeInBytes); - logger.debug("[{}] Starting merge for: {}", getName(), oneMerge); + // logger.debug("[{}] Starting merge for: {}", getName(), oneMerge); MergeResult mergeResult = mergeHandler.doMerge(oneMerge); compositeEngine.applyMergeChanges(mergeResult, oneMerge); mergeHandler.onMergeFinished(oneMerge); tookMS = TimeValue.nsecToMSec((System.nanoTime() - timeNS)); - logger.info("[{}] Merge completed in {}ms for: {} and output is stored in: {}", - getName(), tookMS, oneMerge, mergeResult); + // logger.info("[{}] Merge completed in {}ms for: {} and output is stored in: {}", + // getName(), tookMS, oneMerge, mergeResult); } catch (Exception e) { logger.error("[{}] Unexpected error during merge for: {}", getName(), oneMerge, e); @@ -262,7 +262,7 @@ public int getMaxMergeCount() { //TODO see where we want to call this function for the Merge shutdown public void shutdown() { if (isShutdown.compareAndSet(false, true)) { - logger.info("Shutting down MergeScheduler with {} active merges", activeMerges.get()); + // logger.info("Shutting down MergeScheduler with {} active merges", activeMerges.get()); for (MergeThread thread : mergeThreads) { try { From fdd0003b5e1f071179e6c5b194d9c7cb6ca3035d Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Mon, 9 Mar 2026 10:42:18 +0530 Subject: [PATCH 13/15] Refactor and remote fieldDescriptor --- .../engine/DummyDataUtils.java | 16 +-- .../fields/ParquetField.java | 33 ++----- .../fields/core/data/BinaryParquetField.java | 6 +- .../fields/core/data/BooleanParquetField.java | 6 +- .../core/data/date/DateNanosParquetField.java | 6 +- .../core/data/date/DateParquetField.java | 6 +- .../core/data/number/ByteParquetField.java | 6 +- .../core/data/number/DoubleParquetField.java | 6 +- .../core/data/number/FloatParquetField.java | 6 +- .../data/number/HalfFloatParquetField.java | 6 +- .../core/data/number/IntegerParquetField.java | 6 +- .../core/data/number/LongParquetField.java | 6 +- .../core/data/number/ShortParquetField.java | 6 +- .../data/number/TokenCountParquetField.java | 6 +- .../data/number/UnsignedLongParquetField.java | 6 +- .../fields/core/data/text/IpParquetField.java | 6 +- .../core/data/text/KeywordParquetField.java | 8 +- .../core/data/text/TextParquetField.java | 6 +- .../fields/core/metadata/IdParquetField.java | 6 +- .../core/metadata/IgnoredParquetField.java | 6 +- .../core/metadata/RoutingParquetField.java | 6 +- .../core/metadata/SizeParquetField.java | 6 +- .../writer/ParquetDocumentInput.java | 14 +-- .../index/engine/exec/AssignedFieldType.java | 48 +++++++++ .../index/engine/exec/DocumentInput.java | 7 +- .../engine/exec/FieldAssignmentResolver.java | 25 +++-- .../index/engine/exec/FieldAssignments.java | 27 ++--- .../index/engine/exec/FieldDescriptor.java | 98 ------------------- .../composite/CompositeDataFormatWriter.java | 43 ++++---- .../exec/lucene/fields/LuceneField.java | 9 +- .../lucene/fields/data/BinaryLuceneField.java | 8 +- .../fields/data/BooleanLuceneField.java | 16 +-- .../fields/data/date/DateLuceneField.java | 16 +-- .../data/date/DateNanosLuceneField.java | 16 +-- .../fields/data/metadata/IdLuceneField.java | 12 +-- .../data/metadata/IgnoredLuceneField.java | 8 +- .../data/metadata/RoutingLuceneField.java | 12 +-- .../fields/data/metadata/SizeLuceneField.java | 12 +-- .../fields/data/number/ByteLuceneField.java | 16 +-- .../data/number/DocCountLuceneField.java | 12 +-- .../fields/data/number/DoubleLuceneField.java | 16 +-- .../fields/data/number/FloatLuceneField.java | 16 +-- .../data/number/HalfFloatLuceneField.java | 16 +-- .../data/number/IntegerLuceneField.java | 16 +-- .../fields/data/number/LongLuceneField.java | 16 +-- .../fields/data/number/ShortLuceneField.java | 16 +-- .../data/number/TokenCountLuceneField.java | 16 +-- .../data/number/UnsignedLongLuceneField.java | 16 +-- .../fields/data/text/IpLuceneField.java | 16 +-- .../fields/data/text/KeywordLuceneField.java | 26 ++--- .../fields/data/text/TextLuceneField.java | 16 +-- .../lucene/writer/LuceneDocumentInput.java | 21 ++-- 52 files changed, 346 insertions(+), 431 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/AssignedFieldType.java delete mode 100644 server/src/main/java/org/opensearch/index/engine/exec/FieldDescriptor.java diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java index fbd6f1598179a..9591c50612a84 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java @@ -5,13 +5,10 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.opensearch.common.SuppressForbidden; import org.opensearch.index.engine.exec.DocumentInput; -import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.converter.FieldTypeConverter; import java.util.Arrays; -import java.util.EnumSet; import java.util.Random; @SuppressForbidden(reason = "Need random for creating temp files") @@ -27,22 +24,17 @@ public static Schema getSchema() { } public static void populateDocumentInput(DocumentInput documentInput) { - EnumSet allCapabilities = EnumSet.allOf(FieldCapability.class); - MappedFieldType idField = FieldTypeConverter.convertToMappedFieldType(ID, new ArrowType.Int(32, true)); - documentInput.addField(new FieldDescriptor(idField.name(), idField.typeName(), allCapabilities), generateRandomId()); + documentInput.addField(idField, generateRandomId()); MappedFieldType nameField = FieldTypeConverter.convertToMappedFieldType(NAME, new ArrowType.Utf8()); - documentInput.addField(new FieldDescriptor(nameField.name(), nameField.typeName(), allCapabilities), generateRandomName()); + documentInput.addField(nameField, generateRandomName()); MappedFieldType designationField = FieldTypeConverter.convertToMappedFieldType(DESIGNATION, new ArrowType.Utf8()); - documentInput.addField( - new FieldDescriptor(designationField.name(), designationField.typeName(), allCapabilities), - generateRandomDesignation() - ); + documentInput.addField(designationField, generateRandomDesignation()); MappedFieldType salaryField = FieldTypeConverter.convertToMappedFieldType(SALARY, new ArrowType.Int(32, true)); - documentInput.addField(new FieldDescriptor(salaryField.name(), salaryField.typeName(), allCapabilities), random.nextInt(100000)); + documentInput.addField(salaryField, random.nextInt(100000)); } private static final String ID = "id"; diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java index 91b45c613b8b4..51da41021f463 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java @@ -12,7 +12,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import java.util.Objects; import java.util.Set; @@ -39,47 +39,30 @@ public abstract class ParquetField { /** * Adds the parsed field value to the appropriate vector group within the managed VSR. - * This method is responsible for the actual data conversion and storage in the - * columnar format specific to each field type. * - *

Implementations must handle null values appropriately and ensure type safety - * when casting the parseValue to the expected type.

- * - * @param descriptor the per-field descriptor carrying field name, type name, and capability flags + * @param fieldType the per-field MappedFieldType carrying field name, type name, and capability flags * @param managedVSR the managed vector schema root for columnar data storage * @param parseValue the parsed field value to be stored, may be null - * @throws IllegalArgumentException if any parameter is invalid for this field type - * @throws ClassCastException if parseValue cannot be cast to the expected type */ - protected abstract void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue); + protected abstract void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue); /** * Creates and processes a field entry if the field type supports columnar storage. - * This method serves as the main entry point for field processing and includes - * validation logic to ensure only columnar fields are processed. - * - *

The method performs the following operations: - *

    - *
  1. Validates input parameters
  2. - *
  3. Checks if the field vector exists in the managed VSR
  4. - *
  5. Delegates to {@link #addToGroup} for actual data processing
  6. - *
* - * @param descriptor the per-field descriptor carrying field name, type name, and capability flags, must not be null + * @param fieldType the per-field MappedFieldType carrying field name, type name, and capability flags, must not be null * @param managedVSR the managed vector schema root, must not be null * @param parseValue the parsed field value to be processed, may be null - * @throws IllegalArgumentException if descriptor or managedVSR is null */ - public final void createField(final FieldDescriptor descriptor, + public final void createField(final MappedFieldType fieldType, final ManagedVSR managedVSR, final Object parseValue) { - Objects.requireNonNull(descriptor, "FieldDescriptor cannot be null"); + Objects.requireNonNull(fieldType, "MappedFieldType cannot be null"); Objects.requireNonNull(managedVSR, "ManagedVSR cannot be null"); // TODO: support dynamic mapping update // for now ignore the field - if (managedVSR.getVector(descriptor.fieldName()) != null) { - addToGroup(descriptor, managedVSR, parseValue); + if (managedVSR.getVector(fieldType.name()) != null) { + addToGroup(fieldType, managedVSR, parseValue); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java index a692847049cf8..dd5f406e55b87 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; @@ -44,8 +44,8 @@ public class BinaryParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - final VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + final VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); varBinaryVector.set(rowCount, (byte[]) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java index a3ddc80151cdb..10d4613b29af0 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -45,8 +45,8 @@ public class BooleanParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - BitVector bitVector = (BitVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + BitVector bitVector = (BitVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); bitVector.setSafe(rowIndex, (Boolean) parseValue ? 1 : 0); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java index 4a77c37ddc39c..2d3fb13633c0f 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.date; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TimeStampNanoVector; @@ -46,8 +46,8 @@ public class DateNanosParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - TimeStampNanoVector timeStampNanoVector = (TimeStampNanoVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + TimeStampNanoVector timeStampNanoVector = (TimeStampNanoVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); timeStampNanoVector.setSafe(rowIndex, (long) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java index 441fd3415b514..0e90aeb40e915 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.date; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TimeStampMilliVector; @@ -46,8 +46,8 @@ public class DateParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - TimeStampMilliVector timeStampMilliVector = (TimeStampMilliVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + TimeStampMilliVector timeStampMilliVector = (TimeStampMilliVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); timeStampMilliVector.setSafe(rowIndex, (long) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java index 9bc2416c31df1..89727fb906cd1 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TinyIntVector; @@ -44,8 +44,8 @@ public class ByteParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - TinyIntVector tinyIntVector = (TinyIntVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + TinyIntVector tinyIntVector = (TinyIntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); tinyIntVector.setSafe(rowCount, (Byte) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java index 11f0b707bd0a1..f08b601976b9e 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float8Vector; @@ -46,8 +46,8 @@ public class DoubleParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - Float8Vector float8Vector = (Float8Vector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + Float8Vector float8Vector = (Float8Vector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); float8Vector.setSafe(rowCount, (Double) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java index 5792bd9af1a5a..fcd9dea6a0660 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float4Vector; @@ -46,8 +46,8 @@ public class FloatParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - Float4Vector float4Vector = (Float4Vector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + Float4Vector float4Vector = (Float4Vector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); float4Vector.setSafe(rowCount, (Float) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java index 27b3afe2f9993..178b585751050 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float2Vector; @@ -46,8 +46,8 @@ public class HalfFloatParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - Float2Vector float2Vector = (Float2Vector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + Float2Vector float2Vector = (Float2Vector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); float2Vector.setSafe(rowCount, (Short) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java index 954e137a6054e..1650f8a62dd45 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; @@ -44,8 +44,8 @@ public class IntegerParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - IntVector intVector = (IntVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + IntVector intVector = (IntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java index 99727deb6e778..4a3f9a3e5b811 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.BigIntVector; @@ -45,8 +45,8 @@ public class LongParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - BigIntVector bigIntVector = (BigIntVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + BigIntVector bigIntVector = (BigIntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); bigIntVector.setSafe(rowCount, (Long) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java index d496662202e96..c69fa21b2ee1c 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.SmallIntVector; @@ -45,8 +45,8 @@ public class ShortParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - SmallIntVector smallIntVector = (SmallIntVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + SmallIntVector smallIntVector = (SmallIntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); smallIntVector.setSafe(rowCount, (Short) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java index 74b2e25341f06..613abbe70a4e4 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; @@ -47,8 +47,8 @@ public class TokenCountParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - IntVector intVector = (IntVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + IntVector intVector = (IntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java index 18bb70f8ed3a9..867590f0fd684 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.number; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.UInt8Vector; @@ -45,8 +45,8 @@ public class UnsignedLongParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - UInt8Vector uInt8Vector = (UInt8Vector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + UInt8Vector uInt8Vector = (UInt8Vector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); long longValue = ((Number) parseValue).longValue(); uInt8Vector.setSafe(rowCount, longValue); diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java index 691183e524b52..dd1c586604189 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.text; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; @@ -52,8 +52,8 @@ public class IpParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); final BytesRef bytesRef = new BytesRef(InetAddressPoint.encode((InetAddress) parseValue)); varBinaryVector.setSafe(rowIndex, bytesRef.bytes, bytesRef.offset, bytesRef.length); diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java index bc5c4f31c2540..c65160e8ff38b 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.text; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; @@ -52,12 +52,12 @@ public class KeywordParquetField extends ParquetField { private static final Logger logger = LogManager.getLogger(KeywordParquetField.class); @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - VarCharVector textVector = (VarCharVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarCharVector textVector = (VarCharVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); // logger.info("[COMPOSITE_DEBUG] KeywordParquetField.addToGroup: field=[{}] value=[{}] rowIndex=[{}] capabilities={}", - // descriptor.fieldName(), parseValue, rowIndex, descriptor.assignedCapabilities()); + // fieldType.name(), parseValue, rowIndex, descriptor.assignedCapabilities()); } @Override diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java index 888032ee3368e..77ce12726f581 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.data.text; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; @@ -48,8 +48,8 @@ public class TextParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - VarCharVector textVector = (VarCharVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarCharVector textVector = (VarCharVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java index 4eb9f2ebaac08..34e1621d65f37 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.metadata; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; @@ -45,8 +45,8 @@ public class IdParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - VarBinaryVector idVector = (VarBinaryVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarBinaryVector idVector = (VarBinaryVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); BytesRef bytesRef = (BytesRef) parseValue; idVector.setSafe(rowIndex, bytesRef.bytes, bytesRef.offset, bytesRef.length); diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java index 9f363e6c3bb36..2a9fdbb3e26f7 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.metadata; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; @@ -45,8 +45,8 @@ public class IgnoredParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - VarCharVector varCharVector = (VarCharVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarCharVector varCharVector = (VarCharVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); varCharVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java index 6886f27e45fe9..7da6c3935b85d 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.metadata; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; @@ -45,8 +45,8 @@ public class RoutingParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - VarCharVector routingVector = (VarCharVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarCharVector routingVector = (VarCharVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); routingVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java index d02affc4f0269..981fc966cac17 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java @@ -9,7 +9,7 @@ package com.parquet.parquetdataformat.fields.core.metadata; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; @@ -46,8 +46,8 @@ public class SizeParquetField extends ParquetField { @Override - protected void addToGroup(FieldDescriptor descriptor, ManagedVSR managedVSR, Object parseValue) { - IntVector intVector = (IntVector) managedVSR.getVector(descriptor.fieldName()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + IntVector intVector = (IntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java index e0de2ca2d4a2d..a44cd74a14308 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java @@ -9,9 +9,9 @@ import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.DocumentInput; import org.opensearch.index.engine.exec.EngineRole; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.WriteResult; import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.vsr.ManagedVSR; import java.io.IOException; @@ -29,7 +29,7 @@ * *

Key responsibilities: *

    - *
  • Direct field vector population using {@link FieldDescriptor}
  • + *
  • Direct field vector population using {@link MappedFieldType}
  • *
  • Document lifecycle management via ManagedVSR
  • *
  • Integration with the Arrow-based Parquet writer pipeline
  • *
@@ -55,17 +55,17 @@ public void addRowIdField(String fieldName, long rowId) { } @Override - public void addField(FieldDescriptor descriptor, Object value) { - final ParquetField parquetField = ArrowFieldRegistry.getParquetField(descriptor.typeName()); + public void addField(MappedFieldType fieldType, Object value) { + final ParquetField parquetField = ArrowFieldRegistry.getParquetField(fieldType.typeName()); if (parquetField == null) { // Field type not supported by Parquet format — skip silently - // logger.debug("[COMPOSITE_DEBUG] Parquet SKIP field=[{}] type=[{}] — no ParquetField registered in ArrowFieldRegistry", descriptor.fieldName(), descriptor.typeName()); + // logger.debug("[COMPOSITE_DEBUG] Parquet SKIP field=[{}] type=[{}] — no ParquetField registered in ArrowFieldRegistry", fieldType.name(), fieldType.typeName()); return; } - // logger.debug("[COMPOSITE_DEBUG] Parquet ACCEPT field=[{}] type=[{}] value=[{}] capabilities={}", descriptor.fieldName(), descriptor.typeName(), value, descriptor.assignedCapabilities()); - parquetField.createField(descriptor, managedVSR, value); + // logger.debug("[COMPOSITE_DEBUG] Parquet ACCEPT field=[{}] type=[{}] value=[{}]", fieldType.name(), fieldType.typeName(), value); + parquetField.createField(fieldType, managedVSR, value); } @Override diff --git a/server/src/main/java/org/opensearch/index/engine/exec/AssignedFieldType.java b/server/src/main/java/org/opensearch/index/engine/exec/AssignedFieldType.java new file mode 100644 index 0000000000000..652ddb590e7a1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/AssignedFieldType.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.apache.lucene.search.Query; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.TextSearchInfo; +import org.opensearch.index.mapper.ValueFetcher; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.lookup.SearchLookup; + +/** + * Lightweight MappedFieldType created by {@link FieldAssignmentResolver} to carry + * per-format capability flags (isIndexed, isStored, hasDocValues) for a field. + * Not used for query execution — only for the indexing write path. + */ +@ExperimentalApi +public final class AssignedFieldType extends MappedFieldType { + + private final String type; + + public AssignedFieldType(String name, String typeName, boolean isIndexed, boolean isStored, boolean hasDocValues) { + super(name, isIndexed, isStored, hasDocValues, TextSearchInfo.NONE, null); + this.type = typeName; + } + + @Override + public String typeName() { + return type; + } + + @Override + public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { + return null; + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + throw new UnsupportedOperationException("AssignedFieldType does not support queries"); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java index 787b09b39c7e9..2fd5776e7976a 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java @@ -9,6 +9,7 @@ package org.opensearch.index.engine.exec; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; import java.io.IOException; @@ -20,10 +21,10 @@ public interface DocumentInput extends AutoCloseable { /** * Adds a field value to this document input. * - * @param descriptor the {@link FieldDescriptor} carrying the field's name, type, and assigned capabilities - * @param value the field value to add + * @param fieldType the {@link MappedFieldType} carrying the field's name, type, and capability flags + * @param value the field value to add */ - void addField(FieldDescriptor descriptor, Object value); + void addField(MappedFieldType fieldType, Object value); T getFinalInput(); diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java index d6b163f635160..c62d1c257254f 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java @@ -55,9 +55,9 @@ public static Map resolve( } } - // Accumulate capabilities per field name per format before creating FieldDescriptors + // Accumulate capabilities per field name per format before creating AssignedFieldType objects Map>> perFormatCaps = new HashMap<>(); - // Track typeName per fieldName for FieldDescriptor construction + // Track typeName per fieldName for AssignedFieldType construction Map fieldNameToTypeName = new HashMap<>(); for (DataFormat format : roleMap.keySet()) { perFormatCaps.put(format, new HashMap<>()); @@ -74,25 +74,30 @@ public static Map resolve( resolveField(registry, roleMap, primaryFormat, perFormatCaps, fieldType, fieldName, typeName); } - // Convert accumulated capabilities into FieldDescriptor objects and wrap into FieldAssignments + // Convert accumulated capabilities into AssignedFieldType objects and wrap into FieldAssignments Map result = new HashMap<>(); for (Map.Entry>> formatEntry : perFormatCaps.entrySet()) { DataFormat format = formatEntry.getKey(); Map> fieldCaps = formatEntry.getValue(); - Map descriptors = new HashMap<>(); + Map assignedTypes = new HashMap<>(); for (Map.Entry> fieldEntry : fieldCaps.entrySet()) { String fieldName = fieldEntry.getKey(); EnumSet caps = fieldEntry.getValue(); if (!caps.isEmpty()) { String typeName = fieldNameToTypeName.get(fieldName); - descriptors.put(fieldName, new FieldDescriptor(fieldName, typeName, caps)); + assignedTypes.put( + fieldName, + new AssignedFieldType( + fieldName, + typeName, + caps.contains(FieldCapability.INDEX), + caps.contains(FieldCapability.STORE), + caps.contains(FieldCapability.DOC_VALUES) + ) + ); } } - result.put(format, new FieldAssignments(descriptors)); - // logger.info("[COMPOSITE_DEBUG] Field assignments for format [{}]:", format.name()); - // for (Map.Entry descEntry : descriptors.entrySet()) { - // logger.info("[COMPOSITE_DEBUG] field=[{}] -> {}", descEntry.getKey(), descEntry.getValue()); - // } + result.put(format, new FieldAssignments(assignedTypes)); } return result; } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java index a5e2031ae98fb..55cc2f19cde22 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java @@ -9,14 +9,13 @@ package org.opensearch.index.engine.exec; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; -import java.util.Collections; import java.util.Map; -import java.util.Set; /** * Per-format view of field capability assignments resolved by the composite engine. - * Maps fieldName → FieldDescriptor that this format is responsible for. + * Maps fieldName → MappedFieldType that this format is responsible for. * *

Used by DocumentInput implementations to decide whether to write a given field. * If a field name has no entry, this format should skip it entirely. @@ -24,31 +23,23 @@ @ExperimentalApi public class FieldAssignments { - private final Map descriptors; + private final Map fieldTypes; - public FieldAssignments(Map descriptors) { - this.descriptors = Map.copyOf(descriptors); + public FieldAssignments(Map fieldTypes) { + this.fieldTypes = Map.copyOf(fieldTypes); } /** * Returns true if this format should handle the given field name. */ public boolean shouldHandle(String fieldName) { - return descriptors.containsKey(fieldName); + return fieldTypes.containsKey(fieldName); } /** - * Returns the assigned capabilities for a field name, or empty set if none. + * Returns the MappedFieldType for a given field name, or null if none. */ - public Set getAssignedCapabilities(String fieldName) { - FieldDescriptor fd = descriptors.get(fieldName); - return fd != null ? fd.assignedCapabilities() : Collections.emptySet(); - } - - /** - * Returns the full FieldDescriptor for a given field name, or null if none. - */ - public FieldDescriptor getDescriptor(String fieldName) { - return descriptors.get(fieldName); + public MappedFieldType getFieldType(String fieldName) { + return fieldTypes.get(fieldName); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldDescriptor.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldDescriptor.java deleted file mode 100644 index 6de1e5669cbb2..0000000000000 --- a/server/src/main/java/org/opensearch/index/engine/exec/FieldDescriptor.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine.exec; - -import org.opensearch.common.annotation.ExperimentalApi; - -import java.util.Collections; -import java.util.EnumSet; -import java.util.Objects; -import java.util.Set; - -/** - * Immutable per-field descriptor that carries a field's name, type name, resolved capabilities, - * and pre-computed boolean flags for O(1) hot-path capability checks. - * - *

Created by {@link FieldAssignmentResolver} during engine initialization. Each mapped field - * gets its own descriptor per data format, replacing the previous type-name-keyed lookup. - */ -@ExperimentalApi -public final class FieldDescriptor { - - private final String fieldName; - private final String typeName; - private final Set assignedCapabilities; - private final boolean searchable; - private final boolean hasDocValues; - private final boolean stored; - - /** - * Constructs a new FieldDescriptor. - * - * @param fieldName the mapped field name (e.g., "title", "price") - * @param typeName the field type name (e.g., "keyword", "long") - * @param assignedCapabilities the capabilities this format is responsible for on this field - */ - public FieldDescriptor(String fieldName, String typeName, Set assignedCapabilities) { - this.fieldName = Objects.requireNonNull(fieldName); - this.typeName = Objects.requireNonNull(typeName); - this.assignedCapabilities = Collections.unmodifiableSet(EnumSet.copyOf(assignedCapabilities)); - this.searchable = assignedCapabilities.contains(FieldCapability.INDEX); - this.hasDocValues = assignedCapabilities.contains(FieldCapability.DOC_VALUES); - this.stored = assignedCapabilities.contains(FieldCapability.STORE); - } - - /** Returns the mapped field name. */ - public String fieldName() { - return fieldName; - } - - /** Returns the field type name. */ - public String typeName() { - return typeName; - } - - /** Returns the immutable set of assigned capabilities. */ - public Set assignedCapabilities() { - return assignedCapabilities; - } - - /** Returns true if the assigned capabilities include {@link FieldCapability#INDEX}. */ - public boolean isSearchable() { - return searchable; - } - - /** Returns true if the assigned capabilities include {@link FieldCapability#DOC_VALUES}. */ - public boolean hasDocValues() { - return hasDocValues; - } - - /** Returns true if the assigned capabilities include {@link FieldCapability#STORE}. */ - public boolean isStored() { - return stored; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - FieldDescriptor that = (FieldDescriptor) o; - return fieldName.equals(that.fieldName) && typeName.equals(that.typeName) && assignedCapabilities.equals(that.assignedCapabilities); - } - - @Override - public int hashCode() { - return Objects.hash(fieldName, typeName, assignedCapabilities); - } - - @Override - public String toString() { - return "FieldDescriptor{" + "fieldName='" + fieldName + '\'' + ", typeName='" + typeName + '\'' + ", capabilities=" + assignedCapabilities + '}'; - } -} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java index 33155bf7bd6a5..92c615e69621b 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java @@ -14,8 +14,7 @@ import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.DocumentInput; import org.opensearch.index.engine.exec.FieldAssignments; -import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; +import org.opensearch.index.engine.exec.AssignedFieldType; import org.opensearch.index.engine.exec.FileInfos; import org.opensearch.index.engine.exec.FlushIn; import org.opensearch.index.engine.exec.RowIdGenerator; @@ -198,58 +197,54 @@ public void addRowIdField(String fieldName, long rowId) { } /** - * Entry point from the mapper layer. Resolves {@link MappedFieldType} to {@link FieldDescriptor} - * per format using each delegate's {@link FieldAssignments}, then delegates to the format-specific - * {@link DocumentInput#addField(FieldDescriptor, Object)}. - * Skips delegation if no descriptor exists for the field name in that format. + * Entry point from the mapper layer. Resolves per-format {@link MappedFieldType} + * using each delegate's {@link FieldAssignments}, then delegates to the format-specific + * {@link DocumentInput#addField(MappedFieldType, Object)}. + * Skips delegation if no field type exists for the field name in that format. */ public void addField(MappedFieldType fieldType, Object value) { - // logger.debug("[COMPOSITE_DEBUG] addField: field=[{}] type=[{}] value=[{}] — resolving per-format descriptors for {} inputs", + // logger.debug("[COMPOSITE_DEBUG] addField: field=[{}] type=[{}] value=[{}] — resolving per-format field types for {} inputs", // fieldType.name(), fieldType.typeName(), value, inputs.size()); for (DocumentInput input : inputs) { FieldAssignments assignments = fieldAssignmentsMap.get(input.getDataFormat()); if (assignments == null) { continue; } - FieldDescriptor descriptor = assignments.getDescriptor(fieldType.name()); - if (descriptor == null) { + MappedFieldType perFormatType = assignments.getFieldType(fieldType.name()); + if (perFormatType == null) { continue; } - input.addField(descriptor, value); - } - } - - @Override - public void addField(FieldDescriptor descriptor, Object value) { - // Direct FieldDescriptor delegation — used for pre-resolved fields - for (DocumentInput input : inputs) { - input.addField(descriptor, value); + input.addField(perFormatType, value); } } @Override public void setVersion(long version) { this.version = version; - FieldDescriptor versionDescriptor = new FieldDescriptor( + MappedFieldType versionType = new AssignedFieldType( VersionFieldMapper.NAME, VersionFieldMapper.CONTENT_TYPE, - java.util.EnumSet.of(FieldCapability.DOC_VALUES) + false, + false, + true ); for (DocumentInput input : inputs) { - input.addField(versionDescriptor, version); + input.addField(versionType, version); } } @Override public void setSeqNo(long seqNo) { this.seqNo = seqNo; - FieldDescriptor seqNoDescriptor = new FieldDescriptor( + MappedFieldType seqNoType = new AssignedFieldType( SeqNoFieldMapper.NAME, SeqNoFieldMapper.CONTENT_TYPE, - java.util.EnumSet.of(FieldCapability.INDEX, FieldCapability.DOC_VALUES) + true, + false, + true ); for (DocumentInput input : inputs) { - input.addField(seqNoDescriptor, seqNo); + input.addField(seqNoType, seqNo); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java index 9ec8031fab3cf..209b682cac957 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java @@ -11,7 +11,6 @@ import org.apache.lucene.document.Field; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.mapper.FieldNamesFieldMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; @@ -24,19 +23,19 @@ * *

Each subclass handles a specific field type (keyword, long, text, etc.) and * creates the appropriate Lucene index fields based on the capabilities described - * in the {@link FieldDescriptor}. + * in the {@link MappedFieldType}. */ @ExperimentalApi public abstract class LuceneField { /** - * Creates Lucene index fields for the given value based on the descriptor's assigned capabilities. + * Creates Lucene index fields for the given value based on the field type's capability flags. * - * @param descriptor the per-field descriptor carrying field name, type name, and capability flags + * @param fieldType the per-field MappedFieldType carrying field name, type name, and capability flags * @param document the Lucene document to add fields to * @param parseValue the parsed field value to index */ - public abstract void createField(FieldDescriptor descriptor, Document document, Object parseValue); + public abstract void createField(MappedFieldType fieldType, Document document, Object parseValue); protected final void createFieldNamesField(MappedFieldType mappedFieldType, Document document, ParseContext context) { assert !mappedFieldType.hasDocValues() : "_field_names should only be used when doc_values are turned off"; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java index 256d3a53a82af..9e85aa5bc9e57 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java @@ -11,8 +11,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.util.BytesRef; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -21,10 +21,10 @@ public class BinaryLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final BytesRef value = (BytesRef) parseValue; - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value)); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java index ad2bf281d49d1..455c403b463ba 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java @@ -14,8 +14,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -24,21 +24,21 @@ public class BooleanLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Boolean value = (Boolean) parseValue; - if (descriptor.isSearchable()) { + if (fieldType.isSearchable()) { FieldType ft = new FieldType(); ft.setOmitNorms(true); ft.setIndexOptions(IndexOptions.DOCS); ft.setTokenized(false); ft.freeze(); - document.add(new Field(descriptor.fieldName(), value ? "T" : "F", ft)); + document.add(new Field(fieldType.name(), value ? "T" : "F", ft)); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value ? 1 : 0)); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value ? 1 : 0)); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value ? "T" : "F")); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value ? "T" : "F")); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java index 67c6fa7f87bc2..98f37acaedf99 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class DateLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final long timestamp = (long) parseValue; - if (descriptor.isSearchable()) { - document.add(new LongPoint(descriptor.fieldName(), timestamp)); + if (fieldType.isSearchable()) { + document.add(new LongPoint(fieldType.name(), timestamp)); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), timestamp)); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), timestamp)); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), timestamp)); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), timestamp)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java index ab27b411ba78e..b659a6298b5a4 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class DateNanosLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final long timestamp = (long) parseValue; - if (descriptor.isSearchable()) { - document.add(new LongPoint(descriptor.fieldName(), timestamp)); + if (fieldType.isSearchable()) { + document.add(new LongPoint(fieldType.name(), timestamp)); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), timestamp)); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), timestamp)); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), timestamp)); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), timestamp)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java index 13f389195ad45..00feb0ce36bbf 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.util.BytesRef; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,13 +22,13 @@ public class IdLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final BytesRef value = (BytesRef) parseValue; - if (descriptor.hasDocValues()) { - document.add(new BinaryDocValuesField(descriptor.fieldName(), value)); + if (fieldType.hasDocValues()) { + document.add(new BinaryDocValuesField(fieldType.name(), value)); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value)); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java index aa47de8f92e55..62b91d246cf62 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java @@ -10,8 +10,8 @@ import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -20,10 +20,10 @@ public class IgnoredLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final String value = parseValue.toString(); - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value)); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java index b9dba9844d3e5..590dbfbe146f8 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.util.BytesRef; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,13 +22,13 @@ public class RoutingLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final String value = parseValue.toString(); - if (descriptor.hasDocValues()) { - document.add(new SortedSetDocValuesField(descriptor.fieldName(), new BytesRef(value))); + if (fieldType.hasDocValues()) { + document.add(new SortedSetDocValuesField(fieldType.name(), new BytesRef(value))); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value)); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java index 28209584a7aad..3f100dcf1cb88 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java @@ -11,8 +11,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -21,13 +21,13 @@ public class SizeLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.intValue())); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.intValue())); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value.intValue())); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.intValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java index 830a63e62b88d..3e74b55fa9789 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class ByteLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (descriptor.isSearchable()) { - document.add(new IntPoint(descriptor.fieldName(), value.byteValue())); + if (fieldType.isSearchable()) { + document.add(new IntPoint(fieldType.name(), value.byteValue())); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.byteValue())); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.byteValue())); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value.byteValue())); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.byteValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java index a7e566a72c3ec..922c63cc32f25 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java @@ -11,8 +11,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -21,13 +21,13 @@ public class DocCountLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.longValue())); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.longValue())); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value.longValue())); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.longValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java index 5d48bf6811872..7509fccd12dab 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java @@ -13,8 +13,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.util.NumericUtils; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -23,16 +23,16 @@ public class DoubleLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (descriptor.isSearchable()) { - document.add(new DoublePoint(descriptor.fieldName(), value.doubleValue())); + if (fieldType.isSearchable()) { + document.add(new DoublePoint(fieldType.name(), value.doubleValue())); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), NumericUtils.doubleToSortableLong(value.doubleValue()))); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), NumericUtils.doubleToSortableLong(value.doubleValue()))); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value.doubleValue())); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.doubleValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java index 83b17f7244dde..93c4cd1fbc2a5 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java @@ -13,8 +13,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.util.NumericUtils; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -23,16 +23,16 @@ public class FloatLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (descriptor.isSearchable()) { - document.add(new FloatPoint(descriptor.fieldName(), value.floatValue())); + if (fieldType.isSearchable()) { + document.add(new FloatPoint(fieldType.name(), value.floatValue())); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), NumericUtils.floatToSortableInt(value.floatValue()))); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), NumericUtils.floatToSortableInt(value.floatValue()))); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value.floatValue())); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.floatValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java index 9e3e7dff23dc2..f8364b43dfc9d 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class HalfFloatLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (descriptor.isSearchable()) { - document.add(new HalfFloatPoint(descriptor.fieldName(), value.floatValue())); + if (fieldType.isSearchable()) { + document.add(new HalfFloatPoint(fieldType.name(), value.floatValue())); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), HalfFloatPoint.halfFloatToSortableShort(value.floatValue()))); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), HalfFloatPoint.halfFloatToSortableShort(value.floatValue()))); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value.floatValue())); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.floatValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java index c9a8727a6fb7c..0285e384ddc14 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class IntegerLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (descriptor.isSearchable()) { - document.add(new IntPoint(descriptor.fieldName(), value.intValue())); + if (fieldType.isSearchable()) { + document.add(new IntPoint(fieldType.name(), value.intValue())); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.intValue())); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.intValue())); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value.intValue())); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.intValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java index 1c5739893c150..50e17bcb3931b 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class LongLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (descriptor.isSearchable()) { - document.add(new LongPoint(descriptor.fieldName(), value.longValue())); + if (fieldType.isSearchable()) { + document.add(new LongPoint(fieldType.name(), value.longValue())); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.longValue())); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.longValue())); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value.longValue())); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.longValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java index f85f51445abf8..edc97a6a82668 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class ShortLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (descriptor.isSearchable()) { - document.add(new IntPoint(descriptor.fieldName(), value.shortValue())); + if (fieldType.isSearchable()) { + document.add(new IntPoint(fieldType.name(), value.shortValue())); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.shortValue())); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.shortValue())); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value.shortValue())); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.shortValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java index 5c558987ce1ba..68339b083d995 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class TokenCountLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (descriptor.isSearchable()) { - document.add(new IntPoint(descriptor.fieldName(), value.intValue())); + if (fieldType.isSearchable()) { + document.add(new IntPoint(fieldType.name(), value.intValue())); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.intValue())); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.intValue())); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value.intValue())); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.intValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java index 1e9cf4ca83aa8..8f5d76f106dc4 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,16 +22,16 @@ public class UnsignedLongLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final Number value = (Number) parseValue; - if (descriptor.isSearchable()) { - document.add(new LongPoint(descriptor.fieldName(), value.longValue())); + if (fieldType.isSearchable()) { + document.add(new LongPoint(fieldType.name(), value.longValue())); } - if (descriptor.hasDocValues()) { - document.add(new SortedNumericDocValuesField(descriptor.fieldName(), value.longValue())); + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.longValue())); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), value.longValue())); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.longValue())); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java index d663a9825c3ea..bd47d90d50889 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java @@ -14,8 +14,8 @@ import org.apache.lucene.util.BytesRef; import org.opensearch.common.network.InetAddresses; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.net.InetAddress; @@ -25,17 +25,17 @@ public class IpLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final InetAddress address = (InetAddress) parseValue; final byte[] encoded = InetAddresses.forString(address.getHostAddress()).getAddress(); - if (descriptor.isSearchable()) { - document.add(new InetAddressPoint(descriptor.fieldName(), InetAddresses.forString(address.getHostAddress()))); + if (fieldType.isSearchable()) { + document.add(new InetAddressPoint(fieldType.name(), InetAddresses.forString(address.getHostAddress()))); } - if (descriptor.hasDocValues()) { - document.add(new SortedSetDocValuesField(descriptor.fieldName(), new BytesRef(encoded))); + if (fieldType.hasDocValues()) { + document.add(new SortedSetDocValuesField(fieldType.name(), new BytesRef(encoded))); } - if (descriptor.isStored()) { - document.add(new StoredField(descriptor.fieldName(), new BytesRef(encoded))); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), new BytesRef(encoded))); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java index c57b3fcbabf61..2d7915db7846c 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java @@ -13,9 +13,9 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.util.BytesRef; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -24,25 +24,25 @@ public class KeywordLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { String value = (String) parseValue; final BytesRef binaryValue = new BytesRef(value); - boolean shouldIndex = descriptor.isSearchable(); - boolean shouldStore = descriptor.isStored(); + boolean shouldIndex = fieldType.isSearchable(); + boolean shouldStore = fieldType.isStored(); if (shouldIndex || shouldStore) { - FieldType fieldType = new FieldType(); - fieldType.setTokenized(false); - fieldType.setStored(shouldStore); - fieldType.setOmitNorms(true); - fieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS : IndexOptions.NONE); - fieldType.freeze(); - document.add(new KeywordFieldMapper.KeywordField(descriptor.fieldName(), binaryValue, fieldType)); + FieldType luceneFieldType = new FieldType(); + luceneFieldType.setTokenized(false); + luceneFieldType.setStored(shouldStore); + luceneFieldType.setOmitNorms(true); + luceneFieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS : IndexOptions.NONE); + luceneFieldType.freeze(); + document.add(new KeywordFieldMapper.KeywordField(fieldType.name(), binaryValue, luceneFieldType)); } - if (descriptor.hasDocValues()) { - document.add(new SortedSetDocValuesField(descriptor.fieldName(), binaryValue)); + if (fieldType.hasDocValues()) { + document.add(new SortedSetDocValuesField(fieldType.name(), binaryValue)); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java index b74a9e9adc7ae..ed07348a13f78 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java @@ -12,8 +12,8 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.opensearch.index.engine.exec.FieldCapability; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.util.EnumSet; @@ -22,17 +22,17 @@ public class TextLuceneField extends LuceneField { @Override - public void createField(FieldDescriptor descriptor, ParseContext.Document document, Object parseValue) { + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { final String value = (String) parseValue; - boolean shouldIndex = descriptor.isSearchable(); - boolean shouldStore = descriptor.isStored(); + boolean shouldIndex = fieldType.isSearchable(); + boolean shouldStore = fieldType.isStored(); if (shouldIndex || shouldStore) { - FieldType fieldType = new FieldType(); - fieldType.setStored(shouldStore); - fieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS : IndexOptions.NONE); - Field field = new Field(descriptor.fieldName(), value, fieldType); + FieldType luceneFieldType = new FieldType(); + luceneFieldType.setStored(shouldStore); + luceneFieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS : IndexOptions.NONE); + Field field = new Field(fieldType.name(), value, luceneFieldType); document.add(field); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java index a0a5fe80e69ef..678b640329ef9 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java @@ -15,10 +15,10 @@ import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.DocumentInput; import org.opensearch.index.engine.exec.EngineRole; -import org.opensearch.index.engine.exec.FieldDescriptor; import org.opensearch.index.engine.exec.WriteResult; import org.opensearch.index.engine.exec.lucene.fields.LuceneField; import org.opensearch.index.engine.exec.lucene.fields.LuceneFieldRegistry; +import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.ParseContext; import java.io.IOException; @@ -42,27 +42,26 @@ public void addRowIdField(String fieldName, long rowId) { @SuppressWarnings("unchecked") @Override - public void addField(FieldDescriptor descriptor, Object value) { - final LuceneField luceneField = LuceneFieldRegistry.getLuceneField(descriptor.typeName()); + public void addField(MappedFieldType fieldType, Object value) { + final LuceneField luceneField = LuceneFieldRegistry.getLuceneField(fieldType.typeName()); if (luceneField == null) { // Field type not supported by Lucene format — skip silently logger.debug( "[COMPOSITE_DEBUG] Lucene SKIP field=[{}] type=[{}] — no LuceneField registered in LuceneFieldRegistry", - descriptor.fieldName(), - descriptor.typeName() + fieldType.name(), + fieldType.typeName() ); return; } logger.debug( - "[COMPOSITE_DEBUG] Lucene ACCEPT field=[{}] type=[{}] value=[{}] capabilities={}", - descriptor.fieldName(), - descriptor.typeName(), - value, - descriptor.assignedCapabilities() + "[COMPOSITE_DEBUG] Lucene ACCEPT field=[{}] type=[{}] value=[{}]", + fieldType.name(), + fieldType.typeName(), + value ); - luceneField.createField(descriptor, document, value); + luceneField.createField(fieldType, document, value); } /** From 9f5454752c6d2aa43cd0d9ff63d367256f161c50 Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Mon, 9 Mar 2026 11:45:34 +0530 Subject: [PATCH 14/15] Add debug loggers --- .../engine/exec/FieldAssignmentResolver.java | 64 +++++++++---------- .../composite/CompositeDataFormatWriter.java | 4 +- .../composite/CompositeFieldValidator.java | 8 +-- .../CompositeIndexingExecutionEngine.java | 26 ++++---- .../exec/coord/CatalogSnapshotManager.java | 16 ++--- .../exec/merge/CompositeMergePolicy.java | 4 +- 6 files changed, 61 insertions(+), 61 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java index c62d1c257254f..d3a4c03e7aad1 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java @@ -123,31 +123,31 @@ private static void resolveField( required.add(FieldCapability.STORE); } - // logger.info( - // "[COMPOSITE_DEBUG] resolveField: field=[{}] type=[{}] required capabilities={} (isSearchable={}, hasDocValues={}, isStored={})", - // fieldName, - // typeName, - // required, - // fieldType.isSearchable(), - // fieldType.hasDocValues(), - // fieldType.isStored() - // ); + logger.debug( + "[COMPOSITE_DEBUG] resolveField: field=[{}] type=[{}] required capabilities={} (isSearchable={}, hasDocValues={}, isStored={})", + fieldName, + typeName, + required, + fieldType.isSearchable(), + fieldType.hasDocValues(), + fieldType.isStored() + ); // For each required capability, assign to primary if it supports it, else to secondary for (FieldCapability cap : required) { boolean primaryHasCap = primaryFormat != null && registry.hasCapability(typeName, primaryFormat, cap); - // logger.info( - // "[COMPOSITE_DEBUG] capability [{}]: primary format [{}] hasCapability={}, registry capabilities for type={}", - // cap, - // primaryFormat != null ? primaryFormat.name() : "null", - // primaryHasCap, - // primaryFormat != null ? registry.getCapabilities(typeName, primaryFormat) : "N/A" - // ); + logger.debug( + "[COMPOSITE_DEBUG] capability [{}]: primary format [{}] hasCapability={}, registry capabilities for type={}", + cap, + primaryFormat != null ? primaryFormat.name() : "null", + primaryHasCap, + primaryFormat != null ? registry.getCapabilities(typeName, primaryFormat) : "N/A" + ); if (primaryHasCap) { // Primary handles this capability perFormatCaps.get(primaryFormat).computeIfAbsent(fieldName, k -> EnumSet.noneOf(FieldCapability.class)).add(cap); - // logger.info("[COMPOSITE_DEBUG] -> assigned [{}] to PRIMARY format [{}]", cap, primaryFormat.name()); + logger.debug("[COMPOSITE_DEBUG] -> assigned [{}] to PRIMARY format [{}]", cap, primaryFormat.name()); } else { // Find a secondary format that supports it boolean assignedToSecondary = false; @@ -156,31 +156,31 @@ private static void resolveField( EngineRole role = entry.getValue(); boolean isSecondary = role != EngineRole.PRIMARY; boolean secondaryHasCap = registry.hasCapability(typeName, secondaryFormat, cap); - // logger.info( - // "[COMPOSITE_DEBUG] checking secondary format [{}] role={} isSecondary={} hasCapability={} registryCapabilities={}", - // secondaryFormat.name(), - // role, - // isSecondary, - // secondaryHasCap, - // registry.getCapabilities(typeName, secondaryFormat) - // ); + logger.debug( + "[COMPOSITE_DEBUG] checking secondary format [{}] role={} isSecondary={} hasCapability={} registryCapabilities={}", + secondaryFormat.name(), + role, + isSecondary, + secondaryHasCap, + registry.getCapabilities(typeName, secondaryFormat) + ); if (isSecondary && secondaryHasCap) { perFormatCaps.get(secondaryFormat) .computeIfAbsent(fieldName, k -> EnumSet.noneOf(FieldCapability.class)) .add(cap); - // logger.info("[COMPOSITE_DEBUG] -> assigned [{}] to SECONDARY format [{}]", cap, secondaryFormat.name()); + logger.debug("[COMPOSITE_DEBUG] -> assigned [{}] to SECONDARY format [{}]", cap, secondaryFormat.name()); assignedToSecondary = true; break; } } if (!assignedToSecondary) { - // logger.warn( - // "[COMPOSITE_DEBUG] -> capability [{}] for field=[{}] type=[{}] NOT assigned to any format!", - // cap, - // fieldName, - // typeName - // ); + logger.warn( + "[COMPOSITE_DEBUG] -> capability [{}] for field=[{}] type=[{}] NOT assigned to any format!", + cap, + fieldName, + typeName + ); } } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java index 92c615e69621b..28ba22c7675f8 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java @@ -203,8 +203,8 @@ public void addRowIdField(String fieldName, long rowId) { * Skips delegation if no field type exists for the field name in that format. */ public void addField(MappedFieldType fieldType, Object value) { - // logger.debug("[COMPOSITE_DEBUG] addField: field=[{}] type=[{}] value=[{}] — resolving per-format field types for {} inputs", - // fieldType.name(), fieldType.typeName(), value, inputs.size()); + logger.debug("[COMPOSITE_DEBUG] addField: field=[{}] type=[{}] value=[{}] — resolving per-format field types for {} inputs", + fieldType.name(), fieldType.typeName(), value, inputs.size()); for (DocumentInput input : inputs) { FieldAssignments assignments = fieldAssignmentsMap.get(input.getDataFormat()); if (assignments == null) { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java index 9487d574d0ab8..9003b30100bdd 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java @@ -64,8 +64,8 @@ public static void validatePrimaryCoverage( } for (MappedFieldType fieldType : fieldTypes) { if (isInternalMetadataField(fieldType)) { - // logger.info("[COMPOSITE_DEBUG] validatePrimaryCoverage: SKIP internal metadata field=[{}] type=[{}]", - // fieldType.name(), fieldType.typeName()); + logger.debug("[COMPOSITE_DEBUG] validatePrimaryCoverage: SKIP internal metadata field=[{}] type=[{}]", + fieldType.name(), fieldType.typeName()); continue; } if (!registry.hasAnyCapability(fieldType.typeName(), primaryFormat)) { @@ -74,8 +74,8 @@ public static void validatePrimaryCoverage( + "] has no capabilities registered for primary data format [" + primaryFormat.name() + "]" ); } - // logger.info("[COMPOSITE_DEBUG] validatePrimaryCoverage: OK field=[{}] type=[{}] has capabilities {} in primary format [{}]", - // fieldType.name(), fieldType.typeName(), registry.getCapabilities(fieldType.typeName(), primaryFormat), primaryFormat.name()); + logger.debug("[COMPOSITE_DEBUG] validatePrimaryCoverage: OK field=[{}] type=[{}] has capabilities {} in primary format [{}]", + fieldType.name(), fieldType.typeName(), registry.getCapabilities(fieldType.typeName(), primaryFormat), primaryFormat.name()); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java index f04fe2f0ddcac..47d624c644819 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java @@ -75,29 +75,29 @@ public CompositeIndexingExecutionEngine( // Setting-based role resolution String primaryDataFormatName = indexSettings.getValue(IndexSettings.INDEX_COMPOSITE_PRIMARY_DATA_FORMAT_SETTING); this.roleMap = resolveRoles(primaryDataFormatName, dataSourcePlugins, singlePlugin); - // logger.info("[COMPOSITE_DEBUG] Resolved engine roles: {}", roleMap.entrySet().stream() - // .map(e -> e.getKey().name() + " -> " + e.getValue()) - // .collect(java.util.stream.Collectors.joining(", "))); + logger.debug("[COMPOSITE_DEBUG] Resolved engine roles: {}", roleMap.entrySet().stream() + .map(e -> e.getKey().name() + " -> " + e.getValue()) + .collect(java.util.stream.Collectors.joining(", "))); // Build FieldSupportRegistry from plugin registrations this.fieldSupportRegistry = new FieldSupportRegistry(); for (DataSourcePlugin plugin : dataSourcePlugins) { plugin.registerFieldSupport(fieldSupportRegistry); } - // logger.info("[COMPOSITE_DEBUG] FieldSupportRegistry built. Registered formats: {}", - // fieldSupportRegistry.allFormats().stream().map(DataFormat::name).collect(java.util.stream.Collectors.joining(", "))); + logger.debug("[COMPOSITE_DEBUG] FieldSupportRegistry built. Registered formats: {}", + fieldSupportRegistry.allFormats().stream().map(DataFormat::name).collect(java.util.stream.Collectors.joining(", "))); // Validate field capabilities if composite (multiple plugins) if (!singlePlugin) { CompositeFieldValidator.validatePrimaryCoverage(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); CompositeFieldValidator.validateMappingPropertyCoverage(fieldSupportRegistry, mapperService.fieldTypes()); - // logger.info("[COMPOSITE_DEBUG] Composite field validation passed for all mapped fields"); + logger.debug("[COMPOSITE_DEBUG] Composite field validation passed for all mapped fields"); } // Resolve field assignments: which format handles which capability for each field // Both single-plugin and multi-plugin modes go through per-field resolution this.fieldAssignmentsMap = FieldAssignmentResolver.resolve(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); - // logger.info("[COMPOSITE_DEBUG] Resolved per-field assignments for {} format(s)", fieldAssignmentsMap.size()); + logger.debug("[COMPOSITE_DEBUG] Resolved per-field assignments for {} format(s)", fieldAssignmentsMap.size()); // Determine primary format from role map DataFormat primaryDataFormat = roleMap.entrySet().stream() @@ -249,15 +249,15 @@ public RefreshResult refresh(RefreshInput refreshInput) throws IOException { List dataFormatWriters = dataFormatWriterPool.checkoutAll(); List refreshedSegment = refreshInput.getExistingSegments(); List newSegmentList = new ArrayList<>(); - // logger.info("[COMPOSITE_DEBUG] CompositeIndexingExecutionEngine.refresh: flushing {} writers, existing segments={}", - // dataFormatWriters.size(), refreshedSegment.size()); + logger.debug("[COMPOSITE_DEBUG] CompositeIndexingExecutionEngine.refresh: flushing {} writers, existing segments={}", + dataFormatWriters.size(), refreshedSegment.size()); // flush to disk for (CompositeDataFormatWriter dataFormatWriter : dataFormatWriters) { Segment newSegment = new Segment(dataFormatWriter.getWriterGeneration()); FileInfos fileInfos = dataFormatWriter.flush(null); fileInfos.getWriterFilesMap().forEach((key, value) -> { - // logger.info("[COMPOSITE_DEBUG] writer gen={} flushed format=[{}] files={}", - // dataFormatWriter.getWriterGeneration(), key.name(), value.getFiles()); + logger.debug("[COMPOSITE_DEBUG] writer gen={} flushed format=[{}] files={}", + dataFormatWriter.getWriterGeneration(), key.name(), value.getFiles()); newSegment.addSearchableFiles(key.name(), value); }); dataFormatWriter.close(); @@ -267,10 +267,10 @@ public RefreshResult refresh(RefreshInput refreshInput) throws IOException { } if (newSegmentList.isEmpty()) { - // logger.info("[COMPOSITE_DEBUG] No new segments produced from flush"); + logger.debug("[COMPOSITE_DEBUG] No new segments produced from flush"); return null; } else { - // logger.info("[COMPOSITE_DEBUG] Produced {} new segments from flush", newSegmentList.size()); + logger.debug("[COMPOSITE_DEBUG] Produced {} new segments from flush", newSegmentList.size()); refreshedSegment.addAll(newSegmentList); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java index cf0d537b4ebec..d95fb8d3159c4 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java @@ -58,7 +58,7 @@ public CatalogSnapshotManager(CompositeEngine compositeEngine, Committer composi }); indexFileDeleter.set(new IndexFileDeleter(compositeEngine, latestCatalogSnapshot, shardPath, deleteUnreferencedFiles)); - // logger.debug("[RESET_DEBUG] IndexFileDeleter created, latestCatalogSnapshot={}, deleteUnreferencedFiles={}", latestCatalogSnapshot, deleteUnreferencedFiles); + logger.debug("[COMPOSITE_DEBUG] IndexFileDeleter created, latestCatalogSnapshot={}, deleteUnreferencedFiles={}", latestCatalogSnapshot, deleteUnreferencedFiles); if(latestCatalogSnapshot != null) { latestCatalogSnapshot.setIndexFileDeleterSupplier(indexFileDeleter::get); latestCatalogSnapshot.setCatalogSnapshotMap(catalogSnapshotMap); @@ -146,8 +146,8 @@ public synchronized void applyMergeResults(MergeResult mergeResult, OneMerge one } private synchronized void advanceCatalogSnapshot(List refreshedSegments) throws IOException { - // logger.info("[COMPOSITE_DEBUG] advanceCatalogSnapshot: previous id={}, version={}, old segment count={}", - // latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), latestCatalogSnapshot.getSegments().size()); + logger.debug("[COMPOSITE_DEBUG] advanceCatalogSnapshot: previous id={}, version={}, old segment count={}", + latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), latestCatalogSnapshot.getSegments().size()); compositeEngineCommitter.addLuceneIndexes(refreshedSegments); CompositeEngineCatalogSnapshot cecs = new CompositeEngineCatalogSnapshot( latestCatalogSnapshot.getId() + 1, @@ -161,11 +161,11 @@ private synchronized void advanceCatalogSnapshot(List refreshedSegments latestCatalogSnapshot.decRef(); } latestCatalogSnapshot = cecs; - // logger.info("[COMPOSITE_DEBUG] advanceCatalogSnapshot: new id={}, version={}, new segment count={}", - // latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), refreshedSegments.size()); - // for (Segment seg : refreshedSegments) { - // logger.info("[COMPOSITE_DEBUG] segment gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); - // } + logger.debug("[COMPOSITE_DEBUG] advanceCatalogSnapshot: new id={}, version={}, new segment count={}", + latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), refreshedSegments.size()); + for (Segment seg : refreshedSegments) { + logger.debug("[COMPOSITE_DEBUG] segment gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); + } } private Segment getSegment(Map writerFileSetMap) { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java b/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java index 7cbe9dd86f7a2..49a78614bbc6e 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java @@ -50,11 +50,11 @@ public CompositeMergePolicy( ) { this.luceneMergePolicy = mergePolicy; this.logger = Loggers.getLogger(getClass(), shardId); - // logger.info("Initialized merge policy: {}", mergePolicy); + logger.debug("[COMPOSITE_DEBUG] Initialized merge policy: {}", mergePolicy); this.infoStream = new InfoStream() { @Override public void message(String component, String message) { - // logger.trace(() -> new ParameterizedMessage("Merge [{}]: {}", component, message)); + logger.debug(() -> new ParameterizedMessage("[COMPOSITE_DEBUG] Merge [{}]: {}", component, message)); } @Override From 35e8df21893ce8a01abd1dd5b6e3c8a070912133 Mon Sep 17 00:00:00 2001 From: Arpit Bandejiya Date: Mon, 9 Mar 2026 16:49:52 +0530 Subject: [PATCH 15/15] Cache DataformatCheck --- .../main/java/org/opensearch/index/mapper/FieldMapper.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java index b0e3173d41607..bf1fffcddb60c 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java @@ -218,6 +218,7 @@ public T meta(Map meta) { protected MultiFields multiFields; protected CopyTo copyTo; protected DerivedFieldGenerator derivedFieldGenerator; + protected Boolean isPluggableDataFormatFeatureEnabled; protected FieldMapper(String simpleName, FieldType fieldType, MappedFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo) { super(simpleName); @@ -347,7 +348,11 @@ protected final void createFieldNamesField(ParseContext context) { } protected final boolean isPluggableDataFormatFeatureEnabled(ParseContext parseContext) { - return FeatureFlags.isEnabled(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) && parseContext.indexSettings().isOptimizedIndex(); + if(isPluggableDataFormatFeatureEnabled == null) { + isPluggableDataFormatFeatureEnabled = FeatureFlags.isEnabled(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) && parseContext.indexSettings().isOptimizedIndex(); + } + + return isPluggableDataFormatFeatureEnabled; } @Override