diff --git a/.gitignore b/.gitignore index fd9b9ad386961..7ad6f9da708a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .claude CLAUDE.md .cursor* +.kiro* # intellij files .idea/ diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java index 8be9ec2213c02..69727316cab46 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java @@ -23,15 +23,18 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.env.Environment; import org.opensearch.env.NodeEnvironment; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FieldSupportRegistry; import org.opensearch.index.engine.exec.IndexingExecutionEngine; import com.parquet.parquetdataformat.bridge.RustBridge; import com.parquet.parquetdataformat.engine.ParquetExecutionEngine; +import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.ShardPath; import org.opensearch.index.store.FormatStoreDirectory; import org.opensearch.index.store.GenericStoreDirectory; import org.opensearch.plugins.DataSourcePlugin; -import org.opensearch.index.mapper.MapperService; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.spi.vectorized.DataSourceCodec; import org.opensearch.repositories.RepositoriesService; @@ -82,8 +85,15 @@ public class ParquetDataFormatPlugin extends Plugin implements DataSourcePlugin @Override @SuppressWarnings("unchecked") - public IndexingExecutionEngine indexingEngine(MapperService mapperService, ShardPath shardPath, IndexSettings indexSettings) { - return (IndexingExecutionEngine) new ParquetExecutionEngine(settings, () -> ArrowSchemaBuilder.getSchema(mapperService), shardPath, indexSettings); + public IndexingExecutionEngine indexingEngine(EngineConfig engineConfig, MapperService mapperService, boolean isPrimary, ShardPath shardPath, IndexSettings indexSettings, FieldAssignments fieldAssignments) { + ParquetExecutionEngine engine = new ParquetExecutionEngine( + settings, + isPrimary, + () -> ArrowSchemaBuilder.getSchema(mapperService, isPrimary), + shardPath, + indexSettings + ); + return (IndexingExecutionEngine) engine; } @Override @@ -109,6 +119,12 @@ public DataFormat getDataFormat() { return new ParquetDataFormat(); } + // In case of Parquet with multi-datasource, it will act as source of truth + @Override + public boolean isPrimary() { + return true; + } + @Override public Optional> getDataSourceCodecs() { Map codecs = new HashMap<>(); @@ -136,6 +152,15 @@ public BlobContainer createBlobContainer(BlobStore blobStore, BlobPath baseBlobP return blobStore.blobContainer(formatPath); } + @Override + public void registerFieldSupport(FieldSupportRegistry registry) { + DataFormat parquet = getDataFormat(); + for (Map.Entry entry : + com.parquet.parquetdataformat.fields.ArrowFieldRegistry.getRegisteredFields().entrySet()) { + registry.register(entry.getKey(), parquet, entry.getValue().getFieldCapabilities()); + } + } + @Override public List> getSettings() { return List.of( diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java index 0d6c2519d463a..9591c50612a84 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java @@ -3,7 +3,6 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.Schema; -import org.apache.arrow.vector.types.FloatingPointPrecision; import org.opensearch.common.SuppressForbidden; import org.opensearch.index.engine.exec.DocumentInput; import org.opensearch.index.mapper.MappedFieldType; @@ -27,10 +26,13 @@ public static Schema getSchema() { public static void populateDocumentInput(DocumentInput documentInput) { MappedFieldType idField = FieldTypeConverter.convertToMappedFieldType(ID, new ArrowType.Int(32, true)); documentInput.addField(idField, generateRandomId()); + MappedFieldType nameField = FieldTypeConverter.convertToMappedFieldType(NAME, new ArrowType.Utf8()); documentInput.addField(nameField, generateRandomName()); + MappedFieldType designationField = FieldTypeConverter.convertToMappedFieldType(DESIGNATION, new ArrowType.Utf8()); documentInput.addField(designationField, generateRandomDesignation()); + MappedFieldType salaryField = FieldTypeConverter.convertToMappedFieldType(SALARY, new ArrowType.Int(32, true)); documentInput.addField(salaryField, random.nextInt(100000)); } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java index de0e808cc1cfa..d49f38012efa2 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java @@ -50,12 +50,14 @@ public void configureStore() { @Override public boolean equals(Object obj) { - return true; + if (this == obj) return true; + if (!(obj instanceof DataFormat)) return false; + return name().equals(((DataFormat) obj).name()); } @Override public int hashCode() { - return 0; + return name().hashCode(); } @Override diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java index b40a3fb4751fd..3dd0997554c86 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java @@ -14,7 +14,9 @@ import org.apache.logging.log4j.Logger; import org.opensearch.common.settings.Settings; import org.opensearch.index.IndexSettings; +import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.IndexingExecutionEngine; import org.opensearch.index.engine.exec.Merger; import org.opensearch.index.engine.exec.RefreshInput; @@ -75,9 +77,11 @@ public class ParquetExecutionEngine implements IndexingExecutionEngine schema, ShardPath shardPath, IndexSettings indexSettings @@ -87,7 +91,7 @@ public ParquetExecutionEngine( this.arrowBufferPool = new ArrowBufferPool(settings); this.indexSettings = indexSettings; this.parquetMerger = new ParquetMergeExecutor(CompactionStrategy.RECORD_BATCH, indexSettings.getIndex().getName()); - + this.isPrimaryEngine = isPrimaryEngine; // Push current settings to Rust store once on construction, then keep in sync on updates pushSettingsToRust(indexSettings); @@ -131,7 +135,7 @@ public void deleteFiles(Map> filesToDelete) { Collection parquetFilesToDelete = filesToDelete.get(PARQUET_DATA_FORMAT.name()); for (String fileName : parquetFilesToDelete) { Path filePath = Paths.get(fileName); - logger.info("Deleting file [ParquetExecutionEngine]: {}", filePath); + // logger.info("Deleting file [ParquetExecutionEngine]: {}", filePath); try { Files.delete(filePath); } catch (Exception e) { @@ -143,14 +147,15 @@ public void deleteFiles(Map> filesToDelete) { } @Override - public List supportedFieldTypes() { - return List.of(); + public List supportedFieldTypes(boolean isPrimaryEngine) { + return new java.util.ArrayList<>(ArrowFieldRegistry.getRegisteredFieldNames()); } @Override public Writer createWriter(long writerGeneration) { String fileName = Path.of(shardPath.getDataPath().toString(), getDataFormat().name(), FILE_NAME_PREFIX + "_" + writerGeneration + FILE_NAME_EXT).toString(); - return new ParquetWriter(fileName, schema.get(), writerGeneration, arrowBufferPool, indexSettings); + EngineRole role = isPrimaryEngine ? EngineRole.PRIMARY : EngineRole.SECONDARY; + return new ParquetWriter(fileName, schema.get(), writerGeneration, arrowBufferPool, indexSettings, role); } @Override @@ -174,8 +179,8 @@ public long getNativeBytesUsed() { long vsrMemory = arrowBufferPool.getTotalAllocatedBytes(); String shardDataPath = shardPath.getDataPath().toString(); long filteredArrowWriterMemory = RustBridge.getFilteredNativeBytesUsed(shardDataPath); - logger.debug("Native memory used by VSR Buffer Pool: {}", vsrMemory); - logger.debug("Native memory used by ArrowWriters in shard path {}: {}", shardDataPath, filteredArrowWriterMemory); + // logger.debug("Native memory used by VSR Buffer Pool: {}", vsrMemory); + // logger.debug("Native memory used by ArrowWriters in shard path {}: {}", shardDataPath, filteredArrowWriterMemory); return vsrMemory + filteredArrowWriterMemory; } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java index c383f4dd958b4..30906b5aea0bc 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java @@ -24,7 +24,7 @@ public class ParquetDataSourceCodec implements DataSourceCodec { static { try { //JniLibraryLoader.loadLibrary(); - logger.info("DataFusion JNI library loaded successfully"); + // logger.info("DataFusion JNI library loaded successfully"); } catch (Exception e) { logger.error("Failed to load DataFusion JNI library", e); throw new RuntimeException("Failed to initialize DataFusion JNI library", e); diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java index 1a65f7a116623..81de6e41ef659 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java @@ -62,7 +62,7 @@ private static void registerCorePlugins() { // Register core data fields registerPlugin(new CoreDataFieldPlugin(), "CoreDataFields"); - // REgister metadata fields + // Register metadata fields registerPlugin(new MetadataFieldPlugin(), "MetadataFields"); } /** @@ -141,6 +141,13 @@ public static ParquetField getParquetField(String fieldType) { return FIELD_REGISTRY.get(fieldType); } + /** + * Returns an unmodifiable view of all registered field mappings. + */ + public static Map getRegisteredFields() { + return Collections.unmodifiableMap(FIELD_REGISTRY); + } + public static class RegistryStats { private final int totalFields; private final Set allFieldTypes; diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java index 5430b7fa03101..b8601d420981a 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowSchemaBuilder.java @@ -29,6 +29,10 @@ * Utility class for creating Apache Arrow schemas from OpenSearch mapper services. * This class provides methods to convert OpenSearch field mappings into Arrow schema definitions * that can be used for Parquet data format operations. + * + *

Uses {@link ArrowFieldRegistry} to determine which fields are eligible for the + * engine's role via {@code getParquetFieldAfterMatchingRole()}. For non-primary contexts, + * fields not eligible for the role are excluded from the schema.

*/ public final class ArrowSchemaBuilder { @@ -38,18 +42,18 @@ private ArrowSchemaBuilder() { } /** - * Creates an Apache Arrow Schema from the provided MapperService. - * This method extracts all non-metadata field mappers and converts them to Arrow fields. + * Creates an Apache Arrow Schema from the provided MapperService using the ArrowFieldRegistry. * * @param mapperService the OpenSearch mapper service containing field definitions - * @return a new Schema containing Arrow field definitions for all mapped fields + * @param isPrimary whether this is a primary engine context + * @return a new Schema containing Arrow field definitions for all eligible mapped fields * @throws IllegalArgumentException if mapperService is null - * @throws IllegalStateException if no valid fields are found or if a field type is not supported + * @throws IllegalStateException if no valid fields are found or if a field type is not supported in primary context */ - public static Schema getSchema(final MapperService mapperService) { + public static Schema getSchema(final MapperService mapperService, boolean isPrimary) { Objects.requireNonNull(mapperService, "MapperService cannot be null"); - final List fields = extractFieldsFromMappers(mapperService); + final List fields = extractFieldsFromMappers(mapperService, isPrimary); if (fields.isEmpty()) { throw new IllegalStateException("No valid fields found in mapper service"); @@ -59,12 +63,14 @@ public static Schema getSchema(final MapperService mapperService) { } /** - * Extracts Arrow fields from the mapper service, filtering out metadata fields. + * Extracts Arrow fields from the mapper service, filtering out metadata fields + * and fields not eligible for the engine's role. * * @param mapperService the mapper service to extract fields from + * @param isPrimary whether this is a primary engine context * @return a list of Arrow fields */ - private static List extractFieldsFromMappers(final MapperService mapperService) { + private static List extractFieldsFromMappers(final MapperService mapperService, boolean isPrimary) { final List fields = new ArrayList<>(); for (final Mapper mapper : mapperService.documentMapper().mappers()) { @@ -72,12 +78,15 @@ private static List extractFieldsFromMappers(final MapperService mapperSe continue; } - final Field arrowField = createArrowField(mapper); - fields.add(arrowField); + final Field arrowField = createArrowField(mapper, isPrimary); + if (arrowField != null) { + fields.add(arrowField); + } } - fields.add(new Field(CompositeDataFormatWriter.ROW_ID, new LongParquetField().getFieldType(), null)); - fields.add(new Field(SeqNoFieldMapper.PRIMARY_TERM_NAME, new LongParquetField().getFieldType(), null)); + LongParquetField longField = new LongParquetField(); + fields.add(new Field(CompositeDataFormatWriter.ROW_ID, longField.getFieldType(), null)); + fields.add(new Field(SeqNoFieldMapper.PRIMARY_TERM_NAME, longField.getFieldType(), null)); return fields; } @@ -98,20 +107,27 @@ private static boolean notSupportedMetadataField(final Mapper mapper) { } /** - * Creates an Arrow Field from an OpenSearch Mapper. + * Creates an Arrow Field from an OpenSearch Mapper using the ArrowFieldRegistry. + * For non-primary contexts, returns null if the field type has no eligible ParquetField, + * allowing the caller to skip the field. For primary contexts, throws IllegalStateException + * if no ParquetField is found. * * @param mapper the mapper to convert - * @return a new Arrow Field - * @throws IllegalStateException if the mapper type is not supported + * @param isPrimary whether this is a primary engine context + * @return a new Arrow Field, or null if the field is not eligible for the role + * @throws IllegalStateException if the mapper type is not supported in primary context */ - private static Field createArrowField(final Mapper mapper) { + private static Field createArrowField(final Mapper mapper, boolean isPrimary) { final ParquetField parquetField = ArrowFieldRegistry.getParquetField(mapper.typeName()); if (parquetField == null) { - throw new IllegalStateException( - String.format("Unsupported field type '%s' for field '%s'", - mapper.typeName(), mapper.name()) - ); + if (isPrimary) { + throw new IllegalStateException( + String.format("Unsupported field type '%s' for field '%s'", + mapper.typeName(), mapper.name()) + ); + } + return null; } return new Field(mapper.name(), parquetField.getFieldType(), null); diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java index dc1a7e369d430..51da41021f463 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java @@ -11,9 +11,11 @@ import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; +import org.opensearch.index.engine.exec.FieldCapability; import org.opensearch.index.mapper.MappedFieldType; import java.util.Objects; +import java.util.Set; /** * Abstract base class for all Parquet field implementations that handle the conversion @@ -37,49 +39,30 @@ public abstract class ParquetField { /** * Adds the parsed field value to the appropriate vector group within the managed VSR. - * This method is responsible for the actual data conversion and storage in the - * columnar format specific to each field type. * - *

Implementations must handle null values appropriately and ensure type safety - * when casting the parseValue to the expected type.

- * - * @param mappedFieldType the OpenSearch field type metadata containing field configuration + * @param fieldType the per-field MappedFieldType carrying field name, type name, and capability flags * @param managedVSR the managed vector schema root for columnar data storage * @param parseValue the parsed field value to be stored, may be null - * @throws IllegalArgumentException if any parameter is invalid for this field type - * @throws ClassCastException if parseValue cannot be cast to the expected type */ - protected abstract void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue); + protected abstract void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue); /** * Creates and processes a field entry if the field type supports columnar storage. - * This method serves as the main entry point for field processing and includes - * validation logic to ensure only columnar fields are processed. - * - *

The method performs the following operations: - *

    - *
  1. Validates input parameters
  2. - *
  3. Checks if the field supports columnar storage
  4. - *
  5. Delegates to {@link #addToGroup} for actual data processing
  6. - *
* - * @param mappedFieldType the OpenSearch field type metadata, must not be null + * @param fieldType the per-field MappedFieldType carrying field name, type name, and capability flags, must not be null * @param managedVSR the managed vector schema root, must not be null * @param parseValue the parsed field value to be processed, may be null - * @throws IllegalArgumentException if mappedFieldType or managedVSR is null */ - public final void createField(final MappedFieldType mappedFieldType, + public final void createField(final MappedFieldType fieldType, final ManagedVSR managedVSR, final Object parseValue) { - Objects.requireNonNull(mappedFieldType, "MappedFieldType cannot be null"); + Objects.requireNonNull(fieldType, "MappedFieldType cannot be null"); Objects.requireNonNull(managedVSR, "ManagedVSR cannot be null"); - if (mappedFieldType.isColumnar()) { - // TODO: support dynamic mapping update - // for now ignore the field - if (managedVSR.getVector(mappedFieldType.name()) != null) { - addToGroup(mappedFieldType, managedVSR, parseValue); - } + // TODO: support dynamic mapping update + // for now ignore the field + if (managedVSR.getVector(fieldType.name()) != null) { + addToGroup(fieldType, managedVSR, parseValue); } } @@ -109,6 +92,12 @@ public final void createField(final MappedFieldType mappedFieldType, */ public abstract FieldType getFieldType(); + /** + * Returns the set of capabilities this field supports. + * The engine uses this to populate the FieldSupportRegistry. + */ + public abstract Set getFieldCapabilities(); + /** * Provides a string representation of this ParquetField for debugging purposes. * The default implementation includes the class name and Arrow type information. diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java index eaa4d5209bfc2..dd5f406e55b87 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BinaryParquetField.java @@ -8,12 +8,16 @@ package com.parquet.parquetdataformat.fields.core.data; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling binary data types in OpenSearch documents. @@ -40,8 +44,8 @@ public class BinaryParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - final VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + final VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); varBinaryVector.set(rowCount, (byte[]) parseValue); } @@ -55,4 +59,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java index 4b2237bf1aa1f..10d4613b29af0 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/BooleanParquetField.java @@ -8,13 +8,17 @@ package com.parquet.parquetdataformat.fields.core.data; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling boolean data types in OpenSearch documents. @@ -41,8 +45,8 @@ public class BooleanParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - BitVector bitVector = (BitVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + BitVector bitVector = (BitVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); bitVector.setSafe(rowIndex, (Boolean) parseValue ? 1 : 0); } @@ -56,4 +60,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java index 09ca4d50c9fe7..2d3fb13633c0f 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateNanosParquetField.java @@ -8,13 +8,16 @@ package com.parquet.parquetdataformat.fields.core.data.date; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TimeStampNanoVector; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.Set; /** * Parquet field implementation for handling date and timestamp data types in OpenSearch documents. @@ -43,8 +46,8 @@ public class DateNanosParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - TimeStampNanoVector timeStampNanoVector = (TimeStampNanoVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + TimeStampNanoVector timeStampNanoVector = (TimeStampNanoVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); timeStampNanoVector.setSafe(rowIndex, (long) parseValue); } @@ -58,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java index 8554314e722a7..0e90aeb40e915 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/date/DateParquetField.java @@ -8,13 +8,16 @@ package com.parquet.parquetdataformat.fields.core.data.date; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TimeStampMilliVector; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.Set; /** * Parquet field implementation for handling date and timestamp data types in OpenSearch documents. @@ -43,8 +46,8 @@ public class DateParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - TimeStampMilliVector timeStampMilliVector = (TimeStampMilliVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + TimeStampMilliVector timeStampMilliVector = (TimeStampMilliVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); timeStampMilliVector.setSafe(rowIndex, (long) parseValue); } @@ -58,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java index d9d45faeb3872..89727fb906cd1 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ByteParquetField.java @@ -8,12 +8,16 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling 8-bit signed byte integer data types in OpenSearch documents. @@ -40,8 +44,8 @@ public class ByteParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - TinyIntVector tinyIntVector = (TinyIntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + TinyIntVector tinyIntVector = (TinyIntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); tinyIntVector.setSafe(rowCount, (Byte) parseValue); } @@ -55,4 +59,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java index ac2b3a6e62927..f08b601976b9e 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/DoubleParquetField.java @@ -8,13 +8,17 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling double-precision floating-point data types in OpenSearch documents. @@ -42,8 +46,8 @@ public class DoubleParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - Float8Vector float8Vector = (Float8Vector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + Float8Vector float8Vector = (Float8Vector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); float8Vector.setSafe(rowCount, (Double) parseValue); } @@ -57,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java index a516efd2f990f..fcd9dea6a0660 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/FloatParquetField.java @@ -8,13 +8,17 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling single-precision floating-point data types in OpenSearch documents. @@ -42,8 +46,8 @@ public class FloatParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - Float4Vector float4Vector = (Float4Vector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + Float4Vector float4Vector = (Float4Vector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); float4Vector.setSafe(rowCount, (Float) parseValue); } @@ -57,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java index 3019773e6bd42..178b585751050 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/HalfFloatParquetField.java @@ -8,13 +8,17 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.Float2Vector; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling half-precision (16-bit) floating-point data types in OpenSearch documents. @@ -42,8 +46,8 @@ public class HalfFloatParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - Float2Vector float2Vector = (Float2Vector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + Float2Vector float2Vector = (Float2Vector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); float2Vector.setSafe(rowCount, (Short) parseValue); } @@ -57,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java index b11d49b666799..1650f8a62dd45 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/IntegerParquetField.java @@ -8,12 +8,16 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling 32-bit signed integer data types in OpenSearch documents. @@ -40,8 +44,8 @@ public class IntegerParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + IntVector intVector = (IntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); } @@ -55,4 +59,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java index 850ac0f004649..4a3f9a3e5b811 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/LongParquetField.java @@ -8,12 +8,16 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling 64-bit signed long integer data types in OpenSearch documents. @@ -41,8 +45,8 @@ public class LongParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - BigIntVector bigIntVector = (BigIntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + BigIntVector bigIntVector = (BigIntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); bigIntVector.setSafe(rowCount, (Long) parseValue); } @@ -56,4 +60,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java index 07ee5c1b54814..c69fa21b2ee1c 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/ShortParquetField.java @@ -8,12 +8,16 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.SmallIntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling 16-bit signed short integer data types in OpenSearch documents. @@ -41,8 +45,8 @@ public class ShortParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - SmallIntVector smallIntVector = (SmallIntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + SmallIntVector smallIntVector = (SmallIntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); smallIntVector.setSafe(rowCount, (Short) parseValue); } @@ -56,4 +60,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java similarity index 81% rename from modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java rename to modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java index 603189bddc80b..613abbe70a4e4 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TokenCountParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/TokenCountParquetField.java @@ -6,14 +6,18 @@ * compatible open source license. */ -package com.parquet.parquetdataformat.fields.core.data; +package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling token count data types in OpenSearch documents. @@ -43,8 +47,8 @@ public class TokenCountParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + IntVector intVector = (IntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); } @@ -58,4 +62,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java index 7f8e407f29092..867590f0fd684 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/number/UnsignedLongParquetField.java @@ -8,12 +8,16 @@ package com.parquet.parquetdataformat.fields.core.data.number; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling 64-bit unsigned long integer data types in OpenSearch documents. @@ -41,8 +45,8 @@ public class UnsignedLongParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - UInt8Vector uInt8Vector = (UInt8Vector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + UInt8Vector uInt8Vector = (UInt8Vector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); long longValue = ((Number) parseValue).longValue(); uInt8Vector.setSafe(rowCount, longValue); @@ -57,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES, FieldCapability.INDEX); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java similarity index 84% rename from modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java rename to modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java index be16d3154b66a..dd1c586604189 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/IpParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/IpParquetField.java @@ -6,8 +6,10 @@ * compatible open source license. */ -package com.parquet.parquetdataformat.fields.core.data; +package com.parquet.parquetdataformat.fields.core.data.text; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; @@ -16,7 +18,9 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.lucene.document.InetAddressPoint; import org.apache.lucene.util.BytesRef; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.Set; import java.net.InetAddress; @@ -48,8 +52,8 @@ public class IpParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarBinaryVector varBinaryVector = (VarBinaryVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); final BytesRef bytesRef = new BytesRef(InetAddressPoint.encode((InetAddress) parseValue)); varBinaryVector.setSafe(rowIndex, bytesRef.bytes, bytesRef.offset, bytesRef.length); @@ -64,4 +68,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java similarity index 72% rename from modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java rename to modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java index 1814e20891f4e..c65160e8ff38b 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/KeywordParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/KeywordParquetField.java @@ -6,16 +6,22 @@ * compatible open source license. */ -package com.parquet.parquetdataformat.fields.core.data; +package com.parquet.parquetdataformat.fields.core.data.text; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import java.nio.charset.StandardCharsets; +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling keyword data types in OpenSearch documents. @@ -43,11 +49,15 @@ */ public class KeywordParquetField extends ParquetField { + private static final Logger logger = LogManager.getLogger(KeywordParquetField.class); + @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarCharVector textVector = (VarCharVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); + // logger.info("[COMPOSITE_DEBUG] KeywordParquetField.addToGroup: field=[{}] value=[{}] rowIndex=[{}] capabilities={}", + // fieldType.name(), parseValue, rowIndex, descriptor.assignedCapabilities()); } @Override @@ -59,4 +69,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java similarity index 83% rename from modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java rename to modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java index e4c93aa9f608f..77ce12726f581 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/TextParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/data/text/TextParquetField.java @@ -6,17 +6,20 @@ * compatible open source license. */ -package com.parquet.parquetdataformat.fields.core.data; +package com.parquet.parquetdataformat.fields.core.data.text; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; +import java.util.EnumSet; +import java.util.Set; /** * Parquet field implementation for handling text data types in OpenSearch documents. @@ -45,8 +48,8 @@ public class TextParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarCharVector textVector = (VarCharVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); textVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); } @@ -60,4 +63,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.DOC_VALUES); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java index 413a3938836fc..34e1621d65f37 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IdParquetField.java @@ -8,13 +8,16 @@ package com.parquet.parquetdataformat.fields.core.metadata; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.lucene.util.BytesRef; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.Set; /** * Parquet field implementation for handling document ID metadata in OpenSearch documents. @@ -42,8 +45,8 @@ public class IdParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - VarBinaryVector idVector = (VarBinaryVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarBinaryVector idVector = (VarBinaryVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); BytesRef bytesRef = (BytesRef) parseValue; idVector.setSafe(rowIndex, bytesRef.bytes, bytesRef.offset, bytesRef.length); @@ -58,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java index c31e3932c2295..2a9fdbb3e26f7 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/IgnoredParquetField.java @@ -8,14 +8,16 @@ package com.parquet.parquetdataformat.fields.core.metadata; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; +import java.util.Set; /** * Parquet field implementation for handling ignored field data types in OpenSearch documents. @@ -43,8 +45,8 @@ public class IgnoredParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - VarCharVector varCharVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarCharVector varCharVector = (VarCharVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); varCharVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); } @@ -58,4 +60,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java index ffacfa1995ed4..7da6c3935b85d 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/RoutingParquetField.java @@ -8,14 +8,16 @@ package com.parquet.parquetdataformat.fields.core.metadata; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; import java.nio.charset.StandardCharsets; +import java.util.Set; /** * Parquet field implementation for handling routing metadata in OpenSearch documents. @@ -43,8 +45,8 @@ public class RoutingParquetField extends ParquetField { @Override - protected void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - VarCharVector routingVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + VarCharVector routingVector = (VarCharVector) managedVSR.getVector(fieldType.name()); int rowIndex = managedVSR.getRowCount(); routingVector.setSafe(rowIndex, parseValue.toString().getBytes(StandardCharsets.UTF_8)); } @@ -58,4 +60,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java index 1367cc7542155..981fc966cac17 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/core/metadata/SizeParquetField.java @@ -8,12 +8,15 @@ package com.parquet.parquetdataformat.fields.core.metadata; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.vsr.ManagedVSR; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.opensearch.index.mapper.MappedFieldType; + +import java.util.Set; /** * Parquet field implementation for handling document size metadata in OpenSearch documents. @@ -43,8 +46,8 @@ public class SizeParquetField extends ParquetField { @Override - public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { - IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); + protected void addToGroup(MappedFieldType fieldType, ManagedVSR managedVSR, Object parseValue) { + IntVector intVector = (IntVector) managedVSR.getVector(fieldType.name()); int rowCount = managedVSR.getRowCount(); intVector.setSafe(rowCount, (Integer) parseValue); } @@ -58,4 +61,9 @@ public ArrowType getArrowType() { public FieldType getFieldType() { return FieldType.nullable(getArrowType()); } + + @Override + public Set getFieldCapabilities() { + return java.util.EnumSet.of(FieldCapability.DOC_VALUES); + } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java index 99ec60ea700b8..d3501f6289c20 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java @@ -28,7 +28,7 @@ public class ArrowBufferPool implements Closeable { public ArrowBufferPool(Settings settings) { long maxAllocationInBytes = 10L * 1024 * 1024 * 1024; - logger.info("Max native memory allocation for ArrowBufferPool: {} bytes", maxAllocationInBytes); + // logger.info("Max native memory allocation for ArrowBufferPool: {} bytes", maxAllocationInBytes); this.rootAllocator = new RootAllocator(maxAllocationInBytes); this.maxChildAllocation = 1024 * 1024 * 1024; } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/merge/RecordBatchMergeStrategy.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/merge/RecordBatchMergeStrategy.java index 59fdca52ec10a..c6dbac8517df2 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/merge/RecordBatchMergeStrategy.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/merge/RecordBatchMergeStrategy.java @@ -79,7 +79,7 @@ public MergeResult mergeParquetFiles(List files, long writerGener ); try { Files.deleteIfExists(Path.of(mergedFilePath)); - logger.info("Stale Merged File Deleted at : [{}]", mergedFilePath); + // logger.info("Stale Merged File Deleted at : [{}]", mergedFilePath); } catch (Exception innerException) { logger.error( () -> new ParameterizedMessage( diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/plugins/fields/CoreDataFieldPlugin.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/plugins/fields/CoreDataFieldPlugin.java index 20bdfc9610d13..98e2747497497 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/plugins/fields/CoreDataFieldPlugin.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/plugins/fields/CoreDataFieldPlugin.java @@ -10,13 +10,9 @@ import com.parquet.parquetdataformat.fields.ParquetField; import com.parquet.parquetdataformat.fields.core.data.BinaryParquetField; -import com.parquet.parquetdataformat.fields.core.data.date.DateNanosParquetField; -import com.parquet.parquetdataformat.fields.core.data.TokenCountParquetField; import com.parquet.parquetdataformat.fields.core.data.BooleanParquetField; +import com.parquet.parquetdataformat.fields.core.data.date.DateNanosParquetField; import com.parquet.parquetdataformat.fields.core.data.date.DateParquetField; -import com.parquet.parquetdataformat.fields.core.data.IpParquetField; -import com.parquet.parquetdataformat.fields.core.data.KeywordParquetField; -import com.parquet.parquetdataformat.fields.core.data.TextParquetField; import com.parquet.parquetdataformat.fields.core.data.number.ByteParquetField; import com.parquet.parquetdataformat.fields.core.data.number.DoubleParquetField; import com.parquet.parquetdataformat.fields.core.data.number.FloatParquetField; @@ -24,7 +20,11 @@ import com.parquet.parquetdataformat.fields.core.data.number.IntegerParquetField; import com.parquet.parquetdataformat.fields.core.data.number.LongParquetField; import com.parquet.parquetdataformat.fields.core.data.number.ShortParquetField; +import com.parquet.parquetdataformat.fields.core.data.number.TokenCountParquetField; import com.parquet.parquetdataformat.fields.core.data.number.UnsignedLongParquetField; +import com.parquet.parquetdataformat.fields.core.data.text.IpParquetField; +import com.parquet.parquetdataformat.fields.core.data.text.KeywordParquetField; +import com.parquet.parquetdataformat.fields.core.data.text.TextParquetField; import org.opensearch.index.mapper.BinaryFieldMapper; import org.opensearch.index.mapper.BooleanFieldMapper; import org.opensearch.index.mapper.DateFieldMapper; diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java index 1044ec0c7c654..a041c8ff1897d 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java @@ -90,7 +90,7 @@ private void setState(VSRState newState) { VSRState oldState = state; state = newState; - logger.debug("State transition: {} -> {} for VSR {}", oldState, newState, id); + // logger.debug("State transition: {} -> {} for VSR {}", oldState, newState, id); } /** diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java index 7ada33f27ecfc..718e8c6567756 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java @@ -85,7 +85,7 @@ public WriteResult addToManagedVSR(ParquetDocumentInput document) throws IOExcep throw new IOException("Cannot add document - VSR is not active: " + currentVSR.getState()); } - logger.debug("addToManagedVSR called for {}, current row count: {}", fileName, currentVSR.getRowCount()); + // logger.debug("addToManagedVSR called for {}, current row count: {}", fileName, currentVSR.getRowCount()); try { // Since ParquetDocumentInput now works directly with ManagedVSR, @@ -94,7 +94,7 @@ public WriteResult addToManagedVSR(ParquetDocumentInput document) throws IOExcep // which will increment the row count. WriteResult result = document.addToWriter(); - logger.debug("After adding document to {}, row count: {}", fileName, currentVSR.getRowCount()); + // logger.debug("After adding document to {}, row count: {}", fileName, currentVSR.getRowCount()); // Check for VSR rotation AFTER successful document processing maybeRotateActiveVSR(); @@ -108,17 +108,17 @@ public WriteResult addToManagedVSR(ParquetDocumentInput document) throws IOExcep public ParquetFileMetadata flush(FlushIn flushIn) throws IOException { ManagedVSR currentVSR = managedVSR.get(); - logger.info("Flush called for {}, row count: {}", fileName, currentVSR.getRowCount()); + // logger.info("Flush called for {}, row count: {}", fileName, currentVSR.getRowCount()); try { // Only flush if we have data if (currentVSR.getRowCount() == 0) { - logger.debug("No data to flush for {}, returning null", fileName); + // logger.debug("No data to flush for {}, returning null", fileName); return null; } // Transition VSR to FROZEN state before flushing currentVSR.moveToFrozen(); - logger.info("Flushing {} rows for {}", currentVSR.getRowCount(), fileName); + // logger.info("Flushing {} rows for {}", currentVSR.getRowCount(), fileName); ParquetFileMetadata metadata; // Write through native writer handle @@ -127,7 +127,7 @@ public ParquetFileMetadata flush(FlushIn flushIn) throws IOException { writer.close(); metadata = writer.getMetadata(); } - logger.debug("Successfully flushed data for {} with metadata: {}", fileName, metadata); + // logger.debug("Successfully flushed data for {} with metadata: {}", fileName, metadata); return metadata; } catch (Exception e) { @@ -184,20 +184,20 @@ public void maybeRotateActiveVSR() throws IOException { boolean rotated = vsrPool.maybeRotateActiveVSR(); if (rotated) { - logger.debug("VSR rotation occurred after document addition for {}", fileName); + // logger.debug("VSR rotation occurred after document addition for {}", fileName); // Get the frozen VSR that was just created by rotation ManagedVSR frozenVSR = vsrPool.getFrozenVSR(); if (frozenVSR != null) { - logger.debug("Processing frozen VSR: {} with {} rows for {}", - frozenVSR.getId(), frozenVSR.getRowCount(), fileName); + // logger.debug("Processing frozen VSR: {} with {} rows for {}", + // frozenVSR.getId(), frozenVSR.getRowCount(), fileName); // Write the frozen VSR data immediately try (ArrowExport export = frozenVSR.exportToArrow()) { writer.write(export.getArrayAddress(), export.getSchemaAddress()); } - logger.debug("Successfully wrote frozen VSR data for {}", fileName); + // logger.debug("Successfully wrote frozen VSR data for {}", fileName); // Complete the VSR processing vsrPool.completeVSR(frozenVSR); @@ -214,8 +214,8 @@ public void maybeRotateActiveVSR() throws IOException { } updateVSRAndReinitialize(oldVSR, newVSR); - logger.debug("VSR rotation completed for {}, new active VSR: {}, row count: {}", - fileName, newVSR.getId(), newVSR.getRowCount()); + // logger.debug("VSR rotation completed for {}, new active VSR: {}, row count: {}", + // fileName, newVSR.getId(), newVSR.getRowCount()); } } catch (IOException e) { logger.error("Error during VSR rotation for {}: {}", fileName, e.getMessage(), e); @@ -237,13 +237,13 @@ private void checkAndHandleVSRRotation() throws IOException { // Check if we got a different VSR (rotation occurred) ManagedVSR oldVSR = managedVSR.get(); if (currentActive != oldVSR) { - logger.debug("VSR rotation detected for {}, updating references", fileName); + // logger.debug("VSR rotation detected for {}, updating references", fileName); // Update the managed VSR reference atomically with field vector map updateVSRAndReinitialize(oldVSR, currentActive); // Note: Writer initialization is not needed per VSR as it's per file - logger.debug("VSR rotation completed for {}, new row count: {}", fileName, currentActive.getRowCount()); + // logger.debug("VSR rotation completed for {}, new row count: {}", fileName, currentActive.getRowCount()); } } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java index 41bb192f55ea3..a44cd74a14308 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java @@ -3,13 +3,19 @@ import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; import com.parquet.parquetdataformat.fields.ParquetField; import org.apache.arrow.vector.BigIntVector; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import com.parquet.parquetdataformat.engine.ParquetDataFormat; +import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.WriteResult; import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; import org.opensearch.index.mapper.MappedFieldType; import com.parquet.parquetdataformat.vsr.ManagedVSR; import java.io.IOException; +import java.util.Objects; /** * Document input wrapper for Parquet-based document processing. @@ -23,7 +29,7 @@ * *

Key responsibilities: *

    - *
  • Direct field vector population using OpenSearch's {@link MappedFieldType}
  • + *
  • Direct field vector population using {@link MappedFieldType}
  • *
  • Document lifecycle management via ManagedVSR
  • *
  • Integration with the Arrow-based Parquet writer pipeline
  • *
@@ -32,10 +38,13 @@ * intermediate ParquetDocument representation for improved performance and memory efficiency. */ public class ParquetDocumentInput implements DocumentInput { + private static final Logger logger = LogManager.getLogger(ParquetDocumentInput.class); private final ManagedVSR managedVSR; + private final EngineRole engineRole; - public ParquetDocumentInput(ManagedVSR managedVSR) { - this.managedVSR = managedVSR; + public ParquetDocumentInput(ManagedVSR managedVSR, EngineRole engineRole) { + this.managedVSR = Objects.requireNonNull(managedVSR, "managedVSR must not be null"); + this.engineRole = Objects.requireNonNull(engineRole, "engineRole must not be null"); } @Override @@ -47,15 +56,15 @@ public void addRowIdField(String fieldName, long rowId) { @Override public void addField(MappedFieldType fieldType, Object value) { - final String fieldTypeName = fieldType.typeName(); - final ParquetField parquetField = ArrowFieldRegistry.getParquetField(fieldTypeName); + final ParquetField parquetField = ArrowFieldRegistry.getParquetField(fieldType.typeName()); if (parquetField == null) { - throw new IllegalArgumentException( - String.format("Unsupported field type: %s. Field type is not registered in ArrowFieldRegistry.", fieldTypeName) - ); + // Field type not supported by Parquet format — skip silently + // logger.debug("[COMPOSITE_DEBUG] Parquet SKIP field=[{}] type=[{}] — no ParquetField registered in ArrowFieldRegistry", fieldType.name(), fieldType.typeName()); + return; } + // logger.debug("[COMPOSITE_DEBUG] Parquet ACCEPT field=[{}] type=[{}] value=[{}]", fieldType.name(), fieldType.typeName(), value); parquetField.createField(fieldType, managedVSR, value); } @@ -71,6 +80,11 @@ public ManagedVSR getFinalInput() { return managedVSR; } + @Override + public EngineRole getEngineRole() { + return engineRole; + } + @Override public WriteResult addToWriter() throws IOException { // Complete the current document by incrementing row count @@ -82,6 +96,11 @@ public WriteResult addToWriter() throws IOException { return new WriteResult(true, null, 1, 1, 1); } + @Override + public DataFormat getDataFormat() { + return ParquetDataFormat.PARQUET_DATA_FORMAT; + } + @Override public void close() throws Exception { // NOTE: ParquetDocumentInput does NOT own the ManagedVSR lifecycle @@ -91,4 +110,5 @@ public void close() throws Exception { // No cleanup needed here - VSRManager handles the ManagedVSR lifecycle } + } diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java index cc552b809d575..757116158b2b7 100644 --- a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java @@ -7,6 +7,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FileInfos; import org.opensearch.index.engine.exec.FlushIn; import org.opensearch.index.engine.exec.WriteResult; @@ -45,18 +46,21 @@ public class ParquetWriter implements Writer { private final Schema schema; private final VSRManager vsrManager; private final long writerGeneration; + private final EngineRole engineRole; public ParquetWriter( String file, Schema schema, long writerGeneration, ArrowBufferPool arrowBufferPool, - IndexSettings indexSettings + IndexSettings indexSettings, + EngineRole engineRole ) { this.file = file; this.schema = schema; this.vsrManager = new VSRManager(file, indexSettings.getIndex().getName(), schema, arrowBufferPool); this.writerGeneration = writerGeneration; + this.engineRole = engineRole; } @Override @@ -87,7 +91,7 @@ public void sync() throws IOException { } @Override - public void close() { + public void close() throws IOException { vsrManager.close(); } @@ -100,6 +104,6 @@ public ParquetDocumentInput newDocumentInput() { } // Get a new ManagedVSR from VSRManager for this document input - return new ParquetDocumentInput(vsrManager.getActiveManagedVSR()); + return new ParquetDocumentInput(vsrManager.getActiveManagedVSR(), engineRole); } } diff --git a/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/vsr/VSRManagerTests.java b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/vsr/VSRManagerTests.java index 3c13cc511eb83..33ba6d5644716 100644 --- a/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/vsr/VSRManagerTests.java +++ b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/vsr/VSRManagerTests.java @@ -10,31 +10,23 @@ import com.parquet.parquetdataformat.bridge.ArrowExport; import com.parquet.parquetdataformat.bridge.ParquetFileMetadata; -import com.parquet.parquetdataformat.bridge.RustBridge; import com.parquet.parquetdataformat.memory.ArrowBufferPool; import com.parquet.parquetdataformat.writer.ParquetDocumentInput; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.apache.arrow.vector.IntVector; -import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.types.Types; +import org.opensearch.index.engine.exec.EngineRole; import org.opensearch.index.engine.exec.FlushIn; import org.opensearch.index.engine.exec.WriteResult; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.common.settings.Settings; -import org.mockito.MockedStatic; import org.mockito.Mockito; import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; - -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.anyString; /** * Integration tests for VSRManager covering document processing workflows and state management @@ -104,7 +96,7 @@ public void testDocumentAdditionThroughVSRManager() throws Exception { VSRManager vsrManager = new VSRManager(testFileName, testSchema, bufferPool); // Create a document to add - ParquetDocumentInput document = new ParquetDocumentInput(vsrManager.getActiveManagedVSR()); + ParquetDocumentInput document = new ParquetDocumentInput(vsrManager.getActiveManagedVSR(), EngineRole.PRIMARY); // Create mock field types and add fields to document MappedFieldType idFieldType = Mockito.mock(MappedFieldType.class); diff --git a/plugins/engine-datafusion/Cargo.toml b/plugins/engine-datafusion/Cargo.toml index 2252604f5c173..53bc37c991ffd 100644 --- a/plugins/engine-datafusion/Cargo.toml +++ b/plugins/engine-datafusion/Cargo.toml @@ -49,7 +49,7 @@ object_store = "=0.12.4" url = "2.0" # Substrait support -substrait = "0.47" +substrait = "=0.62.0" # Temporary directory support tempfile = "3.0" diff --git a/plugins/engine-datafusion/README.md b/plugins/engine-datafusion/README.md index 032dfb7fa7730..42617133401eb 100644 --- a/plugins/engine-datafusion/README.md +++ b/plugins/engine-datafusion/README.md @@ -38,28 +38,321 @@ curl --location --request PUT 'http://localhost:9200/index-7' \ "optimized.enabled": true }, "mappings": { + "dynamic": "false", "properties": { - "id": { - "type": "keyword" - }, - "name": { - "type": "keyword" - }, - "age": { - "type": "integer" - }, - "salary": { - "type": "long" - }, - "score": { - "type": "double" - }, - "active": { - "type": "boolean" - }, - "created_date": { - "type": "date" - } + "AdvEngineID": { + "type": "short" + }, + "Age": { + "type": "short" + }, + "BrowserCountry": { + "type": "keyword" + }, + "BrowserLanguage": { + "type": "keyword" + }, + "CLID": { + "type": "integer" + }, + "ClientEventTime": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis" + }, + "ClientIP": { + "type": "integer" + }, + "ClientTimeZone": { + "type": "short" + }, + "CodeVersion": { + "type": "integer" + }, + "ConnectTiming": { + "type": "integer" + }, + "CookieEnable": { + "type": "short" + }, + "CounterClass": { + "type": "short" + }, + "CounterID": { + "type": "integer" + }, + "DNSTiming": { + "type": "integer" + }, + "DontCountHits": { + "type": "short" + }, + "EventDate": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis" + }, + "EventTime": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis" + }, + "FUniqID": { + "type": "long" + }, + "FetchTiming": { + "type": "integer" + }, + "FlashMajor": { + "type": "short" + }, + "FlashMinor": { + "type": "short" + }, + "FlashMinor2": { + "type": "short" + }, + "FromTag": { + "type": "keyword" + }, + "GoodEvent": { + "type": "short" + }, + "HID": { + "type": "integer" + }, + "HTTPError": { + "type": "short" + }, + "HasGCLID": { + "type": "short" + }, + "HistoryLength": { + "type": "short" + }, + "HitColor": { + "type": "keyword" + }, + "IPNetworkID": { + "type": "integer" + }, + "Income": { + "type": "short" + }, + "Interests": { + "type": "short" + }, + "IsArtifical": { + "type": "short" + }, + "IsDownload": { + "type": "short" + }, + "IsEvent": { + "type": "short" + }, + "IsLink": { + "type": "short" + }, + "IsMobile": { + "type": "short" + }, + "IsNotBounce": { + "type": "short" + }, + "IsOldCounter": { + "type": "short" + }, + "IsParameter": { + "type": "short" + }, + "IsRefresh": { + "type": "short" + }, + "JavaEnable": { + "type": "short" + }, + "JavascriptEnable": { + "type": "short" + }, + "LocalEventTime": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss||strict_date_optional_time||epoch_millis" + }, + "MobilePhone": { + "type": "short" + }, + "MobilePhoneModel": { + "type": "keyword" + }, + "NetMajor": { + "type": "short" + }, + "NetMinor": { + "type": "short" + }, + "OS": { + "type": "short" + }, + "OpenerName": { + "type": "integer" + }, + "OpenstatAdID": { + "type": "keyword" + }, + "OpenstatCampaignID": { + "type": "keyword" + }, + "OpenstatServiceName": { + "type": "keyword" + }, + "OpenstatSourceID": { + "type": "keyword" + }, + "OriginalURL": { + "type": "keyword" + }, + "PageCharset": { + "type": "keyword" + }, + "ParamCurrency": { + "type": "keyword" + }, + "ParamCurrencyID": { + "type": "short" + }, + "ParamOrderID": { + "type": "keyword" + }, + "ParamPrice": { + "type": "long" + }, + "Params": { + "type": "keyword" + }, + "Referer": { + "type": "keyword" + }, + "RefererCategoryID": { + "type": "short" + }, + "RefererHash": { + "type": "long" + }, + "RefererRegionID": { + "type": "integer" + }, + "RegionID": { + "type": "integer" + }, + "RemoteIP": { + "type": "integer" + }, + "ResolutionDepth": { + "type": "short" + }, + "ResolutionHeight": { + "type": "short" + }, + "ResolutionWidth": { + "type": "short" + }, + "ResponseEndTiming": { + "type": "integer" + }, + "ResponseStartTiming": { + "type": "integer" + }, + "Robotness": { + "type": "short" + }, + "SearchEngineID": { + "type": "short" + }, + "SearchPhrase": { + "type": "keyword" + }, + "SendTiming": { + "type": "integer" + }, + "Sex": { + "type": "short" + }, + "SilverlightVersion1": { + "type": "short" + }, + "SilverlightVersion2": { + "type": "short" + }, + "SilverlightVersion3": { + "type": "integer" + }, + "SilverlightVersion4": { + "type": "short" + }, + "SocialSourceNetworkID": { + "type": "short" + }, + "SocialSourcePage": { + "type": "keyword" + }, + "Title": { + "type": "keyword" + }, + "TraficSourceID": { + "type": "short" + }, + "URL": { + "type": "keyword" + }, + "URLCategoryID": { + "type": "short" + }, + "URLHash": { + "type": "long" + }, + "URLRegionID": { + "type": "integer" + }, + "UTMCampaign": { + "type": "keyword" + }, + "UTMContent": { + "type": "keyword" + }, + "UTMMedium": { + "type": "keyword" + }, + "UTMSource": { + "type": "keyword" + }, + "UTMTerm": { + "type": "keyword" + }, + "UserAgent": { + "type": "short" + }, + "UserAgentMajor": { + "type": "short" + }, + "UserAgentMinor": { + "type": "keyword" + }, + "UserID": { + "type": "long" + }, + "WatchID": { + "type": "long" + }, + "WindowClientHeight": { + "type": "short" + }, + "WindowClientWidth": { + "type": "short" + }, + "WindowName": { + "type": "integer" + }, + "WithHash": { + "type": "short" + } } } }' diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index b2fad67343ce0..b6fc3402d126d 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -291,6 +291,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.INDEX_SEARCH_QUERY_PLAN_EXPLAIN_SETTING, IndexSettings.OPTIMIZED_INDEX_ENABLED_SETTING, + IndexSettings.INDEX_COMPOSITE_PRIMARY_DATA_FORMAT_SETTING, IndexSettings.OPTIMIZED_INDEX_CONCURRENT_SEGMENT_SEARCH_MODE, IndexSettings.OPTIMIZED_INDEX_CONCURRENT_SEGMENT_SEARCH_MAX_SLICE_COUNT, diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index 1b1d0619df18d..f6e00bf339f50 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -33,6 +33,7 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.sandbox.index.MergeOnFlushMergePolicy; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; @@ -863,6 +864,17 @@ private void setSearchQueryPlanExplainEnabled(Boolean searchQueryPlaneExplainEna Property.Final ); + /** + * Declares which data format is primary for a composite index. + * Required when multiple DataSourcePlugins are registered. + * Defaults to "parquet". + */ + public static final Setting INDEX_COMPOSITE_PRIMARY_DATA_FORMAT_SETTING = Setting.simpleString( + "index.composite.primary_data_format", + "parquet", + Property.IndexScope + ); + private final Index index; private final Version version; private final Logger logger; @@ -1904,40 +1916,41 @@ public boolean getStarTreeIndexEnabled() { * @param isTimeSeriesIndex true if index contains @timestamp field */ public MergePolicy getMergePolicy(boolean isTimeSeriesIndex) { - String indexScopedPolicy = scopedSettings.get(INDEX_MERGE_POLICY); - MergePolicyProvider mergePolicyProvider = null; - IndexMergePolicy indexMergePolicy = IndexMergePolicy.fromString(indexScopedPolicy); - switch (indexMergePolicy) { - case TIERED: - mergePolicyProvider = tieredMergePolicyProvider; - break; - case LOG_BYTE_SIZE: - mergePolicyProvider = logByteSizeMergePolicyProvider; - break; - case DEFAULT_POLICY: - if (isTimeSeriesIndex) { - String nodeScopedTimeSeriesIndexPolicy = TIME_SERIES_INDEX_MERGE_POLICY.get(nodeSettings); - IndexMergePolicy nodeMergePolicy = IndexMergePolicy.fromString(nodeScopedTimeSeriesIndexPolicy); - switch (nodeMergePolicy) { - case TIERED: - case DEFAULT_POLICY: - mergePolicyProvider = tieredMergePolicyProvider; - break; - case LOG_BYTE_SIZE: - mergePolicyProvider = logByteSizeMergePolicyProvider; - break; - } - } else { - mergePolicyProvider = tieredMergePolicyProvider; - } - break; - } - assert mergePolicyProvider != null : "should not happen as validation for invalid merge policy values " - + "are part of setting definition"; - if (logger.isTraceEnabled()) { - logger.trace("Index: " + this.index.getName() + ", Merge policy used: " + mergePolicyProvider); - } - return mergePolicyProvider.getMergePolicy(); + return NoMergePolicy.INSTANCE; +// String indexScopedPolicy = scopedSettings.get(INDEX_MERGE_POLICY); +// MergePolicyProvider mergePolicyProvider = null; +// IndexMergePolicy indexMergePolicy = IndexMergePolicy.fromString(indexScopedPolicy); +// switch (indexMergePolicy) { +// case TIERED: +// mergePolicyProvider = tieredMergePolicyProvider; +// break; +// case LOG_BYTE_SIZE: +// mergePolicyProvider = logByteSizeMergePolicyProvider; +// break; +// case DEFAULT_POLICY: +// if (isTimeSeriesIndex) { +// String nodeScopedTimeSeriesIndexPolicy = TIME_SERIES_INDEX_MERGE_POLICY.get(nodeSettings); +// IndexMergePolicy nodeMergePolicy = IndexMergePolicy.fromString(nodeScopedTimeSeriesIndexPolicy); +// switch (nodeMergePolicy) { +// case TIERED: +// case DEFAULT_POLICY: +// mergePolicyProvider = tieredMergePolicyProvider; +// break; +// case LOG_BYTE_SIZE: +// mergePolicyProvider = logByteSizeMergePolicyProvider; +// break; +// } +// } else { +// mergePolicyProvider = tieredMergePolicyProvider; +// } +// break; +// } +// assert mergePolicyProvider != null : "should not happen as validation for invalid merge policy values " +// + "are part of setting definition"; +// if (logger.isTraceEnabled()) { +// logger.trace("Index: " + this.index.getName() + ", Merge policy used: " + mergePolicyProvider); +// } +// return mergePolicyProvider.getMergePolicy(); } public T getValue(Setting setting) { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/AssignedFieldType.java b/server/src/main/java/org/opensearch/index/engine/exec/AssignedFieldType.java new file mode 100644 index 0000000000000..652ddb590e7a1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/AssignedFieldType.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.apache.lucene.search.Query; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.TextSearchInfo; +import org.opensearch.index.mapper.ValueFetcher; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.lookup.SearchLookup; + +/** + * Lightweight MappedFieldType created by {@link FieldAssignmentResolver} to carry + * per-format capability flags (isIndexed, isStored, hasDocValues) for a field. + * Not used for query execution — only for the indexing write path. + */ +@ExperimentalApi +public final class AssignedFieldType extends MappedFieldType { + + private final String type; + + public AssignedFieldType(String name, String typeName, boolean isIndexed, boolean isStored, boolean hasDocValues) { + super(name, isIndexed, isStored, hasDocValues, TextSearchInfo.NONE, null); + this.type = typeName; + } + + @Override + public String typeName() { + return type; + } + + @Override + public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { + return null; + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + throw new UnsupportedOperationException("AssignedFieldType does not support queries"); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java index ef1ad24992256..eac8c9f4ad091 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java @@ -11,7 +11,7 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; -import org.opensearch.index.engine.exec.text.TextDF; +import org.opensearch.index.engine.exec.lucene.LuceneDataFormat; @ExperimentalApi public interface DataFormat { @@ -23,29 +23,6 @@ public interface DataFormat { void configureStore(); - static class LuceneDataFormat implements DataFormat { - @Override - public Setting dataFormatSettings() { - return null; - } - - @Override - public Setting clusterLeveldataFormatSettings() { - return null; - } - - @Override - public String name() { - return ""; - } - - @Override - public void configureStore() { - - } - } DataFormat LUCENE = new LuceneDataFormat(); - - DataFormat TEXT = new TextDF(); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java index 4a3c0fc73f111..2fd5776e7976a 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java @@ -12,11 +12,18 @@ import org.opensearch.index.mapper.MappedFieldType; import java.io.IOException; + @ExperimentalApi public interface DocumentInput extends AutoCloseable { void addRowIdField(String fieldName, long rowId); + /** + * Adds a field value to this document input. + * + * @param fieldType the {@link MappedFieldType} carrying the field's name, type, and capability flags + * @param value the field value to add + */ void addField(MappedFieldType fieldType, Object value); T getFinalInput(); @@ -34,4 +41,17 @@ default void setSeqNo(long seqNo) { default void setPrimaryTerm(String fieldName, long seqNo) { // Default no-op implementations, override as needed } + + /** + * Returns the {@link EngineRole} for this document input, indicating whether the engine + * is primary, secondary, or all in a composite configuration. + * Defaults to {@link EngineRole#PRIMARY} for backward compatibility. + */ + default EngineRole getEngineRole() { + return EngineRole.PRIMARY; + } + + default DataFormat getDataFormat() { + return null; + } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/EngineRole.java b/server/src/main/java/org/opensearch/index/engine/exec/EngineRole.java new file mode 100644 index 0000000000000..33ffb9dfd9dff --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/EngineRole.java @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +public enum EngineRole { + PRIMARY, + SECONDARY, + ALL +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java new file mode 100644 index 0000000000000..d3a4c03e7aad1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignmentResolver.java @@ -0,0 +1,189 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; + +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +/** + * Resolves which data format handles which capabilities for each mapped field. + * Uses primary-gets-priority strategy: if the primary format supports a capability + * for a field's type, it wins. Secondary formats only get capabilities the primary can't handle. + * + *

Resolution is keyed by field name (not type name), so two fields of the same type + * with different mapping attributes receive different capability sets. + */ +@ExperimentalApi +public final class FieldAssignmentResolver { + + private static final Logger logger = LogManager.getLogger(FieldAssignmentResolver.class); + + private FieldAssignmentResolver() {} + + /** + * Resolves field assignments for all mapped fields. + * + * @param registry the field support registry with type-level format capabilities + * @param roleMap format → engine role mapping + * @param fieldTypes all mapped field types from the mapper service + * @return per-format FieldAssignments keyed by field name + */ + public static Map resolve( + FieldSupportRegistry registry, + Map roleMap, + Iterable fieldTypes + ) { + // Find primary format + DataFormat primaryFormat = null; + for (Map.Entry entry : roleMap.entrySet()) { + if (entry.getValue() == EngineRole.PRIMARY) { + primaryFormat = entry.getKey(); + break; + } + } + + // Accumulate capabilities per field name per format before creating AssignedFieldType objects + Map>> perFormatCaps = new HashMap<>(); + // Track typeName per fieldName for AssignedFieldType construction + Map fieldNameToTypeName = new HashMap<>(); + for (DataFormat format : roleMap.keySet()) { + perFormatCaps.put(format, new HashMap<>()); + } + + for (MappedFieldType fieldType : fieldTypes) { + // Skip internal metadata fields (e.g. _id, _index, _source) — managed by the engine, not data format plugins + if (fieldType.typeName().startsWith("_")) { + continue; + } + String fieldName = fieldType.name(); + String typeName = fieldType.typeName(); + fieldNameToTypeName.put(fieldName, typeName); + resolveField(registry, roleMap, primaryFormat, perFormatCaps, fieldType, fieldName, typeName); + } + + // Convert accumulated capabilities into AssignedFieldType objects and wrap into FieldAssignments + Map result = new HashMap<>(); + for (Map.Entry>> formatEntry : perFormatCaps.entrySet()) { + DataFormat format = formatEntry.getKey(); + Map> fieldCaps = formatEntry.getValue(); + Map assignedTypes = new HashMap<>(); + for (Map.Entry> fieldEntry : fieldCaps.entrySet()) { + String fieldName = fieldEntry.getKey(); + EnumSet caps = fieldEntry.getValue(); + if (!caps.isEmpty()) { + String typeName = fieldNameToTypeName.get(fieldName); + assignedTypes.put( + fieldName, + new AssignedFieldType( + fieldName, + typeName, + caps.contains(FieldCapability.INDEX), + caps.contains(FieldCapability.STORE), + caps.contains(FieldCapability.DOC_VALUES) + ) + ); + } + } + result.put(format, new FieldAssignments(assignedTypes)); + } + return result; + } + + private static void resolveField( + FieldSupportRegistry registry, + Map roleMap, + DataFormat primaryFormat, + Map>> perFormatCaps, + MappedFieldType fieldType, + String fieldName, + String typeName + ) { + // Determine which capabilities are required by this field's mapping attributes + Set required = EnumSet.noneOf(FieldCapability.class); + if (fieldType.isSearchable()) { + required.add(FieldCapability.INDEX); + } + if (fieldType.hasDocValues()) { + required.add(FieldCapability.DOC_VALUES); + } + if (fieldType.isStored()) { + required.add(FieldCapability.STORE); + } + + logger.debug( + "[COMPOSITE_DEBUG] resolveField: field=[{}] type=[{}] required capabilities={} (isSearchable={}, hasDocValues={}, isStored={})", + fieldName, + typeName, + required, + fieldType.isSearchable(), + fieldType.hasDocValues(), + fieldType.isStored() + ); + + // For each required capability, assign to primary if it supports it, else to secondary + for (FieldCapability cap : required) { + boolean primaryHasCap = primaryFormat != null && registry.hasCapability(typeName, primaryFormat, cap); + logger.debug( + "[COMPOSITE_DEBUG] capability [{}]: primary format [{}] hasCapability={}, registry capabilities for type={}", + cap, + primaryFormat != null ? primaryFormat.name() : "null", + primaryHasCap, + primaryFormat != null ? registry.getCapabilities(typeName, primaryFormat) : "N/A" + ); + + if (primaryHasCap) { + // Primary handles this capability + perFormatCaps.get(primaryFormat).computeIfAbsent(fieldName, k -> EnumSet.noneOf(FieldCapability.class)).add(cap); + logger.debug("[COMPOSITE_DEBUG] -> assigned [{}] to PRIMARY format [{}]", cap, primaryFormat.name()); + } else { + // Find a secondary format that supports it + boolean assignedToSecondary = false; + for (Map.Entry entry : roleMap.entrySet()) { + DataFormat secondaryFormat = entry.getKey(); + EngineRole role = entry.getValue(); + boolean isSecondary = role != EngineRole.PRIMARY; + boolean secondaryHasCap = registry.hasCapability(typeName, secondaryFormat, cap); + logger.debug( + "[COMPOSITE_DEBUG] checking secondary format [{}] role={} isSecondary={} hasCapability={} registryCapabilities={}", + secondaryFormat.name(), + role, + isSecondary, + secondaryHasCap, + registry.getCapabilities(typeName, secondaryFormat) + ); + + if (isSecondary && secondaryHasCap) { + perFormatCaps.get(secondaryFormat) + .computeIfAbsent(fieldName, k -> EnumSet.noneOf(FieldCapability.class)) + .add(cap); + logger.debug("[COMPOSITE_DEBUG] -> assigned [{}] to SECONDARY format [{}]", cap, secondaryFormat.name()); + assignedToSecondary = true; + break; + } + } + if (!assignedToSecondary) { + logger.warn( + "[COMPOSITE_DEBUG] -> capability [{}] for field=[{}] type=[{}] NOT assigned to any format!", + cap, + fieldName, + typeName + ); + } + } + } + } +} + diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java new file mode 100644 index 0000000000000..55cc2f19cde22 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldAssignments.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; + +import java.util.Map; + +/** + * Per-format view of field capability assignments resolved by the composite engine. + * Maps fieldName → MappedFieldType that this format is responsible for. + * + *

Used by DocumentInput implementations to decide whether to write a given field. + * If a field name has no entry, this format should skip it entirely. + */ +@ExperimentalApi +public class FieldAssignments { + + private final Map fieldTypes; + + public FieldAssignments(Map fieldTypes) { + this.fieldTypes = Map.copyOf(fieldTypes); + } + + /** + * Returns true if this format should handle the given field name. + */ + public boolean shouldHandle(String fieldName) { + return fieldTypes.containsKey(fieldName); + } + + /** + * Returns the MappedFieldType for a given field name, or null if none. + */ + public MappedFieldType getFieldType(String fieldName) { + return fieldTypes.get(fieldName); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldCapability.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldCapability.java new file mode 100644 index 0000000000000..299d584cfa08e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldCapability.java @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +/** + * Declares what a data format can do with a given field type. + */ +@ExperimentalApi +public enum FieldCapability { + /** The format can persist raw field values for retrieval (stored fields). */ + STORE, + /** The format can build an inverted index for search (indexed fields). */ + INDEX, + /** The format can store columnar data for sorting and aggregations. */ + DOC_VALUES +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FieldSupportRegistry.java b/server/src/main/java/org/opensearch/index/engine/exec/FieldSupportRegistry.java new file mode 100644 index 0000000000000..f84cdcf381ec0 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FieldSupportRegistry.java @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * Central registry tracking which data formats have which capabilities for which field types. + * Keyed by fieldTypeName → DataFormat → Set<FieldCapability>. + */ +@ExperimentalApi +public class FieldSupportRegistry { + + private final Map>> registry = new HashMap<>(); + + /** + * Registers capabilities for a field type and data format. + * Multiple calls for the same (fieldType, format) pair merge capabilities. + */ + public void register(String fieldTypeName, DataFormat format, Set capabilities) { + registry.computeIfAbsent(fieldTypeName, k -> new HashMap<>()) + .merge(format, EnumSet.copyOf(capabilities), (existing, incoming) -> { + existing.addAll(incoming); + return existing; + }); + } + + /** + * Returns the set of capabilities a format has for a field type, or empty set if none. + */ + public Set getCapabilities(String fieldTypeName, DataFormat format) { + Map> formatMap = registry.get(fieldTypeName); + if (formatMap == null) { + return Collections.emptySet(); + } + Set caps = formatMap.get(format); + return caps != null ? Collections.unmodifiableSet(caps) : Collections.emptySet(); + } + + /** + * Returns true if the format has at least one capability for the field type. + */ + public boolean hasAnyCapability(String fieldTypeName, DataFormat format) { + return !getCapabilities(fieldTypeName, format).isEmpty(); + } + + /** + * Returns true if the format has a specific capability for the field type. + */ + public boolean hasCapability(String fieldTypeName, DataFormat format, FieldCapability capability) { + return getCapabilities(fieldTypeName, format).contains(capability); + } + + /** + * Returns all field type names a format has any capabilities for. + */ + public Set supportedFieldTypes(DataFormat format) { + return registry.entrySet() + .stream() + .filter(e -> e.getValue().containsKey(format)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + } + + /** + * Returns all data formats registered in this registry. + */ + public Set allFormats() { + return registry.values().stream().flatMap(m -> m.keySet().stream()).collect(Collectors.toSet()); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java index 702e64e9f8a20..0f46971034e5d 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java @@ -18,8 +18,9 @@ public interface IndexingExecutionEngine extends Closeable { - List supportedFieldTypes(); + List supportedFieldTypes(boolean isPrimaryEngine); + // Writer should know it's a primary writer or not? Writer> createWriter(long writerGeneration) throws IOException; // A writer responsible for data format vended by this engine. diff --git a/server/src/main/java/org/opensearch/index/engine/exec/Writer.java b/server/src/main/java/org/opensearch/index/engine/exec/Writer.java index d0ad4d35b3fc2..a3f908de17210 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/Writer.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/Writer.java @@ -18,7 +18,7 @@ public interface Writer

> { void sync() throws IOException; - void close(); + void close() throws IOException; P newDocumentInput(); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java index 932b12126b5ae..9aba47cb565ad 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java @@ -16,7 +16,6 @@ import java.io.Serializable; import java.nio.file.Path; import java.util.HashSet; -import java.util.List; import java.util.Set; public class WriterFileSet implements Serializable, Writeable { @@ -25,18 +24,40 @@ public class WriterFileSet implements Serializable, Writeable { private final long writerGeneration; private final Set files; private final long numRows; + private boolean isRefreshed; public WriterFileSet(Path directory, long writerGeneration, long numRows) { this.numRows = numRows; this.files = new HashSet<>(); this.writerGeneration = writerGeneration; this.directory = directory.toString(); + this.isRefreshed = false; } + public WriterFileSet(Path directory, long writerGeneration, long numRows, boolean isRefreshed) { + this.numRows = numRows; + this.files = new HashSet<>(); + this.writerGeneration = writerGeneration; + this.directory = directory.toString(); + this.isRefreshed = isRefreshed; + } + + public WriterFileSet withDirectoryAndFiles(String newDirectory, Set files) { + return WriterFileSet.builder() + .directory(Path.of(newDirectory)) + .writerGeneration(this.writerGeneration) + .addNumRows(this.numRows) + .isRefreshed(this.isRefreshed) + .addFiles(files) + .build(); + } + + public WriterFileSet(StreamInput in) throws IOException { this.directory = in.readString(); this.writerGeneration = in.readLong(); this.numRows = in.readVInt(); + this.isRefreshed = in.readBoolean(); int fileCount = in.readVInt(); this.files = new HashSet<>(fileCount); @@ -50,6 +71,7 @@ public WriterFileSet withDirectory(String newDirectory) { .directory(Path.of(newDirectory)) .writerGeneration(this.writerGeneration) .addFiles(this.files) + .isRefreshed(this.isRefreshed) .build(); } @@ -61,6 +83,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(directory); out.writeLong(writerGeneration); out.writeVInt((int) numRows); + out.writeBoolean(isRefreshed); out.writeVInt(files.size()); for (String file : files) { out.writeString(file); @@ -123,10 +146,20 @@ public static Builder builder() { return new Builder(); } + public boolean refresh() { + // Dummy re-write + return isRefreshed; + } + + public void setRefreshed(){ + this.isRefreshed = true; + } + public static class Builder { private Path directory; private Long writerGeneration; private long numRows; + private boolean isRefreshed = false; private final Set files = new HashSet<>(); public Builder directory(Path directory) { @@ -163,9 +196,14 @@ public WriterFileSet build() { throw new IllegalStateException("writerGeneration must be set"); } - WriterFileSet fileSet = new WriterFileSet(directory, writerGeneration, numRows); + WriterFileSet fileSet = new WriterFileSet(directory, writerGeneration, numRows, isRefreshed); fileSet.files.addAll(this.files); return fileSet; } + + public Builder isRefreshed(boolean isRefreshed) { + this.isRefreshed = isRefreshed; + return this; + } } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java index 4fcfd3117221a..89d7b963087b1 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java @@ -11,14 +11,16 @@ import org.opensearch.index.engine.CommitStats; import org.opensearch.index.engine.SafeCommitInfo; import org.opensearch.index.engine.exec.coord.CatalogSnapshot; +import org.opensearch.index.engine.exec.coord.Segment; import java.io.Closeable; import java.io.IOException; +import java.util.List; import java.util.Map; public interface Committer extends Closeable { - void addLuceneIndexes(CatalogSnapshot catalogSnapshot); + void addLuceneIndexes(List catalogSnapshot) throws IOException; CommitPoint commit(Iterable> commitData, CatalogSnapshot catalogSnapshot); diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java index fc3272087ab95..cb4ed5c4f6057 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java @@ -9,14 +9,20 @@ package org.opensearch.index.engine.exec.commit; import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.SegmentCommitInfo; import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.misc.store.HardlinkCopyDirectoryWrapper; import org.apache.lucene.store.NIOFSDirectory; import org.opensearch.common.collect.MapBuilder; import org.opensearch.common.concurrent.GatedCloseable; import org.opensearch.common.logging.Loggers; +import org.opensearch.common.lucene.Lucene; import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.engine.CombinedDeletionPolicy; import org.opensearch.index.engine.CommitStats; @@ -26,15 +32,20 @@ import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.WriterFileSet; import org.opensearch.index.engine.exec.coord.CatalogSnapshot; +import org.opensearch.index.engine.exec.coord.Segment; +import org.opensearch.index.engine.exec.lucene.LuceneDataFormat; import org.opensearch.index.store.Store; import org.opensearch.index.translog.TranslogDeletionPolicy; import java.io.IOException; import java.nio.file.Path; import java.util.Base64; -import java.util.Collection; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.function.Function; import java.util.function.LongSupplier; +import java.util.stream.Collectors; public class LuceneCommitEngine implements Committer { @@ -50,6 +61,7 @@ public LuceneCommitEngine(Store store, TranslogDeletionPolicy translogDeletionPo this.combinedDeletionPolicy = new CombinedDeletionPolicy(logger, translogDeletionPolicy, null, globalCheckpointSupplier); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(); indexWriterConfig.setIndexDeletionPolicy(combinedDeletionPolicy); + indexWriterConfig.setMergePolicy(NoMergePolicy.INSTANCE); this.store = store; this.lastCommittedSegmentInfos = store.readLastCommittedSegmentsInfo(); if (primaryMode) { @@ -58,20 +70,48 @@ public LuceneCommitEngine(Store store, TranslogDeletionPolicy translogDeletionPo } @Override - public void addLuceneIndexes(CatalogSnapshot catalogSnapshot) { - Collection luceneFileCollection = catalogSnapshot.getSearchableFiles(DataFormat.LUCENE.name()); - luceneFileCollection.forEach(writerFileSet -> { + public synchronized void addLuceneIndexes(List segments) throws IOException { + + for(Segment segment : segments) { + WriterFileSet wfs = segment.getDFGroupedSearchableFiles().get(LuceneDataFormat.LUCENE.name()); + if(wfs == null || wfs.refresh()) continue; + try { - indexWriter.addIndexes(new NIOFSDirectory(Path.of(writerFileSet.getDirectory()))); + indexWriter.addIndexes(new HardlinkCopyDirectoryWrapper(new NIOFSDirectory(Path.of(wfs.getDirectory())))); + wfs.setRefreshed(); } catch (IOException e) { - throw new RuntimeException(e); + throw new RuntimeException("Not able to copy it to the main writer in commiter: {}", e); } - }); + } + + final Map segmentByGeneration = + segments.stream().collect(Collectors.toMap(Segment::getGeneration, Function.identity())); + + try (DirectoryReader dr = DirectoryReader.open(indexWriter)){ + for(LeafReaderContext leaf : dr.getContext().leaves()) { + SegmentCommitInfo segmentCommitInfo = Lucene.segmentReader(leaf.reader()).getSegmentInfo(); + String generationAttr = segmentCommitInfo.info.getAttribute("writer_generation"); + if(generationAttr == null) { + throw new RuntimeException("failed to fetch writer generation"); + } + long writerGeneration = Long.parseLong(generationAttr); + if (segmentByGeneration.containsKey(writerGeneration)) { + WriterFileSet writerFileSet = + segmentByGeneration.get(writerGeneration).getDFGroupedSearchableFiles().get(DataFormat.LUCENE.name()); + Path oldDirectoryPath = Path.of(writerFileSet.getDirectory()); + segmentByGeneration.get(writerGeneration).addSearchableFiles( + DataFormat.LUCENE.name(), + writerFileSet.withDirectoryAndFiles(indexWriter.getDirectory().toString(), new HashSet<>(segmentCommitInfo.files())) + ); + // Deletes the older path once the file path has been updated + IOUtils.rm(oldDirectoryPath); + } + } + } } @Override public synchronized CommitPoint commit(Iterable> commitData, CatalogSnapshot catalogSnapshot) { - addLuceneIndexes(catalogSnapshot); indexWriter.setLiveCommitData(commitData); try { indexWriter.commit(); diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java index c17a3a63c081e..28ba22c7675f8 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java @@ -8,9 +8,13 @@ package org.opensearch.index.engine.exec.composite; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.util.SetOnce; import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.AssignedFieldType; import org.opensearch.index.engine.exec.FileInfos; import org.opensearch.index.engine.exec.FlushIn; import org.opensearch.index.engine.exec.RowIdGenerator; @@ -31,10 +35,10 @@ import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; -import java.util.stream.Collectors; public class CompositeDataFormatWriter implements Writer, Lock { + private static final Logger logger = LogManager.getLogger(CompositeDataFormatWriter.class); private final List>>> writers; private final Runnable postWrite; private final ReentrantLock lock; @@ -43,6 +47,7 @@ public class CompositeDataFormatWriter implements Writer fieldAssignmentsMap; public static final String ROW_ID = "___row_id"; public CompositeDataFormatWriter(CompositeIndexingExecutionEngine engine, long writerGeneration) { @@ -50,6 +55,7 @@ public CompositeDataFormatWriter(CompositeIndexingExecutionEngine engine, long w this.lock = new ReentrantLock(); this.aborted = false; this.writerGeneration = writerGeneration; + this.fieldAssignmentsMap = engine.getFieldAssignmentsMap(); engine.getDelegates().forEach(delegate -> { try { writers.add(new AbstractMap.SimpleImmutableEntry<>(delegate.getDataFormat(), delegate.createWriter(writerGeneration))); @@ -85,7 +91,7 @@ public void sync() throws IOException { } @Override - public void close() { + public void close() throws IOException { for (Map.Entry>> writerPair : writers) { writerPair.getValue().close(); } @@ -93,10 +99,15 @@ public void close() { @Override public CompositeDocumentInput newDocumentInput() { + List> inputs = new ArrayList<>(); + for (Map.Entry>> writerEntry : writers) { + inputs.add(writerEntry.getValue().newDocumentInput()); + } CompositeDocumentInput compositeDocumentInput = new CompositeDocumentInput( - writers.stream().map(Map.Entry::getValue).map(Writer::newDocumentInput).collect(Collectors.toList()), + inputs, + fieldAssignmentsMap, this, postWrite ); @@ -159,14 +170,21 @@ public long getWriterGeneration() { public static class CompositeDocumentInput implements DocumentInput>> { List> inputs; + private final Map fieldAssignmentsMap; CompositeDataFormatWriter writer; Runnable onClose; private long version = -1; private long seqNo = -2L; private long primaryTerm = 0; - public CompositeDocumentInput(List> inputs, CompositeDataFormatWriter writer, Runnable onClose) { + public CompositeDocumentInput( + List> inputs, + Map fieldAssignmentsMap, + CompositeDataFormatWriter writer, + Runnable onClose + ) { this.inputs = inputs; + this.fieldAssignmentsMap = fieldAssignmentsMap; this.writer = writer; this.onClose = onClose; } @@ -178,23 +196,56 @@ public void addRowIdField(String fieldName, long rowId) { } } - @Override + /** + * Entry point from the mapper layer. Resolves per-format {@link MappedFieldType} + * using each delegate's {@link FieldAssignments}, then delegates to the format-specific + * {@link DocumentInput#addField(MappedFieldType, Object)}. + * Skips delegation if no field type exists for the field name in that format. + */ public void addField(MappedFieldType fieldType, Object value) { + logger.debug("[COMPOSITE_DEBUG] addField: field=[{}] type=[{}] value=[{}] — resolving per-format field types for {} inputs", + fieldType.name(), fieldType.typeName(), value, inputs.size()); for (DocumentInput input : inputs) { - input.addField(fieldType, value); + FieldAssignments assignments = fieldAssignmentsMap.get(input.getDataFormat()); + if (assignments == null) { + continue; + } + MappedFieldType perFormatType = assignments.getFieldType(fieldType.name()); + if (perFormatType == null) { + continue; + } + input.addField(perFormatType, value); } } @Override public void setVersion(long version) { this.version = version; - addField(VersionFieldMapper.VersionFieldType.INSTANCE, version); + MappedFieldType versionType = new AssignedFieldType( + VersionFieldMapper.NAME, + VersionFieldMapper.CONTENT_TYPE, + false, + false, + true + ); + for (DocumentInput input : inputs) { + input.addField(versionType, version); + } } @Override public void setSeqNo(long seqNo) { this.seqNo = seqNo; - addField(SeqNoFieldMapper.SeqNoFieldType.INSTANCE, seqNo); + MappedFieldType seqNoType = new AssignedFieldType( + SeqNoFieldMapper.NAME, + SeqNoFieldMapper.CONTENT_TYPE, + true, + false, + true + ); + for (DocumentInput input : inputs) { + input.addField(seqNoType, seqNo); + } } @Override diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java new file mode 100644 index 0000000000000..9003b30100bdd --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeFieldValidator.java @@ -0,0 +1,127 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.composite; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.FieldSupportRegistry; +import org.opensearch.index.mapper.MappedFieldType; + +import java.util.Map; + +/** + * Stateless validator that checks field-to-capability compatibility using the + * {@link FieldSupportRegistry} at index creation or mapping update time. + *

+ * Internal metadata fields (type names starting with {@code _}) are skipped + * because they are managed by the engine itself, not by data format plugins. + */ +@ExperimentalApi +public final class CompositeFieldValidator { + + private CompositeFieldValidator() {} + + private static final Logger logger = LogManager.getLogger(CompositeFieldValidator.class); + + /** + * Returns true if the field type is an internal metadata field that should + * be excluded from composite validation. Internal fields have type names + * starting with '_' (e.g. _id, _index, _source, _seq_no, _routing). + */ + private static boolean isInternalMetadataField(MappedFieldType fieldType) { + return fieldType.typeName().startsWith("_"); + } + + /** + * Validates that the primary data format has at least one capability + * registered for every mapped field type. + * Throws IllegalArgumentException if any field lacks primary coverage. + */ + public static void validatePrimaryCoverage( + FieldSupportRegistry registry, + Map roleMap, + Iterable fieldTypes + ) { + DataFormat primaryFormat = null; + for (Map.Entry entry : roleMap.entrySet()) { + if (entry.getValue() == EngineRole.PRIMARY) { + primaryFormat = entry.getKey(); + break; + } + } + if (primaryFormat == null) { + return; + } + for (MappedFieldType fieldType : fieldTypes) { + if (isInternalMetadataField(fieldType)) { + logger.debug("[COMPOSITE_DEBUG] validatePrimaryCoverage: SKIP internal metadata field=[{}] type=[{}]", + fieldType.name(), fieldType.typeName()); + continue; + } + if (!registry.hasAnyCapability(fieldType.typeName(), primaryFormat)) { + throw new IllegalArgumentException( + "Field [" + fieldType.name() + "] of type [" + fieldType.typeName() + + "] has no capabilities registered for primary data format [" + primaryFormat.name() + "]" + ); + } + logger.debug("[COMPOSITE_DEBUG] validatePrimaryCoverage: OK field=[{}] type=[{}] has capabilities {} in primary format [{}]", + fieldType.name(), fieldType.typeName(), registry.getCapabilities(fieldType.typeName(), primaryFormat), primaryFormat.name()); + } + } + + /** + * Validates that every field's enabled mapping properties have at least one + * data format with the corresponding capability: + * isSearchable() → INDEX, hasDocValues() → DOC_VALUES, isStored() → STORE. + * Throws IllegalArgumentException if any property lacks coverage. + */ + public static void validateMappingPropertyCoverage( + FieldSupportRegistry registry, + Iterable fieldTypes + ) { + for (MappedFieldType fieldType : fieldTypes) { + if (isInternalMetadataField(fieldType)) { + continue; + } + String typeName = fieldType.typeName(); + if (fieldType.isSearchable()) { + checkCapabilityCoverage(registry, fieldType, typeName, FieldCapability.INDEX, "index"); + } + if (fieldType.hasDocValues()) { + checkCapabilityCoverage(registry, fieldType, typeName, FieldCapability.DOC_VALUES, "doc_values"); + } + if (fieldType.isStored()) { + checkCapabilityCoverage(registry, fieldType, typeName, FieldCapability.STORE, "store"); + } + } + } + + private static void checkCapabilityCoverage( + FieldSupportRegistry registry, + MappedFieldType fieldType, + String typeName, + FieldCapability requiredCapability, + String propertyName + ) { + for (DataFormat format : registry.allFormats()) { + if (registry.hasCapability(typeName, format, requiredCapability)) { + return; + } + } + throw new IllegalArgumentException( + "Field [" + fieldType.name() + "] of type [" + typeName + + "] requires [" + requiredCapability + "] capability (mapping property [" + propertyName + + "]=true) but no data format provides it" + ); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java index 5ce88dea6bd67..47d624c644819 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java @@ -8,6 +8,9 @@ package org.opensearch.index.engine.exec.composite; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.exec.coord.Segment; import java.util.Collections; @@ -17,6 +20,10 @@ import org.opensearch.common.util.io.IOUtils; import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldAssignmentResolver; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FieldSupportRegistry; import org.opensearch.index.engine.exec.FileInfos; import org.opensearch.index.engine.exec.IndexingExecutionEngine; import org.opensearch.index.engine.exec.Merger; @@ -26,7 +33,6 @@ import org.opensearch.index.engine.exec.coord.Any; import org.opensearch.index.engine.exec.coord.CatalogSnapshot; import org.opensearch.index.engine.exec.coord.CompositeDataFormatWriterPool; -import org.opensearch.index.engine.exec.text.TextEngine; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.ShardPath; import org.opensearch.plugins.DataSourcePlugin; @@ -45,8 +51,14 @@ public class CompositeIndexingExecutionEngine implements IndexingExecutionEngine private final Any dataFormat; private final AtomicLong writerGeneration; private final List> delegates = new ArrayList<>(); + private final FieldSupportRegistry fieldSupportRegistry; + private final Map roleMap; + private final Map fieldAssignmentsMap; + + private static final Logger logger = LogManager.getLogger(CompositeIndexingExecutionEngine.class); public CompositeIndexingExecutionEngine( + EngineConfig engineConfig, MapperService mapperService, PluginsService pluginsService, ShardPath shardPath, @@ -54,26 +66,121 @@ public CompositeIndexingExecutionEngine( IndexSettings indexSettings ) { this.writerGeneration = new AtomicLong(initialWriterGeneration); + List dataSourcePlugins = pluginsService.filterPlugins(DataSourcePlugin.class) + .stream().toList(); + if (dataSourcePlugins.isEmpty()) throw new IllegalStateException("No data formats found, can't initialise Engine"); + + boolean singlePlugin = dataSourcePlugins.size() == 1; + + // Setting-based role resolution + String primaryDataFormatName = indexSettings.getValue(IndexSettings.INDEX_COMPOSITE_PRIMARY_DATA_FORMAT_SETTING); + this.roleMap = resolveRoles(primaryDataFormatName, dataSourcePlugins, singlePlugin); + logger.debug("[COMPOSITE_DEBUG] Resolved engine roles: {}", roleMap.entrySet().stream() + .map(e -> e.getKey().name() + " -> " + e.getValue()) + .collect(java.util.stream.Collectors.joining(", "))); + + // Build FieldSupportRegistry from plugin registrations + this.fieldSupportRegistry = new FieldSupportRegistry(); + for (DataSourcePlugin plugin : dataSourcePlugins) { + plugin.registerFieldSupport(fieldSupportRegistry); + } + logger.debug("[COMPOSITE_DEBUG] FieldSupportRegistry built. Registered formats: {}", + fieldSupportRegistry.allFormats().stream().map(DataFormat::name).collect(java.util.stream.Collectors.joining(", "))); + + // Validate field capabilities if composite (multiple plugins) + if (!singlePlugin) { + CompositeFieldValidator.validatePrimaryCoverage(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); + CompositeFieldValidator.validateMappingPropertyCoverage(fieldSupportRegistry, mapperService.fieldTypes()); + logger.debug("[COMPOSITE_DEBUG] Composite field validation passed for all mapped fields"); + } + + // Resolve field assignments: which format handles which capability for each field + // Both single-plugin and multi-plugin modes go through per-field resolution + this.fieldAssignmentsMap = FieldAssignmentResolver.resolve(fieldSupportRegistry, roleMap, mapperService.fieldTypes()); + logger.debug("[COMPOSITE_DEBUG] Resolved per-field assignments for {} format(s)", fieldAssignmentsMap.size()); + + // Determine primary format from role map + DataFormat primaryDataFormat = roleMap.entrySet().stream() + .filter(e -> e.getValue() == EngineRole.PRIMARY) + .map(Map.Entry::getKey) + .findFirst() + .orElseThrow(); + List dataFormats = new ArrayList<>(); - try { - DataSourcePlugin plugin = pluginsService.filterPlugins(DataSourcePlugin.class) - .stream() - .findAny() - .orElseThrow(() -> new IllegalArgumentException("dataformat [" + DataFormat.TEXT + "] is not registered.")); + for (DataSourcePlugin plugin : dataSourcePlugins) { dataFormats.add(plugin.getDataFormat()); - delegates.add(plugin.indexingEngine(mapperService, shardPath, indexSettings)); - } catch (NullPointerException e) { - delegates.add(new TextEngine()); - } - this.dataFormat = new Any(dataFormats, dataFormats.getFirst()); - this.dataFormatWriterPool = - new CompositeDataFormatWriterPool( - () -> new CompositeDataFormatWriter(this, writerGeneration.getAndIncrement()), - LinkedList::new, - Runtime.getRuntime().availableProcessors() + boolean isPrimary = roleMap.get(plugin.getDataFormat()) == EngineRole.PRIMARY; + FieldAssignments assignments = fieldAssignmentsMap.get(plugin.getDataFormat()); + IndexingExecutionEngine indexingEngine = plugin.indexingEngine( + engineConfig, mapperService, isPrimary, shardPath, indexSettings, assignments ); + delegates.add(indexingEngine); + } + + this.dataFormat = new Any(dataFormats, primaryDataFormat); + + // logger.debug("Registered dataformats: {}", this.dataFormat); + this.dataFormatWriterPool = new CompositeDataFormatWriterPool( + () -> new CompositeDataFormatWriter(this, writerGeneration.getAndIncrement()), + LinkedList::new, + Runtime.getRuntime().availableProcessors() + ); + } + + /** + * Pure function: resolves engine roles from the primary data format setting. + * Single plugin → always PRIMARY regardless of setting. + * Valid setting → matching format is PRIMARY, others SECONDARY. + * Unknown format name → IllegalArgumentException. + * Empty setting with multiple plugins → IllegalArgumentException. + */ + static Map resolveRoles( + String primaryDataFormatName, + List plugins, + boolean singlePlugin + ) { + Map roles = new HashMap<>(); + if (singlePlugin) { + roles.put(plugins.get(0).getDataFormat(), EngineRole.PRIMARY); + return roles; + } + if (primaryDataFormatName != null && !primaryDataFormatName.isEmpty()) { + boolean found = false; + for (DataSourcePlugin plugin : plugins) { + if (plugin.getDataFormat().name().equals(primaryDataFormatName)) { + roles.put(plugin.getDataFormat(), EngineRole.PRIMARY); + found = true; + } else { + roles.put(plugin.getDataFormat(), EngineRole.SECONDARY); + } + } + if (!found) { + throw new IllegalArgumentException( + "Unrecognized primary data format [" + primaryDataFormatName + "]. Available: " + + plugins.stream().map(p -> p.getDataFormat().name()).toList() + ); + } + return roles; + } + throw new IllegalArgumentException( + "index.composite.primary_data_format is required when multiple data formats are registered. Available: " + + plugins.stream().map(p -> p.getDataFormat().name()).toList() + ); + } + + public FieldSupportRegistry getFieldSupportRegistry() { + return fieldSupportRegistry; + } + + public Map getRoleMap() { + return Collections.unmodifiableMap(roleMap); + } + + public Map getFieldAssignmentsMap() { + return Collections.unmodifiableMap(fieldAssignmentsMap); } + @Override public Any getDataFormat() { return dataFormat; @@ -104,12 +211,13 @@ public long getCurrentWriterGeneration() { } @Override - public List supportedFieldTypes() { + public List supportedFieldTypes(boolean isPrimaryEngine) { throw new UnsupportedOperationException(); } @Override public void loadWriterFiles(CatalogSnapshot catalogSnapshot) throws IOException { + // If this get's called will it not throw exception? for (IndexingExecutionEngine delegate : delegates) { delegate.loadWriterFiles(catalogSnapshot); } @@ -118,6 +226,7 @@ public void loadWriterFiles(CatalogSnapshot catalogSnapshot) throws IOException @Override public void deleteFiles(Map> filesToDelete) throws IOException { for (IndexingExecutionEngine delegate : delegates) { + // Why creating a map when we are always passing for that format here? Map> formatSpecificFilesToDelete = new HashMap<>(); formatSpecificFilesToDelete.put(delegate.getDataFormat().name(), filesToDelete.get(delegate.getDataFormat().name())); delegate.deleteFiles(formatSpecificFilesToDelete); @@ -134,17 +243,21 @@ public Writer createCompositeW } @Override - public RefreshResult refresh(RefreshInput ignore) throws IOException { + public RefreshResult refresh(RefreshInput refreshInput) throws IOException { RefreshResult finalResult; try { List dataFormatWriters = dataFormatWriterPool.checkoutAll(); - List refreshedSegment = ignore.getExistingSegments(); + List refreshedSegment = refreshInput.getExistingSegments(); List newSegmentList = new ArrayList<>(); + logger.debug("[COMPOSITE_DEBUG] CompositeIndexingExecutionEngine.refresh: flushing {} writers, existing segments={}", + dataFormatWriters.size(), refreshedSegment.size()); // flush to disk for (CompositeDataFormatWriter dataFormatWriter : dataFormatWriters) { Segment newSegment = new Segment(dataFormatWriter.getWriterGeneration()); FileInfos fileInfos = dataFormatWriter.flush(null); fileInfos.getWriterFilesMap().forEach((key, value) -> { + logger.debug("[COMPOSITE_DEBUG] writer gen={} flushed format=[{}] files={}", + dataFormatWriter.getWriterGeneration(), key.name(), value.getFiles()); newSegment.addSearchableFiles(key.name(), value); }); dataFormatWriter.close(); @@ -154,8 +267,10 @@ public RefreshResult refresh(RefreshInput ignore) throws IOException { } if (newSegmentList.isEmpty()) { + logger.debug("[COMPOSITE_DEBUG] No new segments produced from flush"); return null; } else { + logger.debug("[COMPOSITE_DEBUG] Produced {} new segments from flush", newSegmentList.size()); refreshedSegment.addAll(newSegmentList); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java index aa51849b5dbd1..e97e5fc0bcae9 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java @@ -54,4 +54,12 @@ public void configureStore() { dataFormat.configureStore(); } } + + @Override + public String toString() { + return "Any{" + + "dataFormats=" + dataFormats + + ", primaryDataFormat=" + primaryDataFormat + + '}'; + } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java index 29c8e7e0e3449..d95fb8d3159c4 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshotManager.java @@ -10,7 +10,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.index.engine.exec.coord.Segment; import org.opensearch.index.engine.exec.DataFormat; import org.opensearch.index.engine.exec.RefreshResult; @@ -59,7 +58,7 @@ public CatalogSnapshotManager(CompositeEngine compositeEngine, Committer composi }); indexFileDeleter.set(new IndexFileDeleter(compositeEngine, latestCatalogSnapshot, shardPath, deleteUnreferencedFiles)); - logger.debug("[RESET_DEBUG] IndexFileDeleter created, latestCatalogSnapshot={}, deleteUnreferencedFiles={}", latestCatalogSnapshot, deleteUnreferencedFiles); + logger.debug("[COMPOSITE_DEBUG] IndexFileDeleter created, latestCatalogSnapshot={}, deleteUnreferencedFiles={}", latestCatalogSnapshot, deleteUnreferencedFiles); if(latestCatalogSnapshot != null) { latestCatalogSnapshot.setIndexFileDeleterSupplier(indexFileDeleter::get); latestCatalogSnapshot.setCatalogSnapshotMap(catalogSnapshotMap); @@ -80,16 +79,10 @@ public void close() { }; } - public synchronized void applyRefreshResult(RefreshResult refreshResult) { - commitCatalogSnapshot( - new CompositeEngineCatalogSnapshot( - latestCatalogSnapshot.getId() + 1, - latestCatalogSnapshot.getVersion() + 1, - refreshResult.getRefreshedSegments(), - catalogSnapshotMap, - indexFileDeleter::get - ) - ); + public synchronized void applyRefreshResult(RefreshResult refreshResult) throws IOException { + // Will refresh always trigger a commit? --> It should be a flush? + // ApplyRefreshResult --> CatalogSnapshot --> Committer(add Indexes) + advanceCatalogSnapshot(refreshResult.getRefreshedSegments()); } public synchronized void applyReplicationChanges(CatalogSnapshot catalogSnapshot, ShardPath shardPath) { @@ -112,7 +105,7 @@ public synchronized void applyReplicationChanges(CatalogSnapshot catalogSnapshot } } - public synchronized void applyMergeResults(MergeResult mergeResult, OneMerge oneMerge) { + public synchronized void applyMergeResults(MergeResult mergeResult, OneMerge oneMerge) throws IOException { List segmentList = new ArrayList<>(latestCatalogSnapshot.getSegments()); @@ -147,19 +140,32 @@ public synchronized void applyMergeResults(MergeResult mergeResult, OneMerge one if (!inserted) { segmentList.add(0, segmentToAdd); } - CompositeEngineCatalogSnapshot newCatSnap = new CompositeEngineCatalogSnapshot(latestCatalogSnapshot.getId() + 1, latestCatalogSnapshot.getVersion() + 1, segmentList, catalogSnapshotMap, indexFileDeleter::get); // Commit new catalog snapshot - commitCatalogSnapshot(newCatSnap); + advanceCatalogSnapshot(segmentList); } - private synchronized void commitCatalogSnapshot(CompositeEngineCatalogSnapshot newCatSnap) { - catalogSnapshotMap.put(newCatSnap.getId(), newCatSnap); + private synchronized void advanceCatalogSnapshot(List refreshedSegments) throws IOException { + logger.debug("[COMPOSITE_DEBUG] advanceCatalogSnapshot: previous id={}, version={}, old segment count={}", + latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), latestCatalogSnapshot.getSegments().size()); + compositeEngineCommitter.addLuceneIndexes(refreshedSegments); + CompositeEngineCatalogSnapshot cecs = new CompositeEngineCatalogSnapshot( + latestCatalogSnapshot.getId() + 1, + latestCatalogSnapshot.getVersion() + 1, + refreshedSegments, + catalogSnapshotMap, + indexFileDeleter::get + ); + catalogSnapshotMap.put(cecs.getId(), cecs); if (latestCatalogSnapshot != null) { latestCatalogSnapshot.decRef(); } - latestCatalogSnapshot = newCatSnap; - compositeEngineCommitter.addLuceneIndexes(latestCatalogSnapshot); + latestCatalogSnapshot = cecs; + logger.debug("[COMPOSITE_DEBUG] advanceCatalogSnapshot: new id={}, version={}, new segment count={}", + latestCatalogSnapshot.getId(), latestCatalogSnapshot.getVersion(), refreshedSegments.size()); + for (Segment seg : refreshedSegments) { + logger.debug("[COMPOSITE_DEBUG] segment gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); + } } private Segment getSegment(Map writerFileSetMap) { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java index 2553e48f3e156..86d1da2fdad5e 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java @@ -275,7 +275,7 @@ public CompositeEngine( // Now read the userData from the newly created commit userData = store.readLastCommittedSegmentsInfo().getUserData(); - logger.debug("Created initial empty commit with translog UUID: {}", translogUUID); + // logger.debug("Created initial empty commit with translog UUID: {}", translogUUID); } } TranslogEventListener internalTranslogEventListener = new TranslogEventListener() { @@ -317,8 +317,9 @@ public void onFailure(String reason, Exception ex) { lastCommittedWriterGeneration.set(Long.parseLong(lastCommittedData.get(LAST_COMPOSITE_WRITER_GEN_KEY))); } - logger.debug("While initialising Composite Engine - lst commit generation : " + lastCommittedWriterGeneration.get()); + // logger.debug("While initialising Composite Engine - lst commit generation : " + lastCommittedWriterGeneration.get()); this.engine = new CompositeIndexingExecutionEngine( + engineConfig, mapperService, pluginsService, shardPath, @@ -428,7 +429,7 @@ public void onFailure(String reason, Exception ex) { } } } - logger.trace("created new CompositeEngine"); + // logger.trace("created new CompositeEngine"); } private LocalCheckpointTracker createLocalCheckpointTracker( @@ -442,14 +443,14 @@ private LocalCheckpointTracker createLocalCheckpointTracker( SequenceNumbers.loadSeqNoInfoFromLuceneCommit(store.readLastCommittedSegmentsInfo().getUserData().entrySet()); maxSeqNo = seqNoStats.maxSeqNo; localCheckpoint = seqNoStats.localCheckpoint; - logger.trace("recovered maximum sequence number [{}] and local checkpoint [{}]", maxSeqNo, localCheckpoint); + // logger.trace("recovered maximum sequence number [{}] and local checkpoint [{}]", maxSeqNo, localCheckpoint); } catch (org.apache.lucene.index.IndexNotFoundException e) { // Local store is empty (remote store recovery scenario) // Initialize with NO_OPS_PERFORMED (-1) - checkpoint will be restored from CatalogSnapshot during first flush - logger.debug( - "Local store is empty during engine initialization, initializing checkpoint tracker with NO_OPS_PERFORMED. " - + "This is expected during remote store recovery where local store has not been initialized yet." - ); + // logger.debug( + // "Local store is empty during engine initialization, initializing checkpoint tracker with NO_OPS_PERFORMED. " + // + "This is expected during remote store recovery where local store has not been initialized yet." + // ); return localCheckpointTrackerSupplier.apply( SequenceNumbers.NO_OPS_PERFORMED, SequenceNumbers.NO_OPS_PERFORMED @@ -547,10 +548,10 @@ public void initializeRefreshListeners(EngineConfig engineConfig) { } } - logger.trace( - "CompositeEngine initialized with {} catalog snapshot aware refresh listeners", - catalogSnapshotAwareRefreshListeners.size() - ); + // logger.trace( + // "CompositeEngine initialized with {} catalog snapshot aware refresh listeners", + // catalogSnapshotAwareRefreshListeners.size() + // ); } public SearchExecEngine getReadEngine(DataFormat dataFormat) { @@ -607,6 +608,8 @@ public Engine.IndexResult index(Engine.Index index) throws IOException { index.documentInput.setSeqNo(index.seqNo()); index.documentInput.setPrimaryTerm(SeqNoFieldMapper.PRIMARY_TERM_NAME, index.primaryTerm()); index.documentInput.setVersion(1); // we are not supporting update in parquet + // logger.info("[COMPOSITE_DEBUG] Indexing doc id=[{}] seqNo=[{}] primaryTerm=[{}] — writing to engine", + // index.id(), index.seqNo(), index.primaryTerm()); WriteResult writeResult = index.documentInput.addToWriter(); indexResult = new Engine.IndexResult(writeResult.version(), index.primaryTerm(), index.seqNo(), writeResult.success()); @@ -803,12 +806,25 @@ public synchronized void refresh(String source) throws EngineException { try (CompositeEngine.ReleasableRef catalogSnapshotReleasableRef = catalogSnapshotManager.acquireSnapshot()) { refreshListeners.forEach(PRE_REFRESH_LISTENER_CONSUMER); + CatalogSnapshot preRefreshSnapshot = catalogSnapshotReleasableRef.getRef(); + // logger.info("[COMPOSITE_DEBUG] refresh(source=[{}]) starting. Pre-refresh CatalogSnapshot: id={}, version={}, segments={}", + // source, preRefreshSnapshot.getId(), preRefreshSnapshot.getVersion(), preRefreshSnapshot.getSegments().size()); + // for (org.opensearch.index.engine.exec.coord.Segment seg : preRefreshSnapshot.getSegments()) { + // logger.info("[COMPOSITE_DEBUG] pre-refresh segment: gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); + // } + RefreshInput refreshInput = new RefreshInput(); refreshInput.setExistingSegments(new ArrayList<>(catalogSnapshotReleasableRef.getRef().getSegments())); - RefreshResult refreshResult = engine.refresh(refreshInput); + RefreshResult refreshResult = engine.refresh(refreshInput); // It should refresh the primary engine, i.e parquet if (refreshResult != null) { + // logger.info("[COMPOSITE_DEBUG] refresh produced {} segments", refreshResult.getRefreshedSegments().size()); + // for (org.opensearch.index.engine.exec.coord.Segment seg : refreshResult.getRefreshedSegments()) { + // logger.info("[COMPOSITE_DEBUG] refreshed segment: gen={}, formats={}", seg.getGeneration(), seg.getDFGroupedSearchableFiles().keySet()); + // } catalogSnapshotManager.applyRefreshResult(refreshResult); refreshed = true; + } else { + // logger.info("[COMPOSITE_DEBUG] refresh returned null (no new data to flush)"); } invokeRefreshListeners(refreshed); @@ -1025,22 +1041,22 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { if (waitIfOngoing == false) { return; } - logger.trace("waiting for in-flight flush to finish"); + // logger.trace("waiting for in-flight flush to finish"); flushLock.lock(); - logger.trace("acquired flush lock after blocking"); + // logger.trace("acquired flush lock after blocking"); } else { - logger.trace("acquired flush lock immediately"); + // logger.trace("acquired flush lock immediately"); } try { boolean shouldPeriodicallyFlush = shouldPeriodicallyFlush(); if (force || shouldFlush() || shouldPeriodicallyFlush || getProcessedLocalCheckpoint() > Long.parseLong( readLastCommittedData().get(SequenceNumbers.LOCAL_CHECKPOINT_KEY))) { - + refresh("flush in composite engine"); translogManager.ensureCanFlush(); try { translogManager.rollTranslogGeneration(); - logger.trace("starting commit for flush; commitTranslog=true"); + // logger.trace("starting commit for flush; commitTranslog=true"); CompositeEngine.ReleasableRef catalogSnapshotToFlushRef = catalogSnapshotManager.acquireSnapshot(); final CatalogSnapshot catalogSnapshotToFlush = catalogSnapshotToFlushRef.getRef(); @@ -1077,7 +1093,7 @@ public void flush(boolean force, boolean waitIfOngoing) throws EngineException { () -> commitData.entrySet().iterator(), catalogSnapshotToFlush ); - logger.trace("finished commit for flush"); + // logger.trace("finished commit for flush"); if (lastCommitedCatalogSnapshotRef != null && lastCommitedCatalogSnapshotRef.getRef() != null) lastCommitedCatalogSnapshotRef.close(); @@ -1253,17 +1269,17 @@ public String getHistoryUUID() { @Override public void flushAndClose() throws IOException { if (isClosed.get() == false) { - logger.trace("flushAndClose now acquire writeLock"); + // logger.trace("flushAndClose now acquire writeLock"); try (ReleasableLock lock = writeLock.acquire()) { - logger.trace("flushAndClose now acquired writeLock"); + // logger.trace("flushAndClose now acquired writeLock"); try { - logger.debug("flushing shard on close - this might take some time to sync files to disk"); + // logger.debug("flushing shard on close - this might take some time to sync files to disk"); try { // TODO we might force a flush in the future since we have the write lock already even though recoveries // are running. flush(false, true); } catch (AlreadyClosedException ex) { - logger.debug("engine already closed - skipping flushAndClose"); + // logger.debug("engine already closed - skipping flushAndClose"); } } finally { close(); // double close is not a problem @@ -1346,21 +1362,21 @@ public void failEngine(String reason, @Nullable Exception failure) { logger.warn("failEngine threw exception", inner); // don't bubble up these exceptions up } } else { - logger.debug( - () -> new ParameterizedMessage( - "tried to fail composite engine but could not acquire lock - composite engine should " + "be failed by now [{}]", - reason - ), failure - ); + // logger.debug( + // () -> new ParameterizedMessage( + // "tried to fail composite engine but could not acquire lock - composite engine should " + "be failed by now [{}]", + // reason + // ), failure + // ); } } @Override public void close() throws IOException { if (isClosed.get() == false) { // don't acquire the write lock if we are already closed - logger.debug("close now acquiring writeLock"); + // logger.debug("close now acquiring writeLock"); try (ReleasableLock lock = writeLock.acquire()) { - logger.debug("close acquired writeLock"); + // logger.debug("close acquired writeLock"); closeNoLock("api", closedLatch); } } @@ -1391,7 +1407,7 @@ protected void closeNoLock(String reason, CountDownLatch closedLatch) { } finally { try { store.decRef(); - logger.debug("engine closed [{}]", reason); + // logger.debug("engine closed [{}]", reason); } finally { closedLatch.countDown(); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java index 57fdb8546c91b..1c4107d26a1fb 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexFileDeleter.java @@ -77,7 +77,7 @@ public synchronized void removeFileReferences(CatalogSnapshot snapshot) { } if (!dfFilesToDelete.isEmpty()) { - System.out.println("Files to delete : " + dfFilesToDelete); + // System.out.println("Files to delete : " + dfFilesToDelete); deleteUnreferencedFiles(dfFilesToDelete); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataFormat.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataFormat.java new file mode 100644 index 0000000000000..8b7a68e503371 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataFormat.java @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene; + +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.engine.exec.DataFormat; + +public class LuceneDataFormat implements DataFormat { + + private final String LUCENE_DATA_FORMAT = "LuceneDataFormat"; + @Override + public Setting dataFormatSettings() { + return null; + } + + @Override + public Setting clusterLeveldataFormatSettings() { + return null; + } + + @Override + public String name() { + return "Lucene"; + } + + @Override + public void configureStore() { + + } + + @Override + public String toString() { + return LUCENE_DATA_FORMAT; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!(obj instanceof DataFormat)) return false; + return name().equals(((DataFormat) obj).name()); + } + + @Override + public int hashCode() { + return name().hashCode(); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataSourcePlugin.java similarity index 50% rename from server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java rename to server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataSourcePlugin.java index 6a3948fa10466..213d1da3ff24b 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneDataSourcePlugin.java @@ -6,68 +6,59 @@ * compatible open source license. */ -package org.opensearch.index.engine.exec.text; +package org.opensearch.index.engine.exec.lucene; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.BlobStore; -import org.opensearch.common.settings.Setting; -import org.opensearch.common.settings.Settings; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FieldSupportRegistry; import org.opensearch.index.engine.exec.IndexingExecutionEngine; +import org.opensearch.index.engine.exec.lucene.engine.LuceneExecutionEngine; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.engine.exec.lucene.fields.LuceneFieldRegistry; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.ShardPath; import org.opensearch.index.store.FormatStoreDirectory; -import org.opensearch.index.store.GenericStoreDirectory; import org.opensearch.plugins.DataSourcePlugin; + import org.opensearch.plugins.Plugin; import java.io.IOException; +import java.util.Map; +public class LuceneDataSourcePlugin extends Plugin implements DataSourcePlugin { -public class TextDF extends Plugin implements DataFormat, DataSourcePlugin { @Override - public Setting dataFormatSettings() { - return null; + @SuppressWarnings("unchecked") + public IndexingExecutionEngine indexingEngine(EngineConfig engineConfig, MapperService mapperService, boolean isPrimary, ShardPath shardPath, IndexSettings indexSettings, FieldAssignments fieldAssignments) { + return (IndexingExecutionEngine) new LuceneExecutionEngine(engineConfig, mapperService, isPrimary, shardPath, indexSettings, fieldAssignments); } @Override - public Setting clusterLeveldataFormatSettings() { + public FormatStoreDirectory createFormatStoreDirectory(IndexSettings indexSettings, ShardPath shardPath) throws IOException { return null; } @Override - public String name() { - return "text"; - } - - @Override - public void configureStore() { - - } - - @Override - public IndexingExecutionEngine indexingEngine(MapperService mapperService, ShardPath shardPath, IndexSettings indexSettings) { - return (IndexingExecutionEngine) new TextEngine(); + public BlobContainer createBlobContainer(BlobStore blobStore, BlobPath blobPath) throws IOException { + return null; } @Override - public FormatStoreDirectory createFormatStoreDirectory(IndexSettings indexSettings, ShardPath shardPath) throws IOException { - return new GenericStoreDirectory<>( - new TextDF(), - shardPath - ); + public DataFormat getDataFormat() { + return new LuceneDataFormat(); } @Override - public BlobContainer createBlobContainer(BlobStore blobStore, BlobPath blobPath) throws IOException { - BlobPath formatPath = blobPath.add(getDataFormat().name().toLowerCase()); - return blobStore.blobContainer(formatPath); + public void registerFieldSupport(FieldSupportRegistry registry) { + DataFormat lucene = getDataFormat(); + for (Map.Entry entry : LuceneFieldRegistry.getRegisteredFields().entrySet()) { + registry.register(entry.getKey(), lucene, entry.getValue().getFieldCapabilities()); + } } - @Override - public DataFormat getDataFormat() { - return new TextDF(); - } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java deleted file mode 100644 index aae78e4b6983e..0000000000000 --- a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java +++ /dev/null @@ -1,158 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine.exec.lucene; - -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.util.BytesRef; -import org.opensearch.index.engine.InternalEngine; -import org.opensearch.index.engine.exec.DataFormat; -import org.opensearch.index.engine.exec.DocumentInput; -import org.opensearch.index.engine.exec.FileInfos; -import org.opensearch.index.engine.exec.FlushIn; -import org.opensearch.index.engine.exec.IndexingExecutionEngine; -import org.opensearch.index.engine.exec.Merger; -import org.opensearch.index.engine.exec.RefreshInput; -import org.opensearch.index.engine.exec.RefreshResult; -import org.opensearch.index.engine.exec.WriteResult; -import org.opensearch.index.engine.exec.Writer; -import org.opensearch.index.engine.exec.coord.CatalogSnapshot; -import org.opensearch.index.mapper.KeywordFieldMapper; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.mapper.ParseContext; - -import java.io.IOException; -import java.util.Collection; -import java.util.List; -import java.util.Map; - -public class LuceneIEEngine implements IndexingExecutionEngine { - - private final InternalEngine internalEngine; - - public LuceneIEEngine(InternalEngine internalEngine) { - this.internalEngine = internalEngine; - } - - @Override - public List supportedFieldTypes() { - return List.of(); - } - - @Override - public Writer> createWriter(long writerGeneration) throws IOException { - return new LuceneWriter(internalEngine.indexWriter, writerGeneration); - } - - @Override - public void loadWriterFiles(CatalogSnapshot catalogSnapshot) { - - } - - @Override - public void deleteFiles(Map> filesToDelete) throws IOException { - - } - - @Override - public Merger getMerger() { - throw new UnsupportedOperationException(); - } - - @Override - public RefreshResult refresh(RefreshInput refreshInput) throws IOException { - internalEngine.refresh(refreshInput.getClass().getName()); - return null; - } - - @Override - public DataFormat getDataFormat() { - return DataFormat.LUCENE; - } - - @Override - public void close() throws IOException { - - } - - public static class LuceneDocumentInput implements DocumentInput { - - private final ParseContext.Document doc; - private final IndexWriter writer; - - public LuceneDocumentInput(ParseContext.Document doc, IndexWriter w) { - this.doc = doc; - this.writer = w; - } - - @Override - public void addRowIdField(String fieldName, long rowId) { - doc.add(new NumericDocValuesField(fieldName, rowId)); - } - - @Override - public void addField(MappedFieldType fieldType, Object value) { - doc.add(new KeywordFieldMapper.KeywordField("f1", new BytesRef("good_field"), null)); - } - - @Override - public ParseContext.Document getFinalInput() { - return doc; - } - - @Override - public WriteResult addToWriter() throws IOException { - writer.addDocument(doc); - return null; - } - - @Override - public void close() throws Exception { - // no-op, reuse writer - } - } - - public static class LuceneWriter implements Writer { - - private final IndexWriter writer; - private final long writerGeneration; - - public LuceneWriter(IndexWriter writer, long writerGeneration) { - this.writer = writer; - this.writerGeneration = writerGeneration; - } - - @Override - public WriteResult addDoc(LuceneDocumentInput d) throws IOException { - writer.addDocument(d.doc); - return null; - } - - @Override - public FileInfos flush(FlushIn flushIn) throws IOException { - writer.flush(); - return null; - } - - @Override - public void sync() throws IOException { - writer.flush(); - } - - @Override - public void close() { - // no-op - } - - @Override - public LuceneDocumentInput newDocumentInput() { - return new LuceneDocumentInput(new ParseContext.Document(), writer); - } - } -} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java new file mode 100644 index 0000000000000..eb8473a06c7dd --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/engine/LuceneExecutionEngine.java @@ -0,0 +1,178 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.engine; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.index.FilterMergePolicy; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.index.MergeTrigger; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.index.SegmentCommitInfo; +import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.NIOFSDirectory; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.EngineConfig; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.IndexingExecutionEngine; +import org.opensearch.index.engine.exec.Merger; +import org.opensearch.index.engine.exec.RefreshInput; +import org.opensearch.index.engine.exec.RefreshResult; +import org.opensearch.index.engine.exec.Writer; +import org.opensearch.index.engine.exec.coord.CatalogSnapshot; +import org.opensearch.index.engine.exec.lucene.LuceneDataFormat; +import org.opensearch.index.engine.exec.lucene.fields.LuceneFieldRegistry; +import org.opensearch.index.engine.exec.lucene.writer.LuceneWriter; +import org.opensearch.index.engine.exec.lucene.writer.LuceneWriterCodec; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.shard.ShardPath; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import static org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter.ROW_ID; + +public class LuceneExecutionEngine implements IndexingExecutionEngine { + + private final MapperService mapperService; + private final ShardPath shardPath; + private final DataFormat dataFormat; + private final EngineConfig engineConfig; + private static final Logger logger = LogManager.getLogger(LuceneExecutionEngine.class); + private final boolean isPrimaryEngine; + + public LuceneExecutionEngine(EngineConfig engineConfig, MapperService mapperService, boolean isPrimaryEngine, ShardPath shardPath, IndexSettings indexSettings, FieldAssignments fieldAssignments) { + this.engineConfig = engineConfig; + this.mapperService = mapperService; + this.dataFormat = DataFormat.LUCENE; + this.isPrimaryEngine = isPrimaryEngine; + this.shardPath = shardPath; + // TODO: Add check for Lucene being the primary engine and MapperService has an unknown field, currently + // in POC it's only a secondary engine so we don't need to have all fields in this. + } + + @Override + public List supportedFieldTypes(boolean isPrimaryEngine) { + // Delegate to the static LuceneFieldRegistry — each registered field type is supported + return new ArrayList<>(LuceneFieldRegistry.getRegisteredFieldNames()); + } + + @Override + public Writer> createWriter(long writerGeneration) throws IOException { + + Path tmpDirectoryPath = shardPath.getDataPath().resolve("tmp"); + Files.createDirectories(tmpDirectoryPath); + Path directoryPath = Files.createTempDirectory(tmpDirectoryPath, Long.toString(writerGeneration)); // TODO:: Is this the right name? + //Path directoryPath = Files.createTempDirectory(Long.toString(System.nanoTime())); // TODO:: Is this the right name? + EngineRole role = isPrimaryEngine ? EngineRole.PRIMARY : EngineRole.SECONDARY; + return new LuceneWriter(directoryPath, createWriter(directoryPath, writerGeneration), writerGeneration, role); + + } + + private IndexWriter createWriter(Path directoryPath, long writerGeneration) { + try { + IndexWriterConfig indexWriterConfig = getIndexWriterConfig(writerGeneration, this.engineConfig); + Directory directory = NIOFSDirectory.open(directoryPath); + return new IndexWriter(directory, indexWriterConfig); + } catch (IOException e) { + throw new RuntimeException("Failed to create lucene writer: {}", e); + } + } + + + + public class ForceMergeOnlyPolicy extends FilterMergePolicy { + + public ForceMergeOnlyPolicy(MergePolicy wrappedPolicy) { + super(wrappedPolicy); + } + + // Block regular/automatic merges — return null + @Override + public MergeSpecification findMerges( + MergeTrigger mergeTrigger, + SegmentInfos segmentInfos, + MergeContext mergeContext) throws IOException { + // No automatic merges + return null; + } + + // Allow forceMerge — delegates to wrapped policy + @Override + public MergeSpecification findForcedMerges( + SegmentInfos segmentInfos, + int maxSegmentCount, + Map segmentsToMerge, + MergeContext mergeContext) throws IOException { + return in.findForcedMerges( + segmentInfos, maxSegmentCount, segmentsToMerge, mergeContext); + } + + // Allow forceMergeDeletes — delegates to wrapped policy + @Override + public MergeSpecification findForcedDeletesMerges( + SegmentInfos segmentInfos, + MergeContext mergeContext) throws IOException { + return in.findForcedDeletesMerges(segmentInfos, mergeContext); + } + } + + private IndexWriterConfig getIndexWriterConfig(long writerGeneration, EngineConfig engineConfig) { + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(); + indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); + indexWriterConfig.setIndexSort(new Sort(new SortField(ROW_ID, SortField.Type.LONG))); + indexWriterConfig.setCodec(new LuceneWriterCodec(engineConfig.getCodec().getName(), engineConfig.getCodec(), writerGeneration)); + MergePolicy mergePolicy = indexWriterConfig.getMergePolicy(); + indexWriterConfig.setMergePolicy(new ForceMergeOnlyPolicy(mergePolicy)); + return indexWriterConfig; + } + + @Override + public Merger getMerger() { + return null; + } + + @Override + public RefreshResult refresh(RefreshInput refreshInput) throws IOException { + return null; + } + + @Override + public DataFormat getDataFormat() { + return new LuceneDataFormat(); + } + + @Override + public void loadWriterFiles(CatalogSnapshot catalogSnapshot) throws IOException { + + } + + @Override + public void deleteFiles(Map> filesToDelete) throws IOException { + + } + + @Override + public void close() throws IOException { + + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java new file mode 100644 index 0000000000000..209b682cac957 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneField.java @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields; + +import org.apache.lucene.document.Field; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.mapper.FieldNamesFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; +import org.opensearch.index.mapper.ParseContext.Document; + +import java.util.Set; + +/** + * Base class for Lucene field implementations in the composite engine. + * + *

Each subclass handles a specific field type (keyword, long, text, etc.) and + * creates the appropriate Lucene index fields based on the capabilities described + * in the {@link MappedFieldType}. + */ +@ExperimentalApi +public abstract class LuceneField { + + /** + * Creates Lucene index fields for the given value based on the field type's capability flags. + * + * @param fieldType the per-field MappedFieldType carrying field name, type name, and capability flags + * @param document the Lucene document to add fields to + * @param parseValue the parsed field value to index + */ + public abstract void createField(MappedFieldType fieldType, Document document, Object parseValue); + + protected final void createFieldNamesField(MappedFieldType mappedFieldType, Document document, ParseContext context) { + assert !mappedFieldType.hasDocValues() : "_field_names should only be used when doc_values are turned off"; + FieldNamesFieldMapper.FieldNamesFieldType fieldNamesFieldType = + context.docMapper().metadataMapper(FieldNamesFieldMapper.class).fieldType(); + if (fieldNamesFieldType != null && fieldNamesFieldType.isEnabled()) { + for (String fieldName : FieldNamesFieldMapper.extractFieldNames(mappedFieldType.name())) { + document.add(new Field(FieldNamesFieldMapper.NAME, fieldName, FieldNamesFieldMapper.Defaults.FIELD_TYPE)); + } + } + } + + /** + * Returns the set of capabilities this field supports. + * The engine uses this to populate the FieldSupportRegistry. + */ + public abstract Set getFieldCapabilities(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java new file mode 100644 index 0000000000000..b487f6614d129 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/LuceneFieldRegistry.java @@ -0,0 +1,130 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields; + +import org.opensearch.index.engine.exec.lucene.fields.data.BinaryLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.BooleanLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.date.DateLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.date.DateNanosLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.metadata.IdLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.metadata.IgnoredLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.metadata.RoutingLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.metadata.SizeLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.ByteLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.DocCountLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.DoubleLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.FloatLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.HalfFloatLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.IntegerLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.LongLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.ShortLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.TokenCountLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.number.UnsignedLongLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.text.IpLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.text.KeywordLuceneField; +import org.opensearch.index.engine.exec.lucene.fields.data.text.TextLuceneField; +import org.opensearch.index.mapper.BinaryFieldMapper; +import org.opensearch.index.mapper.BooleanFieldMapper; +import org.opensearch.index.mapper.DateFieldMapper; +import org.opensearch.index.mapper.DocCountFieldMapper; +import org.opensearch.index.mapper.IdFieldMapper; +import org.opensearch.index.mapper.IgnoredFieldMapper; +import org.opensearch.index.mapper.IpFieldMapper; +import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.RoutingFieldMapper; +import org.opensearch.index.mapper.SeqNoFieldMapper; +import org.opensearch.index.mapper.TextFieldMapper; +import org.opensearch.index.mapper.VersionFieldMapper; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class LuceneFieldRegistry { + + /** + * All registered field mappings (thread-safe, mutable) + */ + private static final Map FIELD_REGISTRY = new ConcurrentHashMap<>(); + + // Static initialization block to populate the field registry + static { + initialize(); + } + + // Private constructor to prevent instantiation of utility class + private LuceneFieldRegistry() { + throw new UnsupportedOperationException("Registry class should not be instantiated"); + } + + /** + * Initialize the registry with all available plugins. + * This method should be called during node startup after all plugins are loaded. + */ + public static synchronized void initialize() { + // Text-based fields + FIELD_REGISTRY.put(KeywordFieldMapper.CONTENT_TYPE, new KeywordLuceneField()); + FIELD_REGISTRY.put(TextFieldMapper.CONTENT_TYPE, new TextLuceneField()); + FIELD_REGISTRY.put(IpFieldMapper.CONTENT_TYPE, new IpLuceneField()); + + // Numeric fields + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.BYTE.typeName(), new ByteLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.SHORT.typeName(), new ShortLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.INTEGER.typeName(), new IntegerLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.LONG.typeName(), new LongLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.UNSIGNED_LONG.typeName(), new UnsignedLongLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.HALF_FLOAT.typeName(), new HalfFloatLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.FLOAT.typeName(), new FloatLuceneField()); + FIELD_REGISTRY.put(NumberFieldMapper.NumberType.DOUBLE.typeName(), new DoubleLuceneField()); + FIELD_REGISTRY.put("token_count", new TokenCountLuceneField()); + FIELD_REGISTRY.put("scaled_float", new LongLuceneField()); + + // Temporal fields + FIELD_REGISTRY.put(DateFieldMapper.CONTENT_TYPE, new DateLuceneField()); + FIELD_REGISTRY.put(DateFieldMapper.DATE_NANOS_CONTENT_TYPE, new DateNanosLuceneField()); + + // Boolean field + FIELD_REGISTRY.put(BooleanFieldMapper.CONTENT_TYPE, new BooleanLuceneField()); + + // Binary field + FIELD_REGISTRY.put(BinaryFieldMapper.CONTENT_TYPE, new BinaryLuceneField()); + + // Metadata fields + FIELD_REGISTRY.put(IdFieldMapper.CONTENT_TYPE, new IdLuceneField()); + FIELD_REGISTRY.put(RoutingFieldMapper.CONTENT_TYPE, new RoutingLuceneField()); + FIELD_REGISTRY.put(IgnoredFieldMapper.CONTENT_TYPE, new IgnoredLuceneField()); + FIELD_REGISTRY.put("_size", new SizeLuceneField()); + FIELD_REGISTRY.put(DocCountFieldMapper.CONTENT_TYPE, new DocCountLuceneField()); + FIELD_REGISTRY.put(SeqNoFieldMapper.CONTENT_TYPE, new LongLuceneField()); + FIELD_REGISTRY.put(VersionFieldMapper.CONTENT_TYPE, new LongLuceneField()); + FIELD_REGISTRY.put(SeqNoFieldMapper.PRIMARY_TERM_NAME, new LongLuceneField()); + } + + /** + * Returns the LuceneField implementation for the specified OpenSearch field type, or null if not found. + */ + public static LuceneField getLuceneField(String fieldType) { + return FIELD_REGISTRY.get(fieldType); + } + + /** + * Returns all registered field type names. + */ + public static java.util.Set getRegisteredFieldNames() { + return java.util.Collections.unmodifiableSet(FIELD_REGISTRY.keySet()); + } + + /** + * Returns an unmodifiable view of all registered field mappings. + */ + public static Map getRegisteredFields() { + return java.util.Collections.unmodifiableMap(FIELD_REGISTRY); + } + +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java new file mode 100644 index 0000000000000..9e85aa5bc9e57 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BinaryLuceneField.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.BytesRef; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class BinaryLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final BytesRef value = (BytesRef) parseValue; + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java new file mode 100644 index 0000000000000..455c403b463ba --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/BooleanLuceneField.java @@ -0,0 +1,49 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.IndexOptions; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class BooleanLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Boolean value = (Boolean) parseValue; + if (fieldType.isSearchable()) { + FieldType ft = new FieldType(); + ft.setOmitNorms(true); + ft.setIndexOptions(IndexOptions.DOCS); + ft.setTokenized(false); + ft.freeze(); + document.add(new Field(fieldType.name(), value ? "T" : "F", ft)); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value ? 1 : 0)); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value ? "T" : "F")); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java new file mode 100644 index 0000000000000..98f37acaedf99 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.date; + +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class DateLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final long timestamp = (long) parseValue; + if (fieldType.isSearchable()) { + document.add(new LongPoint(fieldType.name(), timestamp)); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), timestamp)); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), timestamp)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java new file mode 100644 index 0000000000000..b659a6298b5a4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/date/DateNanosLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.date; + +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class DateNanosLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final long timestamp = (long) parseValue; + if (fieldType.isSearchable()) { + document.add(new LongPoint(fieldType.name(), timestamp)); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), timestamp)); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), timestamp)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java new file mode 100644 index 0000000000000..00feb0ce36bbf --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IdLuceneField.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.metadata; + +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.BytesRef; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class IdLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final BytesRef value = (BytesRef) parseValue; + if (fieldType.hasDocValues()) { + document.add(new BinaryDocValuesField(fieldType.name(), value)); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java new file mode 100644 index 0000000000000..62b91d246cf62 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/IgnoredLuceneField.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.metadata; + +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class IgnoredLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final String value = parseValue.toString(); + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java new file mode 100644 index 0000000000000..590dbfbe146f8 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/RoutingLuceneField.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.metadata; + +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.BytesRef; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class RoutingLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final String value = parseValue.toString(); + if (fieldType.hasDocValues()) { + document.add(new SortedSetDocValuesField(fieldType.name(), new BytesRef(value))); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java new file mode 100644 index 0000000000000..3f100dcf1cb88 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/metadata/SizeLuceneField.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.metadata; + +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class SizeLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Number value = (Number) parseValue; + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.intValue())); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.intValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java new file mode 100644 index 0000000000000..3e74b55fa9789 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ByteLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.number; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class ByteLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Number value = (Number) parseValue; + if (fieldType.isSearchable()) { + document.add(new IntPoint(fieldType.name(), value.byteValue())); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.byteValue())); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.byteValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java new file mode 100644 index 0000000000000..922c63cc32f25 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DocCountLuceneField.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.number; + +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class DocCountLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Number value = (Number) parseValue; + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.longValue())); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.longValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java new file mode 100644 index 0000000000000..7509fccd12dab --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/DoubleLuceneField.java @@ -0,0 +1,43 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.number; + +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.NumericUtils; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class DoubleLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Number value = (Number) parseValue; + if (fieldType.isSearchable()) { + document.add(new DoublePoint(fieldType.name(), value.doubleValue())); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), NumericUtils.doubleToSortableLong(value.doubleValue()))); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.doubleValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java new file mode 100644 index 0000000000000..93c4cd1fbc2a5 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/FloatLuceneField.java @@ -0,0 +1,43 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.number; + +import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.NumericUtils; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class FloatLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Number value = (Number) parseValue; + if (fieldType.isSearchable()) { + document.add(new FloatPoint(fieldType.name(), value.floatValue())); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), NumericUtils.floatToSortableInt(value.floatValue()))); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.floatValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java new file mode 100644 index 0000000000000..f8364b43dfc9d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/HalfFloatLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.number; + +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.sandbox.document.HalfFloatPoint; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class HalfFloatLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Number value = (Number) parseValue; + if (fieldType.isSearchable()) { + document.add(new HalfFloatPoint(fieldType.name(), value.floatValue())); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), HalfFloatPoint.halfFloatToSortableShort(value.floatValue()))); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.floatValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java new file mode 100644 index 0000000000000..0285e384ddc14 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/IntegerLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.number; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class IntegerLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Number value = (Number) parseValue; + if (fieldType.isSearchable()) { + document.add(new IntPoint(fieldType.name(), value.intValue())); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.intValue())); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.intValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java new file mode 100644 index 0000000000000..50e17bcb3931b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/LongLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.number; + +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class LongLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Number value = (Number) parseValue; + if (fieldType.isSearchable()) { + document.add(new LongPoint(fieldType.name(), value.longValue())); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.longValue())); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.longValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java new file mode 100644 index 0000000000000..edc97a6a82668 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/ShortLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.number; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class ShortLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Number value = (Number) parseValue; + if (fieldType.isSearchable()) { + document.add(new IntPoint(fieldType.name(), value.shortValue())); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.shortValue())); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.shortValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java new file mode 100644 index 0000000000000..68339b083d995 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/TokenCountLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.number; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class TokenCountLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Number value = (Number) parseValue; + if (fieldType.isSearchable()) { + document.add(new IntPoint(fieldType.name(), value.intValue())); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.intValue())); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.intValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java new file mode 100644 index 0000000000000..8f5d76f106dc4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/number/UnsignedLongLuceneField.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.number; + +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.StoredField; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class UnsignedLongLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final Number value = (Number) parseValue; + if (fieldType.isSearchable()) { + document.add(new LongPoint(fieldType.name(), value.longValue())); + } + if (fieldType.hasDocValues()) { + document.add(new SortedNumericDocValuesField(fieldType.name(), value.longValue())); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), value.longValue())); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java new file mode 100644 index 0000000000000..bd47d90d50889 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/IpLuceneField.java @@ -0,0 +1,46 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.text; + +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.util.BytesRef; +import org.opensearch.common.network.InetAddresses; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.net.InetAddress; +import java.util.EnumSet; +import java.util.Set; + +public class IpLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final InetAddress address = (InetAddress) parseValue; + final byte[] encoded = InetAddresses.forString(address.getHostAddress()).getAddress(); + if (fieldType.isSearchable()) { + document.add(new InetAddressPoint(fieldType.name(), InetAddresses.forString(address.getHostAddress()))); + } + if (fieldType.hasDocValues()) { + document.add(new SortedSetDocValuesField(fieldType.name(), new BytesRef(encoded))); + } + if (fieldType.isStored()) { + document.add(new StoredField(fieldType.name(), new BytesRef(encoded))); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java new file mode 100644 index 0000000000000..2d7915db7846c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/KeywordLuceneField.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.text; + +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.util.BytesRef; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class KeywordLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + String value = (String) parseValue; + final BytesRef binaryValue = new BytesRef(value); + + boolean shouldIndex = fieldType.isSearchable(); + boolean shouldStore = fieldType.isStored(); + + if (shouldIndex || shouldStore) { + FieldType luceneFieldType = new FieldType(); + luceneFieldType.setTokenized(false); + luceneFieldType.setStored(shouldStore); + luceneFieldType.setOmitNorms(true); + luceneFieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS : IndexOptions.NONE); + luceneFieldType.freeze(); + document.add(new KeywordFieldMapper.KeywordField(fieldType.name(), binaryValue, luceneFieldType)); + } + + if (fieldType.hasDocValues()) { + document.add(new SortedSetDocValuesField(fieldType.name(), binaryValue)); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX, FieldCapability.DOC_VALUES); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java new file mode 100644 index 0000000000000..ed07348a13f78 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/fields/data/text/TextLuceneField.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.fields.data.text; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; +import org.opensearch.index.engine.exec.FieldCapability; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.util.EnumSet; +import java.util.Set; + +public class TextLuceneField extends LuceneField { + + @Override + public void createField(MappedFieldType fieldType, ParseContext.Document document, Object parseValue) { + final String value = (String) parseValue; + + boolean shouldIndex = fieldType.isSearchable(); + boolean shouldStore = fieldType.isStored(); + + if (shouldIndex || shouldStore) { + FieldType luceneFieldType = new FieldType(); + luceneFieldType.setStored(shouldStore); + luceneFieldType.setIndexOptions(shouldIndex ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS : IndexOptions.NONE); + Field field = new Field(fieldType.name(), value, luceneFieldType); + document.add(field); + } + } + + @Override + public Set getFieldCapabilities() { + return EnumSet.of(FieldCapability.STORE, FieldCapability.INDEX); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java new file mode 100644 index 0000000000000..678b640329ef9 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneDocumentInput.java @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.writer; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.IndexWriter; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.WriteResult; +import org.opensearch.index.engine.exec.lucene.fields.LuceneField; +import org.opensearch.index.engine.exec.lucene.fields.LuceneFieldRegistry; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.io.IOException; + +public class LuceneDocumentInput implements DocumentInput { + private static final Logger logger = LogManager.getLogger(LuceneDocumentInput.class); + private final ParseContext.Document document; + private final IndexWriter indexWriter; + private final EngineRole engineRole; + + public LuceneDocumentInput(ParseContext.Document document, IndexWriter indexWriter, EngineRole engineRole) { + this.document = document; + this.indexWriter = indexWriter; + this.engineRole = engineRole; + } + + @Override + public void addRowIdField(String fieldName, long rowId) { + document.add(new NumericDocValuesField(fieldName, rowId)); + } + + @SuppressWarnings("unchecked") + @Override + public void addField(MappedFieldType fieldType, Object value) { + final LuceneField luceneField = LuceneFieldRegistry.getLuceneField(fieldType.typeName()); + + if (luceneField == null) { + // Field type not supported by Lucene format — skip silently + logger.debug( + "[COMPOSITE_DEBUG] Lucene SKIP field=[{}] type=[{}] — no LuceneField registered in LuceneFieldRegistry", + fieldType.name(), + fieldType.typeName() + ); + return; + } + + logger.debug( + "[COMPOSITE_DEBUG] Lucene ACCEPT field=[{}] type=[{}] value=[{}]", + fieldType.name(), + fieldType.typeName(), + value + ); + luceneField.createField(fieldType, document, value); + } + + /** + * Returns the underlying {@link ParseContext.Document} for ingesters to access + * and add Lucene fields directly. + */ + public ParseContext.Document getDocument() { + return document; + } + + @Override + public EngineRole getEngineRole() { + return engineRole; + } + + @Override + public ParseContext.Document getFinalInput() { + return document; + } + + @Override + public WriteResult addToWriter() { + try { + long seqNum = indexWriter.addDocument(document); + return new WriteResult(true, null, 1, 1, seqNum); + } catch (IOException exception) { + return new WriteResult(false, exception, 1, 1, 1); + } + } + + @Override + public DataFormat getDataFormat() { + return DataFormat.LUCENE; + } + + @Override + public void close() throws Exception { + // no-op, reuse writer + } + +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriter.java new file mode 100644 index 0000000000000..6995899bb7d64 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriter.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.writer; + +import org.apache.lucene.index.IndexWriter; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.EngineRole; +import org.opensearch.index.engine.exec.FileInfos; +import org.opensearch.index.engine.exec.FlushIn; +import org.opensearch.index.engine.exec.WriteResult; +import org.opensearch.index.engine.exec.Writer; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.mapper.ParseContext; + +import java.io.IOException; +import java.nio.file.Path; + +public class LuceneWriter implements Writer { + + private final IndexWriter writer; + private final long writerGeneration; + private final Path directoryPath; + private final EngineRole engineRole; + + public LuceneWriter(Path directoryPath, IndexWriter writer, long writerGeneration, EngineRole engineRole) { + this.directoryPath = directoryPath; + this.writer = writer; + this.writerGeneration = writerGeneration; + this.engineRole = engineRole; + } + + @Override + public WriteResult addDoc(LuceneDocumentInput documentInput) throws IOException { + return documentInput.addToWriter(); + } + + @Override + public FileInfos flush(FlushIn flushIn) throws IOException { + writer.forceMerge(1); + WriterFileSet.Builder writerFileSetBuilder = + WriterFileSet.builder().directory(directoryPath).writerGeneration(writerGeneration).addNumRows(writer.getDocStats().numDocs); + return FileInfos.builder().putWriterFileSet(DataFormat.LUCENE, writerFileSetBuilder.build()).build(); + } + + @Override + public void sync() throws IOException { + + } + + @Override + public void close() throws IOException { + writer.close(); + } + + @Override + public LuceneDocumentInput newDocumentInput() { + return new LuceneDocumentInput(new ParseContext.Document(), writer, engineRole); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriterCodec.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriterCodec.java new file mode 100644 index 0000000000000..237804f3a93a7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/writer/LuceneWriterCodec.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene.writer; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.FilterCodec; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; + +import java.io.IOException; + +public class LuceneWriterCodec extends FilterCodec { + + private final long writerGeneration; + + /** + * Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec + * and a unique name to this ctor. + * + * @param name + * @param delegate + */ + public LuceneWriterCodec(String name, Codec delegate, long writerGeneration) { + super(name, delegate); + this.writerGeneration = writerGeneration; + } + + // TODO:: Why this? What does it do? + @Override + public SegmentInfoFormat segmentInfoFormat() { + return new SegmentInfoFormat() { + @Override + public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException { + return delegate.segmentInfoFormat().read(directory, segmentName, segmentID, context); + } + + @Override + public void write(Directory directory, SegmentInfo info, IOContext ioContext) throws IOException { + info.putAttribute("writer_generation", String.valueOf(writerGeneration)); + delegate.segmentInfoFormat().write(directory, info, ioContext); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java b/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java index f53e5efa0aba0..49a78614bbc6e 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/merge/CompositeMergePolicy.java @@ -50,11 +50,11 @@ public CompositeMergePolicy( ) { this.luceneMergePolicy = mergePolicy; this.logger = Loggers.getLogger(getClass(), shardId); - logger.info("Initialized merge policy: {}", mergePolicy); + logger.debug("[COMPOSITE_DEBUG] Initialized merge policy: {}", mergePolicy); this.infoStream = new InfoStream() { @Override public void message(String component, String message) { - logger.trace(() -> new ParameterizedMessage("Merge [{}]: {}", component, message)); + logger.debug(() -> new ParameterizedMessage("[COMPOSITE_DEBUG] Merge [{}]: {}", component, message)); } @Override diff --git a/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeHandler.java b/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeHandler.java index 2f6c7c45ae7b9..872a7c5ad069f 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeHandler.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeHandler.java @@ -187,7 +187,7 @@ private void cleanupStaleMergedFiles(Map mergedWriter Path path = Path.of(wfs.getDirectory(), file); try { Files.deleteIfExists(path); - logger.info("Stale Merged File Deleted at : [{}]", path); + // logger.info("Stale Merged File Deleted at : [{}]", path); } catch (Exception exception) { logger.error( () -> new ParameterizedMessage( diff --git a/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeScheduler.java b/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeScheduler.java index a14b668471d51..d20d3b891aa89 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeScheduler.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/merge/MergeScheduler.java @@ -81,8 +81,8 @@ public synchronized void refreshConfig() { return; } - logger.info(() -> new ParameterizedMessage("Updating from merge scheduler config: maxThreadCount {} -> {}, " + - "maxMergeCount {} -> {}", this.maxConcurrentMerges, newMaxThreadCount, this.maxMergeCount, newMaxMergeCount)); + // logger.info(() -> new ParameterizedMessage("Updating from merge scheduler config: maxThreadCount {} -> {}, " + + // "maxMergeCount {} -> {}", this.maxConcurrentMerges, newMaxThreadCount, this.maxMergeCount, newMaxMergeCount)); this.maxConcurrentMerges = newMaxThreadCount; this.maxMergeCount = newMaxMergeCount; @@ -172,7 +172,7 @@ public void run() { long tookMS = 0; try { if (isShutdown.get()) { - logger.debug("[{}] MergeScheduler is shutdown, skipping merge", getName()); + // logger.debug("[{}] MergeScheduler is shutdown, skipping merge", getName()); return; } @@ -180,15 +180,15 @@ public void run() { currentMergesNumDocs.inc(totalNumDocs); currentMergesSizeInBytes.inc(totalSizeInBytes); - logger.debug("[{}] Starting merge for: {}", getName(), oneMerge); + // logger.debug("[{}] Starting merge for: {}", getName(), oneMerge); MergeResult mergeResult = mergeHandler.doMerge(oneMerge); compositeEngine.applyMergeChanges(mergeResult, oneMerge); mergeHandler.onMergeFinished(oneMerge); tookMS = TimeValue.nsecToMSec((System.nanoTime() - timeNS)); - logger.info("[{}] Merge completed in {}ms for: {} and output is stored in: {}", - getName(), tookMS, oneMerge, mergeResult); + // logger.info("[{}] Merge completed in {}ms for: {} and output is stored in: {}", + // getName(), tookMS, oneMerge, mergeResult); } catch (Exception e) { logger.error("[{}] Unexpected error during merge for: {}", getName(), oneMerge, e); @@ -262,7 +262,7 @@ public int getMaxMergeCount() { //TODO see where we want to call this function for the Merge shutdown public void shutdown() { if (isShutdown.compareAndSet(false, true)) { - logger.info("Shutting down MergeScheduler with {} active merges", activeMerges.get()); + // logger.info("Shutting down MergeScheduler with {} active merges", activeMerges.get()); for (MergeThread thread : mergeThreads) { try { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java deleted file mode 100644 index 8f43091693274..0000000000000 --- a/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java +++ /dev/null @@ -1,205 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine.exec.text; - -import org.opensearch.index.engine.exec.coord.Segment; - -import org.opensearch.index.engine.exec.DataFormat; -import org.opensearch.index.engine.exec.DocumentInput; -import org.opensearch.index.engine.exec.FileInfos; -import org.opensearch.index.engine.exec.FileMetadata; -import org.opensearch.index.engine.exec.FlushIn; -import org.opensearch.index.engine.exec.IndexingExecutionEngine; -import org.opensearch.index.engine.exec.Merger; -import org.opensearch.index.engine.exec.RefreshInput; -import org.opensearch.index.engine.exec.RefreshResult; -import org.opensearch.index.engine.exec.WriteResult; -import org.opensearch.index.engine.exec.Writer; -import org.opensearch.index.engine.exec.WriterFileSet; -import org.opensearch.index.engine.exec.coord.CatalogSnapshot; -import org.opensearch.index.engine.exec.merge.MergeResult; -import org.opensearch.index.engine.exec.merge.RowIdMapping; -import org.opensearch.index.mapper.MappedFieldType; -import org.opensearch.index.shard.ShardPath; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; - -public class TextEngine implements IndexingExecutionEngine { - - private final AtomicLong counter = new AtomicLong(); - private final Set openWriters = new HashSet<>(); - private final List openFiles = new ArrayList<>(); - - @Override - public List supportedFieldTypes() { - return List.of(); - } - - @Override - public Writer> createWriter(long writerGeneration) throws IOException { - return new TextWriter("text_file" + counter.getAndIncrement(), this, writerGeneration); - } - - @Override - public Merger getMerger() { - return new TextMerger(); - } - - @Override - public DataFormat getDataFormat() { - return DataFormat.TEXT; - } - - @Override - public void loadWriterFiles(CatalogSnapshot catalogSnapshot) { - - } - - @Override - public void deleteFiles(Map> filesToDelete) throws IOException { - - } - - @Override - public RefreshResult refresh(RefreshInput refreshInput) throws IOException { - openFiles.addAll(refreshInput.getWriterFiles()); - RefreshResult refreshResult = new RefreshResult(); - Segment segment = new Segment(0); - openFiles.forEach(file -> segment.addSearchableFiles(DataFormat.TEXT.name(), file)); - refreshResult.setRefreshedSegments(List.of(segment)); - return refreshResult; - } - - @Override - public void close() throws IOException { - - } - - public static class TextInput implements DocumentInput { - - private final StringBuilder sb = new StringBuilder(); - private final TextWriter writer; - - public TextInput(TextWriter writer) { - this.writer = writer; - } - - @Override - public void addRowIdField(String fieldName, long rowId) { - sb.append(fieldName).append("=").append(rowId).append(";"); - } - - @Override - public void addField(MappedFieldType fieldType, Object value) { - sb.append(fieldType.name()).append("=").append(value).append(";"); - } - - @Override - public String getFinalInput() { - return sb.append("\n").toString(); - } - - @Override - public WriteResult addToWriter() throws IOException { - return writer.addDoc(this); - } - - @Override - public void close() throws Exception { - //no op - } - } - - public static class TextMerger implements Merger { - - @Override - public MergeResult merge(List fileMetadataList, long writerGeneration) { - // Here we will implementation of logic for merging files and reassign the row-ids - // and creating the mapping of the old segment+id to new row id. - // - // Needed when this data format is configured as primary data format. - throw new UnsupportedOperationException("merge not supported"); - } - - @Override - public MergeResult merge(List fileMetadataList, RowIdMapping rowIdMapping, long writerGeneration) { - // Here we will have implementation of the merge logic where we will have the mapping of the old row id to new id - // and merging the files. - // - // Needed when data format is not configured as primary data format. - throw new UnsupportedOperationException("merge not supported"); - } - } - - public static class TextWriter implements Writer { - - private final StringBuilder sb = new StringBuilder(); - private final File currentFile; - private final AtomicBoolean flushed = new AtomicBoolean(false); - private final Runnable onClose; - private final long writerGeneration; - - public TextWriter(String currentFile, TextEngine engine, long writerGeneration) throws IOException { - this.currentFile = new File("/Users/shnkgo/mustang" + currentFile); - this.currentFile.createNewFile(); - this.writerGeneration = writerGeneration; - boolean canWrite = this.currentFile.setWritable(true); - if (!canWrite) { - throw new IllegalStateException("Cannot write to file [" + currentFile + "]"); - } - engine.openWriters.add(this); - onClose = () -> engine.openWriters.remove(this); - } - - @Override - public WriteResult addDoc(TextInput d) throws IOException { - sb.append(d.getFinalInput()); - return new WriteResult(true, null, 1, 1, 1); - } - - @Override - public FileInfos flush(FlushIn flushIn) throws IOException { - try (FileWriter fw = new FileWriter(currentFile)) { - fw.write(sb.toString()); - } - flushed.set(true); - WriterFileSet writerFileSet = WriterFileSet.builder() - .directory(currentFile.toPath().getParent()) - .writerGeneration(writerGeneration) - .addFile(currentFile.getName()) - .build(); - return FileInfos.builder().putWriterFileSet(DataFormat.TEXT, writerFileSet).build(); - } - - @Override - public void sync() throws IOException { - } - - @Override - public void close() { - onClose.run(); - } - - @Override - public TextInput newDocumentInput() { - return new TextInput(this); - } - - } -} diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java index b0e3173d41607..bf1fffcddb60c 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java @@ -218,6 +218,7 @@ public T meta(Map meta) { protected MultiFields multiFields; protected CopyTo copyTo; protected DerivedFieldGenerator derivedFieldGenerator; + protected Boolean isPluggableDataFormatFeatureEnabled; protected FieldMapper(String simpleName, FieldType fieldType, MappedFieldType mappedFieldType, MultiFields multiFields, CopyTo copyTo) { super(simpleName); @@ -347,7 +348,11 @@ protected final void createFieldNamesField(ParseContext context) { } protected final boolean isPluggableDataFormatFeatureEnabled(ParseContext parseContext) { - return FeatureFlags.isEnabled(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) && parseContext.indexSettings().isOptimizedIndex(); + if(isPluggableDataFormatFeatureEnabled == null) { + isPluggableDataFormatFeatureEnabled = FeatureFlags.isEnabled(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG) && parseContext.indexSettings().isOptimizedIndex(); + } + + return isPluggableDataFormatFeatureEnabled; } @Override diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldNamesFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FieldNamesFieldMapper.java index dc2ea1f35f8b8..345ad79210851 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FieldNamesFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FieldNamesFieldMapper.java @@ -192,7 +192,7 @@ public FieldNamesFieldType fieldType() { return (FieldNamesFieldType) super.fieldType(); } - static Iterable extractFieldNames(final String fullPath) { + public static Iterable extractFieldNames(final String fullPath) { return new Iterable() { @Override public Iterator iterator() { diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 09f4be2d2d8d6..79e886aaff700 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -58,7 +58,6 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.xcontent.support.XContentMapValues; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.xcontent.XContentBuilder; diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 809c598f3b562..f91e8b8109535 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -4646,6 +4646,7 @@ private DocumentMapperForType docMapper() { return mapperService.documentMapperWithAutoCreate(); } + // TODO:: Understand this one.. Do we need same type of Engine Config in lucene as secondary engine? private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) throws IOException { final Sort indexSort = indexSortSupplier.get(); final Engine.Warmer warmer = reader -> { diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 4083484648bca..9aa198c77759f 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -165,6 +165,7 @@ import org.opensearch.index.compositeindex.CompositeIndexSettings; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.MergedSegmentWarmerFactory; +import org.opensearch.index.engine.exec.lucene.LuceneDataSourcePlugin; import org.opensearch.index.mapper.MappingTransformerRegistry; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.remote.RemoteIndexPathUploader; @@ -552,6 +553,20 @@ protected Node(final Environment initialEnvironment, Collection clas // Ensure feature flags from opensearch.yml are valid during plugin initialization. FeatureFlags.initializeFeatureFlags(tmpSettings); + PluginInfo lucenePluginInfo = new PluginInfo( + "LuceneDataformatPlugin", + "Lucene composite dataformat plugin", + "1.0", + Version.CURRENT, + "1.8", + LuceneDataSourcePlugin.class.getName(), + null, + Collections.emptyList(), + false + ); + + classpathPlugins = List.of(lucenePluginInfo); + this.pluginsService = new PluginsService( tmpSettings, initialEnvironment.configDir(), diff --git a/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java b/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java index 1b71fcd8da8a5..65b25ec8ba039 100644 --- a/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java +++ b/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java @@ -12,7 +12,10 @@ import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.blobstore.BlobStore; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.FieldAssignments; +import org.opensearch.index.engine.exec.FieldSupportRegistry; import org.opensearch.index.engine.exec.IndexingExecutionEngine; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.ShardPath; @@ -20,6 +23,7 @@ import org.opensearch.index.store.FormatStoreDirectory; import java.io.IOException; +import java.util.List; import java.util.Map; import java.util.Optional; @@ -28,7 +32,7 @@ default Optional IndexingExecutionEngine indexingEngine(MapperService mapperService, ShardPath shardPath, IndexSettings indexSettings); + IndexingExecutionEngine indexingEngine(EngineConfig engineConfig, MapperService mapperService, boolean isPrimary, ShardPath shardPath, IndexSettings indexSettings, FieldAssignments fieldAssignments); FormatStoreDirectory createFormatStoreDirectory( IndexSettings indexSettings, @@ -38,4 +42,19 @@ FormatStoreDirectory createFormatStoreDirectory( BlobContainer createBlobContainer(BlobStore blobStore, BlobPath blobPath) throws IOException; DataFormat getDataFormat(); + + // This is used to resolve the conflicts in case of multi-datasource plugins + // In case we have single plugin, it should not consider this value and go with considering the only DataSource as primary + default boolean isPrimary() { + return false; + } + + /** + * Registers the field type capabilities this plugin's data format supports. + * Plugins override this to declare which field types their format can handle + * and with what capabilities (STORE, INDEX, DOC_VALUES). + */ + default void registerFieldSupport(FieldSupportRegistry registry) { + // Default no-op; plugins override to register their field type capabilities + } }