diff --git a/CHANGELOG.md b/CHANGELOG.md index 240c016524752..0192e5107a2a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -141,6 +141,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add self-organizing hash table to improve the performance of bucket aggregations ([#7652](https://github.com/opensearch-project/OpenSearch/pull/7652)) - Check UTF16 string size before converting to String to avoid OOME ([#7963](https://github.com/opensearch-project/OpenSearch/pull/7963)) - Move ZSTD compression codecs out of the sandbox ([#7908](https://github.com/opensearch-project/OpenSearch/pull/7908)) +- Enabling compression levels for zstd and zstd_no_dict ([#8312](https://github.com/opensearch-project/OpenSearch/pull/8312)) ### Deprecated diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index 08d8199afface..739982036c2af 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -188,6 +188,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { FsDirectoryFactory.INDEX_LOCK_FACTOR_SETTING, Store.FORCE_RAM_TERM_DICT, EngineConfig.INDEX_CODEC_SETTING, + EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING, EngineConfig.INDEX_OPTIMIZE_AUTO_GENERATED_IDS, IndexMetadata.SETTING_WAIT_FOR_ACTIVE_SHARDS, IndexSettings.DEFAULT_PIPELINE, diff --git a/server/src/main/java/org/opensearch/index/codec/CodecService.java b/server/src/main/java/org/opensearch/index/codec/CodecService.java index b6dac7bd1596c..439cad063fb11 100644 --- a/server/src/main/java/org/opensearch/index/codec/CodecService.java +++ b/server/src/main/java/org/opensearch/index/codec/CodecService.java @@ -38,6 +38,7 @@ import org.apache.lucene.codecs.lucene95.Lucene95Codec.Mode; import org.opensearch.common.Nullable; import org.opensearch.common.collect.MapBuilder; +import org.opensearch.index.codec.customcodecs.Lucene95CustomCodec; import org.opensearch.index.codec.customcodecs.ZstdCodec; import org.opensearch.index.codec.customcodecs.ZstdNoDictCodec; import org.opensearch.index.mapper.MapperService; @@ -58,7 +59,9 @@ public class CodecService { public static final String DEFAULT_CODEC = "default"; public static final String BEST_COMPRESSION_CODEC = "best_compression"; - /** the raw unfiltered lucene default. useful for testing */ + /** + * the raw unfiltered lucene default. useful for testing + */ public static final String LUCENE_DEFAULT_CODEC = "lucene_default"; public static final String ZSTD_CODEC = "zstd"; public static final String ZSTD_NO_DICT_CODEC = "zstd_no_dict"; @@ -91,6 +94,15 @@ public Codec codec(String name) { return codec; } + public Codec codec(String name, int compressionLevel) { + Lucene95CustomCodec codec = (Lucene95CustomCodec) codecs.get(name); + if (codec == null) { + throw new IllegalArgumentException("failed to find codec [" + name + "]"); + } + codec.updateCompressionLevel(compressionLevel); + return codec; + } + /** * Returns all registered available codec names */ diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java index 3c570f9d0566c..ffbe34d68dcc8 100644 --- a/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java @@ -15,6 +15,8 @@ import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec; import org.opensearch.index.mapper.MapperService; +import java.util.Objects; + /** * * Extends {@link FilterCodec} to reuse the functionality of Lucene Codec. @@ -31,7 +33,9 @@ public enum Mode { ZSTD_NO_DICT } - private final StoredFieldsFormat storedFieldsFormat; + private final Mode mode; + + private StoredFieldsFormat storedFieldsFormat; /** * Creates a new compression codec with the default compression level. @@ -52,11 +56,13 @@ public Lucene95CustomCodec(Mode mode) { */ public Lucene95CustomCodec(Mode mode, int compressionLevel) { super("Lucene95CustomCodec", new Lucene95Codec()); + this.mode = Objects.requireNonNull(mode); this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel); } public Lucene95CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) { super("Lucene95CustomCodec", new PerFieldMappingPostingFormatCodec(Lucene95Codec.Mode.BEST_SPEED, mapperService, logger)); + this.mode = Objects.requireNonNull(mode); this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel); } @@ -69,4 +75,8 @@ public StoredFieldsFormat storedFieldsFormat() { public String toString() { return getClass().getSimpleName(); } + + public void updateCompressionLevel(int compressionLevel) { + this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel); + } } diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java index f70306afc8562..2816e2907a5f6 100644 --- a/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormat.java @@ -35,6 +35,7 @@ public class Lucene95CustomStoredFieldsFormat extends StoredFieldsFormat { private final CompressionMode zstdNoDictCompressionMode; private final Lucene95CustomCodec.Mode mode; + private final int compressionLevel; /** default constructor */ public Lucene95CustomStoredFieldsFormat() { @@ -58,6 +59,7 @@ public Lucene95CustomStoredFieldsFormat(Lucene95CustomCodec.Mode mode) { */ public Lucene95CustomStoredFieldsFormat(Lucene95CustomCodec.Mode mode, int compressionLevel) { this.mode = Objects.requireNonNull(mode); + this.compressionLevel = compressionLevel; zstdCompressionMode = new ZstdCompressionMode(compressionLevel); zstdNoDictCompressionMode = new ZstdNoDictCompressionMode(compressionLevel); } @@ -122,4 +124,8 @@ StoredFieldsFormat impl(Lucene95CustomCodec.Mode mode) { Lucene95CustomCodec.Mode getMode() { return mode; } + + public int getCompressionLevel() { + return compressionLevel; + } } diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index 7419cf1dadea6..786dfb8c68ec9 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -82,6 +82,7 @@ public final class EngineConfig { private volatile boolean enableGcDeletes = true; private final TimeValue flushMergesAfter; private final String codecName; + private final int compressionLevel; private final ThreadPool threadPool; private final Engine.Warmer warmer; private final Store store; @@ -142,6 +143,13 @@ public Supplier retentionLeasesSupplier() { return s; } }, Property.IndexScope, Property.NodeScope); + public static final Setting INDEX_CODEC_COMPRESSION_LEVEL_SETTING = Setting.intSetting( + "index.codec.compression_level", + 6, + 1, + 6, + Property.IndexScope + ); /** * Configures an index to optimize documents with auto generated ids for append only. If this setting is updated from false @@ -179,6 +187,7 @@ private EngineConfig(Builder builder) { this.codecService = builder.codecService; this.eventListener = builder.eventListener; codecName = builder.indexSettings.getValue(INDEX_CODEC_SETTING); + compressionLevel = builder.indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING); // We need to make the indexing buffer for this shard at least as large // as the amount of memory that is available for all engines on the // local node so that decisions to flush segments to disk are made by @@ -250,6 +259,9 @@ public boolean isEnableGcDeletes() { *

*/ public Codec getCodec() { + if (codecName.equals(CodecService.ZSTD_CODEC) || codecName.equals(CodecService.ZSTD_NO_DICT_CODEC)) { + return codecService.codec(codecName, compressionLevel); + } return codecService.codec(codecName); } diff --git a/server/src/test/java/org/opensearch/index/codec/CodecTests.java b/server/src/test/java/org/opensearch/index/codec/CodecTests.java index 016e785f8da13..96d61e43045dc 100644 --- a/server/src/test/java/org/opensearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/opensearch/index/codec/CodecTests.java @@ -90,6 +90,20 @@ public void testZstdNoDict() throws Exception { assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); } + public void testZstdWithCompressionLevel() throws Exception { + Codec codec = createCodecService(false).codec("zstd", 1); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(1, storedFieldsFormat.getCompressionLevel()); + } + + public void testZstdNoDictWithCompressionLevel() throws Exception { + Codec codec = createCodecService(false).codec("zstd_no_dict", 1); + assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(1, storedFieldsFormat.getCompressionLevel()); + } + public void testDefaultMapperServiceNull() throws Exception { Codec codec = createCodecService(true).codec("default"); assertStoredFieldsCompressionEquals(Lucene95Codec.Mode.BEST_SPEED, codec); diff --git a/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java b/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java index 4f23450ce0b39..755d271fe5934 100644 --- a/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java +++ b/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java @@ -24,4 +24,22 @@ public void testZstdNoDictLucene95CustomCodecMode() { assertEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, lucene95CustomStoredFieldsFormat.getMode()); } + public void testZstdModeWithCompressionLevel() { + Lucene95CustomStoredFieldsFormat lucene95CustomStoredFieldsFormat = new Lucene95CustomStoredFieldsFormat( + Lucene95CustomCodec.Mode.ZSTD, + 1 + ); + assertEquals(Lucene95CustomCodec.Mode.ZSTD, lucene95CustomStoredFieldsFormat.getMode()); + assertEquals(1, lucene95CustomStoredFieldsFormat.getCompressionLevel()); + } + + public void testZstdNoDictLucene95CustomCodecModeWithCompressionLevel() { + Lucene95CustomStoredFieldsFormat lucene95CustomStoredFieldsFormat = new Lucene95CustomStoredFieldsFormat( + Lucene95CustomCodec.Mode.ZSTD_NO_DICT, + 1 + ); + assertEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, lucene95CustomStoredFieldsFormat.getMode()); + assertEquals(1, lucene95CustomStoredFieldsFormat.getCompressionLevel()); + } + }