From 3f76e3c59b6b0560849e15bbb722f252a0d7d8fa Mon Sep 17 00:00:00 2001 From: Sarthak Aggarwal Date: Fri, 30 Jun 2023 11:28:35 +0530 Subject: [PATCH] addressing review comments Signed-off-by: Sarthak Aggarwal --- .../opensearch/index/codec/CodecService.java | 17 +++------ .../customcodecs/Lucene95CustomCodec.java | 12 +----- .../index/codec/customcodecs/ZstdCodec.java | 4 +- .../codec/customcodecs/ZstdNoDictCodec.java | 4 +- .../opensearch/index/engine/EngineConfig.java | 11 +++--- .../opensearch/index/codec/CodecTests.java | 37 +++++++++++++++++-- ...Lucene95CustomStoredFieldsFormatTests.java | 15 ++++++-- 7 files changed, 60 insertions(+), 40 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/codec/CodecService.java b/server/src/main/java/org/opensearch/index/codec/CodecService.java index 439cad063fb11..07203588eedd0 100644 --- a/server/src/main/java/org/opensearch/index/codec/CodecService.java +++ b/server/src/main/java/org/opensearch/index/codec/CodecService.java @@ -38,13 +38,14 @@ import org.apache.lucene.codecs.lucene95.Lucene95Codec.Mode; import org.opensearch.common.Nullable; import org.opensearch.common.collect.MapBuilder; -import org.opensearch.index.codec.customcodecs.Lucene95CustomCodec; import org.opensearch.index.codec.customcodecs.ZstdCodec; import org.opensearch.index.codec.customcodecs.ZstdNoDictCodec; import org.opensearch.index.mapper.MapperService; import java.util.Map; +import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING; + /** * Since Lucene 4.0 low level index segments are read and written through a * codec layer that allows to use use-case specific file formats & @@ -74,10 +75,11 @@ public CodecService(@Nullable MapperService mapperService, Logger logger) { codecs.put(ZSTD_CODEC, new ZstdCodec()); codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec()); } else { + int compressionLevel = mapperService.getIndexSettings().getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING); codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger)); codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger)); - codecs.put(ZSTD_CODEC, new ZstdCodec(mapperService, logger)); - codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(mapperService, logger)); + codecs.put(ZSTD_CODEC, new ZstdCodec(mapperService, logger, compressionLevel)); + codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(mapperService, logger, compressionLevel)); } codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); for (String codec : Codec.availableCodecs()) { @@ -94,15 +96,6 @@ public Codec codec(String name) { return codec; } - public Codec codec(String name, int compressionLevel) { - Lucene95CustomCodec codec = (Lucene95CustomCodec) codecs.get(name); - if (codec == null) { - throw new IllegalArgumentException("failed to find codec [" + name + "]"); - } - codec.updateCompressionLevel(compressionLevel); - return codec; - } - /** * Returns all registered available codec names */ diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java index ffbe34d68dcc8..3c570f9d0566c 100644 --- a/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/Lucene95CustomCodec.java @@ -15,8 +15,6 @@ import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec; import org.opensearch.index.mapper.MapperService; -import java.util.Objects; - /** * * Extends {@link FilterCodec} to reuse the functionality of Lucene Codec. @@ -33,9 +31,7 @@ public enum Mode { ZSTD_NO_DICT } - private final Mode mode; - - private StoredFieldsFormat storedFieldsFormat; + private final StoredFieldsFormat storedFieldsFormat; /** * Creates a new compression codec with the default compression level. @@ -56,13 +52,11 @@ public Lucene95CustomCodec(Mode mode) { */ public Lucene95CustomCodec(Mode mode, int compressionLevel) { super("Lucene95CustomCodec", new Lucene95Codec()); - this.mode = Objects.requireNonNull(mode); this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel); } public Lucene95CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) { super("Lucene95CustomCodec", new PerFieldMappingPostingFormatCodec(Lucene95Codec.Mode.BEST_SPEED, mapperService, logger)); - this.mode = Objects.requireNonNull(mode); this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel); } @@ -75,8 +69,4 @@ public StoredFieldsFormat storedFieldsFormat() { public String toString() { return getClass().getSimpleName(); } - - public void updateCompressionLevel(int compressionLevel) { - this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel); - } } diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java index 68da782421e6e..04c110fceacdf 100644 --- a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdCodec.java @@ -32,8 +32,8 @@ public ZstdCodec(int compressionLevel) { super(Mode.ZSTD, compressionLevel); } - public ZstdCodec(MapperService mapperService, Logger logger) { - super(Mode.ZSTD, DEFAULT_COMPRESSION_LEVEL, mapperService, logger); + public ZstdCodec(MapperService mapperService, Logger logger, int compressionLevel) { + super(Mode.ZSTD, compressionLevel, mapperService, logger); } /** The name for this codec. */ diff --git a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java index 26620473ec116..134f9a14422ad 100644 --- a/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java +++ b/server/src/main/java/org/opensearch/index/codec/customcodecs/ZstdNoDictCodec.java @@ -32,8 +32,8 @@ public ZstdNoDictCodec(int compressionLevel) { super(Mode.ZSTD_NO_DICT, compressionLevel); } - public ZstdNoDictCodec(MapperService mapperService, Logger logger) { - super(Mode.ZSTD_NO_DICT, DEFAULT_COMPRESSION_LEVEL, mapperService, logger); + public ZstdNoDictCodec(MapperService mapperService, Logger logger, int compressionLevel) { + super(Mode.ZSTD_NO_DICT, compressionLevel, mapperService, logger); } /** The name for this codec. */ diff --git a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java index 786dfb8c68ec9..a1ce263f26b1f 100644 --- a/server/src/main/java/org/opensearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/opensearch/index/engine/EngineConfig.java @@ -82,7 +82,6 @@ public final class EngineConfig { private volatile boolean enableGcDeletes = true; private final TimeValue flushMergesAfter; private final String codecName; - private final int compressionLevel; private final ThreadPool threadPool; private final Engine.Warmer warmer; private final Store store; @@ -143,6 +142,12 @@ public Supplier retentionLeasesSupplier() { return s; } }, Property.IndexScope, Property.NodeScope); + + /** + * Index setting to change the compression level of zstd and zstd_no_dict lucene codecs. + * Compression Level gives a trade-off between compression ratio and speed. The higher compression level results in higher compression ratio but slower compression and decompression speeds. + * This setting is not realtime updateable. + */ public static final Setting INDEX_CODEC_COMPRESSION_LEVEL_SETTING = Setting.intSetting( "index.codec.compression_level", 6, @@ -187,7 +192,6 @@ private EngineConfig(Builder builder) { this.codecService = builder.codecService; this.eventListener = builder.eventListener; codecName = builder.indexSettings.getValue(INDEX_CODEC_SETTING); - compressionLevel = builder.indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING); // We need to make the indexing buffer for this shard at least as large // as the amount of memory that is available for all engines on the // local node so that decisions to flush segments to disk are made by @@ -259,9 +263,6 @@ public boolean isEnableGcDeletes() { *

*/ public Codec getCodec() { - if (codecName.equals(CodecService.ZSTD_CODEC) || codecName.equals(CodecService.ZSTD_NO_DICT_CODEC)) { - return codecService.codec(codecName, compressionLevel); - } return codecService.codec(codecName); } diff --git a/server/src/test/java/org/opensearch/index/codec/CodecTests.java b/server/src/test/java/org/opensearch/index/codec/CodecTests.java index 96d61e43045dc..180a30b5c3ff0 100644 --- a/server/src/test/java/org/opensearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/opensearch/index/codec/CodecTests.java @@ -43,6 +43,7 @@ import org.apache.lucene.index.SegmentReader; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.util.LuceneTestCase.SuppressCodecs; +import org.opensearch.common.Randomness; import org.opensearch.common.settings.Settings; import org.opensearch.env.Environment; import org.opensearch.index.IndexSettings; @@ -83,25 +84,31 @@ public void testBestCompression() throws Exception { public void testZstd() throws Exception { Codec codec = createCodecService(false).codec("zstd"); assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); } public void testZstdNoDict() throws Exception { Codec codec = createCodecService(false).codec("zstd_no_dict"); assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); } public void testZstdWithCompressionLevel() throws Exception { - Codec codec = createCodecService(false).codec("zstd", 1); + int randomCompressionLevel = generateRandomNumber(6, 1); + Codec codec = createCodecService(randomCompressionLevel).codec("zstd"); assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); - assertEquals(1, storedFieldsFormat.getCompressionLevel()); + assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel()); } public void testZstdNoDictWithCompressionLevel() throws Exception { - Codec codec = createCodecService(false).codec("zstd_no_dict", 1); + int randomCompressionLevel = generateRandomNumber(6, 1); + Codec codec = createCodecService(randomCompressionLevel).codec("zstd_no_dict"); assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); - assertEquals(1, storedFieldsFormat.getCompressionLevel()); + assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel()); } public void testDefaultMapperServiceNull() throws Exception { @@ -117,11 +124,15 @@ public void testBestCompressionMapperServiceNull() throws Exception { public void testZstdMapperServiceNull() throws Exception { Codec codec = createCodecService(true).codec("zstd"); assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); } public void testZstdNoDictMapperServiceNull() throws Exception { Codec codec = createCodecService(true).codec("zstd_no_dict"); assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec); + Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat(); + assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel()); } public void testExceptionCodecNull() { @@ -149,6 +160,19 @@ private CodecService createCodecService(boolean isMapperServiceNull) throws IOEx return new CodecService(null, LogManager.getLogger("test")); } Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build(); + return buildCodecService(nodeSettings); + } + + private CodecService createCodecService(int randomCompressionLevel) throws IOException { + Settings nodeSettings = Settings.builder() + .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()) + .put("index.codec.compression_level", randomCompressionLevel) + .build(); + return buildCodecService(nodeSettings); + } + + private CodecService buildCodecService(Settings nodeSettings) throws IOException { + IndexSettings settings = IndexSettingsModule.newIndexSettings("_na", nodeSettings); SimilarityService similarityService = new SimilarityService(settings, null, Collections.emptyMap()); IndexAnalyzers indexAnalyzers = createTestAnalysis(settings, nodeSettings).indexAnalyzers; @@ -180,4 +204,9 @@ private SegmentReader getSegmentReader(Codec codec) throws IOException { dir.close(); return sr; } + + private int generateRandomNumber(int max, int min) { + return Randomness.get().nextInt(max - min + 1) + min; + } + } diff --git a/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java b/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java index 755d271fe5934..a0ef70f0be6b0 100644 --- a/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java +++ b/server/src/test/java/org/opensearch/index/codec/customcodecs/Lucene95CustomStoredFieldsFormatTests.java @@ -8,6 +8,7 @@ package org.opensearch.index.codec.customcodecs; +import org.opensearch.common.Randomness; import org.opensearch.test.OpenSearchTestCase; public class Lucene95CustomStoredFieldsFormatTests extends OpenSearchTestCase { @@ -25,21 +26,27 @@ public void testZstdNoDictLucene95CustomCodecMode() { } public void testZstdModeWithCompressionLevel() { + int randomCompressionLevel = generateRandomNumber(6, 1); Lucene95CustomStoredFieldsFormat lucene95CustomStoredFieldsFormat = new Lucene95CustomStoredFieldsFormat( Lucene95CustomCodec.Mode.ZSTD, - 1 + randomCompressionLevel ); assertEquals(Lucene95CustomCodec.Mode.ZSTD, lucene95CustomStoredFieldsFormat.getMode()); - assertEquals(1, lucene95CustomStoredFieldsFormat.getCompressionLevel()); + assertEquals(randomCompressionLevel, lucene95CustomStoredFieldsFormat.getCompressionLevel()); } public void testZstdNoDictLucene95CustomCodecModeWithCompressionLevel() { + int randomCompressionLevel = generateRandomNumber(6, 1); Lucene95CustomStoredFieldsFormat lucene95CustomStoredFieldsFormat = new Lucene95CustomStoredFieldsFormat( Lucene95CustomCodec.Mode.ZSTD_NO_DICT, - 1 + randomCompressionLevel ); assertEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, lucene95CustomStoredFieldsFormat.getMode()); - assertEquals(1, lucene95CustomStoredFieldsFormat.getCompressionLevel()); + assertEquals(randomCompressionLevel, lucene95CustomStoredFieldsFormat.getCompressionLevel()); + } + + private int generateRandomNumber(int max, int min) { + return Randomness.get().nextInt(max - min + 1) + min; } }