Skip to content

Commit

Permalink
addressing review comments
Browse files Browse the repository at this point in the history
Signed-off-by: Sarthak Aggarwal <[email protected]>
  • Loading branch information
sarthakaggarwal97 committed Jun 30, 2023
1 parent cc53b72 commit 3f76e3c
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 40 deletions.
17 changes: 5 additions & 12 deletions server/src/main/java/org/opensearch/index/codec/CodecService.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,14 @@
import org.apache.lucene.codecs.lucene95.Lucene95Codec.Mode;
import org.opensearch.common.Nullable;
import org.opensearch.common.collect.MapBuilder;
import org.opensearch.index.codec.customcodecs.Lucene95CustomCodec;
import org.opensearch.index.codec.customcodecs.ZstdCodec;
import org.opensearch.index.codec.customcodecs.ZstdNoDictCodec;
import org.opensearch.index.mapper.MapperService;

import java.util.Map;

import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING;

/**
* Since Lucene 4.0 low level index segments are read and written through a
* codec layer that allows to use use-case specific file formats &amp;
Expand Down Expand Up @@ -74,10 +75,11 @@ public CodecService(@Nullable MapperService mapperService, Logger logger) {
codecs.put(ZSTD_CODEC, new ZstdCodec());
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec());
} else {
int compressionLevel = mapperService.getIndexSettings().getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING);
codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger));
codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger));
codecs.put(ZSTD_CODEC, new ZstdCodec(mapperService, logger));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(mapperService, logger));
codecs.put(ZSTD_CODEC, new ZstdCodec(mapperService, logger, compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDictCodec(mapperService, logger, compressionLevel));
}
codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
for (String codec : Codec.availableCodecs()) {
Expand All @@ -94,15 +96,6 @@ public Codec codec(String name) {
return codec;
}

public Codec codec(String name, int compressionLevel) {
Lucene95CustomCodec codec = (Lucene95CustomCodec) codecs.get(name);
if (codec == null) {
throw new IllegalArgumentException("failed to find codec [" + name + "]");
}
codec.updateCompressionLevel(compressionLevel);
return codec;
}

/**
* Returns all registered available codec names
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec;
import org.opensearch.index.mapper.MapperService;

import java.util.Objects;

/**
*
* Extends {@link FilterCodec} to reuse the functionality of Lucene Codec.
Expand All @@ -33,9 +31,7 @@ public enum Mode {
ZSTD_NO_DICT
}

private final Mode mode;

private StoredFieldsFormat storedFieldsFormat;
private final StoredFieldsFormat storedFieldsFormat;

/**
* Creates a new compression codec with the default compression level.
Expand All @@ -56,13 +52,11 @@ public Lucene95CustomCodec(Mode mode) {
*/
public Lucene95CustomCodec(Mode mode, int compressionLevel) {
super("Lucene95CustomCodec", new Lucene95Codec());
this.mode = Objects.requireNonNull(mode);
this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel);
}

public Lucene95CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
super("Lucene95CustomCodec", new PerFieldMappingPostingFormatCodec(Lucene95Codec.Mode.BEST_SPEED, mapperService, logger));
this.mode = Objects.requireNonNull(mode);
this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel);
}

Expand All @@ -75,8 +69,4 @@ public StoredFieldsFormat storedFieldsFormat() {
public String toString() {
return getClass().getSimpleName();
}

public void updateCompressionLevel(int compressionLevel) {
this.storedFieldsFormat = new Lucene95CustomStoredFieldsFormat(mode, compressionLevel);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ public ZstdCodec(int compressionLevel) {
super(Mode.ZSTD, compressionLevel);
}

public ZstdCodec(MapperService mapperService, Logger logger) {
super(Mode.ZSTD, DEFAULT_COMPRESSION_LEVEL, mapperService, logger);
public ZstdCodec(MapperService mapperService, Logger logger, int compressionLevel) {
super(Mode.ZSTD, compressionLevel, mapperService, logger);
}

/** The name for this codec. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ public ZstdNoDictCodec(int compressionLevel) {
super(Mode.ZSTD_NO_DICT, compressionLevel);
}

public ZstdNoDictCodec(MapperService mapperService, Logger logger) {
super(Mode.ZSTD_NO_DICT, DEFAULT_COMPRESSION_LEVEL, mapperService, logger);
public ZstdNoDictCodec(MapperService mapperService, Logger logger, int compressionLevel) {
super(Mode.ZSTD_NO_DICT, compressionLevel, mapperService, logger);
}

/** The name for this codec. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ public final class EngineConfig {
private volatile boolean enableGcDeletes = true;
private final TimeValue flushMergesAfter;
private final String codecName;
private final int compressionLevel;
private final ThreadPool threadPool;
private final Engine.Warmer warmer;
private final Store store;
Expand Down Expand Up @@ -143,6 +142,12 @@ public Supplier<RetentionLeases> retentionLeasesSupplier() {
return s;
}
}, Property.IndexScope, Property.NodeScope);

/**
* Index setting to change the compression level of zstd and zstd_no_dict lucene codecs.
* Compression Level gives a trade-off between compression ratio and speed. The higher compression level results in higher compression ratio but slower compression and decompression speeds.
* This setting is <b>not</b> realtime updateable.
*/
public static final Setting<Integer> INDEX_CODEC_COMPRESSION_LEVEL_SETTING = Setting.intSetting(
"index.codec.compression_level",
6,
Expand Down Expand Up @@ -187,7 +192,6 @@ private EngineConfig(Builder builder) {
this.codecService = builder.codecService;
this.eventListener = builder.eventListener;
codecName = builder.indexSettings.getValue(INDEX_CODEC_SETTING);
compressionLevel = builder.indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING);
// We need to make the indexing buffer for this shard at least as large
// as the amount of memory that is available for all engines on the
// local node so that decisions to flush segments to disk are made by
Expand Down Expand Up @@ -259,9 +263,6 @@ public boolean isEnableGcDeletes() {
* </p>
*/
public Codec getCodec() {
if (codecName.equals(CodecService.ZSTD_CODEC) || codecName.equals(CodecService.ZSTD_NO_DICT_CODEC)) {
return codecService.codec(codecName, compressionLevel);
}
return codecService.codec(codecName);
}

Expand Down
37 changes: 33 additions & 4 deletions server/src/test/java/org/opensearch/index/codec/CodecTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase.SuppressCodecs;
import org.opensearch.common.Randomness;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
Expand Down Expand Up @@ -83,25 +84,31 @@ public void testBestCompression() throws Exception {
public void testZstd() throws Exception {
Codec codec = createCodecService(false).codec("zstd");
assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec);
Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat();
assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel());
}

public void testZstdNoDict() throws Exception {
Codec codec = createCodecService(false).codec("zstd_no_dict");
assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec);
Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat();
assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel());
}

public void testZstdWithCompressionLevel() throws Exception {
Codec codec = createCodecService(false).codec("zstd", 1);
int randomCompressionLevel = generateRandomNumber(6, 1);
Codec codec = createCodecService(randomCompressionLevel).codec("zstd");
assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec);
Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat();
assertEquals(1, storedFieldsFormat.getCompressionLevel());
assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel());
}

public void testZstdNoDictWithCompressionLevel() throws Exception {
Codec codec = createCodecService(false).codec("zstd_no_dict", 1);
int randomCompressionLevel = generateRandomNumber(6, 1);
Codec codec = createCodecService(randomCompressionLevel).codec("zstd_no_dict");
assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec);
Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat();
assertEquals(1, storedFieldsFormat.getCompressionLevel());
assertEquals(randomCompressionLevel, storedFieldsFormat.getCompressionLevel());
}

public void testDefaultMapperServiceNull() throws Exception {
Expand All @@ -117,11 +124,15 @@ public void testBestCompressionMapperServiceNull() throws Exception {
public void testZstdMapperServiceNull() throws Exception {
Codec codec = createCodecService(true).codec("zstd");
assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD, codec);
Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat();
assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel());
}

public void testZstdNoDictMapperServiceNull() throws Exception {
Codec codec = createCodecService(true).codec("zstd_no_dict");
assertStoredFieldsCompressionEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, codec);
Lucene95CustomStoredFieldsFormat storedFieldsFormat = (Lucene95CustomStoredFieldsFormat) codec.storedFieldsFormat();
assertEquals(Lucene95CustomCodec.DEFAULT_COMPRESSION_LEVEL, storedFieldsFormat.getCompressionLevel());
}

public void testExceptionCodecNull() {
Expand Down Expand Up @@ -149,6 +160,19 @@ private CodecService createCodecService(boolean isMapperServiceNull) throws IOEx
return new CodecService(null, LogManager.getLogger("test"));
}
Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build();
return buildCodecService(nodeSettings);
}

private CodecService createCodecService(int randomCompressionLevel) throws IOException {
Settings nodeSettings = Settings.builder()
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir())
.put("index.codec.compression_level", randomCompressionLevel)
.build();
return buildCodecService(nodeSettings);
}

private CodecService buildCodecService(Settings nodeSettings) throws IOException {

IndexSettings settings = IndexSettingsModule.newIndexSettings("_na", nodeSettings);
SimilarityService similarityService = new SimilarityService(settings, null, Collections.emptyMap());
IndexAnalyzers indexAnalyzers = createTestAnalysis(settings, nodeSettings).indexAnalyzers;
Expand Down Expand Up @@ -180,4 +204,9 @@ private SegmentReader getSegmentReader(Codec codec) throws IOException {
dir.close();
return sr;
}

private int generateRandomNumber(int max, int min) {
return Randomness.get().nextInt(max - min + 1) + min;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.opensearch.index.codec.customcodecs;

import org.opensearch.common.Randomness;
import org.opensearch.test.OpenSearchTestCase;

public class Lucene95CustomStoredFieldsFormatTests extends OpenSearchTestCase {
Expand All @@ -25,21 +26,27 @@ public void testZstdNoDictLucene95CustomCodecMode() {
}

public void testZstdModeWithCompressionLevel() {
int randomCompressionLevel = generateRandomNumber(6, 1);
Lucene95CustomStoredFieldsFormat lucene95CustomStoredFieldsFormat = new Lucene95CustomStoredFieldsFormat(
Lucene95CustomCodec.Mode.ZSTD,
1
randomCompressionLevel
);
assertEquals(Lucene95CustomCodec.Mode.ZSTD, lucene95CustomStoredFieldsFormat.getMode());
assertEquals(1, lucene95CustomStoredFieldsFormat.getCompressionLevel());
assertEquals(randomCompressionLevel, lucene95CustomStoredFieldsFormat.getCompressionLevel());
}

public void testZstdNoDictLucene95CustomCodecModeWithCompressionLevel() {
int randomCompressionLevel = generateRandomNumber(6, 1);
Lucene95CustomStoredFieldsFormat lucene95CustomStoredFieldsFormat = new Lucene95CustomStoredFieldsFormat(
Lucene95CustomCodec.Mode.ZSTD_NO_DICT,
1
randomCompressionLevel
);
assertEquals(Lucene95CustomCodec.Mode.ZSTD_NO_DICT, lucene95CustomStoredFieldsFormat.getMode());
assertEquals(1, lucene95CustomStoredFieldsFormat.getCompressionLevel());
assertEquals(randomCompressionLevel, lucene95CustomStoredFieldsFormat.getCompressionLevel());
}

private int generateRandomNumber(int max, int min) {
return Randomness.get().nextInt(max - min + 1) + min;
}

}

0 comments on commit 3f76e3c

Please sign in to comment.