Skip to content

Commit

Permalink
scaled float support for star tree
Browse files Browse the repository at this point in the history
Signed-off-by: Sarthak Aggarwal <[email protected]>
  • Loading branch information
sarthakaggarwal97 committed Sep 3, 2024
1 parent 33be5a9 commit bf5c070
Show file tree
Hide file tree
Showing 40 changed files with 517 additions and 357 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.opensearch.common.xcontent.support.XContentMapValues;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.core.xcontent.XContentParser.Token;
import org.opensearch.index.compositeindex.datacube.DimensionType;
import org.opensearch.index.fielddata.FieldData;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.IndexNumericFieldData;
Expand All @@ -71,10 +72,12 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;

/** A {@link FieldMapper} for scaled floats. Values are internally multiplied
* by a scaling factor and rounded to the closest long. */
* by a scaling factor and rounded to the closest long.
*/
public class ScaledFloatFieldMapper extends ParametrizedFieldMapper {

public static final String CONTENT_TYPE = "scaled_float";
Expand Down Expand Up @@ -162,11 +165,21 @@ public ScaledFloatFieldMapper build(BuilderContext context) {
);
return new ScaledFloatFieldMapper(name, type, multiFieldsBuilder.build(this, context), copyTo.build(), this);
}

@Override
public Optional<DimensionType> getSupportedDataCubeDimensionType() {
return Optional.of(DimensionType.NUMERIC);
}

@Override
public boolean isDataCubeMetricSupported() {
return true;
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getSettings()));

public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder {
public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder, FieldValueConverter {

private final double scalingFactor;
private final Double nullValue;
Expand Down Expand Up @@ -340,6 +353,12 @@ public DocValueFormat docValueFormat(String format, ZoneId timeZone) {
private double scale(Object input) {
return new BigDecimal(Double.toString(parse(input))).multiply(BigDecimal.valueOf(scalingFactor)).doubleValue();
}

@Override
public double toDoubleValue(Long value) {
double inverseScalingFactor = 1d / scalingFactor;
return value * inverseScalingFactor;
}
}

private final Explicit<Boolean> ignoreMalformed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,27 @@

import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.core.common.unit.ByteSizeUnit;
import org.opensearch.core.common.unit.ByteSizeValue;
import org.opensearch.core.xcontent.MediaTypeRegistry;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.plugins.Plugin;
import org.junit.AfterClass;
import org.junit.BeforeClass;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;

import static java.util.Collections.singletonList;
import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX;
import static org.hamcrest.Matchers.containsString;

public class ScaledFloatFieldMapperTests extends MapperTestCase {
Expand Down Expand Up @@ -91,24 +100,112 @@ public void testExistsQueryDocValuesDisabled() throws IOException {
assertParseMinimalWarnings();
}

public void testDefaults() throws Exception {
XContentBuilder mapping = fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 10.0));
@BeforeClass
public static void createMapper() {
FeatureFlags.initializeFeatureFlags(Settings.builder().put(STAR_TREE_INDEX, "true").build());
}

@AfterClass
public static void clearMapper() {
FeatureFlags.initializeFeatureFlags(Settings.EMPTY);
}

public void testScaledFloatWithStarTree() throws Exception {

double scalingFactorField1 = randomDouble() * 100;
double scalingFactorField2 = randomDouble() * 100;
double scalingFactorField3 = randomDouble() * 100;

XContentBuilder mapping = getStarTreeMappingWithScaledFloat(scalingFactorField1, scalingFactorField2, scalingFactorField3);
DocumentMapper mapper = createDocumentMapper(mapping);
assertEquals(mapping.toString(), mapper.mappingSource().toString());
assertTrue(mapping.toString().contains("startree"));

ParsedDocument doc = mapper.parse(source(b -> b.field("field", 123)));
IndexableField[] fields = doc.rootDoc().getFields("field");
long randomLongField1 = randomLong();
long randomLongField2 = randomLong();
long randomLongField3 = randomLong();
ParsedDocument doc = mapper.parse(
source(b -> b.field("field1", randomLongField1).field("field2", randomLongField2).field("field3", randomLongField3))
);
validateScaledFloatFields(doc, "field1", randomLongField1, scalingFactorField1);
validateScaledFloatFields(doc, "field2", randomLongField2, scalingFactorField2);
validateScaledFloatFields(doc, "field3", randomLongField3, scalingFactorField3);
}

@Override
protected Settings getIndexSettings() {
return Settings.builder()
.put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true)
.put(super.getIndexSettings())
.build();
}

private static void validateScaledFloatFields(ParsedDocument doc, String field, long value, double scalingFactor) {
IndexableField[] fields = doc.rootDoc().getFields(field);
assertEquals(2, fields.length);
IndexableField pointField = fields[0];
assertEquals(1, pointField.fieldType().pointDimensionCount());
assertFalse(pointField.fieldType().stored());
assertEquals(1230, pointField.numericValue().longValue());
assertEquals((long) (value * scalingFactor), pointField.numericValue().longValue());
IndexableField dvField = fields[1];
assertEquals(DocValuesType.SORTED_NUMERIC, dvField.fieldType().docValuesType());
assertEquals(1230, dvField.numericValue().longValue());
assertEquals((long) (value * scalingFactor), dvField.numericValue().longValue());
assertFalse(dvField.fieldType().stored());
}

private XContentBuilder getStarTreeMappingWithScaledFloat(
double scalingFactorField1,
double scalingFactorField2,
double scalingFactorField3
) throws IOException {
return topMapping(b -> {
b.startObject("composite");
b.startObject("startree");
b.field("type", "star_tree");
b.startObject("config");
b.field("max_leaf_docs", 100);
b.startArray("ordered_dimensions");
b.startObject();
b.field("name", "field1");
b.endObject();
b.startObject();
b.field("name", "field2");
b.endObject();
b.endArray();
b.startArray("metrics");
b.startObject();
b.field("name", "field3");
b.startArray("stats");
b.value("sum");
b.value("value_count");
b.endArray();
b.endObject();
b.endArray();
b.endObject();
b.endObject();
b.endObject();
b.startObject("properties");
b.startObject("field1");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField1);
b.endObject();
b.startObject("field2");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField2);
b.endObject();
b.startObject("field3");
b.field("type", "scaled_float").field("scaling_factor", scalingFactorField3);
b.endObject();
b.endObject();
});
}

public void testDefaults() throws Exception {
XContentBuilder mapping = fieldMapping(b -> b.field("type", "scaled_float").field("scaling_factor", 10.0));
DocumentMapper mapper = createDocumentMapper(mapping);
assertEquals(mapping.toString(), mapper.mappingSource().toString());

ParsedDocument doc = mapper.parse(source(b -> b.field("field", 123)));
validateScaledFloatFields(doc, "field", 123, 10.0);
}

public void testMissingScalingFactor() {
Exception e = expectThrows(
MapperParsingException.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.xcontent.support.XContentMapValues;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.index.mapper.Mapper;
import org.opensearch.index.mapper.NumberFieldMapper;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -55,11 +53,13 @@ public static Dimension parseAndCreateDimension(
Map<String, Object> dimensionMap,
Mapper.TypeParser.ParserContext c
) {
if (builder instanceof DateFieldMapper.Builder) {
if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.DATE)) {
return parseAndCreateDateDimension(name, dimensionMap, c);
} else if (builder instanceof NumberFieldMapper.Builder) {
return new NumericDimension(name);
}
} else if (builder.getSupportedDataCubeDimensionType().isPresent()
&& builder.getSupportedDataCubeDimensionType().get().equals(DimensionType.NUMERIC)) {
return new NumericDimension(name);
}
throw new IllegalArgumentException(
String.format(Locale.ROOT, "unsupported field type associated with star tree dimension [%s]", name)
);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.compositeindex.datacube;

/**
* Represents the types of dimensions supported in a data cube.
* <p>
* This enum defines the possible types of dimensions that can be used
* in a data cube structure within the composite index.
*
* @opensearch.experimental
*/
public enum DimensionType {
/**
* Represents a numeric dimension type.
* This is used for dimensions that contain numerical values.
*/
NUMERIC,

/**
* Represents a date dimension type.
* This is used for dimensions that contain date or timestamp values.
*/
DATE
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,23 @@
*/
package org.opensearch.index.compositeindex.datacube.startree.aggregators;

import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType;
import org.opensearch.index.mapper.FieldValueConverter;
import org.opensearch.index.mapper.NumberFieldMapper;

/**
* Count value aggregator for star tree
*
* @opensearch.experimental
*/
class CountValueAggregator implements ValueAggregator<Long> {
public class CountValueAggregator implements ValueAggregator<Long> {

public static final long DEFAULT_INITIAL_VALUE = 1L;
private static final StarTreeNumericType VALUE_AGGREGATOR_TYPE = StarTreeNumericType.LONG;
private static final FieldValueConverter VALUE_AGGREGATOR_TYPE = NumberFieldMapper.NumberType.LONG;

public CountValueAggregator() {}

@Override
public StarTreeNumericType getAggregatedValueType() {
public FieldValueConverter getAggregatedValueType() {
return VALUE_AGGREGATOR_TYPE;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

package org.opensearch.index.compositeindex.datacube.startree.aggregators;

import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType;
import org.opensearch.index.mapper.FieldValueConverter;
import org.opensearch.index.mapper.NumberFieldMapper;

/**
* Aggregator to handle '_doc_count' field
Expand All @@ -17,12 +18,12 @@
*/
public class DocCountAggregator implements ValueAggregator<Long> {

private static final StarTreeNumericType VALUE_AGGREGATOR_TYPE = StarTreeNumericType.LONG;
private static final FieldValueConverter VALUE_AGGREGATOR_TYPE = NumberFieldMapper.NumberType.LONG;

public DocCountAggregator() {}

@Override
public StarTreeNumericType getAggregatedValueType() {
public FieldValueConverter getAggregatedValueType() {
return VALUE_AGGREGATOR_TYPE;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
*/
package org.opensearch.index.compositeindex.datacube.startree.aggregators;

import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType;
import org.opensearch.index.mapper.FieldValueConverter;

/**
* Max value aggregator for star tree
Expand All @@ -16,8 +16,8 @@
*/
class MaxValueAggregator extends StatelessDoubleValueAggregator {

public MaxValueAggregator(StarTreeNumericType starTreeNumericType) {
super(starTreeNumericType, null);
public MaxValueAggregator(FieldValueConverter fieldValueConverter) {
super(fieldValueConverter, null);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
package org.opensearch.index.compositeindex.datacube.startree.aggregators;

import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType;
import org.opensearch.index.fielddata.IndexNumericFieldData;
import org.opensearch.index.mapper.FieldValueConverter;

import java.util.Comparator;
import java.util.Objects;
Expand All @@ -27,15 +26,15 @@ public class MetricAggregatorInfo implements Comparable<MetricAggregatorInfo> {
private final MetricStat metricStat;
private final String field;
private final ValueAggregator valueAggregators;
private final StarTreeNumericType starTreeNumericType;
private final FieldValueConverter fieldValueConverter;

/**
* Constructor for MetricAggregatorInfo
*/
public MetricAggregatorInfo(MetricStat metricStat, String field, String starFieldName, IndexNumericFieldData.NumericType numericType) {
public MetricAggregatorInfo(MetricStat metricStat, String field, String starFieldName, FieldValueConverter fieldValueConverter) {
this.metricStat = metricStat;
this.starTreeNumericType = StarTreeNumericType.fromNumericType(numericType);
this.valueAggregators = ValueAggregatorFactory.getValueAggregator(metricStat, this.starTreeNumericType);
this.fieldValueConverter = fieldValueConverter;
this.valueAggregators = ValueAggregatorFactory.getValueAggregator(metricStat, this.fieldValueConverter);
this.field = field;
this.starFieldName = starFieldName;
this.metric = toFieldName();
Expand Down Expand Up @@ -72,8 +71,8 @@ public ValueAggregator getValueAggregators() {
/**
* @return star tree aggregated value type
*/
public StarTreeNumericType getAggregatedValueType() {
return starTreeNumericType;
public FieldValueConverter getNumericFieldConverter() {
return fieldValueConverter;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
*/
package org.opensearch.index.compositeindex.datacube.startree.aggregators;

import org.opensearch.index.compositeindex.datacube.startree.aggregators.numerictype.StarTreeNumericType;
import org.opensearch.index.mapper.FieldValueConverter;

/**
* Min value aggregator for star tree
Expand All @@ -16,8 +16,8 @@
*/
class MinValueAggregator extends StatelessDoubleValueAggregator {

public MinValueAggregator(StarTreeNumericType starTreeNumericType) {
super(starTreeNumericType, null);
public MinValueAggregator(FieldValueConverter fieldValueConverter) {
super(fieldValueConverter, null);
}

@Override
Expand Down
Loading

0 comments on commit bf5c070

Please sign in to comment.