Skip to content

Commit

Permalink
fixes in merge, aggregators, added tests, addressed comments
Browse files Browse the repository at this point in the history
Signed-off-by: Sarthak Aggarwal <[email protected]>
  • Loading branch information
sarthakaggarwal97 committed Aug 13, 2024
1 parent 3b2ad9b commit cf53b70
Show file tree
Hide file tree
Showing 21 changed files with 509 additions and 273 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedNumericDocValues;
import org.opensearch.index.codec.composite.DocValuesProvider;

import java.io.IOException;

Expand All @@ -21,7 +22,7 @@
*
* @opensearch.experimental
*/
public class Lucene90DocValuesProducerWrapper {
public class Lucene90DocValuesProducerWrapper implements DocValuesProvider {

private final Lucene90DocValuesProducer lucene90DocValuesProducer;
private final SegmentReadState state;
Expand All @@ -37,12 +38,15 @@ public Lucene90DocValuesProducerWrapper(
this.state = state;
}

// returns the doc id set iterator based on field name
// returns the field doc id set iterator based on field name
@Override
public SortedNumericDocValues getSortedNumeric(String fieldName) throws IOException {
return this.lucene90DocValuesProducer.getSortedNumeric(state.fieldInfos.fieldInfo(fieldName));
}

public Lucene90DocValuesProducer getLucene90DocValuesProducer() {
@Override
public DocValuesProducer getDocValuesProducer() {
return lucene90DocValuesProducer;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite;

import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.SortedNumericDocValues;

import java.io.IOException;

/**
* An interface that provides access to document values for a specific field.
*
* @opensearch.experimental
*/
public interface DocValuesProvider {

/**
* Returns the sorted numeric document values for the specified field.
*
* @param fieldName The name of the field for which to retrieve the sorted numeric document values.
* @return The sorted numeric document values for the specified field.
* @throws IOException If an error occurs while retrieving the sorted numeric document values.
*/
SortedNumericDocValues getSortedNumeric(String fieldName) throws IOException;

/**
* Returns the DocValuesProducer instance.
*
* @return The DocValuesProducer instance.
*/
DocValuesProducer getDocValuesProducer();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite;

import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesConsumerWrapper;
import org.apache.lucene.index.SegmentWriteState;

import java.io.IOException;

import static org.opensearch.index.codec.composite.composite99.Composite99Codec.COMPOSITE_INDEX_CODEC_NAME;

/**
* A factory class that provides a factory method for creating {@link DocValuesConsumer} instances
* based on the specified composite codec.
*
* @opensearch.experimental
*/
public class LuceneDocValuesConsumerFactory {

public static DocValuesConsumer getDocValuesConsumerForCompositeCodec(
String compositeCodec,
SegmentWriteState state,
String dataCodec,
String dataExtension,
String metaCodec,
String metaExtension
) throws IOException {

switch (compositeCodec) {
case COMPOSITE_INDEX_CODEC_NAME:
return new Lucene90DocValuesConsumerWrapper(state, dataCodec, dataExtension, metaCodec, metaExtension)
.getLucene90DocValuesConsumer();
default:
throw new IllegalStateException("Invalid composite codec " + "[" + compositeCodec + "]");
}

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.composite;

import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesProducerWrapper;
import org.apache.lucene.index.SegmentReadState;
import org.opensearch.index.codec.composite.composite99.Composite99Codec;

import java.io.IOException;

/**
* A factory class that provides a factory method for creating {@link DocValuesConsumer} instances
* based on the specified composite codec.
*
* @opensearch.experimental
*/
public class LuceneDocValuesProducerFactory {

public static DocValuesProvider getDocValuesProducerForCompositeCodec(
String compositeCodec,
SegmentReadState state,
String dataCodec,
String dataExtension,
String metaCodec,
String metaExtension
) throws IOException {

switch (compositeCodec) {
case Composite99Codec.COMPOSITE_INDEX_CODEC_NAME:
return new Lucene90DocValuesProducerWrapper(state, dataCodec, dataExtension, metaCodec, metaExtension);
default:
throw new IllegalStateException("Invalid composite codec " + "[" + compositeCodec + "]");
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import org.opensearch.common.util.io.IOUtils;
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
import org.opensearch.index.codec.composite.CompositeIndexReader;
import org.opensearch.index.codec.composite.DocValuesProvider;
import org.opensearch.index.codec.composite.LuceneDocValuesProducerFactory;
import org.opensearch.index.compositeindex.CompositeIndexMetadata;
import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.MetricEntry;
import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata;
Expand Down Expand Up @@ -185,13 +187,17 @@ public Composite99DocValuesReader(DocValuesProducer producer, SegmentReadState r
);

// initialize star-tree doc values producer
compositeDocValuesProducer = new Lucene90DocValuesProducerWrapper(

DocValuesProvider docValuesProvider = LuceneDocValuesProducerFactory.getDocValuesProducerForCompositeCodec(
Composite99Codec.COMPOSITE_INDEX_CODEC_NAME,
segmentReadState,
Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC,
Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION,
Composite99DocValuesFormat.META_DOC_VALUES_CODEC,
Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION
);

compositeDocValuesProducer = (Lucene90DocValuesProducerWrapper) docValuesProvider;
} catch (Throwable t) {
priorE = t;
} finally {
Expand Down Expand Up @@ -242,7 +248,7 @@ public void close() throws IOException {
boolean success = false;
try {
IOUtils.close(metaIn, dataIn);
IOUtils.close(compositeDocValuesProducer.getLucene90DocValuesProducer());
IOUtils.close(compositeDocValuesProducer.getDocValuesProducer());
success = true;
} finally {
if (!success) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesConsumerWrapper;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.EmptyDocValuesProducer;
Expand All @@ -25,6 +24,7 @@
import org.opensearch.common.util.io.IOUtils;
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
import org.opensearch.index.codec.composite.CompositeIndexReader;
import org.opensearch.index.codec.composite.LuceneDocValuesConsumerFactory;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeField;
import org.opensearch.index.compositeindex.datacube.startree.builder.StarTreesBuilder;
import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues;
Expand Down Expand Up @@ -86,13 +86,14 @@ public Composite99DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState

boolean success = false;
try {
this.composite99DocValuesConsumer = new Lucene90DocValuesConsumerWrapper(
this.composite99DocValuesConsumer = LuceneDocValuesConsumerFactory.getDocValuesConsumerForCompositeCodec(
Composite99Codec.COMPOSITE_INDEX_CODEC_NAME,
segmentWriteState,
Composite99DocValuesFormat.DATA_DOC_VALUES_CODEC,
Composite99DocValuesFormat.DATA_DOC_VALUES_EXTENSION,
Composite99DocValuesFormat.META_DOC_VALUES_CODEC,
Composite99DocValuesFormat.META_DOC_VALUES_EXTENSION
).getLucene90DocValuesConsumer();
);

String dataFileName = IndexFileNames.segmentFileName(
segmentWriteState.segmentInfo.name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public Double toStarTreeNumericTypeValue(Long value) {
if (value == null) {
return getIdentityMetricValue();
}
return starTreeNumericType.getDoubleValue(value);
return VALUE_AGGREGATOR_TYPE.getDoubleValue(value);
} catch (Exception e) {
throw new IllegalStateException("Cannot convert " + value + " to sortable aggregation type", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public Double toStarTreeNumericTypeValue(Long value) {
if (value == null) {
return getIdentityMetricValue();
}
return starTreeNumericType.getDoubleValue(value);
return VALUE_AGGREGATOR_TYPE.getDoubleValue(value);
} catch (Exception e) {
throw new IllegalStateException("Cannot convert " + value + " to sortable aggregation type", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,16 +171,16 @@ public List<SequentialDocValuesIterator> getMetricReaders(SegmentWriteState stat
for (MetricStat metricStat : metric.getMetrics()) {
SequentialDocValuesIterator metricReader;
FieldInfo metricFieldInfo = state.fieldInfos.fieldInfo(metric.getField());
if (metricStat != MetricStat.COUNT) {
if (metricFieldInfo == null) {
metricFieldInfo = StarTreeUtils.getFieldInfo(metric.getField(), 1);
}
metricReader = new SequentialDocValuesIterator(
fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo)
);
} else {
metricReader = new SequentialDocValuesIterator();
// if (metricStat != MetricStat.COUNT) {
if (metricFieldInfo == null) {
metricFieldInfo = StarTreeUtils.getFieldInfo(metric.getField(), 1);
}
metricReader = new SequentialDocValuesIterator(
fieldProducerMap.get(metricFieldInfo.name).getSortedNumeric(metricFieldInfo)
);
// } else {
// metricReader = new SequentialDocValuesIterator();
// }

metricReaders.add(metricReader);
}
Expand Down Expand Up @@ -238,9 +238,7 @@ public void build(
) throws IOException {
int numSegmentStarTreeDocument = totalSegmentDocs;

while (starTreeDocumentIterator.hasNext()) {
appendToStarTree(starTreeDocumentIterator.next());
}
appendDocumentsToStarTree(starTreeDocumentIterator);
int numStarTreeDocument = numStarTreeDocs;
logger.debug("Generated star tree docs : [{}] from segment docs : [{}]", numStarTreeDocument, numSegmentStarTreeDocument);

Expand Down Expand Up @@ -269,6 +267,12 @@ public void build(
serializeStarTree(numStarTreeDocument);
}

void appendDocumentsToStarTree(Iterator<StarTreeDocument> starTreeDocumentIterator) throws IOException {
while (starTreeDocumentIterator.hasNext()) {
appendToStarTree(starTreeDocumentIterator.next());
}
}

private void serializeStarTree(int numSegmentStarTreeDocument) throws IOException {
// serialize the star tree data
long dataFilePointer = dataOut.getFilePointer();
Expand Down Expand Up @@ -759,9 +763,7 @@ private InMemoryTreeNode constructStarNode(int startDocId, int endDocId, int dim
starNode.nodeType = StarTreeNodeType.STAR.getValue();
starNode.startDocId = numStarTreeDocs;
Iterator<StarTreeDocument> starTreeDocumentIterator = generateStarTreeDocumentsForStarNode(startDocId, endDocId, dimensionId);
while (starTreeDocumentIterator.hasNext()) {
appendToStarTree(starTreeDocumentIterator.next());
}
appendDocumentsToStarTree(starTreeDocumentIterator);
starNode.endDocId = numStarTreeDocs;
return starNode;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata;
import org.opensearch.index.compositeindex.datacube.startree.node.StarTree;
import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode;
import org.opensearch.index.compositeindex.datacube.startree.node.Tree;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -102,7 +101,7 @@ public StarTreeValues(
)
);

Tree starTree = new StarTree(compositeIndexIn, starTreeMetadata);
StarTree starTree = new StarTree(compositeIndexIn, starTreeMetadata);
this.root = starTree.getRoot();

// get doc id set iterators for metrics and dimensions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
*
* @opensearch.experimental
*/
public class StarTree implements Tree {
public class StarTree {
private static final Logger logger = LogManager.getLogger(StarTree.class);
private final FixedLengthStarTreeNode root;
private final Integer numNodes;
Expand All @@ -49,7 +49,6 @@ public StarTree(IndexInput data, StarTreeMetadata starTreeMetadata) throws IOExc
root = new FixedLengthStarTreeNode(in, 0);
}

@Override
public StarTreeNode getRoot() {
return root;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,17 @@ public interface StarTreeNode {
boolean isLeaf();

/**
* Checks if the current node is a star node, null node or a node with actual dimension value.
* Determines the type of the current node in the Star Tree index structure.
*
* @return the node type value based on the star-tree node type
* <p>The node type can be one of the following:
* <ul>
* <li>Star Node: Represented by the value -2.
* <li>Null Node: Represented by the value -1.
* <li>Default Node: Represented by the value 0.
* </ul>
* @see StarTreeNodeType
*
* @return The type of the current node, represented by the corresponding integer value (-2, -1, or 0).
* @throws IOException if an I/O error occurs while reading the node type
*/
byte getStarTreeNodeType() throws IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,27 @@

/**
* Represents the different types of nodes in a StarTree data structure.
*
* <p>
* In order to handle different node types, we use a byte value to represent the node type.
* This enum provides a convenient way to map byte values to their corresponding node types.
*
* <p>
* Star and Null Nodes are represented as special cases. Default is the general case.
* Star and Null nodes are represented with negative ordinals so that first node is Star, second node is Null Node
* and the rest of the default nodes are sorted based on dimension values.
* Star and null nodes are represented with negative ordinal values to ensure that they are
* sorted before the default nodes, which are sorted based on their dimension values.
*
* <p>
* The node type can be one of the following:
* <ul>
* <li>Star Node: Represented by the value -2. A star node is a special node that represents
* all possible values for a dimension.</li>
* <li>Null Node: Represented by the value -1. A null node indicates the absence of any value
* for a dimension.</li>
* <li>Default Node: Represented by the value 0. A default node represents a node with an
* actual dimension value.</li>
* </ul>
*
* By default, we want to consider nodes as default node.
*
* @opensearch.experimental
Expand Down
Loading

0 comments on commit cf53b70

Please sign in to comment.