Skip to content

Commit

Permalink
separated functions for star node, removed metric entry and tree node
Browse files Browse the repository at this point in the history
Signed-off-by: Sarthak Aggarwal <[email protected]>
  • Loading branch information
sarthakaggarwal97 committed Aug 28, 2024
1 parent 8776aeb commit 926ec57
Show file tree
Hide file tree
Showing 14 changed files with 125 additions and 201 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@
import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration;
import org.opensearch.index.compositeindex.datacube.startree.aggregators.MetricAggregatorInfo;
import org.opensearch.index.compositeindex.datacube.startree.aggregators.ValueAggregator;
import org.opensearch.index.compositeindex.datacube.startree.node.InMemoryTreeNode;
import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType;
import org.opensearch.index.compositeindex.datacube.startree.utils.SequentialDocValuesIterator;
import org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode;
import org.opensearch.index.fielddata.IndexNumericFieldData;
import org.opensearch.index.mapper.DocCountFieldMapper;
import org.opensearch.index.mapper.Mapper;
Expand All @@ -46,7 +47,7 @@
import java.util.Objects;
import java.util.Set;

import static org.opensearch.index.compositeindex.datacube.startree.utils.TreeNode.ALL;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL;

/**
* Builder for star tree. Defines the algorithm to construct star-tree
Expand All @@ -72,7 +73,7 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder {
protected int numStarTreeNodes;
protected final int maxLeafDocuments;

protected final TreeNode rootNode = getNewNode();
protected final InMemoryTreeNode rootNode = getNewNode();

protected final StarTreeField starTreeField;
private final SegmentWriteState state;
Expand Down Expand Up @@ -578,9 +579,9 @@ private void appendToStarTree(StarTreeDocument starTreeDocument) throws IOExcept
*
* @return return new star-tree node
*/
private TreeNode getNewNode() {
private InMemoryTreeNode getNewNode() {
numStarTreeNodes++;
return new TreeNode();
return new InMemoryTreeNode();
}

/**
Expand All @@ -591,7 +592,7 @@ private TreeNode getNewNode() {
* @param endDocId end document id
* @throws IOException throws an exception if we are unable to construct the tree
*/
private void constructStarTree(TreeNode node, int startDocId, int endDocId) throws IOException {
private void constructStarTree(InMemoryTreeNode node, int startDocId, int endDocId) throws IOException {

int childDimensionId = node.dimensionId + 1;
if (childDimensionId == numDimensions) {
Expand All @@ -600,7 +601,7 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro

// Construct all non-star children nodes
node.childDimensionId = childDimensionId;
Map<Long, TreeNode> children = constructNonStarNodes(startDocId, endDocId, childDimensionId);
Map<Long, InMemoryTreeNode> children = constructNonStarNodes(startDocId, endDocId, childDimensionId);
node.children = children;

// Construct star-node if required
Expand All @@ -609,7 +610,7 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro
}

// Further split on child nodes if required
for (TreeNode child : children.values()) {
for (InMemoryTreeNode child : children.values()) {
if (child.endDocId - child.startDocId > maxLeafDocuments) {
constructStarTree(child, child.startDocId, child.endDocId);
}
Expand All @@ -625,14 +626,14 @@ private void constructStarTree(TreeNode node, int startDocId, int endDocId) thro
* @return root node with non-star nodes constructed
* @throws IOException throws an exception if we are unable to construct non-star nodes
*/
private Map<Long, TreeNode> constructNonStarNodes(int startDocId, int endDocId, int dimensionId) throws IOException {
Map<Long, TreeNode> nodes = new HashMap<>();
private Map<Long, InMemoryTreeNode> constructNonStarNodes(int startDocId, int endDocId, int dimensionId) throws IOException {
Map<Long, InMemoryTreeNode> nodes = new HashMap<>();
int nodeStartDocId = startDocId;
Long nodeDimensionValue = getDimensionValue(startDocId, dimensionId);
for (int i = startDocId + 1; i < endDocId; i++) {
Long dimensionValue = getDimensionValue(i, dimensionId);
if (Objects.equals(dimensionValue, nodeDimensionValue) == false) {
TreeNode child = getNewNode();
InMemoryTreeNode child = getNewNode();
child.dimensionId = dimensionId;
child.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL;
child.startDocId = nodeStartDocId;
Expand All @@ -643,7 +644,7 @@ private Map<Long, TreeNode> constructNonStarNodes(int startDocId, int endDocId,
nodeDimensionValue = dimensionValue;
}
}
TreeNode lastNode = getNewNode();
InMemoryTreeNode lastNode = getNewNode();
lastNode.dimensionId = dimensionId;
lastNode.dimensionValue = nodeDimensionValue != null ? nodeDimensionValue : ALL;
lastNode.startDocId = nodeStartDocId;
Expand All @@ -661,11 +662,11 @@ private Map<Long, TreeNode> constructNonStarNodes(int startDocId, int endDocId,
* @return root node with star nodes constructed
* @throws IOException throws an exception if we are unable to construct non-star nodes
*/
private TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException {
TreeNode starNode = getNewNode();
private InMemoryTreeNode constructStarNode(int startDocId, int endDocId, int dimensionId) throws IOException {
InMemoryTreeNode starNode = getNewNode();
starNode.dimensionId = dimensionId;
starNode.dimensionValue = ALL;
starNode.isStarNode = true;
starNode.nodeType = StarTreeNodeType.STAR.getValue();
starNode.startDocId = numStarTreeDocs;
Iterator<StarTreeDocument> starTreeDocumentIterator = generateStarTreeDocumentsForStarNode(startDocId, endDocId, dimensionId);
while (starTreeDocumentIterator.hasNext()) {
Expand All @@ -682,7 +683,7 @@ private TreeNode constructStarNode(int startDocId, int endDocId, int dimensionId
* @return aggregated star-tree documents
* @throws IOException throws an exception upon failing to create new aggregated docs based on star tree
*/
private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException {
private StarTreeDocument createAggregatedDocs(InMemoryTreeNode node) throws IOException {
StarTreeDocument aggregatedStarTreeDocument = null;
if (node.children == null) {

Expand All @@ -709,8 +710,8 @@ private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException
// For non-leaf node
if (node.children.containsKey((long) ALL)) {
// If it has star child, use the star child aggregated document directly
for (TreeNode child : node.children.values()) {
if (child.isStarNode) {
for (InMemoryTreeNode child : node.children.values()) {
if (child.nodeType == StarTreeNodeType.STAR.getValue()) {
aggregatedStarTreeDocument = createAggregatedDocs(child);
node.aggregatedDocId = child.aggregatedDocId;
} else {
Expand All @@ -720,12 +721,12 @@ private StarTreeDocument createAggregatedDocs(TreeNode node) throws IOException
} else {
// If no star child exists, aggregate all aggregated documents from non-star children
if (node.children.values().size() == 1) {
for (TreeNode child : node.children.values()) {
for (InMemoryTreeNode child : node.children.values()) {
aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child));
node.aggregatedDocId = child.aggregatedDocId;
}
} else {
for (TreeNode child : node.children.values()) {
for (InMemoryTreeNode child : node.children.values()) {
aggregatedStarTreeDocument = reduceStarTreeDocuments(aggregatedStarTreeDocument, createAggregatedDocs(child));
}
if (null == aggregatedStarTreeDocument) {
Expand Down Expand Up @@ -760,7 +761,7 @@ public void close() throws IOException {

abstract Iterator<StarTreeDocument> mergeStarTrees(List<StarTreeValues> starTreeValues) throws IOException;

public TreeNode getRootNode() {
public InMemoryTreeNode getRootNode() {
return rootNode;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import java.util.List;
import java.util.Queue;

import static org.opensearch.index.compositeindex.datacube.startree.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES;
import static org.opensearch.index.compositeindex.datacube.startree.fileformats.node.FixedLengthStarTreeNode.SERIALIZABLE_DATA_SIZE_IN_BYTES;
import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL;

/**
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,11 @@ private static void writeMeta(
metaOut.writeInt(numNodes);

// number of dimensions
// TODO: Revisit the number of dimensions for timestamps (as we will split timestamp into min, hour, etc.)
metaOut.writeVInt(starTreeField.getDimensionsOrder().size());

// dimensions
// TODO: Add sub-dimensions for timestamps (as we will split timestamp into min, hour, etc.)
for (Dimension dimension : starTreeField.getDimensionsOrder()) {
metaOut.writeString(dimension.getField());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,17 @@
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.IndexInput;
import org.opensearch.index.compositeindex.CompositeIndexMetadata;
import org.opensearch.index.compositeindex.datacube.Metric;
import org.opensearch.index.compositeindex.datacube.MetricStat;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeFieldConfiguration;
import org.opensearch.index.mapper.CompositeMappedFieldType;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
Expand Down Expand Up @@ -62,9 +65,9 @@ public class StarTreeMetadata extends CompositeIndexMetadata {
private final List<String> dimensionFields;

/**
* List of metric entries, containing field names and associated metric statistic.
* List of metrics, containing field names and associated metric statistics.
*/
private final List<MetricEntry> metricEntries;
private final List<Metric> metrics;

/**
* The total number of documents aggregated in this star-tree segment.
Expand Down Expand Up @@ -119,7 +122,7 @@ public StarTreeMetadata(
this.version = version;
this.numberOfNodes = readNumberOfNodes();
this.dimensionFields = readStarTreeDimensions();
this.metricEntries = readMetricEntries();
this.metrics = readMetricEntries();
this.segmentAggregatedDocCount = readSegmentAggregatedDocCount();
this.maxLeafDocs = readMaxLeafDocs();
this.skipStarNodeCreationInDims = readSkipStarNodeCreationInDims();
Expand All @@ -141,7 +144,7 @@ public StarTreeMetadata(
* @param compositeFieldType type of the composite field. Here, STAR_TREE field.
* @param version The version of the star tree stored in the segments.
* @param dimensionFields list of dimension fields
* @param metricEntries list of metric entries
* @param metrics list of metric entries
* @param segmentAggregatedDocCount segment aggregated doc count
* @param maxLeafDocs max leaf docs
* @param skipStarNodeCreationInDims set of dimensions to skip star node creation
Expand All @@ -156,7 +159,7 @@ public StarTreeMetadata(
Integer version,
Integer numberOfNodes,
List<String> dimensionFields,
List<MetricEntry> metricEntries,
List<Metric> metrics,
Integer segmentAggregatedDocCount,
Integer maxLeafDocs,
Set<String> skipStarNodeCreationInDims,
Expand All @@ -171,7 +174,7 @@ public StarTreeMetadata(
this.version = version;
this.numberOfNodes = numberOfNodes;
this.dimensionFields = dimensionFields;
this.metricEntries = metricEntries;
this.metrics = metrics;
this.segmentAggregatedDocCount = segmentAggregatedDocCount;
this.maxLeafDocs = maxLeafDocs;
this.skipStarNodeCreationInDims = skipStarNodeCreationInDims;
Expand Down Expand Up @@ -203,17 +206,19 @@ private int readMetricsCount() throws IOException {
return meta.readVInt();
}

private List<MetricEntry> readMetricEntries() throws IOException {
private List<Metric> readMetricEntries() throws IOException {
int metricCount = readMetricsCount();
List<MetricEntry> metricEntries = new ArrayList<>();

Map<String, Metric> starTreeMetricMap = new LinkedHashMap<>();
for (int i = 0; i < metricCount; i++) {
String metricFieldName = meta.readString();
String metricName = meta.readString();
int metricStatOrdinal = meta.readVInt();
metricEntries.add(new MetricEntry(metricFieldName, MetricStat.fromMetricOrdinal(metricStatOrdinal)));
MetricStat metricStat = MetricStat.fromMetricOrdinal(metricStatOrdinal);
Metric metric = starTreeMetricMap.computeIfAbsent(metricName, field -> new Metric(field, new ArrayList<>()));
metric.getMetrics().add(metricStat);
}

return metricEntries;
return new ArrayList<>(starTreeMetricMap.values());
}

private int readSegmentAggregatedDocCount() throws IOException {
Expand Down Expand Up @@ -282,8 +287,8 @@ public List<String> getDimensionFields() {
*
* @return star-tree metric entries
*/
public List<MetricEntry> getMetricEntries() {
return metricEntries;
public List<Metric> getMetrics() {
return metrics;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
package org.opensearch.index.compositeindex.datacube.startree.node;
package org.opensearch.index.compositeindex.datacube.startree.fileformats.node;

import org.apache.lucene.store.RandomAccessInput;
import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode;
import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Iterator;

import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL;

/**
* Fixed Length implementation of {@link StarTreeNode}.
* <p>
Expand Down Expand Up @@ -187,16 +187,17 @@ public byte getStarTreeNodeType() throws IOException {
}

@Override
public StarTreeNode getChildForDimensionValue(Long dimensionValue, boolean isStar) throws IOException {
public StarTreeNode getChildStarNode() throws IOException {
return handleStarNode();
}

@Override
public StarTreeNode getChildForDimensionValue(Long dimensionValue) throws IOException {
// there will be no children for leaf nodes
if (isLeaf()) {
return null;
}

// Specialize star node for performance
if (isStar) {
return handleStarNode();
}
StarTreeNode resultStarTreeNode = null;
if (null != dimensionValue) {
resultStarTreeNode = binarySearchChild(dimensionValue);
Expand All @@ -213,7 +214,7 @@ public StarTreeNode getChildForDimensionValue(Long dimensionValue, boolean isSta
*/
private FixedLengthStarTreeNode handleStarNode() throws IOException {
FixedLengthStarTreeNode firstNode = new FixedLengthStarTreeNode(in, firstChildId);
if (firstNode.getDimensionValue() == ALL) {
if (firstNode.getStarTreeNodeType() == StarTreeNodeType.STAR.getValue()) {
return firstNode;
} else {
return null;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

/**
* Holds classes associated with star tree node with file formats
*/
package org.opensearch.index.compositeindex.datacube.startree.fileformats.node;
Loading

0 comments on commit 926ec57

Please sign in to comment.