Skip to content

Commit

Permalink
addressing nits
Browse files Browse the repository at this point in the history
Signed-off-by: Sarthak Aggarwal <[email protected]>
  • Loading branch information
sarthakaggarwal97 committed Aug 25, 2024
1 parent 668561f commit e82715c
Show file tree
Hide file tree
Showing 6 changed files with 150 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public class CompositeIndexConstants {
public static final long COMPOSITE_FIELD_MARKER = 0xC0950513F1E1DL; // Composite Field

/**
* Represents the key to fetch number of documents in a segment.
* Represents the key to fetch number of non-star aggregated segment documents.
*/
public static final String SEGMENT_DOCS_COUNT = "segmentDocsCount";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
/**
* This class represents the metadata of a Composite Index, which includes information about
* the composite field name, type, and the specific metadata for the type of composite field
* (e.g., Tree metadata).
* (e.g., Star Tree metadata).
*
* @opensearch.experimental
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,67 @@
*/
public class StarTreeMetadata extends CompositeIndexMetadata {
private static final Logger logger = LogManager.getLogger(StarTreeMetadata.class);

/**
* The index input for reading metadata from the segment file.
*/
private final IndexInput meta;

/**
* The name of the star-tree field, used to identify the star-tree.
*/
private final String starTreeFieldName;

/**
* The type of the star-tree field, indicating the specific implementation or version.
* Here, STAR_TREE field.
*/
private final String starTreeFieldType;

/**
* List of dimension fields used in the star-tree.
*/
private final List<String> dimensionFields;

/**
* List of metric entries, containing field names and associated metric statistic.
*/
private final List<MetricEntry> metricEntries;

/**
* The total number of documents aggregated in this star-tree segment.
*/
private final Integer segmentAggregatedDocCount;

/**
* The maximum number of documents allowed in a leaf node.
*/
private final Integer maxLeafDocs;

/**
* Set of dimensions for which star node creation should be skipped.
*/
private final Set<String> skipStarNodeCreationInDims;

/**
* The build mode used for constructing the star-tree.
*/
private final StarTreeFieldConfiguration.StarTreeBuildMode starTreeBuildMode;

/**
* The file pointer to the start of the associated star-tree data in the (.cid) file
*/
private final long dataStartFilePointer;

/**
* The length of the star-tree data in bytes, used for reading the correct amount of data from (.cid) file
*/
private final long dataLength;

/**
* A star tree metadata constructor to initialize star tree metadata from the segment file (.cim) using index input.
*
* @param metaIn an index input to read star-tree meta
* @param metaIn an index input to read star-tree meta
* @param compositeFieldName name of the composite field. Here, name of the star-tree field.
* @param compositeFieldType type of the composite field. Here, STAR_TREE field.
* @throws IOException if unable to read star-tree metadata from the file
Expand Down Expand Up @@ -74,17 +119,17 @@ public StarTreeMetadata(IndexInput metaIn, String compositeFieldName, CompositeM
* A star tree metadata constructor to initialize star tree metadata.
* Used for testing.
*
* @param meta an index input to read star-tree meta
* @param compositeFieldName name of the composite field. Here, name of the star-tree field.
* @param compositeFieldType type of the composite field. Here, STAR_TREE field.
* @param dimensionFields list of dimension fields
* @param metricEntries list of metric entries
* @param segmentAggregatedDocCount segment aggregated doc count
* @param maxLeafDocs max leaf docs
* @param meta an index input to read star-tree meta
* @param compositeFieldName name of the composite field. Here, name of the star-tree field.
* @param compositeFieldType type of the composite field. Here, STAR_TREE field.
* @param dimensionFields list of dimension fields
* @param metricEntries list of metric entries
* @param segmentAggregatedDocCount segment aggregated doc count
* @param maxLeafDocs max leaf docs
* @param skipStarNodeCreationInDims set of dimensions to skip star node creation
* @param starTreeBuildMode star tree build mode
* @param dataStartFilePointer data start file pointer
* @param dataLength data length
* @param starTreeBuildMode star tree build mode
* @param dataStartFilePointer star file pointer to the associated star tree data in (.cid) file
* @param dataLength length of the corresponding star-tree data in (.cid) file
*/
public StarTreeMetadata(
String compositeFieldName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,55 @@
import org.apache.lucene.store.RandomAccessInput;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Iterator;

import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils.ALL;

/**
* Fixed Length implementation of {@link StarTreeNode}
* Fixed Length implementation of {@link StarTreeNode}.
* <p>
* This class represents a node in a star tree with a fixed-length serialization format.
* It provides efficient storage and retrieval of node information using a RandomAccessInput.
* The node structure includes the methods to access all the constructs of InMemoryTreeNode.
*
* <p>
* Key features:
* - Fixed-size serialization for each node, allowing for efficient random access
* - Binary search capability for finding child nodes
* - Support for star nodes, null nodes and other default nodes
* - Iteration over child nodes
* <p>
*
* The class uses specific byte offsets for each field in the serialized format,
* enabling direct access to node properties without parsing the entire node structure.
*
* @opensearch.experimental
*/
public class FixedLengthStarTreeNode implements StarTreeNode {

/**
* Number of integer fields in the serializable data
*/
public static final int NUM_INT_SERIALIZABLE_FIELDS = 6;

/**
* Number of long fields in the serializable data
*/
public static final int NUM_LONG_SERIALIZABLE_FIELDS = 1;

/**
* Number of byte fields in the serializable data
*/
public static final int NUM_BYTE_SERIALIZABLE_FIELDS = 1;

/**
* Total size in bytes of the serializable data for each node
*/
public static final long SERIALIZABLE_DATA_SIZE_IN_BYTES = (Integer.BYTES * NUM_INT_SERIALIZABLE_FIELDS) + (Long.BYTES
* NUM_LONG_SERIALIZABLE_FIELDS) + (NUM_BYTE_SERIALIZABLE_FIELDS * Byte.BYTES);

// Byte offsets for each field in the serialized data
private static final int DIMENSION_ID_OFFSET = 0;
private static final int DIMENSION_VALUE_OFFSET = DIMENSION_ID_OFFSET + Integer.BYTES;
private static final int START_DOC_ID_OFFSET = DIMENSION_VALUE_OFFSET + Long.BYTES;
Expand All @@ -34,27 +68,68 @@ public class FixedLengthStarTreeNode implements StarTreeNode {
private static final int FIRST_CHILD_ID_OFFSET = STAR_NODE_TYPE_OFFSET + Byte.BYTES;
private static final int LAST_CHILD_ID_OFFSET = FIRST_CHILD_ID_OFFSET + Integer.BYTES;

/**
* Constant representing an invalid node ID
*/
public static final int INVALID_ID = -1;

/**
* The ID of this node
*/
private final int nodeId;

/**
* The ID of the first child of this node
*/
private final int firstChildId;

/**
* The input source for reading node data
*/
RandomAccessInput in;

/**
* Constructs a FixedLengthStarTreeNode.
*
* @param in The RandomAccessInput to read node data from
* @param nodeId The ID of this node
* @throws IOException If there's an error reading from the input
*/
public FixedLengthStarTreeNode(RandomAccessInput in, int nodeId) throws IOException {
this.in = in;
this.nodeId = nodeId;
firstChildId = getInt(FIRST_CHILD_ID_OFFSET);
}

/**
* Reads an integer value from the specified offset in the node's data.
*
* @param fieldOffset The offset of the field to read
* @return The integer value at the specified offset
* @throws IOException If there's an error reading from the input
*/
private int getInt(int fieldOffset) throws IOException {
return in.readInt(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset);
}

/**
* Reads a long value from the specified offset in the node's data.
*
* @param fieldOffset The offset of the field to read
* @return The long value at the specified offset
* @throws IOException If there's an error reading from the input
*/
private long getLong(int fieldOffset) throws IOException {
return in.readLong(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset);
}

/**
* Reads a byte value from the specified offset in the node's data.
*
* @param fieldOffset The offset of the field to read
* @return The byte value at the specified offset
* @throws IOException If there's an error reading from the input
*/
private byte getByte(int fieldOffset) throws IOException {
return in.readByte(nodeId * SERIALIZABLE_DATA_SIZE_IN_BYTES + fieldOffset);
}
Expand Down Expand Up @@ -127,6 +202,12 @@ public StarTreeNode getChildForDimensionValue(long dimensionValue, boolean isSta
return binarySearchChild(dimensionValue);
}

/**
* Handles the special case of a star node.
*
* @return The star node if found, null otherwise
* @throws IOException If there's an error reading from the input
*/
private FixedLengthStarTreeNode handleStarNode() throws IOException {
FixedLengthStarTreeNode firstNode = new FixedLengthStarTreeNode(in, firstChildId);
if (firstNode.getDimensionValue() == ALL) {
Expand All @@ -136,8 +217,14 @@ private FixedLengthStarTreeNode handleStarNode() throws IOException {
}
}

/**
* Performs a binary search to find a child node with the given dimension value.
*
* @param dimensionValue The dimension value to search for
* @return The child node if found, null otherwise
* @throws IOException If there's an error reading from the input
*/
private FixedLengthStarTreeNode binarySearchChild(long dimensionValue) throws IOException {
// Binary search to find child node
int low = firstChildId;
int high = getInt(LAST_CHILD_ID_OFFSET);

Expand Down Expand Up @@ -173,7 +260,7 @@ public FixedLengthStarTreeNode next() {
try {
return new FixedLengthStarTreeNode(in, currentChildId++);
} catch (IOException e) {
throw new RuntimeException(e);
throw new UncheckedIOException(e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,6 @@ public static StarTreeNodeType fromValue(byte value) {
return nodeType;
}
}
return null;
throw new IllegalStateException("Unrecognized value byte to determine star-tree node type: [" + value + "]");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ public void test_starTreeMetadata() throws IOException {
metrics = List.of(
new Metric("field2", List.of(MetricStat.SUM)),
new Metric("field4", List.of(MetricStat.SUM)),
new Metric("field6", List.of(MetricStat.COUNT))
new Metric("field6", List.of(MetricStat.VALUE_COUNT))
);
int maxLeafDocs = randomNonNegativeInt();
StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration(
Expand Down

0 comments on commit e82715c

Please sign in to comment.