Skip to content

Commit

Permalink
sstable: clean up TestRewriteSuffixProps, fix subtle bug
Browse files Browse the repository at this point in the history
This commit cleans up the TestRewriteSuffixProps unit test, refactoring it to
use the testkey comparator and new block-property collectors that are functions
of the testkey integer suffix.

It also fixes a bug where if the suffix rewriting WriterOptions contained fewer
block property collectors than the original sstable had when it was written,
suffix rewriting could fail to decode all the relevant properties from a block
handle.
  • Loading branch information
jbowens committed Oct 6, 2024
1 parent 649e50a commit 0523662
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 104 deletions.
97 changes: 74 additions & 23 deletions sstable/block_property_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ import (
"fmt"
"io"
"math"
"math/rand"
"math/bits"
"slices"
"sort"
"strconv"
"strings"
Expand All @@ -25,6 +26,7 @@ import (
"github.com/cockroachdb/pebble/internal/rangekey"
"github.com/cockroachdb/pebble/internal/testkeys"
"github.com/stretchr/testify/require"
"golang.org/x/exp/rand"
)

func TestIntervalEncodeDecode(t *testing.T) {
Expand Down Expand Up @@ -1349,6 +1351,52 @@ func runBlockPropsCmd(r *Reader) string {
return sb.String()
}

func randomTestCollectors(
rng *rand.Rand,
) (names []string, collectors []func() BlockPropertyCollector) {
names = slices.Clone(testCollectorNames())
rng.Shuffle(len(names), func(i, j int) { names[i], names[j] = names[j], names[i] })
names = names[:rng.Intn(len(names)+1)]
collectors = testCollectorsByNames(names...)
return names, collectors
}

func testCollectorsByNames(names ...string) []func() BlockPropertyCollector {
collectors := make([]func() BlockPropertyCollector, len(names))
for i, name := range names {
collectors[i] = testCollectorConstructors[name]
}
return collectors
}

var testCollectorNames = func() []string {
names := make([]string, 0, len(testCollectorConstructors))
for name := range testCollectorConstructors {
names = append(names, name)
}
sort.Strings(names)
return names
}

var testCollectorConstructors = map[string]func() BlockPropertyCollector{
"count": keyCountCollectorFn("count"),
"parity": func() BlockPropertyCollector {
fn := func(v uint64) uint64 { return v & 1 }
m := &testkeySuffixIntervalMapper{fn: fn}
return NewBlockIntervalCollector("parity", m, m)
},
"log10": func() BlockPropertyCollector {
fn := func(v uint64) uint64 { return uint64(math.Log10(float64(v))) + 1 }
m := &testkeySuffixIntervalMapper{fn: fn}
return NewBlockIntervalCollector("log10", m, m)
},
"onebits": func() BlockPropertyCollector {
fn := func(v uint64) uint64 { return uint64(bits.OnesCount(uint(v))) }
m := &testkeySuffixIntervalMapper{fn: fn}
return NewBlockIntervalCollector("onebits", m, m)
},
}

type keyCountCollector struct {
name string
block, index, table int
Expand Down Expand Up @@ -1402,7 +1450,7 @@ func (p *keyCountCollector) AddCollectedWithSuffixReplacement(
) error {
n, err := strconv.Atoi(string(oldProp))
if err != nil {
return err
return errors.Wrap(err, "parsing key count property")
}
p.block = n
return nil
Expand All @@ -1412,42 +1460,45 @@ func (p *keyCountCollector) SupportsSuffixReplacement() bool {
return true
}

type intSuffixIntervalMapper struct {
suffixLen int
type testkeySuffixIntervalMapper struct {
fn func(uint64) uint64
}

var _ IntervalMapper = (*intSuffixIntervalMapper)(nil)
var _ BlockIntervalSuffixReplacer = (*intSuffixIntervalMapper)(nil)

func newIntSuffixIntervalMapper(suffixLen int) *intSuffixIntervalMapper {
return &intSuffixIntervalMapper{suffixLen: suffixLen}
}
var _ IntervalMapper = (*testkeySuffixIntervalMapper)(nil)
var _ BlockIntervalSuffixReplacer = (*testkeySuffixIntervalMapper)(nil)

// MapPointKey is part of the IntervalMapper interface.
func (i *intSuffixIntervalMapper) MapPointKey(
func (i *testkeySuffixIntervalMapper) MapPointKey(
key InternalKey, value []byte,
) (BlockInterval, error) {
return stringToInterval(key.UserKey[len(key.UserKey)-i.suffixLen:]), nil
j := testkeys.Comparer.Split(key.UserKey)
if len(key.UserKey) == j {
return BlockInterval{}, nil
}
v, err := testkeys.ParseSuffix(key.UserKey[j:])
if err != nil {
return BlockInterval{}, errors.Wrap(err, "mapping point key")
}
mv := i.fn(uint64(v))
return BlockInterval{mv, mv + 1}, nil
}

// MapRangeKeys is part of the IntervalMapper interface.
func (i *intSuffixIntervalMapper) MapRangeKeys(span Span) (BlockInterval, error) {
func (i *testkeySuffixIntervalMapper) MapRangeKeys(span Span) (BlockInterval, error) {
return BlockInterval{}, nil
}

// ApplySuffixReplacement is part of the BlockIntervalSuffixReplacer interface.
func (i *intSuffixIntervalMapper) ApplySuffixReplacement(
func (i *testkeySuffixIntervalMapper) ApplySuffixReplacement(
interval BlockInterval, newSuffix []byte,
) (BlockInterval, error) {
if len(newSuffix) > i.suffixLen {
newSuffix = newSuffix[len(newSuffix)-i.suffixLen:]
if len(newSuffix) == 0 {
return BlockInterval{}, nil
}
return stringToInterval(newSuffix), nil
}

func intSuffixIntervalCollectorFn(name string, suffixLen int) func() BlockPropertyCollector {
m := newIntSuffixIntervalMapper(suffixLen)
return func() BlockPropertyCollector {
return NewBlockIntervalCollector(name, m, m)
v, err := testkeys.ParseSuffix(newSuffix)
if err != nil {
return BlockInterval{}, errors.Wrap(err, "applying suffix replacement")
}
mv := i.fn(uint64(v))
return BlockInterval{mv, mv + 1}, nil
}
4 changes: 2 additions & 2 deletions sstable/colblk_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -967,7 +967,7 @@ func (w *RawColumnWriter) rewriteSuffixes(

// oldShortIDs maps the shortID for the block property collector in the old
// blocks to the shortID in the new blocks. Initialized once for the sstable.
oldShortIDs, err := getShortIDs(r, w.blockPropCollectors)
oldShortIDs, n, err := getShortIDs(r, w.blockPropCollectors)
if err != nil {
return errors.Wrap(err, "getting short IDs")
}
Expand All @@ -980,7 +980,7 @@ func (w *RawColumnWriter) rewriteSuffixes(
for i := range oldProps {
oldProps[i] = nil
}
decoder := makeBlockPropertiesDecoder(len(oldProps), l.Data[i].Props)
decoder := makeBlockPropertiesDecoder(n, l.Data[i].Props)
for !decoder.Done() {
id, val, err := decoder.Next()
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions sstable/rowblk_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -1945,7 +1945,7 @@ func (w *RawRowWriter) rewriteSuffixes(

// oldShortIDs maps the shortID for the block property collector in the old
// blocks to the shortID in the new blocks. Initialized once for the sstable.
oldShortIDs, err := getShortIDs(r, w.blockPropCollectors)
oldShortIDs, n, err := getShortIDs(r, w.blockPropCollectors)
if err != nil {
return errors.Wrap(err, "getting short IDs")
}
Expand All @@ -1962,7 +1962,7 @@ func (w *RawRowWriter) rewriteSuffixes(
for i := range oldProps {
oldProps[i] = nil
}
decoder := makeBlockPropertiesDecoder(len(oldProps), l.Data[i].Props)
decoder := makeBlockPropertiesDecoder(n, l.Data[i].Props)
for !decoder.Done() {
id, val, err := decoder.Next()
if err != nil {
Expand Down
22 changes: 16 additions & 6 deletions sstable/suffix_rewriter.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,23 +238,33 @@ func rewriteRangeKeyBlockToWriter(r *Reader, w RawWriter, from, to []byte) error
// collector in r, with the values containing a new shortID corresponding to the
// index of the corresponding block property collector in collectors.
//
// Additionally, getShortIDs returns n—n is the number of encoded properties
// that a reader needs to process in order to get values for all the given
// collectors. This can be different than len(collectors) when the latter is
// different from the number of block property collectors used when the sstable
// was written. Note that shortIDs[n:] are all invalid.
//
// getShortIDs errors if any of the collectors are not found in the sstable.
func getShortIDs(r *Reader, collectors []BlockPropertyCollector) ([]shortID, error) {
func getShortIDs(
r *Reader, collectors []BlockPropertyCollector,
) (shortIDs []shortID, n int, err error) {
if len(collectors) == 0 {
return nil, nil
return nil, 0, nil
}
shortIDs := make([]shortID, math.MaxUint8)
shortIDs = make([]shortID, math.MaxUint8)
for i := range shortIDs {
shortIDs[i] = invalidShortID
}
for i, p := range collectors {
prop, ok := r.Properties.UserProperties[p.Name()]
if !ok {
return nil, errors.Errorf("sstable does not contain property %s", p.Name())
return nil, 0, errors.Errorf("sstable does not contain property %s", p.Name())
}
shortIDs[shortID(prop[0])] = shortID(i)
oldShortID := shortID(prop[0])
shortIDs[oldShortID] = shortID(i)
n = max(n, int(oldShortID)+1)
}
return shortIDs, nil
return shortIDs, n, nil
}

type copyFilterWriter struct {
Expand Down
Loading

0 comments on commit 0523662

Please sign in to comment.