From b34a39376796a1a36d585a7936eefe70cf035c41 Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Mon, 23 Sep 2024 14:07:34 -0700 Subject: [PATCH] base: allow CompareSuffixes to be stricter than Compare This change allows `CompareSuffixes` to be stricter than `Compare` (when the prefixes are equal). This will allow reverting the CRDB comparer behavior to be consistent with previous releases (avoiding $replica inconsistency). Informs https://github.com/cockroachdb/cockroach/issues/130533 --- internal/base/comparer.go | 60 ++++++++++++++++++++--------- internal/crdbtest/crdbtest.go | 18 ++++++--- internal/crdbtest/crdbtest_test.go | 19 ++++++--- internal/rangekey/testdata/coalesce | 22 +++++++++++ internal/testkeys/testkeys.go | 25 +++++++++++- internal/testkeys/testkeys_test.go | 10 +++++ 6 files changed, 123 insertions(+), 31 deletions(-) diff --git a/internal/base/comparer.go b/internal/base/comparer.go index 934ba820a9..cad7347cf2 100644 --- a/internal/base/comparer.go +++ b/internal/base/comparer.go @@ -15,14 +15,6 @@ import ( "github.com/cockroachdb/errors" ) -// CompareSuffixes compares two key suffixes and returns -1, 0, or +1. -// -// The empty slice suffix must be 'less than' any non-empty suffix. -// -// A full key k is composed of a prefix k[:Split(k)] and suffix k[Split(k):]. -// Suffixes are compared to break ties between equal prefixes. -type CompareSuffixes func(a, b []byte) int - // Compare returns -1, 0, or +1 depending on whether a is 'less than', 'equal // to' or 'greater than' b. // @@ -34,13 +26,30 @@ type CompareSuffixes func(a, b []byte) int // // In other words, if prefix(a) = a[:Split(a)] and suffix(a) = a[Split(a):]: // -// Compare(a, b) = bytes.Compare(prefix(a), prefix(b)) if not 0, or -// CompareSuffixes(suffix(a), suffix(b)) otherwise. +// Compare(a, b) = bytes.Compare(prefix(a), prefix(b)) if not 0, +// otherwise either 0 or CompareSuffixes(suffix(a), suffix(b)) +// +// The "either 0" part is because CompareSuffixes is allowed to be more strict; +// see CompareSuffixes. // // Compare defaults to using the formula above but it can be customized if there -// is a (potentially faster) specialization. +// is a (potentially faster) specialization or it has to compare suffixes +// differently. type Compare func(a, b []byte) int +// CompareSuffixes compares two key suffixes and returns -1, 0, or +1. +// +// For historical reasons (see +// https://github.com/cockroachdb/cockroach/issues/130533 for a summary), we +// allow this function to be more strict than Compare. Specifically, Compare may +// treat two suffixes as equal whereas CompareSuffixes might not. +// +// The empty slice suffix must be 'less than' any non-empty suffix. +// +// A full key k is composed of a prefix k[:Split(k)] and suffix k[Split(k):]. +// Suffixes are compared to break ties between equal prefixes. +type CompareSuffixes func(a, b []byte) int + // defaultCompare implements Compare in terms of Split and CompareSuffixes, as // mentioned above. func defaultCompare(split Split, compareSuffixes CompareSuffixes, a, b []byte) int { @@ -432,6 +441,15 @@ func CheckComparer(c *Comparer, prefixes [][]byte, suffixes [][]byte) error { return errors.Errorf("incorrect Split result %d on '%x' (prefix '%x' suffix '%x')", n, key, p, s) } } + for i := 1; i < len(suffixes); i++ { + a := slices.Concat(p, suffixes[i-1]) + b := slices.Concat(p, suffixes[i]) + // Make sure the Compare function agrees with CompareSuffixes, with the + // caveat that it can consider some consecutive suffixes to be equal. + if cmp := c.Compare(a, b); cmp > 0 { + return errors.Errorf("Compare(%s, %s)=%d, expected <= 0", c.FormatKey(a), c.FormatKey(b), cmp) + } + } } // Check the Compare/Equals functions on all possible combinations. @@ -442,18 +460,22 @@ func CheckComparer(c *Comparer, prefixes [][]byte, suffixes [][]byte) error { for _, bs := range suffixes { b := slices.Concat(bp, bs) result := c.Compare(a, b) - - expected := bytes.Compare(ap, bp) - if expected == 0 { - expected = c.CompareSuffixes(as, bs) - } - if (result == 0) != c.Equal(a, b) { return errors.Errorf("Equal(%s, %s) doesn't agree with Compare", c.FormatKey(a), c.FormatKey(b)) } - if result != expected { - return errors.Errorf("Compare(%s, %s)=%d, expected %d", c.FormatKey(a), c.FormatKey(b), result, expected) + if prefixCmp := bytes.Compare(ap, bp); prefixCmp != 0 { + if result != prefixCmp { + return errors.Errorf("Compare(%s, %s)=%d, expected %d", c.FormatKey(a), c.FormatKey(b), result, prefixCmp) + } + } else { + // We allow Compare to return equality for suffixes even when + // CompareSuffixes does not. + if result != 0 { + if suffixCmp := c.CompareSuffixes(as, bs); result != suffixCmp { + return errors.Errorf("Compare(%s, %s)=%d, expected %d", c.FormatKey(a), c.FormatKey(b), result, suffixCmp) + } + } } } } diff --git a/internal/crdbtest/crdbtest.go b/internal/crdbtest/crdbtest.go index acf9e6c1f7..17f4cedc4e 100644 --- a/internal/crdbtest/crdbtest.go +++ b/internal/crdbtest/crdbtest.go @@ -231,11 +231,7 @@ func Compare(a, b []byte) int { // Compare the version part of the key. Note that when the version is a // timestamp, the timestamp encoding causes byte comparison to be equivalent // to timestamp comparison. - return CompareSuffixes(a[aSep+1:], b[bSep+1:]) -} - -// CompareSuffixes compares suffixes (normally timestamps). -func CompareSuffixes(a, b []byte) int { + a, b = a[aSep+1:], b[bSep+1:] if len(a) == 0 || len(b) == 0 { // Empty suffixes come before non-empty suffixes. return cmp.Compare(len(a), len(b)) @@ -246,6 +242,18 @@ func CompareSuffixes(a, b []byte) int { ) } +// CompareSuffixes compares suffixes (normally timestamps). +func CompareSuffixes(a, b []byte) int { + if len(a) == 0 || len(b) == 0 { + // Empty suffixes come before non-empty suffixes. + return cmp.Compare(len(a), len(b)) + } + // Here we are not using normalizeEngineKeyVersionForCompare for historical + // reasons, summarized in + // https://github.com/cockroachdb/cockroach/issues/130533. + return bytes.Compare(b[:len(b)-1], a[:len(a)-1]) +} + // Equal implements base.Equal for Cockroach keys. func Equal(a, b []byte) bool { // TODO(radu): Pebble sometimes passes empty "keys" and we have to tolerate diff --git a/internal/crdbtest/crdbtest_test.go b/internal/crdbtest/crdbtest_test.go index e8adc69cc4..df7d8dfc97 100644 --- a/internal/crdbtest/crdbtest_test.go +++ b/internal/crdbtest/crdbtest_test.go @@ -28,10 +28,13 @@ func TestComparer(t *testing.T) { if len(suffix) == withWall { // Append a suffix that encodes a zero logical value that should be - // ignored in comparisons. + // ignored in key comparisons, but not suffix comparisons. newSuffix := slices.Concat(suffix[:withWall-1], zeroLogical[:], []byte{withLogical}) - if Comparer.CompareSuffixes(suffix, newSuffix) != 0 { - t.Fatalf("expected suffixes %x and %x to be equal", suffix, newSuffix) + if Comparer.CompareSuffixes(suffix, newSuffix) != 1 { + t.Fatalf("expected suffixes %x < %x", suffix, newSuffix) + } + if Comparer.Compare(slices.Concat(prefixes[0], suffix), slices.Concat(prefixes[0], newSuffix)) != 0 { + t.Fatalf("expected keys with suffixes %x and %x to be equal", suffix, newSuffix) } suffixes = append(suffixes, newSuffix) suffix = newSuffix @@ -39,10 +42,14 @@ func TestComparer(t *testing.T) { if len(suffix) != withLogical { t.Fatalf("unexpected suffix %x", suffix) } - // Append a synthetic bit that should be ignored in comparisons. + // Append a synthetic bit that should be ignored in key comparisons, but + // not suffix comparisons. newSuffix := slices.Concat(suffix[:withLogical-1], []byte{1}, []byte{withSynthetic}) - if Comparer.CompareSuffixes(suffix, newSuffix) != 0 { - t.Fatalf("expected suffixes %x and %x to be equal", suffix, newSuffix) + if Comparer.CompareSuffixes(suffix, newSuffix) != 1 { + t.Fatalf("expected suffixes %x < %x", suffix, newSuffix) + } + if Comparer.Compare(slices.Concat(prefixes[0], suffix), slices.Concat(prefixes[0], newSuffix)) != 0 { + t.Fatalf("expected keys with suffixes %x and %x to be equal", suffix, newSuffix) } suffixes = append(suffixes, newSuffix) } diff --git a/internal/rangekey/testdata/coalesce b/internal/rangekey/testdata/coalesce index 05657eb7a1..9157d613bd 100644 --- a/internal/rangekey/testdata/coalesce +++ b/internal/rangekey/testdata/coalesce @@ -85,3 +85,25 @@ coalesce a-c:{(#5,RANGEKEYSET,@5,foo) (#5,RANGEKEYUNSET,@5) (#5,RANGEKEYDEL)} ---- a-c:{(#5,RANGEKEYSET,@5,foo) (#5,RANGEKEYDEL)} + +# Verify that the suffix comparison is strict w.r.t. the ignorable synthetic +# tag. +coalesce +a-c:{(#5,RANGEKEYUNSET,@3,foo) (#4,RANGEKEYSET,@3)} +---- +a-c:{(#5,RANGEKEYUNSET,@3,foo)} + +coalesce +a-c:{(#5,RANGEKEYUNSET,@3_synthetic,foo) (#4,RANGEKEYSET,@3_synthetic)} +---- +a-c:{(#5,RANGEKEYUNSET,@3_synthetic,foo)} + +coalesce +a-c:{(#5,RANGEKEYUNSET,@3_synthetic,foo) (#4,RANGEKEYSET,@3)} +---- +a-c:{(#5,RANGEKEYUNSET,@3_synthetic,foo) (#4,RANGEKEYSET,@3)} + +coalesce +a-c:{(#5,RANGEKEYUNSET,@3,foo) (#4,RANGEKEYSET,@3_synthetic)} +---- +a-c:{(#5,RANGEKEYUNSET,@3,foo) (#4,RANGEKEYSET,@3_synthetic)} diff --git a/internal/testkeys/testkeys.go b/internal/testkeys/testkeys.go index e1c4d1aab1..789ddfde8b 100644 --- a/internal/testkeys/testkeys.go +++ b/internal/testkeys/testkeys.go @@ -33,6 +33,11 @@ var inverseAlphabet = make(map[byte]int64, len(alpha)) var prefixRE = regexp.MustCompile("[" + alpha + "]+") +// ignoreTimestampSuffix is a suffix that is ignored when comparing keys, but +// not when comparing suffixes. It simulates the CRDB synthetic bit situation +// (see https://github.com/cockroachdb/cockroach/issues/130533). +var ignoreTimestampSuffix = []byte("_synthetic") + func init() { for i := range alpha { inverseAlphabet[alpha[i]] = int64(i) @@ -44,7 +49,7 @@ var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64))) // Comparer is the comparer for test keys generated by this package. var Comparer = &base.Comparer{ - CompareSuffixes: compareTimestamps, + CompareSuffixes: compareSuffixes, Compare: compare, Equal: func(a, b []byte) bool { return compare(a, b) == 0 }, AbbreviatedKey: func(k []byte) uint64 { @@ -134,6 +139,8 @@ func split(a []byte) int { } func compareTimestamps(a, b []byte) int { + a = bytes.TrimSuffix(a, ignoreTimestampSuffix) + b = bytes.TrimSuffix(b, ignoreTimestampSuffix) if len(a) == 0 || len(b) == 0 { // The empty suffix sorts first. return cmp.Compare(len(a), len(b)) @@ -152,6 +159,21 @@ func compareTimestamps(a, b []byte) int { return cmp.Compare(bi, ai) } +func compareSuffixes(a, b []byte) int { + cmp := compareTimestamps(a, b) + if cmp == 0 { + aHasIgnorableSuffix := bytes.HasSuffix(a, ignoreTimestampSuffix) + bHasIgnorableSuffix := bytes.HasSuffix(b, ignoreTimestampSuffix) + if aHasIgnorableSuffix && !bHasIgnorableSuffix { + return 1 + } + if !aHasIgnorableSuffix && bHasIgnorableSuffix { + return -1 + } + } + return cmp +} + // Keyspace describes a finite keyspace of unsuffixed test keys. type Keyspace interface { // Count returns the number of keys that exist within this keyspace. @@ -229,6 +251,7 @@ func SuffixLen(t int64) int { // ParseSuffix returns the integer representation of the encoded suffix. func ParseSuffix(s []byte) (int64, error) { + s = bytes.TrimSuffix(s, ignoreTimestampSuffix) return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64) } diff --git a/internal/testkeys/testkeys_test.go b/internal/testkeys/testkeys_test.go index 75f9a22106..43de0a321f 100644 --- a/internal/testkeys/testkeys_test.go +++ b/internal/testkeys/testkeys_test.go @@ -306,3 +306,13 @@ func TestComparer(t *testing.T) { t.Error(err) } } + +func TestIgnorableSuffix(t *testing.T) { + require.Equal(t, 0, Comparer.Compare([]byte("foo@1"), []byte("foo@1_synthetic"))) + require.Equal(t, 1, Comparer.Compare([]byte("foo@1"), []byte("foo@2_synthetic"))) + require.Equal(t, 1, Comparer.Compare([]byte("foo@1_synthetic"), []byte("foo@2"))) + require.Equal(t, -1, Comparer.CompareSuffixes([]byte("@1"), []byte("@1_synthetic"))) + require.Equal(t, 1, Comparer.CompareSuffixes([]byte("@1_synthetic"), []byte("@1"))) + require.Equal(t, 0, Comparer.CompareSuffixes([]byte("@1_synthetic"), []byte("@1_synthetic"))) + require.Equal(t, 0, Comparer.CompareSuffixes([]byte("@1"), []byte("@1"))) +}