From 2e21d2e8a55843eccaa7507b42a903d4d78a01cf Mon Sep 17 00:00:00 2001 From: Oleg Afanasyev Date: Mon, 10 Jul 2023 11:35:16 +0100 Subject: [PATCH] batcheval: delete flaky integration test TestLeaseCommandLearnerReplica is fundamentally flaky as commit can't retry all underlying replication failures and acceptable failure causes are not exposed on the higher levels where test operates. Release note: None --- pkg/kv/kvserver/batcheval/cmd_lease_test.go | 92 --------------------- 1 file changed, 92 deletions(-) diff --git a/pkg/kv/kvserver/batcheval/cmd_lease_test.go b/pkg/kv/kvserver/batcheval/cmd_lease_test.go index 9587216b561e..84b02dd4a83a 100644 --- a/pkg/kv/kvserver/batcheval/cmd_lease_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_lease_test.go @@ -15,7 +15,6 @@ import ( "testing" "time" - "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/readsummary" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/readsummary/rspb" @@ -23,7 +22,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/testutils" - "github.com/cockroachdb/cockroach/pkg/testutils/serverutils" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" @@ -31,96 +29,6 @@ import ( "github.com/stretchr/testify/require" ) -// TestLeaseTransferWithPipelinedWrite verifies that pipelined writes -// do not cause retry errors to be leaked to clients when the error -// can be handled internally. Pipelining dissociates a write from its -// caller, so the retries of internally-generated errors (specifically -// out-of-order lease indexes) must be retried below that level. -// -// This issue was observed in practice to affect the first insert -// after table creation with high probability. -func TestLeaseTransferWithPipelinedWrite(t *testing.T) { - defer leaktest.AfterTest(t)() - defer log.Scope(t).Close(t) - - ctx := context.Background() - - tc := serverutils.StartNewTestCluster(t, 3, base.TestClusterArgs{}) - defer tc.Stopper().Stop(ctx) - - db := tc.ServerConn(0) - - for iter := 0; iter < 100; iter++ { - log.Infof(ctx, "iter %d", iter) - if _, err := db.ExecContext(ctx, "drop table if exists test"); err != nil { - t.Fatal(err) - } - if _, err := db.ExecContext(ctx, "create table test (a int, b int, primary key (a, b))"); err != nil { - t.Fatal(err) - } - - workerErrCh := make(chan error, 1) - go func() { - workerErrCh <- func() error { - for i := 0; i < 1; i++ { - tx, err := db.BeginTx(ctx, nil) - if err != nil { - return err - } - defer func() { - if tx != nil { - if err := tx.Rollback(); err != nil { - log.Warningf(ctx, "error rolling back: %+v", err) - } - } - }() - // Run two inserts in a transaction to ensure that we have - // pipelined writes that cannot be retried at the SQL layer - // due to the first-statement rule. - if _, err := tx.ExecContext(ctx, "INSERT INTO test (a, b) VALUES ($1, $2)", i, 1); err != nil { - return err - } - if _, err := tx.ExecContext(ctx, "INSERT INTO test (a, b) VALUES ($1, $2)", i, 2); err != nil { - return err - } - if err := tx.Commit(); err != nil { - return err - } - tx = nil - } - return nil - }() - }() - - // TODO(bdarnell): This test reliably reproduced the issue when - // introduced, because table creation causes splits and repeated - // table creation leads to lease transfers due to rebalancing. - // This is a subtle thing to rely on and the test might become - // more reliable if we ran more iterations in the worker goroutine - // and added a second goroutine to explicitly transfer leases back - // and forth. - - select { - case <-time.After(15 * time.Second): - // TODO(bdarnell): The test seems flaky under stress with a 5s - // timeout. Why? I'm giving it a high timeout since hanging - // isn't a failure mode we're particularly concerned about here, - // but it shouldn't be taking this long even with stress. - t.Fatal("timed out") - case err := <-workerErrCh: - if err != nil { - // We allow the transaction to run into an aborted error due to a lease - // transfer when it attempts to create its transaction record. This it - // outside of the focus of this test. - okErr := testutils.IsError(err, kvpb.ABORT_REASON_NEW_LEASE_PREVENTS_TXN.String()) - if !okErr { - t.Fatalf("worker failed: %+v", err) - } - } - } - } -} - func TestLeaseCommandLearnerReplica(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t)