diff --git a/log.go b/log.go index db22740b..5fd5e1bb 100644 --- a/log.go +++ b/log.go @@ -371,11 +371,17 @@ func (l *raftLog) stableSnapTo(i uint64) { l.unstable.stableSnapTo(i) } func (l *raftLog) acceptUnstable() { l.unstable.acceptInProgress() } func (l *raftLog) lastTerm() uint64 { - t, err := l.term(l.lastIndex()) + _, term := l.tip() + return term +} + +func (l *raftLog) tip() (index, term uint64) { + index = l.lastIndex() + t, err := l.term(index) if err != nil { l.logger.Panicf("unexpected error when getting the last term (%v)", err) } - return t + return index, t } func (l *raftLog) term(i uint64) (uint64, error) { diff --git a/raft.go b/raft.go index d1048294..9ef30eb0 100644 --- a/raft.go +++ b/raft.go @@ -757,7 +757,10 @@ func (r *raft) reset(term uint64) { } func (r *raft) appendEntry(es ...pb.Entry) (accepted bool) { - li := r.raftLog.lastIndex() + li, lt := r.raftLog.tip() + if r.Term < lt { + r.logger.Panicf("%x appending out-of-order term: %d < %d", r.id, r.Term, lt) + } for i := range es { es[i].Term = r.Term es[i].Index = li + 1 + uint64(i) @@ -1727,6 +1730,16 @@ func (r *raft) restore(s pb.Snapshot) bool { return false } + // Another defense-in-depth: the follower is seeing a snapshot at a bigger + // term, but hasn't updated its own term. + if s.Metadata.Term > r.Term { + r.logger.Warningf("%x attempted to restore snapshot at term %d while being at earlier term %d; "+ + "should transition to follower at a larger term first", + r.id, s.Metadata.Term, r.Term) + r.becomeFollower(s.Metadata.Term, None) + return false + } + // More defense-in-depth: throw away snapshot if recipient is not in the // config. This shouldn't ever happen (at the time of writing) but lots of // code here and there assumes that r.id is in the progress tracker. diff --git a/raft_snap_test.go b/raft_snap_test.go index 6585bd24..77b1f96e 100644 --- a/raft_snap_test.go +++ b/raft_snap_test.go @@ -17,6 +17,8 @@ package raft import ( "testing" + "github.com/stretchr/testify/require" + pb "go.etcd.io/raft/v3/raftpb" ) @@ -33,8 +35,8 @@ var ( func TestSendingSnapshotSetPendingSnapshot(t *testing.T) { storage := newTestMemoryStorage(withPeers(1)) sm := newTestRaft(1, 10, 1, storage) - sm.restore(testingSnap) - + sm.becomeFollower(testingSnap.Metadata.Term, 0) + require.True(t, sm.restore(testingSnap)) sm.becomeCandidate() sm.becomeLeader() @@ -51,8 +53,8 @@ func TestSendingSnapshotSetPendingSnapshot(t *testing.T) { func TestPendingSnapshotPauseReplication(t *testing.T) { storage := newTestMemoryStorage(withPeers(1, 2)) sm := newTestRaft(1, 10, 1, storage) - sm.restore(testingSnap) - + sm.becomeFollower(testingSnap.Metadata.Term, 0) + require.True(t, sm.restore(testingSnap)) sm.becomeCandidate() sm.becomeLeader() @@ -68,8 +70,8 @@ func TestPendingSnapshotPauseReplication(t *testing.T) { func TestSnapshotFailure(t *testing.T) { storage := newTestMemoryStorage(withPeers(1, 2)) sm := newTestRaft(1, 10, 1, storage) - sm.restore(testingSnap) - + sm.becomeFollower(testingSnap.Metadata.Term, 0) + require.True(t, sm.restore(testingSnap)) sm.becomeCandidate() sm.becomeLeader() @@ -91,8 +93,8 @@ func TestSnapshotFailure(t *testing.T) { func TestSnapshotSucceed(t *testing.T) { storage := newTestMemoryStorage(withPeers(1, 2)) sm := newTestRaft(1, 10, 1, storage) - sm.restore(testingSnap) - + sm.becomeFollower(testingSnap.Metadata.Term, 0) + require.True(t, sm.restore(testingSnap)) sm.becomeCandidate() sm.becomeLeader() @@ -114,8 +116,8 @@ func TestSnapshotSucceed(t *testing.T) { func TestSnapshotAbort(t *testing.T) { storage := newTestMemoryStorage(withPeers(1, 2)) sm := newTestRaft(1, 10, 1, storage) - sm.restore(testingSnap) - + sm.becomeFollower(testingSnap.Metadata.Term, 0) + require.True(t, sm.restore(testingSnap)) sm.becomeCandidate() sm.becomeLeader() diff --git a/raft_test.go b/raft_test.go index 5637c4a4..a9ba7eb1 100644 --- a/raft_test.go +++ b/raft_test.go @@ -1366,11 +1366,11 @@ func TestHandleHeartbeat(t *testing.T) { // TestHandleHeartbeatResp ensures that we re-send log entries when we get a heartbeat response. func TestHandleHeartbeatResp(t *testing.T) { storage := newTestMemoryStorage(withPeers(1, 2)) + storage.SetHardState(pb.HardState{Term: 3, Vote: 1, Commit: 3}) storage.Append([]pb.Entry{{Index: 1, Term: 1}, {Index: 2, Term: 2}, {Index: 3, Term: 3}}) sm := newTestRaft(1, 5, 1, storage) sm.becomeCandidate() sm.becomeLeader() - sm.raftLog.commitTo(sm.raftLog.lastIndex()) // A heartbeat response from a node that is behind; re-send MsgApp sm.Step(pb.Message{From: 2, Type: pb.MsgHeartbeatResp}) @@ -2916,9 +2916,8 @@ func TestRestore(t *testing.T) { storage := newTestMemoryStorage(withPeers(1, 2)) sm := newTestRaft(1, 10, 1, storage) - if ok := sm.restore(s); !ok { - t.Fatal("restore fail, want succeed") - } + sm.becomeFollower(s.Metadata.Term, 0) + require.True(t, sm.restore(s)) if sm.raftLog.lastIndex() != s.Metadata.Index { t.Errorf("log.lastIndex = %d, want %d", sm.raftLog.lastIndex(), s.Metadata.Index) @@ -2955,9 +2954,8 @@ func TestRestoreWithLearner(t *testing.T) { storage := newTestMemoryStorage(withPeers(1, 2), withLearners(3)) sm := newTestLearnerRaft(3, 8, 2, storage) - if ok := sm.restore(s); !ok { - t.Error("restore fail, want succeed") - } + sm.becomeFollower(s.Metadata.Term, 0) + require.True(t, sm.restore(s)) if sm.raftLog.lastIndex() != s.Metadata.Index { t.Errorf("log.lastIndex = %d, want %d", sm.raftLog.lastIndex(), s.Metadata.Index) @@ -3001,9 +2999,8 @@ func TestRestoreWithVotersOutgoing(t *testing.T) { storage := newTestMemoryStorage(withPeers(1, 2)) sm := newTestRaft(1, 10, 1, storage) - if ok := sm.restore(s); !ok { - t.Fatal("restore fail, want succeed") - } + sm.becomeFollower(s.Metadata.Term, 0) + require.True(t, sm.restore(s)) if sm.raftLog.lastIndex() != s.Metadata.Index { t.Errorf("log.lastIndex = %d, want %d", sm.raftLog.lastIndex(), s.Metadata.Index) @@ -3047,13 +3044,9 @@ func TestRestoreVoterToLearner(t *testing.T) { storage := newTestMemoryStorage(withPeers(1, 2, 3)) sm := newTestRaft(3, 10, 1, storage) - - if sm.isLearner { - t.Errorf("%x is learner, want not", sm.id) - } - if ok := sm.restore(s); !ok { - t.Error("restore failed unexpectedly") - } + sm.becomeFollower(s.Metadata.Term, 0) + require.True(t, !sm.isLearner) + require.True(t, sm.restore(s)) } // TestRestoreLearnerPromotion checks that a learner can become to a follower after @@ -3069,18 +3062,10 @@ func TestRestoreLearnerPromotion(t *testing.T) { storage := newTestMemoryStorage(withPeers(1, 2), withLearners(3)) sm := newTestLearnerRaft(3, 10, 1, storage) - - if !sm.isLearner { - t.Errorf("%x is not learner, want yes", sm.id) - } - - if ok := sm.restore(s); !ok { - t.Error("restore fail, want succeed") - } - - if sm.isLearner { - t.Errorf("%x is learner, want not", sm.id) - } + require.True(t, sm.isLearner) + sm.becomeFollower(s.Metadata.Term, 0) + require.True(t, sm.restore(s)) + require.True(t, !sm.isLearner) } // TestLearnerReceiveSnapshot tests that a learner can receive a snpahost from leader @@ -3096,13 +3081,13 @@ func TestLearnerReceiveSnapshot(t *testing.T) { store := newTestMemoryStorage(withPeers(1), withLearners(2)) n1 := newTestLearnerRaft(1, 10, 1, store) - n2 := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) - - n1.restore(s) + n1.becomeFollower(s.Metadata.Term, 0) + require.True(t, n1.restore(s)) snap := n1.raftLog.nextUnstableSnapshot() store.ApplySnapshot(*snap) n1.appliedSnap(snap) + n2 := newTestLearnerRaft(2, 10, 1, newTestMemoryStorage(withPeers(1), withLearners(2))) nt := newNetwork(n1, n2) setRandomizedElectionTimeout(n1, n1.electionTimeout) @@ -3132,6 +3117,7 @@ func TestRestoreIgnoreSnapshot(t *testing.T) { ConfState: pb.ConfState{Voters: []uint64{1, 2}}, }, } + sm.becomeFollower(s.Metadata.Term, 0) // ignore snapshot if ok := sm.restore(s); ok { @@ -3162,8 +3148,8 @@ func TestProvideSnap(t *testing.T) { } storage := newTestMemoryStorage(withPeers(1)) sm := newTestRaft(1, 10, 1, storage) - sm.restore(s) - + sm.becomeFollower(s.Metadata.Term, 0) + require.True(t, sm.restore(s)) sm.becomeCandidate() sm.becomeLeader() @@ -3192,8 +3178,8 @@ func TestIgnoreProvidingSnap(t *testing.T) { } storage := newTestMemoryStorage(withPeers(1)) sm := newTestRaft(1, 10, 1, storage) - sm.restore(s) - + sm.becomeFollower(s.Metadata.Term, 0) + require.True(t, sm.restore(s)) sm.becomeCandidate() sm.becomeLeader() @@ -3218,14 +3204,11 @@ func TestRestoreFromSnapMsg(t *testing.T) { ConfState: pb.ConfState{Voters: []uint64{1, 2}}, }, } - m := pb.Message{Type: pb.MsgSnap, From: 1, Term: 2, Snapshot: s} + m := pb.Message{Type: pb.MsgSnap, From: 1, Term: 11, Snapshot: s} sm := newTestRaft(2, 10, 1, newTestMemoryStorage(withPeers(1, 2))) sm.Step(m) - - if sm.lead != uint64(1) { - t.Errorf("sm.lead = %d, want 1", sm.lead) - } + require.Equal(t, uint64(1), sm.lead) // TODO(bdarnell): what should this test? }