Skip to content

Commit 30f832f

Browse files
committed
Panic when the network can't get stable checkpoint cert
Introduces the behavior required in FAB-189: https://jira.hyperledger.org/browse/FAB-189 Change-Id: I1a0e3809c5f58b7e256e5e27e0e8722b6f674725 Signed-off-by: Kostas Christidis <[email protected]>
1 parent 46350b0 commit 30f832f

File tree

2 files changed

+41
-4
lines changed

2 files changed

+41
-4
lines changed

consensus/pbft/pbft-core.go

+18-2
Original file line numberDiff line numberDiff line change
@@ -1160,15 +1160,31 @@ func (instance *pbftCore) recvCheckpoint(chkpt *Checkpoint) events.Event {
11601160

11611161
instance.checkpointStore[*chkpt] = true
11621162

1163+
// Track how many different checkpoint values we have for the seqNo in question
1164+
diffValues := make(map[string]struct{})
1165+
diffValues[chkpt.Id] = struct{}{}
1166+
11631167
matching := 0
11641168
for testChkpt := range instance.checkpointStore {
1165-
if testChkpt.SequenceNumber == chkpt.SequenceNumber && testChkpt.Id == chkpt.Id {
1166-
matching++
1169+
if testChkpt.SequenceNumber == chkpt.SequenceNumber {
1170+
if testChkpt.Id == chkpt.Id {
1171+
matching++
1172+
} else {
1173+
if _, ok := diffValues[testChkpt.Id]; !ok {
1174+
diffValues[testChkpt.Id] = struct{}{}
1175+
}
1176+
}
11671177
}
11681178
}
11691179
logger.Debugf("Replica %d found %d matching checkpoints for seqNo %d, digest %s",
11701180
instance.id, matching, chkpt.SequenceNumber, chkpt.Id)
11711181

1182+
// If f+2 different values have been observed, we'll never be able to get a stable cert for this seqNo
1183+
if count := len(diffValues); count > instance.f+1 {
1184+
logger.Panicf("Network unable to find stable certificate for seqNo %d (%d different values observed already)",
1185+
chkpt.SequenceNumber, count)
1186+
}
1187+
11721188
if matching == instance.f+1 {
11731189
// We have a weak cert
11741190
// If we have generated a checkpoint for this seqNo, make sure we have a match

consensus/pbft/pbft-core_test.go

+23-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"os"
2323
"reflect"
24+
"strconv"
2425
"strings"
2526
"sync"
2627
"testing"
@@ -1763,7 +1764,7 @@ func TestCheckpointDiffersFromWeakCert(t *testing.T) {
17631764

17641765
badChkpt := &Checkpoint{
17651766
SequenceNumber: 10,
1766-
Id: base64.StdEncoding.EncodeToString([]byte("WRONG")),
1767+
Id: "WRONG",
17671768
ReplicaId: 3,
17681769
}
17691770
instance.chkpts[10] = badChkpt.Id // This is done via the exec path, shortcut it here
@@ -1772,7 +1773,7 @@ func TestCheckpointDiffersFromWeakCert(t *testing.T) {
17721773
for i := uint64(0); i < 2; i++ {
17731774
events.SendEvent(instance, &Checkpoint{
17741775
SequenceNumber: 10,
1775-
Id: base64.StdEncoding.EncodeToString([]byte("CORRECT")),
1776+
Id: "CORRECT",
17761777
ReplicaId: i,
17771778
})
17781779
}
@@ -1781,3 +1782,23 @@ func TestCheckpointDiffersFromWeakCert(t *testing.T) {
17811782
t.Fatalf("State target should not have been updated")
17821783
}
17831784
}
1785+
1786+
// This test is designed to ensure the peer panics if it observes > f+1 different checkpoint values for the same seqNo
1787+
// This indicates a network that will be unable to move its watermarks and thus progress
1788+
func TestNoCheckpointQuorum(t *testing.T) {
1789+
defer func() {
1790+
if r := recover(); r == nil {
1791+
t.Errorf("More than f+1 different checkpoint values found, should have panicked.")
1792+
}
1793+
}()
1794+
1795+
instance := newPbftCore(3, loadConfig(), &omniProto{}, &inertTimerFactory{})
1796+
1797+
for i := uint64(0); i < 3; i++ {
1798+
events.SendEvent(instance, &Checkpoint{
1799+
SequenceNumber: 10,
1800+
Id: strconv.FormatUint(i, 10),
1801+
ReplicaId: i,
1802+
})
1803+
}
1804+
}

0 commit comments

Comments
 (0)