Skip to content

Commit a7f445f

Browse files
committed
pull algorithm fix
Had a bug in the pull algorithm: The NONCE set was actually needed to be split into 2 diff sets, because servicing sync requests has nothing to do with initiating them, and the cleanup of initiated sync requests effected sync requets that were serviced. Added a unit test that successfully reproduced the bug, and after I fixed it it always succeeds. Change-Id: Ie99f41485a41ddb46e99d8fa7d8db408f669b20c Signed-off-by: Yacov Manevich <[email protected]>
1 parent db22cdc commit a7f445f

File tree

2 files changed

+101
-40
lines changed

2 files changed

+101
-40
lines changed

gossip/gossip/algo/pull.go

+46-24
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,28 @@ import (
4343
4444
*/
4545

46-
const (
47-
DEF_DIGEST_WAIT_TIME = time.Duration(4) * time.Second
48-
DEF_REQUEST_WAIT_TIME = time.Duration(4) * time.Second
49-
DEF_RESPONSE_WAIT_TIME = time.Duration(7) * time.Second
50-
)
51-
5246
func init() {
5347
rand.Seed(42)
5448
}
5549

56-
var defaultDigestWaitTime = DEF_DIGEST_WAIT_TIME
57-
var defaultRequestWaitTime = DEF_REQUEST_WAIT_TIME
58-
var defaultResponseWaitTime = DEF_RESPONSE_WAIT_TIME
50+
var digestWaitTime = time.Duration(4) * time.Second
51+
var requestWaitTime = time.Duration(4) * time.Second
52+
var responseWaitTime = time.Duration(7) * time.Second
53+
54+
// SetDigestWaitTime sets the digest wait time
55+
func SetDigestWaitTime(time time.Duration) {
56+
digestWaitTime = time
57+
}
58+
59+
// SetRequestWaitTime sets the request wait time
60+
func SetRequestWaitTime(time time.Duration) {
61+
requestWaitTime = time
62+
}
63+
64+
// SetResponseWaitTime sets the response wait time
65+
func SetResponseWaitTime(time time.Duration) {
66+
responseWaitTime = time
67+
}
5968

6069
// PullAdapter is needed by the PullEngine in order to
6170
// send messages to the remote PullEngine instances.
@@ -83,6 +92,8 @@ type PullAdapter interface {
8392
SendRes(items []uint64, context interface{}, nonce uint64)
8493
}
8594

95+
// PullEngine is the component that actually invokes the pull algorithm
96+
// with the help of the PullAdapter
8697
type PullEngine struct {
8798
PullAdapter
8899
stopFlag int32
@@ -93,20 +104,24 @@ type PullEngine struct {
93104
acceptingDigests int32
94105
acceptingResponses int32
95106
lock sync.Mutex
96-
nonces *util.Set
107+
outgoingNONCES *util.Set
108+
incomingNONCES *util.Set
97109
}
98110

111+
// NewPullEngine creates an instance of a PullEngine with a certain sleep time
112+
// between pull initiations
99113
func NewPullEngine(participant PullAdapter, sleepTime time.Duration) *PullEngine {
100114
engine := &PullEngine{
101-
PullAdapter: participant,
115+
PullAdapter: participant,
102116
stopFlag: int32(0),
103117
state: util.NewSet(),
104118
item2owners: make(map[uint64][]string),
105119
peers2nonces: make(map[string]uint64),
106120
nonces2peers: make(map[uint64]string),
107121
acceptingDigests: int32(0),
108122
acceptingResponses: int32(0),
109-
nonces: util.NewSet(),
123+
incomingNONCES: util.NewSet(),
124+
outgoingNONCES: util.NewSet(),
110125
}
111126

112127
go func() {
@@ -144,6 +159,7 @@ func (engine *PullEngine) ignoreDigests() {
144159
atomic.StoreInt32(&(engine.acceptingDigests), int32(0))
145160
}
146161

162+
// Stop stops the engine
147163
func (engine *PullEngine) Stop() {
148164
atomic.StoreInt32(&(engine.stopFlag), int32(1))
149165
}
@@ -155,13 +171,13 @@ func (engine *PullEngine) initiatePull() {
155171
engine.acceptDigests()
156172
for _, peer := range engine.SelectPeers() {
157173
nonce := engine.newNONCE()
158-
engine.nonces.Add(nonce)
174+
engine.outgoingNONCES.Add(nonce)
159175
engine.nonces2peers[nonce] = peer
160176
engine.peers2nonces[peer] = nonce
161177
engine.Hello(peer, nonce)
162178
}
163179

164-
time.AfterFunc(defaultDigestWaitTime, func() {
180+
time.AfterFunc(digestWaitTime, func() {
165181
engine.processIncomingDigests()
166182
})
167183
}
@@ -189,7 +205,7 @@ func (engine *PullEngine) processIncomingDigests() {
189205
engine.SendReq(dest, seqsToReq, engine.peers2nonces[dest])
190206
}
191207

192-
time.AfterFunc(defaultResponseWaitTime, engine.endPull)
208+
time.AfterFunc(responseWaitTime, engine.endPull)
193209

194210
}
195211

@@ -198,15 +214,16 @@ func (engine *PullEngine) endPull() {
198214
defer engine.lock.Unlock()
199215

200216
atomic.StoreInt32(&(engine.acceptingResponses), int32(0))
201-
engine.nonces.Clear()
217+
engine.outgoingNONCES.Clear()
202218

203219
engine.item2owners = make(map[uint64][]string)
204220
engine.peers2nonces = make(map[string]uint64)
205221
engine.nonces2peers = make(map[uint64]string)
206222
}
207223

224+
// OnDigest notifies the engine that a digest has arrived
208225
func (engine *PullEngine) OnDigest(digest []uint64, nonce uint64, context interface{}) {
209-
if !engine.isAcceptingDigests() || !engine.nonces.Exists(nonce) {
226+
if !engine.isAcceptingDigests() || !engine.outgoingNONCES.Exists(nonce) {
210227
return
211228
}
212229

@@ -226,22 +243,25 @@ func (engine *PullEngine) OnDigest(digest []uint64, nonce uint64, context interf
226243
}
227244
}
228245

246+
// Add adds items to the state
229247
func (engine *PullEngine) Add(seqs ...uint64) {
230248
for _, seq := range seqs {
231249
engine.state.Add(seq)
232250
}
233251
}
234252

253+
// Remove removes items from the state
235254
func (engine *PullEngine) Remove(seqs ...uint64) {
236255
for _, seq := range seqs {
237256
engine.state.Remove(seq)
238257
}
239258
}
240259

260+
// OnHello notifies the engine a hello has arrived
241261
func (engine *PullEngine) OnHello(nonce uint64, context interface{}) {
242-
engine.nonces.Add(nonce)
243-
time.AfterFunc(defaultRequestWaitTime, func() {
244-
engine.nonces.Remove(nonce)
262+
engine.incomingNONCES.Add(nonce)
263+
time.AfterFunc(requestWaitTime, func() {
264+
engine.incomingNONCES.Remove(nonce)
245265
})
246266

247267
a := engine.state.ToArray()
@@ -252,14 +272,15 @@ func (engine *PullEngine) OnHello(nonce uint64, context interface{}) {
252272
engine.SendDigest(digest, nonce, context)
253273
}
254274

275+
// OnReq notifies the engine a request has arrived
255276
func (engine *PullEngine) OnReq(items []uint64, nonce uint64, context interface{}) {
256-
if !engine.nonces.Exists(nonce) {
277+
if !engine.incomingNONCES.Exists(nonce) {
257278
return
258279
}
259280
engine.lock.Lock()
260281
defer engine.lock.Unlock()
261282

262-
items2Send := make([]uint64, 0)
283+
var items2Send []uint64
263284
for _, item := range items {
264285
if engine.state.Exists(item) {
265286
items2Send = append(items2Send, item)
@@ -269,8 +290,9 @@ func (engine *PullEngine) OnReq(items []uint64, nonce uint64, context interface{
269290
engine.SendRes(items2Send, context, nonce)
270291
}
271292

293+
// OnRes notifies the engine a response has arrived
272294
func (engine *PullEngine) OnRes(items []uint64, nonce uint64) {
273-
if !engine.nonces.Exists(nonce) || !engine.isAcceptingResponses() {
295+
if !engine.outgoingNONCES.Exists(nonce) || !engine.isAcceptingResponses() {
274296
return
275297
}
276298

@@ -281,7 +303,7 @@ func (engine *PullEngine) newNONCE() uint64 {
281303
n := uint64(0)
282304
for {
283305
n = uint64(rand.Int63())
284-
if !engine.nonces.Exists(n) {
306+
if !engine.outgoingNONCES.Exists(n) {
285307
return n
286308
}
287309
}

gossip/gossip/algo/pull_test.go

+55-16
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,17 @@ import (
2121
"testing"
2222
"time"
2323

24+
"fmt"
25+
"sync/atomic"
26+
2427
"github.com/hyperledger/fabric/gossip/util"
2528
"github.com/stretchr/testify/assert"
26-
"sync/atomic"
2729
)
2830

2931
func init() {
30-
defaultRequestWaitTime = time.Duration(50) * time.Millisecond
31-
defaultDigestWaitTime = time.Duration(20) * time.Millisecond
32-
defaultResponseWaitTime = time.Duration(50) * time.Millisecond
33-
32+
requestWaitTime = time.Duration(50) * time.Millisecond
33+
digestWaitTime = time.Duration(20) * time.Millisecond
34+
responseWaitTime = time.Duration(50) * time.Millisecond
3435
}
3536

3637
type messageHook func(interface{})
@@ -78,15 +79,14 @@ func newPushPullTestInstance(name string, peers map[string]*pullTestInstance) *p
7879
name: name,
7980
}
8081

81-
inst.PullEngine = NewPullEngine(inst, time.Duration(500)*time.Millisecond)
82+
inst.PullEngine = NewPullEngine(inst, time.Duration(100)*time.Millisecond)
8283

8384
peers[name] = inst
8485
go func() {
8586
for {
8687
select {
8788
case <-inst.stopChan:
8889
return
89-
break
9090
case m := <-inst.msgQueue:
9191
inst.handleMessage(m)
9292
break
@@ -207,6 +207,31 @@ func TestPullEngine_Stop(t *testing.T) {
207207
assert.Equal(t, len1, len2, "PullEngine was still active after Stop() was invoked!")
208208
}
209209

210+
func TestPullEngineAll2AllWithIncrementalSpawning(t *testing.T) {
211+
// Scenario: spawn 10 nodes, each 50 ms after the other
212+
// and have them transfer data between themselves.
213+
// Expected outcome: obviously, everything should succeed.
214+
// Isn't that's why we're here?
215+
instanceCount := 10
216+
peers := make(map[string]*pullTestInstance)
217+
218+
for i := 0; i < instanceCount; i++ {
219+
inst := newPushPullTestInstance(fmt.Sprintf("p%d", i+1), peers)
220+
inst.Add(uint64(i + 1))
221+
time.Sleep(time.Duration(50) * time.Millisecond)
222+
}
223+
for i := 0; i < instanceCount; i++ {
224+
pID := fmt.Sprintf("p%d", i+1)
225+
peers[pID].setNextPeerSelection(keySet(pID, peers))
226+
}
227+
time.Sleep(time.Duration(500) * time.Millisecond)
228+
229+
for i := 0; i < instanceCount; i++ {
230+
pID := fmt.Sprintf("p%d", i+1)
231+
assert.Equal(t, instanceCount, len(peers[pID].state.ToArray()))
232+
}
233+
}
234+
210235
func TestPullEngineSelectiveUpdates(t *testing.T) {
211236
// Scenario: inst1 has {1, 3} and inst2 has {0,1,2,3}.
212237
// inst1 initiates to inst2
@@ -254,7 +279,7 @@ func TestPullEngineSelectiveUpdates(t *testing.T) {
254279

255280
inst1.setNextPeerSelection([]string{"p2"})
256281

257-
time.Sleep(time.Duration(800) * time.Millisecond)
282+
time.Sleep(time.Duration(200) * time.Millisecond)
258283
assert.Equal(t, len(inst2.state.ToArray()), len(inst1.state.ToArray()))
259284
}
260285

@@ -301,7 +326,7 @@ func TestByzantineResponder(t *testing.T) {
301326

302327
inst1.setNextPeerSelection([]string{"p2"})
303328

304-
time.Sleep(time.Duration(800) * time.Millisecond)
329+
time.Sleep(time.Duration(200) * time.Millisecond)
305330

306331
assert.Equal(t, int32(1), atomic.LoadInt32(&receivedDigestFromInst3), "inst1 hasn't received a digest from inst3")
307332

@@ -333,7 +358,7 @@ func TestMultipleInitiators(t *testing.T) {
333358
inst2.setNextPeerSelection([]string{"p4"})
334359
inst3.setNextPeerSelection([]string{"p4"})
335360

336-
time.Sleep(time.Duration(800) * time.Millisecond)
361+
time.Sleep(time.Duration(200) * time.Millisecond)
337362

338363
for _, inst := range []*pullTestInstance{inst1, inst2, inst3} {
339364
assert.True(t, util.IndexInSlice(inst.state.ToArray(), uint64(1), numericCompare) != -1)
@@ -362,7 +387,7 @@ func TestLatePeers(t *testing.T) {
362387
})
363388
inst1.setNextPeerSelection([]string{"p2", "p3"})
364389

365-
time.Sleep(time.Duration(800) * time.Millisecond)
390+
time.Sleep(time.Duration(200) * time.Millisecond)
366391

367392
assert.True(t, util.IndexInSlice(inst1.state.ToArray(), uint64(1), numericCompare) == -1)
368393
assert.True(t, util.IndexInSlice(inst1.state.ToArray(), uint64(2), numericCompare) == -1)
@@ -391,7 +416,7 @@ func TestBiDiUpdates(t *testing.T) {
391416
inst1.setNextPeerSelection([]string{"p2"})
392417
inst2.setNextPeerSelection([]string{"p1"})
393418

394-
time.Sleep(time.Duration(800) * time.Millisecond)
419+
time.Sleep(time.Duration(200) * time.Millisecond)
395420

396421
assert.True(t, util.IndexInSlice(inst1.state.ToArray(), uint64(0), numericCompare) != -1)
397422
assert.True(t, util.IndexInSlice(inst1.state.ToArray(), uint64(1), numericCompare) != -1)
@@ -453,14 +478,14 @@ func TestSpread(t *testing.T) {
453478

454479
inst1.setNextPeerSelection([]string{"p2", "p3", "p4"})
455480

456-
time.Sleep(time.Duration(800) * time.Millisecond)
481+
time.Sleep(time.Duration(200) * time.Millisecond)
457482

458483
lock.Lock()
459-
for p_i, counter := range chooseCounters {
460-
if p_i == "p5" {
484+
for pI, counter := range chooseCounters {
485+
if pI == "p5" {
461486
assert.Equal(t, 0, counter)
462487
} else {
463-
assert.True(t, counter > 0, "%s was not selected!", p_i)
488+
assert.True(t, counter > 0, "%s was not selected!", pI)
464489
}
465490
}
466491
lock.Unlock()
@@ -470,3 +495,17 @@ func TestSpread(t *testing.T) {
470495
func numericCompare(a interface{}, b interface{}) bool {
471496
return a.(uint64) == b.(uint64)
472497
}
498+
499+
func keySet(selfPeer string, m map[string]*pullTestInstance) []string {
500+
peers := make([]string, len(m)-1)
501+
i := 0
502+
for pID := range m {
503+
if pID == selfPeer {
504+
continue
505+
}
506+
peers[i] = pID
507+
i++
508+
}
509+
510+
return peers
511+
}

0 commit comments

Comments
 (0)