Skip to content

Commit 2800ab9

Browse files
committed
Gossip tests tweaks
This commit: 1) Shortens time of the discovery tests from 20 seconds to 5 seconds By waiting until assertions hold (or a timeout expires) instead of sleeping and then checking the assertions. 2) Fixes a bug in tests that only manifests on Mac: I am checking At some point that goroutines are dead, but the detection logic didn't fit Mac runtime because they had different order of methods in go tests stack traces. I simply do an exhaustive search now of all stack trace of a goroutine instead of checking a specific index. Change-Id: I0bfed083bedcf6dce6247c77c2885ea436717c82 Signed-off-by: Yacov Manevich <[email protected]>
1 parent 37b1168 commit 2800ab9

File tree

4 files changed

+188
-124
lines changed

4 files changed

+188
-124
lines changed

gossip/comm/comm_impl.go

+3
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,9 @@ func (c *commImpl) Ping(context.Context, *proto.Empty) (*proto.Empty, error) {
458458
}
459459

460460
func (c *commImpl) disconnect(pkiID PKIidType) {
461+
if c.isStopping() {
462+
return
463+
}
461464
c.deadEndpoints <- pkiID
462465
c.connStore.closeByPKIid(pkiID)
463466
}

gossip/comm/conn.go

+4
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ func (cs *connectionStore) getConnection(peer *RemotePeer) (*connection, error)
8989

9090
destinationLock.Unlock()
9191

92+
if cs.isClosing {
93+
return nil, fmt.Errorf("ConnStore is closing")
94+
}
95+
9296
cs.Lock()
9397
delete(cs.destinationLocks, string(pkiID))
9498
defer cs.Unlock()

gossip/discovery/discovery_test.go

+72-35
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ import (
3131
"google.golang.org/grpc"
3232
)
3333

34+
var timeout = time.Second * time.Duration(15)
35+
3436
type dummyCommModule struct {
3537
id string
3638
presumeDead chan PKIidType
@@ -257,35 +259,47 @@ func TestUpdate(t *testing.T) {
257259
instances = append(instances, inst)
258260
}
259261

260-
time.Sleep(time.Duration(5) * time.Second)
261-
assert.Equal(t, nodeNum-1, len(instances[nodeNum-1].GetMembership()))
262+
fullMembership := func() bool {
263+
return nodeNum-1 == len(instances[nodeNum-1].GetMembership())
264+
}
265+
266+
waitUntilOrFail(t, fullMembership)
262267

263268
instances[0].UpdateMetadata([]byte("bla bla"))
264269
instances[nodeNum-1].UpdateEndpoint("localhost:5511")
265-
time.Sleep(time.Duration(5) * time.Second)
266270

267-
for _, member := range instances[nodeNum-1].GetMembership() {
268-
if string(member.PKIid) == instances[0].comm.id {
269-
assert.Equal(t, "bla bla", string(member.Metadata))
271+
checkMembership := func() bool {
272+
for _, member := range instances[nodeNum-1].GetMembership() {
273+
if string(member.PKIid) == instances[0].comm.id {
274+
if "bla bla" != string(member.Metadata) {
275+
return false
276+
}
277+
}
270278
}
271-
}
272279

273-
for _, member := range instances[0].GetMembership() {
274-
if string(member.PKIid) == instances[nodeNum-1].comm.id {
275-
assert.Equal(t, "localhost:5511", string(member.Endpoint))
280+
for _, member := range instances[0].GetMembership() {
281+
if string(member.PKIid) == instances[nodeNum-1].comm.id {
282+
if "localhost:5511" != string(member.Endpoint) {
283+
return false
284+
}
285+
}
276286
}
287+
return true
277288
}
278289

290+
291+
waitUntilOrFail(t, checkMembership)
292+
279293
stopAction := &sync.WaitGroup{}
280-
for i, inst := range instances {
281-
fmt.Println("Stopping instance ", i)
294+
for _, inst := range instances {
282295
stopAction.Add(1)
283296
go func(inst *gossipInstance) {
284297
defer stopAction.Done()
285298
inst.Stop()
286299
}(inst)
287300
}
288-
stopAction.Wait()
301+
302+
waitUntilOrFailBlocking(t, stopAction.Wait)
289303
}
290304

291305
func TestExpiration(t *testing.T) {
@@ -305,30 +319,35 @@ func TestExpiration(t *testing.T) {
305319
instances = append(instances, inst)
306320
}
307321

308-
time.Sleep(time.Duration(4) * time.Second)
309-
assert.Equal(t, nodeNum-1, len(instances[nodeNum-1].GetMembership()))
322+
fullMembership := func() bool {
323+
return nodeNum-1 == len(instances[nodeNum-1].GetMembership())
324+
}
325+
326+
waitUntilOrFail(t, fullMembership)
310327

311-
instances[nodeNum-1].Stop()
312-
instances[nodeNum-2].Stop()
328+
waitUntilOrFailBlocking(t, instances[nodeNum-1].Stop)
329+
waitUntilOrFailBlocking(t, instances[nodeNum-2].Stop)
313330

314331
time.Sleep(time.Duration(2) * time.Second)
315-
assert.Equal(t, nodeNum-3, len(instances[0].GetMembership()))
332+
membershipReduced := func() bool {
333+
return nodeNum-3 == len(instances[0].GetMembership())
334+
}
335+
336+
waitUntilOrFail(t, membershipReduced)
316337

317-
//fmt.Println("Stopping members")
318338
stopAction := &sync.WaitGroup{}
319339
for i, inst := range instances {
320340
if i+2 == nodeNum {
321341
break
322342
}
323-
//fmt.Println("Stopping instance", inst.DiscoveryService.Self().Endpoint)
324343
stopAction.Add(1)
325344
go func(inst *gossipInstance) {
326345
defer stopAction.Done()
327346
inst.Stop()
328-
//fmt.Println("Stopped instance", inst.DiscoveryService.Self().Endpoint)
329347
}(inst)
330348
}
331-
stopAction.Wait()
349+
350+
waitUntilOrFailBlocking(t, stopAction.Wait)
332351
}
333352

334353
func TestGetFullMembership(t *testing.T) {
@@ -348,8 +367,10 @@ func TestGetFullMembership(t *testing.T) {
348367
instances = append(instances, inst)
349368
}
350369

351-
time.Sleep(time.Duration(5) * time.Second)
352-
assert.Equal(t, nodeNum-1, len(instances[nodeNum-1].GetMembership()))
370+
fullMembership := func() bool {
371+
return nodeNum - 1 == len(instances[nodeNum-1].GetMembership())
372+
}
373+
waitUntilOrFail(t, fullMembership)
353374

354375
stopAction := &sync.WaitGroup{}
355376
for _, inst := range instances {
@@ -359,24 +380,40 @@ func TestGetFullMembership(t *testing.T) {
359380
inst.Stop()
360381
}(inst)
361382
}
362-
stopAction.Wait()
383+
384+
waitUntilOrFailBlocking(t, stopAction.Wait)
363385
}
364386

365387
func TestGossipDiscoveryStopping(t *testing.T) {
366388
inst := createDiscoveryInstance(9611, "d1", []string{bootPeer(9611)})
389+
time.Sleep(time.Second)
390+
waitUntilOrFailBlocking(t, inst.Stop)
367391

368-
diedChan := make(chan struct{})
369-
go func(inst *gossipInstance) {
370-
inst.Stop()
371-
diedChan <- struct{}{}
372-
}(inst)
392+
}
373393

374-
timer := time.Tick(time.Duration(500) * time.Millisecond)
394+
func waitUntilOrFail(t *testing.T, pred func() bool) {
395+
start := time.Now()
396+
limit := start.UnixNano() + timeout.Nanoseconds()
397+
for time.Now().UnixNano() < limit {
398+
if pred() {
399+
return
400+
}
401+
time.Sleep(timeout / 10)
402+
}
403+
assert.Fail(t, "Timeout expired!")
404+
}
375405

406+
func waitUntilOrFailBlocking(t *testing.T, f func()) {
407+
successChan := make(chan struct{}, 1)
408+
go func() {
409+
f()
410+
successChan <- struct{}{}
411+
}()
376412
select {
377-
case <-timer:
378-
t.Fatal("Didn't stop within a timely manner")
379-
t.Fail()
380-
case <-diedChan:
413+
case <-time.NewTimer(timeout).C:
414+
break
415+
case <-successChan:
416+
return
381417
}
418+
assert.Fail(t, "Timeout expired!")
382419
}

0 commit comments

Comments
 (0)