Skip to content

Commit ecc4ea8

Browse files
committed
Gossip Discovery small fix
Fixes a scenario in which a node is resurrected, but isn't found in the dead list. Then it needs to be added to the alive list regardless. Also fixes a bug in which when expiring a peer, if it has no recent seen timestamp- do not move it to the lastDeadTS because no such exists. Change-Id: I7b5fdd21abc4dffcf2bcc03c0d9a9190d19d3506 Signed-off-by: Yacov Manevich <[email protected]>
1 parent 03c4a70 commit ecc4ea8

File tree

1 file changed

+19
-16
lines changed

1 file changed

+19
-16
lines changed

gossip/discovery/discovery_impl.go

+19-16
Original file line numberDiff line numberDiff line change
@@ -280,19 +280,12 @@ func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.AliveMessage) {
280280
d.logger.Debug("Got alive message about ourselves,", m)
281281
return
282282
}
283-
endpoint := m.Membership.Endpoint
284283
ts := m.Timestamp
285-
meta := m.Membership.Metadata
286284

287285
d.lock.RLock()
288286
_, known := d.id2Member[id]
289287
d.lock.RUnlock()
290288

291-
netMember := &NetworkMember{
292-
Id: id,
293-
Endpoint: endpoint,
294-
Metadata: meta,
295-
}
296289

297290
if !known {
298291
d.learnNewMembers([]*proto.AliveMessage{m}, []*proto.AliveMessage{})
@@ -316,7 +309,7 @@ func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.AliveMessage) {
316309

317310
if !isAlive && uint64(lastDeadTS.incTime.Nanosecond()) <= ts.IncNumber && lastDeadTS.seqNum < ts.SeqNum {
318311
// resurrect peer
319-
d.resurrectMember(id, netMember, *ts)
312+
d.resurrectMember(m, *ts)
320313
return
321314
}
322315

@@ -332,30 +325,37 @@ func (d *gossipDiscoveryImpl) handleAliveMessage(m *proto.AliveMessage) {
332325
// else, ignore the message because it is too old
333326
}
334327

335-
func (d *gossipDiscoveryImpl) resurrectMember(id string, member *NetworkMember, t proto.PeerTime) {
336-
d.logger.Info("Entering, id =", id, "member = ", member, "t = ", t)
328+
func (d *gossipDiscoveryImpl) resurrectMember(m *proto.AliveMessage, t proto.PeerTime) {
329+
d.logger.Info("Entering,", m, t)
337330
defer d.logger.Info("Exiting")
338331
d.lock.Lock()
339332
defer d.lock.Unlock()
340333

334+
id := m.Membership.Id
335+
341336
d.aliveLastTS[id] = &timestamp{
342337
lastSeen: time.Now(),
343338
seqNum: t.SeqNum,
344339
incTime: tsToTime(t.IncNumber),
345340
}
346341

347-
d.id2Member[id] = member
342+
d.id2Member[id] = &NetworkMember{
343+
Id: id,
344+
Endpoint: m.Membership.Endpoint,
345+
Metadata: m.Membership.Metadata,
346+
}
348347
delete(d.deadLastTS, id)
349348
aliveMsgWithId := &proto.AliveMessage{
350349
Membership: &proto.Member{Id: id},
351350
}
352351

352+
// If the member is in the dead list, delete it from there
353353
i := util.IndexInSlice(d.cachedMembership.Dead, aliveMsgWithId, sameIdAliveMessages)
354354
if i != -1 {
355-
resurrectedMember := d.cachedMembership.Dead[i]
356355
d.cachedMembership.Dead = append(d.cachedMembership.Dead[:i], d.cachedMembership.Dead[i+1:]...)
357-
d.cachedMembership.Alive = append(d.cachedMembership.Alive, resurrectedMember)
358356
}
357+
// add the member to the alive list
358+
d.cachedMembership.Alive = append(d.cachedMembership.Alive, m)
359359
}
360360

361361
func (d *gossipDiscoveryImpl) periodicalReconnectToDead() {
@@ -467,9 +467,12 @@ func (d *gossipDiscoveryImpl) expireDeadMembers(dead []string) {
467467
for _, id := range dead {
468468
d.comm.CloseConn(id)
469469
// move lastTS from alive to dead
470-
lastTS := d.aliveLastTS[id]
471-
d.deadLastTS[id] = lastTS
472-
delete(d.aliveLastTS, id)
470+
lastTS, hasLastTS := d.aliveLastTS[id]
471+
if hasLastTS {
472+
d.deadLastTS[id] = lastTS
473+
delete(d.aliveLastTS, id)
474+
}
475+
473476
aliveMsgWithId := &proto.AliveMessage{
474477
Membership: &proto.Member{Id: id},
475478
}

0 commit comments

Comments
 (0)