Skip to content

Commit 48942d7

Browse files
committed
[FAB-1352] Add time-based block cutting to Kafka
https://jira.hyperledger.org/browse/FAB-1352 In the version that was rebased on top of the common components, this option was kept out in order to minimize the complexity of the changeset. This changeset introduces it so now the Kafka-based consenter uses the BatchTimeout setting in shared config, and posts time-to-cut (TTC-X) messages according to the design document posted here: https://docs.google.com/document/d/1vNMaM7XhOlu9tB_10dKnlrhy5d7b1u8lSY8a-kVjCO4/edit The respective unit tests from the solo package have been ported, as well as additional tests specific to the time-to-cut logic. Do note that this path shall be revisited with integration tests. Change-Id: I743d4412cf8a3536fcb854433dfcbb3baa221d95 Signed-off-by: Kostas Christidis <[email protected]>
1 parent d5f3788 commit 48942d7

File tree

2 files changed

+643
-42
lines changed

2 files changed

+643
-42
lines changed

orderer/kafka/main.go

+50-9
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ limitations under the License.
1717
package kafka
1818

1919
import (
20+
"time"
21+
2022
"github.com/Shopify/sarama"
2123
"github.com/golang/protobuf/proto"
2224
"github.com/hyperledger/fabric/orderer/localconfig"
@@ -93,6 +95,7 @@ func newChain(consenter testableConsenter, support multichain.ConsenterSupport)
9395
consenter: consenter,
9496
support: support,
9597
partition: newChainPartition(support.ChainID(), rawPartition),
98+
batchTimeout: support.SharedConfig().BatchTimeout(),
9699
lastProcessed: sarama.OffsetOldest - 1, // TODO This should be retrieved by ConsenterSupport; also see note in loop() below
97100
producer: consenter.prodFunc()(support.SharedConfig().KafkaBrokers(), consenter.kafkaVersion(), consenter.retryOptions()),
98101
halted: false, // Redundant as the default value for booleans is false but added for readability
@@ -123,7 +126,9 @@ type chainImpl struct {
123126
support multichain.ConsenterSupport
124127

125128
partition ChainPartition
129+
batchTimeout time.Duration
126130
lastProcessed int64
131+
lastCutBlock uint64
127132

128133
producer Producer
129134
consumer Consumer
@@ -199,46 +204,82 @@ func (ch *chainImpl) Enqueue(env *cb.Envelope) bool {
199204

200205
func (ch *chainImpl) loop() {
201206
msg := new(ab.KafkaMessage)
207+
var timer <-chan time.Time
208+
var ttcNumber uint64
202209

203210
defer close(ch.haltedChan)
204211
defer ch.producer.Close()
205212
defer func() { ch.halted = true }()
206213
defer ch.consumer.Close()
207214

208-
// TODO Add support for time-based block cutting
209-
210215
for {
211216
select {
212217
case in := <-ch.consumer.Recv():
213-
logger.Debug("Received:", in)
214218
if err := proto.Unmarshal(in.Value, msg); err != nil {
215219
// This shouldn't happen, it should be filtered at ingress
216220
logger.Critical("Unable to unmarshal consumed message:", err)
217221
}
218-
logger.Debug("Unmarshaled to:", msg)
222+
logger.Debug("Received:", msg)
219223
switch msg.Type.(type) {
220-
case *ab.KafkaMessage_Connect, *ab.KafkaMessage_TimeToCut:
221-
logger.Debugf("Ignoring message")
224+
case *ab.KafkaMessage_Connect:
225+
logger.Debug("It's a connect message - ignoring")
222226
continue
227+
case *ab.KafkaMessage_TimeToCut:
228+
ttcNumber = msg.GetTimeToCut().BlockNumber
229+
logger.Debug("It's a time-to-cut message for block", ttcNumber)
230+
if ttcNumber == ch.lastCutBlock+1 {
231+
timer = nil
232+
logger.Debug("Nil'd the timer")
233+
batch, committers := ch.support.BlockCutter().Cut()
234+
if len(batch) == 0 {
235+
logger.Warningf("Got right time-to-cut message (%d) but no pending requests - this might indicate a bug", ch.lastCutBlock)
236+
logger.Infof("Consenter for chain %s exiting", ch.partition.Topic())
237+
return
238+
}
239+
block := ch.support.CreateNextBlock(batch)
240+
ch.support.WriteBlock(block, committers)
241+
ch.lastCutBlock++
242+
logger.Debug("Proper time-to-cut received, just cut block", ch.lastCutBlock)
243+
continue
244+
} else if ttcNumber > ch.lastCutBlock+1 {
245+
logger.Warningf("Got larger time-to-cut message (%d) than allowed (%d) - this might indicate a bug", ttcNumber, ch.lastCutBlock+1)
246+
logger.Infof("Consenter for chain %s exiting", ch.partition.Topic())
247+
return
248+
}
249+
logger.Debug("Ignoring stale time-to-cut-message for", ch.lastCutBlock)
223250
case *ab.KafkaMessage_Regular:
224251
env := new(cb.Envelope)
225252
if err := proto.Unmarshal(msg.GetRegular().Payload, env); err != nil {
226253
// This shouldn't happen, it should be filtered at ingress
227-
logger.Critical("Unable to unmarshal consumed message:", err)
254+
logger.Critical("Unable to unmarshal consumed regular message:", err)
228255
continue
229256
}
230257
batches, committers, ok := ch.support.BlockCutter().Ordered(env)
231258
logger.Debugf("Ordering results: batches: %v, ok: %v", batches, ok)
232-
if ok && len(batches) == 0 {
259+
if ok && len(batches) == 0 && timer == nil {
260+
timer = time.After(ch.batchTimeout)
261+
logger.Debugf("Just began %s batch timer", ch.batchTimeout.String())
233262
continue
234263
}
235264
// If !ok, batches == nil, so this will be skipped
236265
for i, batch := range batches {
237266
block := ch.support.CreateNextBlock(batch)
238267
ch.support.WriteBlock(block, committers[i])
268+
ch.lastCutBlock++
269+
logger.Debug("Batch filled, just cut block", ch.lastCutBlock)
270+
}
271+
if len(batches) > 0 {
272+
timer = nil
239273
}
240274
}
241-
case <-ch.exitChan: // when Halt() is called
275+
case <-timer:
276+
logger.Debugf("Time-to-cut block %d timer expired", ch.lastCutBlock+1)
277+
timer = nil
278+
if err := ch.producer.Send(ch.partition, utils.MarshalOrPanic(newTimeToCutMessage(ch.lastCutBlock+1))); err != nil {
279+
logger.Errorf("Couldn't post to %s: %s", ch.partition, err)
280+
// Do not exit
281+
}
282+
case <-ch.exitChan: // When Halt() is called
242283
logger.Infof("Consenter for chain %s exiting", ch.partition.Topic())
243284
return
244285
}

0 commit comments

Comments
 (0)