Skip to content

Commit c0dc54b

Browse files
committed
FAB-1233 State DB recovery
We need to recover the state DB when (i) the peer fails during commit (partial written state). (ii) the database gets corrupted. We introduce a new state in DB called savepoint, and update it with the block height once all valid tx in the last/recent block is committed. Whenever peer starts (first boot up or after a failure), we compare the savepoint in DB and block height to see whether the state DB is in consistent state. If not, we execute the following steps: (i) retrieve all required blocks (#blocks = block height - savepoint) from block storage, (ii) compute write set for valid tx, commits these values, & update the savepoint. Change-Id: I769d1391de511d3cdb55c40692beb829e2cc5c2f Signed-off-by: senthil <[email protected]>
1 parent 87a0ce8 commit c0dc54b

File tree

5 files changed

+186
-24
lines changed

5 files changed

+186
-24
lines changed

core/ledger/kvledger/kv_ledger.go

+53-6
Original file line numberDiff line numberDiff line change
@@ -77,25 +77,72 @@ func NewKVLedger(conf *Conf) (*KVLedger, error) {
7777
blockStorageConf := fsblkstorage.NewConf(conf.blockStorageDir, conf.maxBlockfileSize)
7878
blockStore := fsblkstorage.NewFsBlockStore(blockStorageConf, indexConfig)
7979

80+
var txmgmt txmgmt.TxMgr
8081
if ledgerconfig.IsCouchDBEnabled() == true {
8182
//By default we can talk to CouchDB with empty id and pw (""), or you can add your own id and password to talk to a secured CouchDB
8283
logger.Debugf("===COUCHDB=== NewKVLedger() Using CouchDB instead of RocksDB...hardcoding and passing connection config for now")
8384

8485
couchDBDef := ledgerconfig.GetCouchDBDefinition()
8586

8687
//create new transaction manager based on couchDB
87-
txmgmt := couchdbtxmgmt.NewCouchDBTxMgr(&couchdbtxmgmt.Conf{DBPath: conf.txMgrDBPath},
88+
txmgmt = couchdbtxmgmt.NewCouchDBTxMgr(&couchdbtxmgmt.Conf{DBPath: conf.txMgrDBPath},
8889
couchDBDef.URL, //couchDB connection URL
8990
"system", //couchDB db name matches ledger name, TODO for now use system ledger, eventually allow passing in subledger name
9091
couchDBDef.Username, //enter couchDB id here
9192
couchDBDef.Password) //enter couchDB pw here
92-
return &KVLedger{blockStore, txmgmt, nil}, nil
93+
} else {
94+
// Fall back to using RocksDB lockbased transaction manager
95+
txmgmt = lockbasedtxmgmt.NewLockBasedTxMgr(&lockbasedtxmgmt.Conf{DBPath: conf.txMgrDBPath})
9396
}
97+
l := &KVLedger{blockStore, txmgmt, nil}
9498

95-
// Fall back to using RocksDB lockbased transaction manager
96-
txmgmt := lockbasedtxmgmt.NewLockBasedTxMgr(&lockbasedtxmgmt.Conf{DBPath: conf.txMgrDBPath})
97-
return &KVLedger{blockStore, txmgmt, nil}, nil
99+
if err := recoverStateDB(l); err != nil {
100+
panic(fmt.Errorf(`Error during state DB recovery:%s`, err))
101+
}
102+
103+
return l, nil
104+
105+
}
106+
107+
//Recover the state database by recommitting last valid blocks
108+
func recoverStateDB(l *KVLedger) error {
109+
//If there is no block in blockstorage, nothing to recover.
110+
info, _ := l.blockStore.GetBlockchainInfo()
111+
if info.Height == 0 {
112+
return nil
113+
}
114+
115+
//Getting savepointValue stored in the state DB
116+
var err error
117+
var savepointValue uint64
118+
if savepointValue, err = l.txtmgmt.GetBlockNumFromSavepoint(); err != nil {
119+
return err
120+
}
121+
122+
//Checking whether the savepointValue is in sync with block storage height
123+
if savepointValue == info.Height {
124+
return nil
125+
} else if savepointValue > info.Height {
126+
return errors.New("BlockStorage height is behind savepoint by %d blocks. Recovery the BlockStore first")
127+
}
98128

129+
//Compute updateSet for each missing savepoint and commit to state DB
130+
for blockNumber := savepointValue + 1; blockNumber <= info.Height; blockNumber++ {
131+
if l.pendingBlockToCommit, err = l.GetBlockByNumber(blockNumber); err != nil {
132+
return err
133+
}
134+
logger.Debugf("Constructing updateSet for the block %d", blockNumber)
135+
if _, _, err = l.txtmgmt.ValidateAndPrepare(l.pendingBlockToCommit, false); err != nil {
136+
return err
137+
}
138+
logger.Debugf("Committing block %d to state database", blockNumber)
139+
if err = l.txtmgmt.Commit(); err != nil {
140+
return err
141+
}
142+
}
143+
l.pendingBlockToCommit = nil
144+
145+
return nil
99146
}
100147

101148
// GetTransactionByID retrieves a transaction by id
@@ -150,7 +197,7 @@ func (l *KVLedger) RemoveInvalidTransactionsAndPrepare(block *common.Block) (*co
150197
var validBlock *common.Block
151198
var invalidTxs []*pb.InvalidTransaction
152199
var err error
153-
validBlock, invalidTxs, err = l.txtmgmt.ValidateAndPrepare(block)
200+
validBlock, invalidTxs, err = l.txtmgmt.ValidateAndPrepare(block, true)
154201
if err == nil {
155202
l.pendingBlockToCommit = validBlock
156203
}

core/ledger/kvledger/kv_ledger_test.go

+82-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ func TestKVLedgerBlockStorage(t *testing.T) {
2828
defer env.cleanup()
2929
ledger, _ := NewKVLedger(env.conf)
3030
defer ledger.Close()
31-
3231
bcInfo, _ := ledger.GetBlockchainInfo()
3332
testutil.AssertEquals(t, bcInfo, &pb.BlockchainInfo{
3433
Height: 0, CurrentBlockHash: nil, PreviousBlockHash: nil})
@@ -76,3 +75,85 @@ func TestKVLedgerBlockStorage(t *testing.T) {
7675
b2, _ = ledger.GetBlockByNumber(2)
7776
testutil.AssertEquals(t, b2, block2)
7877
}
78+
79+
func TestKVLedgerStateDBRecovery(t *testing.T) {
80+
env := newTestEnv(t)
81+
defer env.cleanup()
82+
ledger, _ := NewKVLedger(env.conf)
83+
defer ledger.Close()
84+
85+
bcInfo, _ := ledger.GetBlockchainInfo()
86+
testutil.AssertEquals(t, bcInfo, &pb.BlockchainInfo{
87+
Height: 0, CurrentBlockHash: nil, PreviousBlockHash: nil})
88+
89+
//creating and committing the first block
90+
simulator, _ := ledger.NewTxSimulator()
91+
//simulating a transaction
92+
simulator.SetState("ns1", "key1", []byte("value1"))
93+
simulator.SetState("ns1", "key2", []byte("value2"))
94+
simulator.SetState("ns1", "key3", []byte("value3"))
95+
simulator.Done()
96+
simRes, _ := simulator.GetTxSimulationResults()
97+
//generating a block based on the simulation result
98+
bg := testutil.NewBlockGenerator(t)
99+
block1 := bg.NextBlock([][]byte{simRes}, false)
100+
//performing validation of read and write set to find valid transactions
101+
ledger.RemoveInvalidTransactionsAndPrepare(block1)
102+
//writing the validated block to block storage and committing the transaction to state DB
103+
ledger.Commit()
104+
105+
bcInfo, _ = ledger.GetBlockchainInfo()
106+
block1Hash := block1.Header.Hash()
107+
testutil.AssertEquals(t, bcInfo, &pb.BlockchainInfo{
108+
Height: 1, CurrentBlockHash: block1Hash, PreviousBlockHash: []byte{}})
109+
110+
//creating the second block but peer fails before committing the transaction to state DB
111+
simulator, _ = ledger.NewTxSimulator()
112+
//simulating transaction
113+
simulator.SetState("ns1", "key1", []byte("value4"))
114+
simulator.SetState("ns1", "key2", []byte("value5"))
115+
simulator.SetState("ns1", "key3", []byte("value6"))
116+
simulator.Done()
117+
simRes, _ = simulator.GetTxSimulationResults()
118+
//generating a block based on the simulation result
119+
block2 := bg.NextBlock([][]byte{simRes}, false)
120+
//performing validation of read and write set to find valid transactions
121+
ledger.RemoveInvalidTransactionsAndPrepare(block2)
122+
//writing the validated block to block storage but not committing the transaction to state DB
123+
ledger.blockStore.AddBlock(ledger.pendingBlockToCommit)
124+
//assume that peer fails here before committing the transaction
125+
126+
bcInfo, _ = ledger.GetBlockchainInfo()
127+
block2Hash := block2.Header.Hash()
128+
testutil.AssertEquals(t, bcInfo, &pb.BlockchainInfo{
129+
Height: 2, CurrentBlockHash: block2Hash, PreviousBlockHash: block1.Header.Hash()})
130+
131+
simulator, _ = ledger.NewTxSimulator()
132+
value, _ := simulator.GetState("ns1", "key1")
133+
//value for 'key1' should be 'value1' as the last commit failed
134+
testutil.AssertEquals(t, value, []byte("value1"))
135+
value, _ = simulator.GetState("ns1", "key2")
136+
//value for 'key2' should be 'value2' as the last commit failed
137+
testutil.AssertEquals(t, value, []byte("value2"))
138+
value, _ = simulator.GetState("ns1", "key3")
139+
//value for 'key3' should be 'value3' as the last commit failed
140+
testutil.AssertEquals(t, value, []byte("value3"))
141+
simulator.Done()
142+
ledger.Close()
143+
144+
//we assume here that the peer comes online and calls NewKVLedger to get a handler for the ledger
145+
//State DB should be recovered before returning from NewKVLedger call
146+
ledger, _ = NewKVLedger(env.conf)
147+
simulator, _ = ledger.NewTxSimulator()
148+
value, _ = simulator.GetState("ns1", "key1")
149+
//value for 'key1' should be 'value4' after recovery
150+
testutil.AssertEquals(t, value, []byte("value4"))
151+
value, _ = simulator.GetState("ns1", "key2")
152+
//value for 'key2' should be 'value5' after recovery
153+
testutil.AssertEquals(t, value, []byte("value5"))
154+
value, _ = simulator.GetState("ns1", "key3")
155+
//value for 'key3' should be 'value6' after recovery
156+
testutil.AssertEquals(t, value, []byte("value6"))
157+
simulator.Done()
158+
ledger.Close()
159+
}

core/ledger/kvledger/txmgmt/couchdbtxmgmt/couchdb_txmgr.go

+24-8
Original file line numberDiff line numberDiff line change
@@ -126,14 +126,23 @@ func (txmgr *CouchDBTxMgr) NewTxSimulator() (ledger.TxSimulator, error) {
126126
}
127127

128128
// ValidateAndPrepare implements method in interface `txmgmt.TxMgr`
129-
func (txmgr *CouchDBTxMgr) ValidateAndPrepare(block *common.Block) (*common.Block, []*pb.InvalidTransaction, error) {
130-
logger.Debugf("===COUCHDB=== Entering CouchDBTxMgr.ValidateAndPrepare()")
129+
func (txmgr *CouchDBTxMgr) ValidateAndPrepare(block *common.Block, doMVCCValidation bool) (*common.Block, []*pb.InvalidTransaction, error) {
130+
if doMVCCValidation == true {
131+
logger.Debugf("===COUCHDB=== Entering CouchDBTxMgr.ValidateAndPrepare()")
132+
logger.Debugf("Validating a block with [%d] transactions", len(block.Data.Data))
133+
} else {
134+
logger.Debugf("New block arrived for write set computation:%#v", block)
135+
logger.Debugf("Computing write set for a block with [%d] transactions", len(block.Data.Data))
136+
}
131137
invalidTxs := []*pb.InvalidTransaction{}
132138
var valid bool
133139
txmgr.updateSet = newUpdateSet()
134140
txmgr.blockNum = block.Header.Number
135-
logger.Debugf("Validating a block with [%d] transactions", len(block.Data.Data))
141+
136142
for txIndex, envBytes := range block.Data.Data {
143+
//TODO: Process valid txs bitmap in block.BlockMetadata.Metadata and skip
144+
//this transaction if found invalid.
145+
137146
// extract actions from the envelope message
138147
respPayload, err := putils.GetActionFromEnvelope(envBytes)
139148
if err != nil {
@@ -152,15 +161,22 @@ func (txmgr *CouchDBTxMgr) ValidateAndPrepare(block *common.Block) (*common.Bloc
152161
// trace the first 2000 characters of RWSet only, in case it is huge
153162
if logger.IsEnabledFor(logging.DEBUG) {
154163
txRWSetString := txRWSet.String()
164+
operation := "validating"
165+
if doMVCCValidation == false {
166+
operation = "computing write set from"
167+
}
155168
if len(txRWSetString) < 2000 {
156-
logger.Debugf("validating txRWSet:[%s]", txRWSetString)
169+
logger.Debugf(operation+" txRWSet:[%s]", txRWSetString)
157170
} else {
158-
logger.Debugf("validating txRWSet:[%s...]", txRWSetString[0:2000])
171+
logger.Debugf(operation+" txRWSet:[%s...]", txRWSetString[0:2000])
159172
}
160173
}
161-
162-
if valid, err = txmgr.validateTx(txRWSet); err != nil {
163-
return nil, nil, err
174+
if doMVCCValidation == true {
175+
if valid, err = txmgr.validateTx(txRWSet); err != nil {
176+
return nil, nil, err
177+
}
178+
} else {
179+
valid = true
164180
}
165181

166182
if valid {

core/ledger/kvledger/txmgmt/lockbasedtxmgmt/lockbased_txmgr.go

+25-8
Original file line numberDiff line numberDiff line change
@@ -103,14 +103,23 @@ func (txmgr *LockBasedTxMgr) NewTxSimulator() (ledger.TxSimulator, error) {
103103
}
104104

105105
// ValidateAndPrepare implements method in interface `txmgmt.TxMgr`
106-
func (txmgr *LockBasedTxMgr) ValidateAndPrepare(block *common.Block) (*common.Block, []*pb.InvalidTransaction, error) {
107-
logger.Debugf("New block arrived for validation:%#v", block)
106+
func (txmgr *LockBasedTxMgr) ValidateAndPrepare(block *common.Block, doMVCCValidation bool) (*common.Block, []*pb.InvalidTransaction, error) {
107+
if doMVCCValidation == true {
108+
logger.Debugf("New block arrived for validation:%#v", block)
109+
logger.Debugf("Validating a block with [%d] transactions", len(block.Data.Data))
110+
} else {
111+
logger.Debugf("New block arrived for write set computation:%#v", block)
112+
logger.Debugf("Computing write set for a block with [%d] transactions", len(block.Data.Data))
113+
}
108114
invalidTxs := []*pb.InvalidTransaction{}
109115
var valid bool
110116
txmgr.updateSet = newUpdateSet()
111117
txmgr.blockNum = block.Header.Number
112-
logger.Debugf("Validating a block with [%d] transactions", len(block.Data.Data))
118+
113119
for txIndex, envBytes := range block.Data.Data {
120+
//TODO: Process valid txs bitmap in block.BlockMetadata.Metadata and skip
121+
//this transaction if found invalid.
122+
114123
// extract actions from the envelope message
115124
respPayload, err := putils.GetActionFromEnvelope(envBytes)
116125
if err != nil {
@@ -129,16 +138,24 @@ func (txmgr *LockBasedTxMgr) ValidateAndPrepare(block *common.Block) (*common.Bl
129138
// trace the first 2000 characters of RWSet only, in case it is huge
130139
if logger.IsEnabledFor(logging.DEBUG) {
131140
txRWSetString := txRWSet.String()
141+
operation := "validating"
142+
if doMVCCValidation == false {
143+
operation = "computing write set from"
144+
}
132145
if len(txRWSetString) < 2000 {
133-
logger.Debugf("validating txRWSet:[%s]", txRWSetString)
146+
logger.Debugf(operation+" txRWSet:[%s]", txRWSetString)
134147
} else {
135-
logger.Debugf("validating txRWSet:[%s...]", txRWSetString[0:2000])
148+
logger.Debugf(operation+" txRWSet:[%s...]", txRWSetString[0:2000])
136149
}
137150
}
138-
139-
if valid, err = txmgr.validateTx(txRWSet); err != nil {
140-
return nil, nil, err
151+
if doMVCCValidation == true {
152+
if valid, err = txmgr.validateTx(txRWSet); err != nil {
153+
return nil, nil, err
154+
}
155+
} else {
156+
valid = true
141157
}
158+
142159
//TODO add the validation info to the bitmap in the metadata of the block
143160
if valid {
144161
committingTxHeight := version.NewHeight(block.Header.Number, uint64(txIndex+1))

core/ledger/kvledger/txmgmt/txmgmt.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ import (
2626
type TxMgr interface {
2727
NewQueryExecutor() (ledger.QueryExecutor, error)
2828
NewTxSimulator() (ledger.TxSimulator, error)
29-
ValidateAndPrepare(block *common.Block) (*common.Block, []*pb.InvalidTransaction, error)
29+
ValidateAndPrepare(block *common.Block, doMVCCValidation bool) (*common.Block, []*pb.InvalidTransaction, error)
30+
GetBlockNumFromSavepoint() (uint64, error)
3031
Commit() error
3132
Rollback()
3233
Shutdown()

0 commit comments

Comments
 (0)