Skip to content

Commit b3eef4c

Browse files
Chris Elderdenyeart
Chris Elder
authored andcommitted
[FAB-3686] CouchDB timeout causes error upon retry
If a state db document update to CouchDB has a http timeout (currently defaulted at 35s), the transaction may eventually succeed in CouchDB, but the peer will do a retry using the previous CouchDB document revision number. The retry will therefore fail due to revision number conflict. The peer will think it failed, when in fact it succeeded. Peer panics when it can't process the block to completion, since subsequent blocks should not be processed. Need to fix retry logic to account for this scenario, potentially by introducing a higher level retry that could get the new revision number for a final attempt. Implement a retry in the couchdb SaveDoc/DeleteDoc functions to retry if the return code is 409 (revision conflict). If the return code is 409, then retrieve the current revision for the document ID and retry. This logic is separate from the timeout logic in handleRequest is only used for resolving document revision conflicts caused by CouchDB timeouts. Change-Id: I2ef79b63397a8f76e295dc7c562f5cc7f86da73d Signed-off-by: Chris Elder <[email protected]> Signed-off-by: David Enyeart <[email protected]>
1 parent 3079333 commit b3eef4c

File tree

2 files changed

+127
-44
lines changed

2 files changed

+127
-44
lines changed

core/ledger/util/couchdb/couchdb.go

+79-44
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ func (dbclient *CouchDatabase) EnsureFullCommit() (*DBOperationResponse, error)
465465
func (dbclient *CouchDatabase) SaveDoc(id string, rev string, couchDoc *CouchDoc) (string, error) {
466466

467467
logger.Debugf("Entering SaveDoc() id=[%s]", id)
468+
468469
if !utf8.ValidString(id) {
469470
return "", fmt.Errorf("doc id [%x] not a valid utf8 string", id)
470471
}
@@ -479,19 +480,6 @@ func (dbclient *CouchDatabase) SaveDoc(id string, rev string, couchDoc *CouchDoc
479480
// id can contain a '/', so encode separately
480481
saveURL = &url.URL{Opaque: saveURL.String() + "/" + encodePathElement(id)}
481482

482-
if rev == "" {
483-
484-
//See if the document already exists, we need the rev for save
485-
_, revdoc, err2 := dbclient.ReadDoc(id)
486-
if err2 != nil {
487-
//set the revision to indicate that the document was not found
488-
rev = ""
489-
} else {
490-
//set the revision to the rev returned from the document read
491-
rev = revdoc
492-
}
493-
}
494-
495483
logger.Debugf(" rev=%s", rev)
496484

497485
//Set up a buffer for the data to be pushed to couchdb
@@ -540,9 +528,10 @@ func (dbclient *CouchDatabase) SaveDoc(id string, rev string, couchDoc *CouchDoc
540528
//get the number of retries
541529
maxRetries := dbclient.CouchInstance.conf.MaxRetries
542530

543-
//handle the request for saving the JSON or attachments
544-
resp, _, err := dbclient.CouchInstance.handleRequest(http.MethodPut, saveURL.String(), data,
545-
rev, defaultBoundary, maxRetries, keepConnectionOpen)
531+
//handle the request for saving document with a retry if there is a revision conflict
532+
resp, _, err := dbclient.handleRequestWithRevisionRetry(id, http.MethodPut,
533+
*saveURL, data, rev, defaultBoundary, maxRetries, keepConnectionOpen)
534+
546535
if err != nil {
547536
return "", err
548537
}
@@ -560,6 +549,20 @@ func (dbclient *CouchDatabase) SaveDoc(id string, rev string, couchDoc *CouchDoc
560549

561550
}
562551

552+
//getDocumentRevision will return the revision if the document exists, otherwise it will return ""
553+
func (dbclient *CouchDatabase) getDocumentRevision(id string) string {
554+
555+
var rev = ""
556+
557+
//See if the document already exists, we need the rev for saves and deletes
558+
_, revdoc, err := dbclient.ReadDoc(id)
559+
if err == nil {
560+
//set the revision to the rev returned from the document read
561+
rev = revdoc
562+
}
563+
return rev
564+
}
565+
563566
func createAttachmentPart(couchDoc *CouchDoc, defaultBoundary string) (bytes.Buffer, string, error) {
564567

565568
//Create a buffer for writing the result
@@ -646,7 +649,8 @@ func getRevisionHeader(resp *http.Response) (string, error) {
646649

647650
}
648651

649-
//ReadDoc method provides function to retrieve a document from the database by id
652+
//ReadDoc method provides function to retrieve a document and its revision
653+
//from the database by id
650654
func (dbclient *CouchDatabase) ReadDoc(id string) (*CouchDoc, string, error) {
651655
var couchDoc CouchDoc
652656
attachments := []*Attachment{}
@@ -906,27 +910,14 @@ func (dbclient *CouchDatabase) DeleteDoc(id, rev string) error {
906910
// id can contain a '/', so encode separately
907911
deleteURL = &url.URL{Opaque: deleteURL.String() + "/" + encodePathElement(id)}
908912

909-
if rev == "" {
910-
911-
//See if the document already exists, we need the rev for delete
912-
_, revdoc, err2 := dbclient.ReadDoc(id)
913-
if err2 != nil {
914-
//set the revision to indicate that the document was not found
915-
rev = ""
916-
} else {
917-
//set the revision to the rev returned from the document read
918-
rev = revdoc
919-
}
920-
}
921-
922-
logger.Debugf(" rev=%s", rev)
923-
924913
//get the number of retries
925914
maxRetries := dbclient.CouchInstance.conf.MaxRetries
926915

927-
resp, couchDBReturn, err := dbclient.CouchInstance.handleRequest(http.MethodDelete, deleteURL.String(), nil, rev, "", maxRetries, true)
916+
//handle the request for saving document with a retry if there is a revision conflict
917+
resp, couchDBReturn, err := dbclient.handleRequestWithRevisionRetry(id, http.MethodDelete,
918+
*deleteURL, nil, "", "", maxRetries, true)
919+
928920
if err != nil {
929-
fmt.Printf("couchDBReturn=%v", couchDBReturn)
930921
if couchDBReturn != nil && couchDBReturn.StatusCode == 404 {
931922
logger.Debug("Document not found (404), returning nil value instead of 404 error")
932923
// non-existent document should return nil value instead of a 404 error
@@ -1173,9 +1164,52 @@ func (dbclient *CouchDatabase) BatchUpdateDocuments(documents []*CouchDoc) ([]*B
11731164

11741165
}
11751166

1167+
//handleRequestWithRevisionRetry method is a generic http request handler with
1168+
//a retry for document revision conflict errors,
1169+
//which may be detected during saves or deletes that timed out from client http perspective,
1170+
//but which eventually succeeded in couchdb
1171+
func (dbclient *CouchDatabase) handleRequestWithRevisionRetry(id, method string, connectURL url.URL, data []byte, rev string,
1172+
multipartBoundary string, maxRetries int, keepConnectionOpen bool) (*http.Response, *DBReturn, error) {
1173+
1174+
//Initialize a flag for the revsion conflict
1175+
revisionConflictDetected := false
1176+
var resp *http.Response
1177+
var couchDBReturn *DBReturn
1178+
var errResp error
1179+
1180+
//attempt the http request for the max number of retries
1181+
//In this case, the retry is to catch problems where a client timeout may miss a
1182+
//successful CouchDB update and cause a document revision conflict on a retry in handleRequest
1183+
for attempts := 0; attempts < maxRetries; attempts++ {
1184+
1185+
//if the revision was not passed in, or if a revision conflict is detected on prior attempt,
1186+
//query CouchDB for the document revision
1187+
if rev == "" || revisionConflictDetected {
1188+
rev = dbclient.getDocumentRevision(id)
1189+
}
1190+
1191+
//handle the request for saving/deleting the couchdb data
1192+
resp, couchDBReturn, errResp = dbclient.CouchInstance.handleRequest(method, connectURL.String(),
1193+
data, rev, multipartBoundary, maxRetries, keepConnectionOpen)
1194+
1195+
//If there was a 409 conflict error during the save/delete, log it and retry it.
1196+
//Otherwise, break out of the retry loop
1197+
if couchDBReturn != nil && couchDBReturn.StatusCode == 409 {
1198+
logger.Warningf("CouchDB document revision conflict detected, retrying. Attempt:%v", attempts+1)
1199+
revisionConflictDetected = true
1200+
} else {
1201+
break
1202+
}
1203+
}
1204+
1205+
// return the handleRequest results
1206+
return resp, couchDBReturn, errResp
1207+
}
1208+
11761209
//handleRequest method is a generic http request handler.
1177-
// if it returns an error, it ensures that the response body is closed, else it is the
1178-
// callee's responsibility to close response correctly
1210+
// If it returns an error, it ensures that the response body is closed, else it is the
1211+
// callee's responsibility to close response correctly.
1212+
// Any http error or CouchDB error (4XX or 500) will result in a golang error getting returned
11791213
func (couchInstance *CouchInstance) handleRequest(method, connectURL string, data []byte, rev string,
11801214
multipartBoundary string, maxRetries int, keepConnectionOpen bool) (*http.Response, *DBReturn, error) {
11811215

@@ -1251,20 +1285,20 @@ func (couchInstance *CouchInstance) handleRequest(method, connectURL string, dat
12511285
//Execute http request
12521286
resp, errResp = couchInstance.client.Do(req)
12531287

1254-
//if an error is not detected then drop out of the retry
1288+
//if there is no golang http error and no CouchDB 500 error, then drop out of the retry
12551289
if errResp == nil && resp != nil && resp.StatusCode < 500 {
12561290
break
12571291
}
12581292

1259-
//if this is an error, record the retry error, else this is a 500 error
1293+
//if this is an unexpected golang http error, log the error and retry
12601294
if errResp != nil {
12611295

12621296
//Log the error with the retry count and continue
12631297
logger.Warningf("Retrying couchdb request in %s. Attempt:%v Error:%v",
12641298
waitDuration.String(), attempts+1, errResp.Error())
12651299

1300+
//otherwise this is an unexpected 500 error from CouchDB. Log the error and retry.
12661301
} else {
1267-
12681302
//Read the response body and close it for next attempt
12691303
jsonError, err := ioutil.ReadAll(resp.Body)
12701304
closeResponseBody(resp)
@@ -1288,18 +1322,19 @@ func (couchInstance *CouchInstance) handleRequest(method, connectURL string, dat
12881322
//backoff, doubling the retry time for next attempt
12891323
waitDuration *= 2
12901324

1291-
}
1325+
} // end retry loop
12921326

1293-
//if the error present, return the error
1327+
//if a golang http error is still present after retries are exhausted, return the error
12941328
if errResp != nil {
12951329
return nil, nil, errResp
12961330
}
12971331

12981332
//set the return code for the couchDB request
12991333
couchDBReturn.StatusCode = resp.StatusCode
13001334

1301-
//check to see if the status code is 400 or higher
1302-
//response codes 4XX and 500 will be treated as errors
1335+
//check to see if the status code from couchdb is 400 or higher
1336+
//response codes 4XX and 500 will be treated as errors -
1337+
//golang error will be created from the couchDBReturn contents and both will be returned
13031338
if resp.StatusCode >= 400 {
13041339
// close the response before returning error
13051340
defer closeResponseBody(resp)
@@ -1325,7 +1360,7 @@ func (couchInstance *CouchInstance) handleRequest(method, connectURL string, dat
13251360

13261361
logger.Debugf("Exiting handleRequest()")
13271362

1328-
//If no errors, then return the results
1363+
//If no errors, then return the http response and the couchdb return object
13291364
return resp, couchDBReturn, nil
13301365
}
13311366

core/ledger/util/couchdb/couchdb_test.go

+48
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,54 @@ func TestDBRequestTimeout(t *testing.T) {
546546
}
547547
}
548548

549+
func TestDBTimeoutConflictRetry(t *testing.T) {
550+
551+
if ledgerconfig.IsCouchDBEnabled() {
552+
553+
database := "testdbtimeoutretry"
554+
err := cleanup(database)
555+
testutil.AssertNoError(t, err, fmt.Sprintf("Error when trying to cleanup Error: %s", err))
556+
defer cleanup(database)
557+
558+
// if there was an error upon cleanup, return here
559+
if err != nil {
560+
return
561+
}
562+
563+
//create a new instance and database object
564+
couchInstance, err := CreateCouchInstance(couchDBDef.URL, couchDBDef.Username, couchDBDef.Password,
565+
couchDBDef.MaxRetries, 3, couchDBDef.RequestTimeout)
566+
testutil.AssertNoError(t, err, fmt.Sprintf("Error when trying to create couch instance"))
567+
db := CouchDatabase{CouchInstance: *couchInstance, DBName: database}
568+
569+
//create a new database
570+
_, errdb := db.CreateDatabaseIfNotExist()
571+
testutil.AssertNoError(t, errdb, fmt.Sprintf("Error when trying to create database"))
572+
573+
//Retrieve the info for the new database and make sure the name matches
574+
dbResp, _, errdb := db.GetDatabaseInfo()
575+
testutil.AssertNoError(t, errdb, fmt.Sprintf("Error when trying to retrieve database information"))
576+
testutil.AssertEquals(t, dbResp.DbName, database)
577+
578+
//Save the test document
579+
_, saveerr := db.SaveDoc("1", "", &CouchDoc{JSONValue: assetJSON, Attachments: nil})
580+
testutil.AssertNoError(t, saveerr, fmt.Sprintf("Error when trying to save a document"))
581+
582+
//Retrieve the test document
583+
_, _, geterr := db.ReadDoc("1")
584+
testutil.AssertNoError(t, geterr, fmt.Sprintf("Error when trying to retrieve a document"))
585+
586+
//Save the test document with an invalid rev. This should cause a retry
587+
_, saveerr = db.SaveDoc("1", "1-11111111111111111111111111111111", &CouchDoc{JSONValue: assetJSON, Attachments: nil})
588+
testutil.AssertNoError(t, saveerr, fmt.Sprintf("Error when trying to save a document with a revision conflict"))
589+
590+
//Delete the test document with an invalid rev. This should cause a retry
591+
deleteerr := db.DeleteDoc("1", "1-11111111111111111111111111111111")
592+
testutil.AssertNoError(t, deleteerr, fmt.Sprintf("Error when trying to delete a document with a revision conflict"))
593+
594+
}
595+
}
596+
549597
func TestDBBadJSON(t *testing.T) {
550598

551599
if ledgerconfig.IsCouchDBEnabled() {

0 commit comments

Comments
 (0)