From 10c9f1ef7ca4a629e9dc45931ec6404effdacf81 Mon Sep 17 00:00:00 2001 From: Emmanuel T Odeke Date: Tue, 17 Oct 2023 14:09:55 +0300 Subject: [PATCH] fix(nodedb): prevent deadlock by releasing DeleteVersionsFrom mutex on error (#843) --- nodedb.go | 1 + nodedb_test.go | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/nodedb.go b/nodedb.go index 8c22e06c9..6d4471b60 100644 --- a/nodedb.go +++ b/nodedb.go @@ -493,6 +493,7 @@ func (ndb *nodeDB) DeleteVersionsFrom(fromVersion int64) error { ndb.mtx.Lock() for v, r := range ndb.versionReaders { if v >= fromVersion && r != 0 { + ndb.mtx.Unlock() // Unlock before exiting return fmt.Errorf("unable to delete version %v with %v active readers", v, r) } } diff --git a/nodedb_test.go b/nodedb_test.go index bc36ce3ec..531604866 100644 --- a/nodedb_test.go +++ b/nodedb_test.go @@ -5,6 +5,7 @@ import ( "fmt" "strconv" "testing" + "time" log "cosmossdk.io/log" db "github.com/cosmos/cosmos-db" @@ -398,3 +399,47 @@ func makeAndPopulateMutableTree(tb testing.TB) *MutableTree { require.Nil(tb, err, "Expected .SaveVersion to succeed") return tree } + +func TestDeleteVersionsFromNoDeadlock(t *testing.T) { + const expectedVersion = fastStorageVersionValue + + db := db.NewMemDB() + + ndb := newNodeDB(db, 0, DefaultOptions(), log.NewNopLogger()) + require.Equal(t, defaultStorageVersionValue, ndb.getStorageVersion()) + + err := ndb.setFastStorageVersionToBatch() + require.NoError(t, err) + + latestVersion, err := ndb.getLatestVersion() + require.NoError(t, err) + require.Equal(t, expectedVersion+fastStorageVersionDelimiter+strconv.Itoa(int(latestVersion)), ndb.getStorageVersion()) + require.NoError(t, ndb.batch.Write()) + + // Reported in https://github.com/cosmos/iavl/issues/842 + // there was a deadlock that triggered on an invalid version being + // checked for deletion. + // Now add in data to trigger the error path. + ndb.versionReaders[latestVersion+1] = 2 + + errCh := make(chan error) + targetVersion := latestVersion - 1 + + go func() { + defer close(errCh) + errCh <- ndb.DeleteVersionsFrom(targetVersion) + }() + + select { + case err = <-errCh: + // Happy path, the mutex was unlocked fast enough. + + case <-time.After(2 * time.Second): + t.Error("code did not return even after 2 seconds") + } + + require.True(t, ndb.mtx.TryLock(), "tryLock failed mutex was still locked") + ndb.mtx.Unlock() // Since TryLock passed, the lock is now solely being held by us. + require.Error(t, err, "") + require.Contains(t, err.Error(), fmt.Sprintf("unable to delete version %v with 2 active readers", targetVersion+2)) +}