From 361aac9776b46b6a22a56cbc2b48d8e2f661b9d5 Mon Sep 17 00:00:00 2001 From: Chun-Hung Tseng Date: Thu, 18 Apr 2024 17:14:33 +0200 Subject: [PATCH] Fix execution of failpoint should not block deactivation There are 2 main flows of the gofail library: namely enable/disable and execution (`Acquire`) of the failpoints. Currently, a mutex is protecting both flows, thus only one action can make progress at a time. This PR proposes a fine-grained mutex, as each failpoint is protected under a dedicated `RWMutex`. The existing `failpointsMu` will only be protecting the main shared data structures, such as `failpoints` map. Notice that in our current implementation, the execution of the same failpoint is still sequential (there is a lock within `eval` on the term being executed) Reference: - https://github.com/etcd-io/gofail/issues/64 Signed-off-by: Chun-Hung Tseng --- runtime/failpoint.go | 16 ++++++++++++---- runtime/runtime.go | 11 +++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/runtime/failpoint.go b/runtime/failpoint.go index a470417..7559140 100644 --- a/runtime/failpoint.go +++ b/runtime/failpoint.go @@ -16,10 +16,13 @@ package runtime import ( "fmt" + "sync" ) type Failpoint struct { t *terms + + failpointMu sync.RWMutex } func NewFailpoint(name string) *Failpoint { @@ -28,14 +31,19 @@ func NewFailpoint(name string) *Failpoint { // Acquire gets evalutes the failpoint terms; if the failpoint // is active, it will return a value. Otherwise, returns a non-nil error. +// +// Notice that during the exection of Acquire(), the failpoint can be disabled, +// but the already in-flight execution won't be terminated func (fp *Failpoint) Acquire() (interface{}, error) { - failpointsMu.RLock() - defer failpointsMu.RUnlock() + fp.failpointMu.RLock() + // terms are locked during execution, so deepcopy is not required as no change can be made during execution + cachedT := fp.t + fp.failpointMu.RUnlock() - if fp.t == nil { + if cachedT == nil { return nil, ErrDisabled } - result := fp.t.eval() + result := cachedT.eval() if result == nil { return nil, ErrDisabled } diff --git a/runtime/runtime.go b/runtime/runtime.go index 938aa4d..2356980 100644 --- a/runtime/runtime.go +++ b/runtime/runtime.go @@ -86,6 +86,10 @@ func enable(name, inTerms string) error { fmt.Printf("failed to enable \"%s=%s\" (%v)\n", name, inTerms, err) return err } + + fp.failpointMu.Lock() + defer fp.failpointMu.Unlock() + fp.t = t return nil @@ -104,6 +108,9 @@ func disable(name string) error { return ErrNoExist } + fp.failpointMu.Lock() + defer fp.failpointMu.Unlock() + if fp.t == nil { return ErrDisabled } @@ -116,6 +123,7 @@ func disable(name string) error { func Status(failpath string) (string, int, error) { failpointsMu.Lock() defer failpointsMu.Unlock() + return status(failpath) } @@ -125,6 +133,9 @@ func status(failpath string) (string, int, error) { return "", 0, ErrNoExist } + fp.failpointMu.RLock() + defer fp.failpointMu.RUnlock() + t := fp.t if t == nil { return "", 0, ErrDisabled