From 542c64f728e0a147892a1306c5086bca87cfa8f5 Mon Sep 17 00:00:00 2001 From: Marek Siarkowicz Date: Mon, 16 Oct 2023 21:59:15 +0200 Subject: [PATCH] Implement sleep failpoint injection Signed-off-by: Marek Siarkowicz --- go.mod | 2 + go.sum | 4 +- server/go.mod | 2 + tests/framework/e2e/etcd_process.go | 32 +++++++++++++++ tests/robustness/failpoint/failpoint.go | 52 ++++++++++++++++++++++++- tools/mod/go.mod | 1 + 6 files changed, 90 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 49bc0a6aa6b..e4e4084be78 100644 --- a/go.mod +++ b/go.mod @@ -99,3 +99,5 @@ require ( sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 // indirect sigs.k8s.io/yaml v1.3.0 // indirect ) + +replace go.etcd.io/gofail => github.com/pchan/gofail v0.1.1-0.20230605030243-4e2ac034f230 diff --git a/go.sum b/go.sum index 2b2e1dd12d7..3b167bd3330 100644 --- a/go.sum +++ b/go.sum @@ -114,6 +114,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/pchan/gofail v0.1.1-0.20230605030243-4e2ac034f230 h1:s1uu+CN6zMaNwK3tyCNujqKwAKCVz2+C3qGc5boukUA= +github.com/pchan/gofail v0.1.1-0.20230605030243-4e2ac034f230/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -156,8 +158,6 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ= go.etcd.io/bbolt v1.3.7/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw= -go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg= -go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M= go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a h1:Znv2XJyAf/fsJsFNt9toO8uyXwwHQ44wxqsvdSxipj4= go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a/go.mod h1:eMshmuwXLWZrjHXN8ZgYrOMQRSbHqi5M84DEZWhG+o4= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.45.0 h1:RsQi0qJ2imFfCvZabqzM9cNXBG8k6gXMv1A0cXRmH6A= diff --git a/server/go.mod b/server/go.mod index 935d06bd76c..a3f9161cdb2 100644 --- a/server/go.mod +++ b/server/go.mod @@ -86,3 +86,5 @@ replace ( replace go.etcd.io/etcd => ./FORBIDDEN_DEPENDENCY replace go.etcd.io/tests/v3 => ./FORBIDDEN_DEPENDENCY + +replace go.etcd.io/gofail => github.com/pchan/gofail v0.1.1-0.20230605030243-4e2ac034f230 diff --git a/tests/framework/e2e/etcd_process.go b/tests/framework/e2e/etcd_process.go index f6d53d3f1b6..2426c1b15e6 100644 --- a/tests/framework/e2e/etcd_process.go +++ b/tests/framework/e2e/etcd_process.go @@ -23,6 +23,7 @@ import ( "net/http" "net/url" "os" + "strconv" "strings" "syscall" "testing" @@ -369,6 +370,37 @@ func (f *BinaryFailpoints) SetupHTTP(ctx context.Context, failpoint, payload str return nil } +func (f *BinaryFailpoints) Count(ctx context.Context, failpoint string) (int64, error) { + host := fmt.Sprintf("127.0.0.1:%d", f.member.Config().GoFailPort) + failpointUrl := url.URL{ + Scheme: "http", + Host: host, + Path: failpoint + "/count", + } + r, err := http.NewRequestWithContext(ctx, "GET", failpointUrl.String(), nil) + if err != nil { + return 0, err + } + resp, err := httpClient.Do(r) + if err != nil { + return 0, err + } + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + return 0, fmt.Errorf("bad status code: %d", resp.StatusCode) + } + body, err := io.ReadAll(resp.Body) + resp.Body.Close() + if err != nil { + return 0, err + } + count, err := strconv.ParseInt(string(body), 10, 64) + if err != nil { + return 0, err + } + return count, nil +} + var httpClient = http.Client{ Timeout: 10 * time.Millisecond, } diff --git a/tests/robustness/failpoint/failpoint.go b/tests/robustness/failpoint/failpoint.go index 1363c84296e..e3d66bc84ff 100644 --- a/tests/robustness/failpoint/failpoint.go +++ b/tests/robustness/failpoint/failpoint.go @@ -67,6 +67,9 @@ var ( RaftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"raftBeforeSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower} RaftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"raftAfterSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower} beforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second} + BeforeCommitSleep Failpoint = gofailSleep{"beforeCommit", time.Second} + AfterCommitSleep Failpoint = gofailSleep{"afterCommit", time.Second} + RaftBeforeSaveSleep Failpoint = gofailSleep{"raftBeforeSave", 10 * time.Millisecond} allFailpoints = []Failpoint{ KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic, DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic, @@ -77,6 +80,8 @@ var ( RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic, RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot, beforeApplyOneConfChangeSleep, + BeforeCommitSleep, AfterCommitSleep, + RaftBeforeSaveSleep, } ) @@ -560,7 +565,7 @@ func (f killAndGofailSleep) Inject(ctx context.Context, t *testing.T, lg *zap.Lo } func (f killAndGofailSleep) Name() string { - return fmt.Sprintf("%s=sleep(%s)", f.failpoint, f.time) + return fmt.Sprintf("kill, %s=sleep(%s)", f.failpoint, f.time) } func (f killAndGofailSleep) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) bool { @@ -570,3 +575,48 @@ func (f killAndGofailSleep) Available(config e2e.EtcdProcessClusterConfig, membe } return memberFailpoints.Available(f.failpoint) } + +type gofailSleep struct { + failpoint string + time time.Duration +} + +func (f gofailSleep) Inject(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster) error { + member := clus.Procs[rand.Int()%len(clus.Procs)] + err := member.Failpoints().SetupHTTP(ctx, f.failpoint, fmt.Sprintf(`sleep(%q)`, f.time)) + if err != nil { + return err + } + if err != nil { + return err + } + for { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + time.Sleep(100 * time.Millisecond) + count, err := member.Failpoints().Count(ctx, f.failpoint) + if err != nil { + continue + } + lg.Info("Failpoint count", zap.String("failpoint", f.failpoint), zap.Int64("count", count)) + if count > 0 { + break + } + } + return nil +} + +func (f gofailSleep) Name() string { + return fmt.Sprintf("%s=sleep(%s)", f.failpoint, f.time) +} + +func (f gofailSleep) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) bool { + memberFailpoints := member.Failpoints() + if memberFailpoints == nil { + return false + } + return memberFailpoints.Available(f.failpoint) +} diff --git a/tools/mod/go.mod b/tools/mod/go.mod index a1a0f1bc3a3..8275df0e2e2 100644 --- a/tools/mod/go.mod +++ b/tools/mod/go.mod @@ -82,3 +82,4 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/klog/v2 v2.80.1 // indirect ) +replace go.etcd.io/gofail => github.com/pchan/gofail v0.1.1-0.20230605030243-4e2ac034f230