Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement sleep failpoint injection #16776

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,5 @@ require (
sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)

replace go.etcd.io/gofail => github.com/pchan/gofail v0.1.1-0.20230605030243-4e2ac034f230
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfr
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o=
github.com/pchan/gofail v0.1.1-0.20230605030243-4e2ac034f230 h1:s1uu+CN6zMaNwK3tyCNujqKwAKCVz2+C3qGc5boukUA=
github.com/pchan/gofail v0.1.1-0.20230605030243-4e2ac034f230/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
Expand Down Expand Up @@ -156,8 +158,6 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ=
go.etcd.io/bbolt v1.3.7/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw=
go.etcd.io/gofail v0.1.0 h1:XItAMIhOojXFQMgrxjnd2EIIHun/d5qL0Pf7FzVTkFg=
go.etcd.io/gofail v0.1.0/go.mod h1:VZBCXYGZhHAinaBiiqYvuDynvahNsAyLFwB3kEHKz1M=
go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a h1:Znv2XJyAf/fsJsFNt9toO8uyXwwHQ44wxqsvdSxipj4=
go.etcd.io/raft/v3 v3.0.0-20221201111702-eaa6808e1f7a/go.mod h1:eMshmuwXLWZrjHXN8ZgYrOMQRSbHqi5M84DEZWhG+o4=
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.45.0 h1:RsQi0qJ2imFfCvZabqzM9cNXBG8k6gXMv1A0cXRmH6A=
Expand Down
2 changes: 2 additions & 0 deletions server/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,5 @@ replace (
replace go.etcd.io/etcd => ./FORBIDDEN_DEPENDENCY

replace go.etcd.io/tests/v3 => ./FORBIDDEN_DEPENDENCY

replace go.etcd.io/gofail => github.com/pchan/gofail v0.1.1-0.20230605030243-4e2ac034f230
32 changes: 32 additions & 0 deletions tests/framework/e2e/etcd_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"net/http"
"net/url"
"os"
"strconv"
"strings"
"syscall"
"testing"
Expand Down Expand Up @@ -369,6 +370,37 @@ func (f *BinaryFailpoints) SetupHTTP(ctx context.Context, failpoint, payload str
return nil
}

func (f *BinaryFailpoints) Count(ctx context.Context, failpoint string) (int64, error) {
host := fmt.Sprintf("127.0.0.1:%d", f.member.Config().GoFailPort)
failpointUrl := url.URL{
Scheme: "http",
Host: host,
Path: failpoint + "/count",
}
r, err := http.NewRequestWithContext(ctx, "GET", failpointUrl.String(), nil)
if err != nil {
return 0, err
}
resp, err := httpClient.Do(r)
if err != nil {
return 0, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return 0, fmt.Errorf("bad status code: %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
return 0, err
}
count, err := strconv.ParseInt(string(body), 10, 64)
if err != nil {
return 0, err
}
return count, nil
}

var httpClient = http.Client{
Timeout: 10 * time.Millisecond,
}
Expand Down
52 changes: 51 additions & 1 deletion tests/robustness/failpoint/failpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ var (
RaftBeforeSaveSnapPanic Failpoint = goPanicFailpoint{"raftBeforeSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
RaftAfterSaveSnapPanic Failpoint = goPanicFailpoint{"raftAfterSaveSnap", triggerBlackhole{waitTillSnapshot: true}, Follower}
beforeApplyOneConfChangeSleep Failpoint = killAndGofailSleep{"beforeApplyOneConfChange", time.Second}
BeforeCommitSleep Failpoint = gofailSleep{"beforeCommit", time.Second}
AfterCommitSleep Failpoint = gofailSleep{"afterCommit", time.Second}
RaftBeforeSaveSleep Failpoint = gofailSleep{"raftBeforeSave", 10 * time.Millisecond}
allFailpoints = []Failpoint{
KillFailpoint, BeforeCommitPanic, AfterCommitPanic, RaftBeforeSavePanic, RaftAfterSavePanic,
DefragBeforeCopyPanic, DefragBeforeRenamePanic, BackendBeforePreCommitHookPanic, BackendAfterPreCommitHookPanic,
Expand All @@ -77,6 +80,8 @@ var (
RaftBeforeFollowerSendPanic, RaftBeforeApplySnapPanic, RaftAfterApplySnapPanic, RaftAfterWALReleasePanic,
RaftBeforeSaveSnapPanic, RaftAfterSaveSnapPanic, BlackholeUntilSnapshot,
beforeApplyOneConfChangeSleep,
BeforeCommitSleep, AfterCommitSleep,
RaftBeforeSaveSleep,
}
)

Expand Down Expand Up @@ -560,7 +565,7 @@ func (f killAndGofailSleep) Inject(ctx context.Context, t *testing.T, lg *zap.Lo
}

func (f killAndGofailSleep) Name() string {
return fmt.Sprintf("%s=sleep(%s)", f.failpoint, f.time)
return fmt.Sprintf("kill, %s=sleep(%s)", f.failpoint, f.time)
}

func (f killAndGofailSleep) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) bool {
Expand All @@ -570,3 +575,48 @@ func (f killAndGofailSleep) Available(config e2e.EtcdProcessClusterConfig, membe
}
return memberFailpoints.Available(f.failpoint)
}

type gofailSleep struct {
failpoint string
time time.Duration
}

func (f gofailSleep) Inject(ctx context.Context, t *testing.T, lg *zap.Logger, clus *e2e.EtcdProcessCluster) error {
member := clus.Procs[rand.Int()%len(clus.Procs)]
err := member.Failpoints().SetupHTTP(ctx, f.failpoint, fmt.Sprintf(`sleep(%q)`, f.time))
if err != nil {
return err
}
if err != nil {
return err
}
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
time.Sleep(100 * time.Millisecond)
count, err := member.Failpoints().Count(ctx, f.failpoint)
if err != nil {
continue
}
lg.Info("Failpoint count", zap.String("failpoint", f.failpoint), zap.Int64("count", count))
if count > 0 {
break
}
}
return nil
}

func (f gofailSleep) Name() string {
return fmt.Sprintf("%s=sleep(%s)", f.failpoint, f.time)
}

func (f gofailSleep) Available(config e2e.EtcdProcessClusterConfig, member e2e.EtcdProcess) bool {
memberFailpoints := member.Failpoints()
if memberFailpoints == nil {
return false
}
return memberFailpoints.Available(f.failpoint)
}
1 change: 1 addition & 0 deletions tools/mod/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,4 @@ require (
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/klog/v2 v2.80.1 // indirect
)
replace go.etcd.io/gofail => github.com/pchan/gofail v0.1.1-0.20230605030243-4e2ac034f230
Loading