Skip to content

Commit

Permalink
[CONFIG-329][CONFIG-330] End to end snapshot integrity check (#124)
Browse files Browse the repository at this point in the history
* refactor supervisor to use sdk v2 and add checksum

* open another reader to get checksum

* make generate

* refactoring

* hex encoding

* don't calculate

* use metadata

* WIP test

* wip

* wip

* WIP

* WIP

* WIP

* fix install

* WIP

* wip

* wip

* chmod

* install s3cmd from alpine

* install aws-cli in alpine

* install shasum in alpine

* wip

* test if sha value matches

* wip

* fix shell syntax error

* add 5 attempts threshold

* skip checksum validation if null

* test unhappy path

* revert false negative

* address feedbacks

* fix script

* test unhappy path

* revert false negative

* using sha1

* update log to avoid confusion

* fix wording

---------

Co-authored-by: Hongyu Zhou <[email protected]>
  • Loading branch information
zhou-hongyu and Hongyu Zhou authored Oct 6, 2023
1 parent 47950dd commit 146e400
Show file tree
Hide file tree
Showing 7 changed files with 532 additions and 131 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ RUN CGO_ENABLED=1 go install -ldflags="-X github.com/segmentio/ctlstore/pkg/vers
&& cp ${GOPATH}/bin/ctlstore-cli /usr/local/bin

FROM alpine
RUN apk --no-cache add sqlite pigz
RUN apk --no-cache add sqlite pigz aws-cli perl-utils jq

COPY --from=0 /go/src/github.com/segmentio/ctlstore/scripts/download.sh .
COPY --from=0 /bin/chamber /bin/chamber
Expand Down
21 changes: 20 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ go 1.20
require (
github.com/AlekSi/pointer v1.0.0
github.com/aws/aws-sdk-go v1.37.8
github.com/aws/aws-sdk-go-v2/config v1.18.40
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.84
github.com/aws/aws-sdk-go-v2/service/s3 v1.38.5
github.com/fsnotify/fsnotify v1.5.1
github.com/go-sql-driver/mysql v1.4.1
github.com/google/go-cmp v0.5.6
github.com/google/go-cmp v0.5.8
github.com/google/uuid v1.1.2
github.com/gorilla/mux v1.7.3
github.com/julienschmidt/httprouter v1.2.0
Expand All @@ -23,6 +26,22 @@ require (
)

require (
github.com/aws/aws-sdk-go-v2 v1.21.0 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.13 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.13.38 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.11 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.42 // indirect
github.com/aws/aws-sdk-go-v2/internal/v4a v1.1.4 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.14 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.36 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.35 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.15.4 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.14.0 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.16.0 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.22.0 // indirect
github.com/aws/smithy-go v1.14.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/mdlayher/genetlink v0.0.0-20190313224034-60417448a851 // indirect
Expand Down
43 changes: 40 additions & 3 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,44 @@ github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJs
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/aws/aws-sdk-go v1.37.8 h1:9kywcbuz6vQuTf+FD+U7FshafrHzmqUCjgAEiLuIJ8U=
github.com/aws/aws-sdk-go v1.37.8/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro=
github.com/aws/aws-sdk-go-v2 v1.21.0 h1:gMT0IW+03wtYJhRqTVYn0wLzwdnK9sRMcxmtfGzRdJc=
github.com/aws/aws-sdk-go-v2 v1.21.0/go.mod h1:/RfNgGmRxI+iFOB1OeJUyxiU+9s88k3pfHvDagGEp0M=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.13 h1:OPLEkmhXf6xFPiz0bLeDArZIDx1NNS4oJyG4nv3Gct0=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.13/go.mod h1:gpAbvyDGQFozTEmlTFO8XcQKHzubdq0LzRyJpG6MiXM=
github.com/aws/aws-sdk-go-v2/config v1.18.40 h1:dbu1llI/nTIL+r6sYHMeVLl99DM8J8/o1I4EPurnhLg=
github.com/aws/aws-sdk-go-v2/config v1.18.40/go.mod h1:JjrCZQwSPGCoZRQzKHyZNNueaKO+kFaEy2sR6mCzd90=
github.com/aws/aws-sdk-go-v2/credentials v1.13.38 h1:gDAuCdVlA4lmmgQhvpZlscwicloCqH44vkxLklGkQLA=
github.com/aws/aws-sdk-go-v2/credentials v1.13.38/go.mod h1:sD4G/Ybgp6s89mWIES3Xn97CsRLpxvz9uVSdv0UxY8I=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.11 h1:uDZJF1hu0EVT/4bogChk8DyjSF6fof6uL/0Y26Ma7Fg=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.11/go.mod h1:TEPP4tENqBGO99KwVpV9MlOX4NSrSLP8u3KRy2CDwA8=
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.84 h1:LENrVcqnWTyI8fbIUCvxAMe+fXbREIaXzcR8WPwco1U=
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.84/go.mod h1:LHxCiYAStsgps4srke7HujyADd504MSkNXjLpOtICTc=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 h1:22dGT7PneFMx4+b3pz7lMTRyN8ZKH7M2cW4GP9yUS2g=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41/go.mod h1:CrObHAuPneJBlfEJ5T3szXOUkLEThaGfvnhTf33buas=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35 h1:SijA0mgjV8E+8G45ltVHs0fvKpTj8xmZJ3VwhGKtUSI=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35/go.mod h1:SJC1nEVVva1g3pHAIdCp7QsRIkMmLAgoDquQ9Rr8kYw=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.42 h1:GPUcE/Yq7Ur8YSUk6lVkoIMWnJNO0HT18GUzCWCgCI0=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.42/go.mod h1:rzfdUlfA+jdgLDmPKjd3Chq9V7LVLYo1Nz++Wb91aRo=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.1.4 h1:6lJvvkQ9HmbHZ4h/IEwclwv2mrTW8Uq1SOB/kXy0mfw=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.1.4/go.mod h1:1PrKYwxTM+zjpw9Y41KFtoJCQrJ34Z47Y4VgVbfndjo=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.14 h1:m0QTSI6pZYJTk5WSKx3fm5cNW/DCicVzULBgU/6IyD0=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.14/go.mod h1:dDilntgHy9WnHXsh7dDtUPgHKEfTJIBUTHM8OWm0f/0=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.36 h1:eev2yZX7esGRjqRbnVk1UxMLw4CyVZDpZXRCcy75oQk=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.36/go.mod h1:lGnOkH9NJATw0XEPcAknFBj3zzNTEGRHtSw+CwC1YTg=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.35 h1:CdzPW9kKitgIiLV1+MHobfR5Xg25iYnyzWZhyQuSlDI=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.35/go.mod h1:QGF2Rs33W5MaN9gYdEQOBBFPLwTZkEhRwI33f7KIG0o=
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.15.4 h1:v0jkRigbSD6uOdwcaUQmgEwG1BkPfAPDqaeNt/29ghg=
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.15.4/go.mod h1:LhTyt8J04LL+9cIt7pYJ5lbS/U98ZmXovLOR/4LUsk8=
github.com/aws/aws-sdk-go-v2/service/s3 v1.38.5 h1:A42xdtStObqy7NGvzZKpnyNXvoOmm+FENobZ0/ssHWk=
github.com/aws/aws-sdk-go-v2/service/s3 v1.38.5/go.mod h1:rDGMZA7f4pbmTtPOk5v5UM2lmX6UAbRnMDJeDvnH7AM=
github.com/aws/aws-sdk-go-v2/service/sso v1.14.0 h1:AR/hlTsCyk1CwlyKnPFvIMvnONydRjDDRT9OGb0i+/g=
github.com/aws/aws-sdk-go-v2/service/sso v1.14.0/go.mod h1:fIAwKQKBFu90pBxx07BFOMJLpRUGu8VOzLJakeY+0K4=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.16.0 h1:vbgiXuhtn49+erlPrgIvQ+J32rg1HseaPf8lEpKbkxQ=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.16.0/go.mod h1:yygr8ACQRY2PrEcy3xsUI357stq2AxnFM6DIsR9lij4=
github.com/aws/aws-sdk-go-v2/service/sts v1.22.0 h1:s4bioTgjSFRwOoyEFzAVCmFmoowBgjTR8gkrF/sQ4wk=
github.com/aws/aws-sdk-go-v2/service/sts v1.22.0/go.mod h1:VC7JDqsqiwXukYEDjoHh9U0fOJtNWh04FPQz4ct4GGU=
github.com/aws/smithy-go v1.14.2 h1:MJU9hqBGbvWZdApzpvoF2WAIJDbtjK2NDJSiJP7HblQ=
github.com/aws/smithy-go v1.14.2/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand All @@ -24,8 +62,8 @@ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5a
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.1.2 h1:EVhdT+1Kseyi1/pUmXKaFxYsDNy9RQYkMWRH68J/W7Y=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw=
Expand Down Expand Up @@ -155,7 +193,6 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
Expand Down
85 changes: 68 additions & 17 deletions pkg/supervisor/archived_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@ package supervisor
import (
"bufio"
"context"
"crypto/sha1"
"encoding/base64"
"fmt"
"io"
"net/url"
"os"
"strings"
"time"

"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/feature/s3/manager"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/pkg/errors"
"github.com/segmentio/events/v2"
"github.com/segmentio/stats/v4"
Expand Down Expand Up @@ -52,7 +56,7 @@ type s3Snapshot struct {
Bucket string
Key string
sendToS3Func sendToS3Func
s3Uploader S3Uploader
s3Client S3Client
}

func (c *s3Snapshot) Upload(ctx context.Context, path string) error {
Expand All @@ -71,6 +75,12 @@ func (c *s3Snapshot) Upload(ctx context.Context, path string) error {
key = key[1:]
}
var reader io.Reader = bufio.NewReaderSize(f, 1024*32) // use a 32K buffer for reading

cs, err := getChecksum(path)
if err != nil {
return errors.Wrap(err, "generate file Checksum")
}

var gpr *gzipCompressionReader
if strings.HasSuffix(key, ".gz") {
events.Log("Compressing s3 payload with GZIP")
Expand All @@ -80,7 +90,7 @@ func (c *s3Snapshot) Upload(ctx context.Context, path string) error {
events.Log("Uploading %{file}s (%d bytes) to %{bucket}s/%{key}s", path, size, c.Bucket, key)

start := time.Now()
if err = c.sendToS3(ctx, key, c.Bucket, reader); err != nil {
if err = c.sendToS3(ctx, key, c.Bucket, reader, cs); err != nil {
return errors.Wrap(err, "send to s3")
}
stats.Observe("ldb-upload-time", time.Since(start), stats.T("compressed", isCompressed(gpr)))
Expand All @@ -97,42 +107,83 @@ func (c *s3Snapshot) Upload(ctx context.Context, path string) error {
return nil
}

func getChecksum(path string) (string, error) {
f, err := os.OpenFile(path, os.O_RDONLY, 0)
if err != nil {
return "", errors.Wrap(err, "opening file")
}
defer f.Close()

h := sha1.New()
if _, err := io.Copy(h, f); err != nil {
events.Log("failed to generate sha1 of snapshot", err)
}

cs := base64.StdEncoding.EncodeToString(h.Sum(nil))
events.Log("base64 encoding of sha1: %s", cs)

return cs, nil
}

func isCompressed(gpr *gzipCompressionReader) string {
if gpr == nil {
return "false"
}
return "true"
}

func (c *s3Snapshot) sendToS3(ctx context.Context, key string, bucket string, body io.Reader) error {
type BucketBasics struct {
S3Client S3Client
}

func (c *s3Snapshot) sendToS3(ctx context.Context, key string, bucket string, body io.Reader, cs string) error {
if c.sendToS3Func != nil {
return c.sendToS3Func(ctx, key, bucket, body)
}
ul, err := c.getS3Uploader()

client, err := c.getS3Client()
if err != nil {
return err
}
output, err := ul.UploadWithContext(ctx, &s3manager.UploadInput{
Bucket: &bucket,
Key: &key,
Body: body,

var basics = BucketBasics{
S3Client: client,
}
var partMiBs int64 = 16
uploader := manager.NewUploader(basics.S3Client, func(u *manager.Uploader) {
u.PartSize = partMiBs * 1024 * 1024
})

output, err := uploader.Upload(ctx, &s3.PutObjectInput{
Bucket: &bucket,
Key: &key,
Body: body,
ChecksumAlgorithm: "sha256",
Metadata: map[string]string{
"checksum": cs,
},
})
if err == nil {
events.Log("Wrote to S3 location: %s", output.Location)
} else {
events.Log("Couldn't upload s3 snapshot to %v:%v. Here's why: %v\n",
bucket, key, err)
}
return errors.Wrap(err, "upload with context")
}

func (c *s3Snapshot) getS3Uploader() (S3Uploader, error) {
if c.s3Uploader != nil {
return c.s3Uploader, nil
func (c *s3Snapshot) getS3Client() (S3Client, error) {
if c.s3Client != nil {
return c.s3Client, nil
}
sess, err := session.NewSession()
cfg, err := config.LoadDefaultConfig(context.Background())

if err != nil {
return nil, errors.Wrap(err, "creating aws session")
panic(fmt.Sprintf("failed loading config, %v", err))
}
uploader := s3manager.NewUploader(sess)
return uploader, nil

client := s3.NewFromConfig(cfg)
return client, nil
}

func archivedSnapshotFromURL(URL string) (archivedSnapshot, error) {
Expand Down
Loading

0 comments on commit 146e400

Please sign in to comment.