Skip to content

Commit

Permalink
CLI: 'ais scrub' (new; part two)
Browse files Browse the repository at this point in the history
* part two, prev. commit: 06c5c8d

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Dec 9, 2024
1 parent 837c0bb commit a7ac713
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 61 deletions.
4 changes: 2 additions & 2 deletions cmd/cli/cli/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ const (
cmdRebalance = apc.ActRebalance
cmdLRU = apc.ActLRU
cmdStgCleanup = "cleanup" // display name for apc.ActStoreCleanup
cmdStgValidate = "validate"
cmdScrub = "validate"
cmdSummary = "summary" // ditto apc.ActSummaryBck

cmdCluster = commandCluster
Expand Down Expand Up @@ -532,7 +532,7 @@ var (
//
objLimitFlag = cli.IntFlag{
Name: "limit",
Usage: "maximum number of object names to display (0 - unlimited; see also '--max-pages')\n" +
Usage: "maximum number of object names to list (0 - unlimited; see also '--max-pages')\n" +
indent4 + "\te.g.: 'ais ls gs://abc --limit 1234 --cached --props size,custom",
}
pageSizeFlag = cli.IntFlag{
Expand Down
13 changes: 8 additions & 5 deletions cmd/cli/cli/storage_hdlr.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ var validateUsage = validateSummaryFlag.Usage + "\n" +
indent1 + "e.g.:\n" +
indent1 + "\t* ais storage validate \t- validate all in-cluster buckets;\n" +
indent1 + "\t* ais scrub \t- same as above;\n" +
indent1 + "\t* ais scrub ais \t- all ais buckets;\n" +
indent1 + "\t* ais storage validate ais \t- validate (a.k.a. scrub) all ais buckets;\n" +
indent1 + "\t* ais scrub s3 \t- all s3 buckets present in the cluster;\n" +
indent1 + "\t* ais scrub s3 --refresh 10\t- same as above while refreshing runtime counter(s) every 10s;\n" +
indent1 + "\t* ais scrub gs://abc/images/\t- validate part of the gcp bucket under 'images/`;\n" +
Expand Down Expand Up @@ -163,10 +163,13 @@ var (
longRunFlags,
jsonFlag,
),
cmdStgValidate: append(
cmdScrub: append(
longRunFlags,
bsummPrefixFlag,
waitJobXactFinishedFlag,
objLimitFlag,
noHeaderFlag,
maxPagesFlag,
noRecursFlag,
),
}

Expand Down Expand Up @@ -205,10 +208,10 @@ var (
makeAlias(showCmdStorage, "", true, commandShow), // alias for `ais show`
showCmdStgSummary,
{
Name: cmdStgValidate,
Name: cmdScrub,
Usage: validateUsage,
ArgsUsage: lsAnyCommandArgument,
Flags: storageFlags[cmdStgValidate],
Flags: storageFlags[cmdScrub],
Action: prelimScrub,
BashComplete: bucketCompletions(bcmplop{}),
},
Expand Down
128 changes: 98 additions & 30 deletions cmd/cli/cli/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ import (
"errors"
"fmt"
"path/filepath"
"sync"

"github.com/NVIDIA/aistore/api"
"github.com/NVIDIA/aistore/api/apc"
"github.com/NVIDIA/aistore/cmd/cli/teb"
"github.com/NVIDIA/aistore/cmn"
"github.com/NVIDIA/aistore/cmn/cos"
"github.com/NVIDIA/aistore/cmn/debug"
"github.com/NVIDIA/aistore/sys"
"github.com/urfave/cli"
)

Expand All @@ -31,9 +33,11 @@ type (
FiveGBplus uint64
}
ctxScrub struct {
c *cli.Context
qbck cmn.QueryBcks
pref string
c *cli.Context
scrubs []*preScrub
qbck cmn.QueryBcks
pref string
tmpl string
}
)

Expand All @@ -60,7 +64,14 @@ func prelimScrub(c *cli.Context) (err error) {
ctx.pref = prefix
}

return waitForFunc(ctx.do, longClientTimeout)
ctx.tmpl = teb.BucketSummaryValidateTmpl
if flagIsSet(ctx.c, noHeaderFlag) {
ctx.tmpl = teb.BucketSummaryValidateBody
}
if ctx.qbck.IsBucket() {
return waitForFunc(ctx.one, longClientTimeout)
}
return waitForFunc(ctx.many, longClientTimeout)
}

//////////////
Expand All @@ -87,56 +98,113 @@ func (scr *preScrub) upd(en *cmn.LsoEnt, bprops *cmn.Bprops) {
// ctxScrub //
//////////////

func (ctx *ctxScrub) do() error {
// TODO: when !ctx.qbck.IsQuery do HEAD instead of list-buckets, skip HEADing below
func (ctx *ctxScrub) many() error {
bcks, err := api.ListBuckets(apiBP, ctx.qbck, apc.FltPresent)
if err != nil {
return V(err)
}

var (
scrubs = make([]*preScrub, 0, len(bcks))
msg = &apc.LsoMsg{Prefix: ctx.pref, Flags: apc.LsObjCached | apc.LsMissing}
num = len(bcks)
wg = cos.NewLimitedWaitGroup(sys.NumCPU(), num)
mu = &sync.Mutex{}
)
msg.AddProps(apc.GetPropsAll...)

ctx.scrubs = make([]*preScrub, 0, num)
for i := range bcks {
bck := bcks[i]
if ctx.qbck.Name != "" && !ctx.qbck.Equal(&bck) {
continue
}

bprops, err := headBucket(bck, true /* don't add */)
if err != nil {
return err
}
wg.Add(1)
go ctx.gols(bck, wg, mu)
}
wg.Wait()

var (
callAfter = listObjectsWaitTime
_listed = &_listed{c: ctx.c, bck: &bck, msg: msg}
lsargs api.ListArgs
)
if flagIsSet(ctx.c, refreshFlag) {
callAfter = parseDurationFlag(ctx.c, refreshFlag)
}
return teb.Print(ctx.scrubs, ctx.tmpl)
}

func (ctx *ctxScrub) gols(bck cmn.Bck, wg cos.WG, mu *sync.Mutex) {
defer wg.Done()
scr, err := ctx.ls(bck)
if err != nil {
warn := fmt.Sprintf("cannot validate %s: %v", bck.Cname(ctx.pref), err)
actionWarn(ctx.c, warn)
return
}
mu.Lock()
ctx.scrubs = append(ctx.scrubs, scr)
mu.Unlock()
}

func (ctx *ctxScrub) one() error {
scr, err := ctx.ls(cmn.Bck(ctx.qbck))
if err != nil {
return err
}
return teb.Print([]*preScrub{scr}, ctx.tmpl)
}

func (ctx *ctxScrub) ls(bck cmn.Bck) (*preScrub, error) {
bprops, err := headBucket(bck, true /* don't add */)
if err != nil {
return nil, err
}
bck.Props = bprops
var (
callAfter = listObjectsWaitTime
lsargs api.ListArgs
lsmsg = &apc.LsoMsg{Prefix: ctx.pref, Flags: apc.LsObjCached | apc.LsMissing}
_listed = &_listed{c: ctx.c, bck: &bck, msg: lsmsg}
)
lsmsg.AddProps(apc.GetPropsName, apc.GetPropsSize)

if flagIsSet(ctx.c, refreshFlag) {
callAfter = parseDurationFlag(ctx.c, refreshFlag)
}

scr := &preScrub{Bck: bck}

pageSize, maxPages, limit, err := _setPage(ctx.c, bck)
if err != nil {
return nil, err
}
lsmsg.PageSize = pageSize
{
lsargs.Callback = _listed.cb
lsargs.CallAfter = callAfter
lst, err := api.ListObjects(apiBP, bck, msg, lsargs)
lsargs.Limit = limit
}

// pages
pageCounter, toShow := 0, int(limit)
for {
lst, err := api.ListObjectsPage(apiBP, bck, lsmsg, lsargs)
if err != nil {
return err
return nil, err
}

scr := &preScrub{Bck: bck}
// one page
for _, en := range lst.Entries {
if en.IsDir() || cos.IsLastB(en.Name, filepath.Separator) {
continue
}
debug.Assert(en.IsPresent(), bck.Cname(en.Name), " expected to be present") // vs apc.LsObjCached
debug.Assert(en.IsPresent(), bck.Cname(en.Name), " must be present") // (LsObjCached)
scr.upd(en, bprops)
}
scrubs = append(scrubs, scr)

if lsmsg.ContinuationToken == "" {
break
}
pageCounter++
if maxPages > 0 && pageCounter >= int(maxPages) {
break
}
if limit > 0 {
toShow -= len(lst.Entries)
if toShow <= 0 {
break
}
}
}

return teb.Print(scrubs, teb.BucketSummaryValidateTmpl)
return scr, nil
}
4 changes: 2 additions & 2 deletions cmd/cli/teb/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ const (
"{{end}}"

BucketSummaryValidateHdr = "BUCKET\t OBJECTS\t MISPLACED\t MISSING COPIES\t ZERO SIZE\t 5+GB\n"
BucketSummaryValidateTmpl = BucketSummaryValidateHdr + bucketSummaryValidateBody
bucketSummaryValidateBody = "{{range $v := . }}" +
BucketSummaryValidateTmpl = BucketSummaryValidateHdr + BucketSummaryValidateBody
BucketSummaryValidateBody = "{{range $v := . }}" +
"{{FormatBckName $v.Bck}}\t {{$v.ObjectCnt}}\t {{$v.Misplaced}}\t {{$v.MissingCopies}}\t {{$v.ZeroSize}}\t {{$v.FiveGBplus}}\n" +
"{{end}}"

Expand Down
27 changes: 16 additions & 11 deletions docs/cli/bucket.md
Original file line number Diff line number Diff line change
Expand Up @@ -919,12 +919,13 @@ A few additional words must be said about `--validate`. The option is provided t
> an alternative way to execute _validation_ is to run `ais strorage validate` or (simply) `ais scrub`:
```console
$ ais storage validate --help
NAME:
ais storage validate - check in-cluster content for misplaced objects, objects that have insufficient numbers of copies, zero size, and more
e.g.:
* ais storage validate - validate all in-cluster buckets;
* ais scrub - same as above;
* ais scrub ais - all ais buckets;
* ais storage validate ais - validate (a.k.a. scrub) all ais buckets;
* ais scrub s3 - all s3 buckets present in the cluster;
* ais scrub s3 --refresh 10 - same as above while refreshing runtime counter(s) every 10s;
* ais scrub gs://abc/images/ - validate part of the gcp bucket under 'images/`;
Expand All @@ -934,16 +935,20 @@ USAGE:
ais storage validate [command options] [BUCKET[/PREFIX]] or [PROVIDER]

OPTIONS:
--refresh value time interval for continuous monitoring; can be also used to update progress bar (at a given interval);
valid time units: ns, us (or µs), ms, s (default), m, h
--count value used together with '--refresh' to limit the number of generated reports, e.g.:
'--refresh 10 --count 5' - run 5 times with 10s interval (default: 0)
--prefix value for each bucket, select only those objects (names) that start with the specified prefix, e.g.:
'--prefix a/b/c' - sum-up sizes of the virtual directory a/b/c and objects from the virtual directory
a/b that have names (relative to this directory) starting with the letter c
--timeout value maximum time to wait for a job to finish; if omitted: wait forever or until Ctrl-C;
valid time units: ns, us (or µs), ms, s (default), m, h
--help, -h show help
--refresh value time interval for continuous monitoring; can be also used to update progress bar (at a given interval);
valid time units: ns, us (or µs), ms, s (default), m, h
--count value used together with '--refresh' to limit the number of generated reports, e.g.:
'--refresh 10 --count 5' - run 5 times with 10s interval (default: 0)
--prefix value for each bucket, select only those objects (names) that start with the specified prefix, e.g.:
'--prefix a/b/c' - sum-up sizes of the virtual directory a/b/c and objects from the virtual directory
a/b that have names (relative to this directory) starting with the letter c
--limit value maximum number of object names to list (0 - unlimited; see also '--max-pages')
e.g.: 'ais ls gs://abc --limit 1234 --cached --props size,custom (default: 0)
--no-headers, -H display tables without headers
--max-pages value maximum number of pages to display (see also '--page-size' and '--limit')
e.g.: 'ais ls az://abc --paged --page-size 123 --max-pages 7 (default: 0)
--non-recursive, --nr list objects without including nested virtual subdirectories
--help, -h show help
```

For details and additional examples, please see:
Expand Down
26 changes: 15 additions & 11 deletions docs/cli/storage.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ NAME:
e.g.:
* ais storage validate - validate all in-cluster buckets;
* ais scrub - same as above;
* ais scrub ais - all ais buckets;
* ais storage validate ais - validate (a.k.a. scrub) all ais buckets;
* ais scrub s3 - all s3 buckets present in the cluster;
* ais scrub s3 --refresh 10 - same as above while refreshing runtime counter(s) every 10s;
* ais scrub gs://abc/images/ - validate part of the gcp bucket under 'images/`;
Expand All @@ -109,16 +109,20 @@ USAGE:
ais storage validate [command options] [BUCKET[/PREFIX]] or [PROVIDER]

OPTIONS:
--refresh value time interval for continuous monitoring; can be also used to update progress bar (at a given interval);
valid time units: ns, us (or µs), ms, s (default), m, h
--count value used together with '--refresh' to limit the number of generated reports, e.g.:
'--refresh 10 --count 5' - run 5 times with 10s interval (default: 0)
--prefix value for each bucket, select only those objects (names) that start with the specified prefix, e.g.:
'--prefix a/b/c' - sum-up sizes of the virtual directory a/b/c and objects from the virtual directory
a/b that have names (relative to this directory) starting with the letter c
--timeout value maximum time to wait for a job to finish; if omitted: wait forever or until Ctrl-C;
valid time units: ns, us (or µs), ms, s (default), m, h
--help, -h show help
--refresh value time interval for continuous monitoring; can be also used to update progress bar (at a given interval);
valid time units: ns, us (or µs), ms, s (default), m, h
--count value used together with '--refresh' to limit the number of generated reports, e.g.:
'--refresh 10 --count 5' - run 5 times with 10s interval (default: 0)
--prefix value for each bucket, select only those objects (names) that start with the specified prefix, e.g.:
'--prefix a/b/c' - sum-up sizes of the virtual directory a/b/c and objects from the virtual directory
a/b that have names (relative to this directory) starting with the letter c
--limit value maximum number of object names to list (0 - unlimited; see also '--max-pages')
e.g.: 'ais ls gs://abc --limit 1234 --cached --props size,custom (default: 0)
--no-headers, -H display tables without headers
--max-pages value maximum number of pages to display (see also '--page-size' and '--limit')
e.g.: 'ais ls az://abc --paged --page-size 123 --max-pages 7 (default: 0)
--non-recursive, --nr list objects without including nested virtual subdirectories
--help, -h show help
```

Checks all objects of the bucket `BUCKET` and show number of misplaced objects, number of objects that have insufficient number of copies, etc.
Expand Down

0 comments on commit a7ac713

Please sign in to comment.