Skip to content

Commit

Permalink
rotate logs; dedup api
Browse files Browse the repository at this point in the history
* rotate logs: individual nodes and entire cluster
* CLI: `ais advanced rotate-logs`
* `api` package: reduce copy/paste, ref

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Nov 7, 2023
1 parent f79a68f commit 518fb18
Show file tree
Hide file tree
Showing 20 changed files with 112 additions and 63 deletions.
4 changes: 2 additions & 2 deletions ais/htrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -887,8 +887,8 @@ func (h *htrun) bcastAsyncIC(msg *aisMsg) {
freeBcArgs(args)
}

func (h *htrun) bcastReqGroup(w http.ResponseWriter, r *http.Request, args *bcastArgs, to int) {
args.to = to
func (h *htrun) bcastAllNodes(w http.ResponseWriter, r *http.Request, args *bcastArgs) {
args.to = cluster.AllNodes
results := h.bcastGroup(args)
for _, res := range results {
if res.err != nil {
Expand Down
2 changes: 2 additions & 0 deletions ais/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -2695,6 +2695,8 @@ func (p *proxy) httpdaeput(w http.ResponseWriter, r *http.Request) {
if err := p.owner.config.resetDaemonConfig(); err != nil {
p.writeErr(w, r, err)
}
case apc.ActRotateLogs:
nlog.Flush(nlog.ActRotate)
case apc.ActResetStats:
errorsOnly := msg.Value.(bool)
p.statsT.ResetStats(errorsOnly)
Expand Down
17 changes: 14 additions & 3 deletions ais/prxclu.go
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,8 @@ func (p *proxy) cluputJSON(w http.ResponseWriter, r *http.Request) {
}
case apc.ActResetConfig:
p.resetCluCfgPersistent(w, r, msg)
case apc.ActRotateLogs:
p.rotateLogs(w, r, msg)

case apc.ActShutdownCluster:
args := allocBcArgs()
Expand Down Expand Up @@ -997,7 +999,7 @@ func (p *proxy) cluputJSON(w http.ResponseWriter, r *http.Request) {
p.statsT.ResetStats(errorsOnly)
args := allocBcArgs()
args.req = cmn.HreqArgs{Method: http.MethodPut, Path: apc.URLPathDae.S, Body: cos.MustMarshal(msg)}
p.bcastReqGroup(w, r, args, cluster.AllNodes)
p.bcastAllNodes(w, r, args)
freeBcArgs(args)
case apc.ActXactStart:
p.xstart(w, r, msg)
Expand Down Expand Up @@ -1083,7 +1085,16 @@ func (p *proxy) resetCluCfgPersistent(w http.ResponseWriter, r *http.Request, ms

args := allocBcArgs()
args.req = cmn.HreqArgs{Method: http.MethodPut, Path: apc.URLPathDae.S, Body: body}
p.bcastReqGroup(w, r, args, cluster.AllNodes)
p.bcastAllNodes(w, r, args)
freeBcArgs(args)
}

func (p *proxy) rotateLogs(w http.ResponseWriter, r *http.Request, msg *apc.ActMsg) {
nlog.Flush(nlog.ActRotate)
body := cos.MustMarshal(msg)
args := allocBcArgs()
args.req = cmn.HreqArgs{Method: http.MethodPut, Path: apc.URLPathDae.S, Body: body}
p.bcastAllNodes(w, r, args)
freeBcArgs(args)
}

Expand All @@ -1100,7 +1111,7 @@ func (p *proxy) setCluCfgTransient(w http.ResponseWriter, r *http.Request, toUpd
Body: cos.MustMarshal(msg),
Query: url.Values{apc.ActTransient: []string{"true"}},
}
p.bcastReqGroup(w, r, args, cluster.AllNodes)
p.bcastAllNodes(w, r, args)
freeBcArgs(args)
}

Expand Down
2 changes: 2 additions & 0 deletions ais/tgtcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ func (t *target) daeputJSON(w http.ResponseWriter, r *http.Request) {
if err := t.owner.config.resetDaemonConfig(); err != nil {
t.writeErr(w, r, err)
}
case apc.ActRotateLogs:
nlog.Flush(nlog.ActRotate)
case apc.ActResetStats:
errorsOnly := msg.Value.(bool)
t.statsT.ResetStats(errorsOnly)
Expand Down
3 changes: 3 additions & 0 deletions api/apc/actmsg.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,13 @@ const (
ActPromote = "promote"
ActRenameObject = "rename-obj"

// cp (reverse)
ActResetStats = "reset-stats"
ActResetConfig = "reset-config"
ActSetConfig = "set-config"

ActRotateLogs = "rotate-logs"

ActShutdownCluster = "shutdown" // see also: ActShutdownNode

// multi-object (via `ListRange`)
Expand Down
30 changes: 15 additions & 15 deletions api/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,19 +183,6 @@ func GetClusterStats(bp BaseParams) (res stats.Cluster, err error) {
return
}

func ResetClusterStats(bp BaseParams, errorsOnly bool) (err error) {
bp.Method = http.MethodPut
reqParams := AllocRp()
{
reqParams.BaseParams = bp
reqParams.Path = apc.URLPathClu.S
reqParams.Body = cos.MustMarshal(apc.ActMsg{Action: apc.ActResetStats, Value: errorsOnly})
}
err = reqParams.DoRequest()
FreeRp(reqParams)
return
}

func GetRemoteAIS(bp BaseParams) (remais cluster.Remotes, err error) {
bp.Method = http.MethodGet
reqParams := AllocRp()
Expand Down Expand Up @@ -288,14 +275,27 @@ func SetClusterConfigUsingMsg(bp BaseParams, configToUpdate *cmn.ConfigToSet, tr
return err
}

// ResetClusterConfig resets the configuration of all nodes to the cluster configuration
// zero out: all metrics _or_ only error counters
func ResetClusterStats(bp BaseParams, errorsOnly bool) (err error) {
return _putCluster(bp, apc.ActMsg{Action: apc.ActResetStats, Value: errorsOnly})
}

// all nodes: reset configuration to cluster defaults
func ResetClusterConfig(bp BaseParams) error {
return _putCluster(bp, apc.ActMsg{Action: apc.ActResetConfig})
}

func RotateClusterLogs(bp BaseParams) error {
return _putCluster(bp, apc.ActMsg{Action: apc.ActRotateLogs})
}

func _putCluster(bp BaseParams, msg apc.ActMsg) error {
bp.Method = http.MethodPut
reqParams := AllocRp()
{
reqParams.BaseParams = bp
reqParams.Path = apc.URLPathClu.S
reqParams.Body = cos.MustMarshal(apc.ActMsg{Action: apc.ActResetConfig})
reqParams.Body = cos.MustMarshal(msg)
reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
}
err := reqParams.DoRequest()
Expand Down
33 changes: 15 additions & 18 deletions api/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,21 +193,6 @@ func GetDaemonStats(bp BaseParams, node *meta.Snode) (ds *stats.Node, err error)
return ds, err
}

// see also: ResetClusterStats
func ResetDaemonStats(bp BaseParams, node *meta.Snode, errorsOnly bool) (err error) {
bp.Method = http.MethodPut
reqParams := AllocRp()
{
reqParams.BaseParams = bp
reqParams.Path = apc.URLPathReverseDae.S
reqParams.Body = cos.MustMarshal(apc.ActMsg{Action: apc.ActResetStats, Value: errorsOnly})
reqParams.Header = http.Header{apc.HdrNodeID: []string{node.ID()}}
}
err = reqParams.DoRequest()
FreeRp(reqParams)
return
}

func GetDiskStats(bp BaseParams, tid string) (res ios.AllDiskStats, err error) {
bp.Method = http.MethodGet
reqParams := AllocRp()
Expand Down Expand Up @@ -289,15 +274,27 @@ func SetDaemonConfig(bp BaseParams, nodeID string, nvs cos.StrKVs, transient ...
return err
}

// ResetDaemonConfig resets the configuration for a specific node to the cluster configuration.
// TODO: revisit access control
// see also: ResetClusterStats
func ResetDaemonStats(bp BaseParams, node *meta.Snode, errorsOnly bool) error {
return _putDaemon(bp, node.ID(), apc.ActMsg{Action: apc.ActResetStats, Value: errorsOnly})
}

// reset node's configuration to cluster defaults
func ResetDaemonConfig(bp BaseParams, nodeID string) error {
return _putDaemon(bp, nodeID, apc.ActMsg{Action: apc.ActResetConfig})
}

func RotateLogs(bp BaseParams, nodeID string) error {
return _putDaemon(bp, nodeID, apc.ActMsg{Action: apc.ActRotateLogs})
}

func _putDaemon(bp BaseParams, nodeID string, msg apc.ActMsg) error {
bp.Method = http.MethodPut
reqParams := AllocRp()
{
reqParams.BaseParams = bp
reqParams.Path = apc.URLPathReverseDae.S
reqParams.Body = cos.MustMarshal(apc.ActMsg{Action: apc.ActResetConfig})
reqParams.Body = cos.MustMarshal(msg)
reqParams.Header = http.Header{
apc.HdrNodeID: []string{nodeID},
cos.HdrContentType: []string{cos.ContentJSON},
Expand Down
2 changes: 1 addition & 1 deletion cmd/aisnode/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ var (
func main() {
debug.Assert(build != "", "missing build")
ecode := ais.Run(cmn.VersionAIStore+"."+build, buildtime)
nlog.Flush(true)
nlog.Flush(nlog.ActExit)
os.Exit(ecode)
}
2 changes: 1 addition & 1 deletion cmd/aisnodeprofile/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func run() int {
}

exitCode := ais.Run(cmn.VersionAIStore+"."+build, buildtime)
nlog.Flush(true)
nlog.Flush(nlog.ActExit)

if s := *memProfile; s != "" {
*memProfile = s + "." + strconv.Itoa(syscall.Getpid())
Expand Down
4 changes: 2 additions & 2 deletions cmd/authn/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func init() {
func logFlush() {
for {
time.Sleep(time.Minute) // TODO: must be configurable
nlog.Flush()
nlog.Flush(nlog.ActNone)
}
}

Expand Down Expand Up @@ -99,7 +99,7 @@ func main() {
srv := newServer(mgr)
err = srv.Run()

nlog.Flush(true)
nlog.Flush(nlog.ActExit)
cos.Close(mgr.db)
if err != nil {
cos.ExitLogf("Server failed: %v", err)
Expand Down
29 changes: 29 additions & 0 deletions cmd/cli/cli/advanced_hdlr.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ var (
Action: randMountpath,
BashComplete: suggestTargets,
},
{
Name: cmdRotateLogs,
Usage: "rotate logs",
ArgsUsage: optionalNodeIDArgument,
Action: rotateLogs,
BashComplete: suggestAllNodes,
},
},
}
)
Expand Down Expand Up @@ -132,3 +139,25 @@ func randMountpath(c *cli.Context) error {
}
return nil
}

func rotateLogs(c *cli.Context) error {
node, sname, err := arg0Node(c)
if err != nil {
return err
}
// 1. node
if node != nil {
if err := api.RotateLogs(apiBP, node.ID()); err != nil {
return V(err)
}
msg := fmt.Sprintf("%s: rotated logs", sname)
actionDone(c, msg)
return nil
}
// 2. or cluster
if err := api.RotateClusterLogs(apiBP); err != nil {
return V(err)
}
actionDone(c, "cluster: rotated all logs")
return nil
}
1 change: 1 addition & 0 deletions cmd/cli/cli/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const (
cmdRmSmap = "remove-from-smap"
cmdRandNode = "random-node"
cmdRandMountpath = "random-mountpath"
cmdRotateLogs = "rotate-logs"
)

// - 2nd level subcommands (mostly, verbs)
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/cli/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func fillNodeStatusMap(c *cli.Context, daeType string) (smap *meta.Smap, tstatus
wg.Wait()

mmc := strings.Split(cmn.VersionAIStore, versionSepa)
debug.Assert(len(mmc) > 2)
debug.Assert(len(mmc) > 1)
ok := checkVersionWarn(c, apc.Target, mmc, tstatusMap)
if ok && pstatusMap != nil {
_ = checkVersionWarn(c, apc.Proxy, mmc, pstatusMap)
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.21

// direct
require (
github.com/NVIDIA/aistore v1.3.21
github.com/NVIDIA/aistore v1.3.22-0.20231107182908-95194b4118d8
github.com/fatih/color v1.15.0
github.com/json-iterator/go v1.1.12
github.com/onsi/ginkgo v1.16.5
Expand Down
4 changes: 2 additions & 2 deletions cmd/cli/go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
code.cloudfoundry.org/bytefmt v0.0.0-20190710193110-1eb035ffe2b6/go.mod h1:wN/zk7mhREp/oviagqUXY3EwuHhWyOvAdsn5Y4CzOrc=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/NVIDIA/aistore v1.3.21 h1:OB0FC6tOD9o+YZIhJHQ6rLx8OYfyDhZemd/v27WivRk=
github.com/NVIDIA/aistore v1.3.21/go.mod h1:+iSnZg0ovMaLgaT9fLAs2WmYBP7IfeTW1WYkbKrwP4g=
github.com/NVIDIA/aistore v1.3.22-0.20231107182908-95194b4118d8 h1:XRR43BhmCk3GwxF0d3oYVNtMlgT0jAwt5uqmMwX2D2c=
github.com/NVIDIA/aistore v1.3.22-0.20231107182908-95194b4118d8/go.mod h1:+iSnZg0ovMaLgaT9fLAs2WmYBP7IfeTW1WYkbKrwP4g=
github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8=
github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
Expand Down
6 changes: 3 additions & 3 deletions cmn/cos/assert.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,22 @@ func Assertf(cond bool, f string, a ...any) {

func Assert(cond bool) {
if !cond {
nlog.Flush(true)
nlog.Flush(nlog.ActExit)
panic(assertMsg)
}
}

// NOTE: when using Sprintf and such, `if (!cond) { AssertMsg(false, msg) }` is the preferable usage.
func AssertMsg(cond bool, msg string) {
if !cond {
nlog.Flush(true)
nlog.Flush(nlog.ActExit)
panic(assertMsg + ": " + msg)
}
}

func AssertNoErr(err error) {
if err != nil {
nlog.Flush(true)
nlog.Flush(nlog.ActExit)
panic(err)
}
}
4 changes: 2 additions & 2 deletions cmn/cos/err.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ func ExitLogf(f string, a ...any) {
msg := fmt.Sprintf(fatalPrefix+f, a...)
if flag.Parsed() {
nlog.ErrorDepth(1, msg+"\n")
nlog.Flush(true)
nlog.Flush(nlog.ActExit)
}
_exit(msg)
}
Expand All @@ -198,7 +198,7 @@ func ExitLog(a ...any) {
msg := fatalPrefix + fmt.Sprint(a...)
if flag.Parsed() {
nlog.ErrorDepth(1, msg+"\n")
nlog.Flush(true)
nlog.Flush(nlog.ActExit)
}
_exit(msg)
}
Expand Down
2 changes: 1 addition & 1 deletion cmn/debug/debug_on.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func _panic(a ...any) {
}
if flag.Parsed() {
nlog.Errorln(buffer.String())
nlog.Flush(true)
nlog.Flush(nlog.ActExit)
} else {
fmt.Fprintln(os.Stderr, buffer.String())
}
Expand Down
Loading

0 comments on commit 518fb18

Please sign in to comment.