Skip to content

Commit

Permalink
CLI: amend 'performance throughput'
Browse files Browse the repository at this point in the history
* compute all throughputs on the client side
  - given interval and the corresponding `KindSize` metric
  - Clouds now separately
* add `SizeToThroughput` api helper; add "naming conventions"

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Dec 27, 2024
1 parent 0996dc6 commit a8374ce
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 37 deletions.
61 changes: 40 additions & 21 deletions cmd/cli/cli/performance.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,10 @@ func showCountersHandler(c *cli.Context) error {
return showPerfTab(c, selected, nil, cmdShowCounters, nil, false)
}

// TODO -- FIXME: revisit computing over totals (compare with latency)
// TODO -- FIXME: support regex, verbose
func showThroughputHandler(c *cli.Context) error {
var (
totals = make(map[string]int64, 4) // throughput metrics ("columns") to tally up
regexStr = parseStrFlag(c, regexColsFlag)
metrics, err = getMetricNames(c)
verbose = flagIsSet(c, verboseFlag)
)
Expand All @@ -187,31 +186,41 @@ func showThroughputHandler(c *cli.Context) error {

selected := make(cos.StrKVs, len(metrics))
for name, kind := range metrics {
switch name {
case stats.GetSize, stats.GetCount, stats.PutSize, stats.PutCount:
// - always show io-errors
// - other errors only if (get|put) and verbose
// - otherwise, skip anything other than the two relevant kinds
if stats.IsIOErrMetric(name) {
selected[name] = kind
continue
}
if !verbose && regexStr == "" {
if cos.StringInSlice(name, verboseCounters[:]) {
if stats.IsErrMetric(name) {
if !verbose {
continue
}
if !strings.Contains(name, "get") && !strings.Contains(name, "put") {
continue
}
selected[name] = kind
continue
}

switch {
case kind == stats.KindThroughput:
// 1. all throughput
selected[name] = kind
totals[name] = 0
case strings.HasSuffix(name, "."+stats.GetCount) || strings.HasSuffix(name, "."+stats.GetSize):
selected[name] = kind
case strings.HasSuffix(name, "."+stats.PutCount) || strings.HasSuffix(name, "."+stats.PutSize):
selected[name] = kind
case stats.IsErrMetric(name):
// 3. errors (compare with latency selection below)
if strings.Contains(name, "get") || strings.Contains(name, "put") {
switch kind {
case stats.KindCounter:
if name == stats.GetCount || name == stats.PutCount ||
strings.HasSuffix(name, "."+stats.GetCount) || strings.HasSuffix(name, "."+stats.PutCount) {
selected[name] = kind
}
continue
case stats.KindSize:
if name == stats.GetSize || name == stats.PutSize ||
strings.HasSuffix(name, "."+stats.GetSize) || strings.HasSuffix(name, "."+stats.PutSize) {
selected[name] = kind

if bpsName := stats.SizeToThroughput(name, stats.KindSize); bpsName != "" {
selected[bpsName] = stats.KindThroughput
}
}
continue
}
}
// `true` to show average get/put sizes
Expand All @@ -232,24 +241,34 @@ func _throughput(c *cli.Context, metrics cos.StrKVs, mapBegin, mapEnd teb.StstMa
continue
}
for name, v := range begin.Tracker {
if kind, ok := metrics[name]; !ok || kind != stats.KindThroughput {
kind, ok := metrics[name]
if !ok || kind != stats.KindSize {
continue
}
bpsName := stats.SizeToThroughput(name, stats.KindSize)
if bpsName == "" {
continue
}
vend := end.Tracker[name]

if vend.Value <= v.Value {
// no changes, nothing to show
v.Value = 0
begin.Tracker[name] = v
continue
}

//
// given this (KindSize) metric change and elapsed time, add computed throughput:
//
v.Value = (vend.Value - v.Value) / seconds
begin.Tracker[name] = v
begin.Tracker[bpsName] = v
num++
}
}

idle = num == 0
return
return idle
}

// otherwise, skip computing (TODO: add comdline option)
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/NVIDIA/aistore/cmd/cli
go 1.23.2

require (
github.com/NVIDIA/aistore v1.3.26-0.20241226184445-e6814a27d17d
github.com/NVIDIA/aistore v1.3.26-0.20241227164600-fbb55bc59304
github.com/fatih/color v1.18.0
github.com/json-iterator/go v1.1.12
github.com/onsi/ginkgo/v2 v2.21.0
Expand Down
4 changes: 2 additions & 2 deletions cmd/cli/go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
code.cloudfoundry.org/bytefmt v0.0.0-20190710193110-1eb035ffe2b6/go.mod h1:wN/zk7mhREp/oviagqUXY3EwuHhWyOvAdsn5Y4CzOrc=
github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
github.com/NVIDIA/aistore v1.3.26-0.20241226184445-e6814a27d17d h1:hhv9YZyWW0QCgxv82OBgq3+nFBn+YS8ztkmg1zC7Re4=
github.com/NVIDIA/aistore v1.3.26-0.20241226184445-e6814a27d17d/go.mod h1:mjhY9OGIZULaC79+iRfzEUvUZw7aIWklJ8um321QVpw=
github.com/NVIDIA/aistore v1.3.26-0.20241227164600-fbb55bc59304 h1:4BQqN3OSXWS04ciLhSOrS+VuEYRIAoXO8ci6J3W8+gk=
github.com/NVIDIA/aistore v1.3.26-0.20241227164600-fbb55bc59304/go.mod h1:mjhY9OGIZULaC79+iRfzEUvUZw7aIWklJ8um321QVpw=
github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8=
github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
Expand Down
29 changes: 23 additions & 6 deletions stats/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/NVIDIA/aistore/api/apc"
"github.com/NVIDIA/aistore/cmn"
"github.com/NVIDIA/aistore/cmn/cos"
"github.com/NVIDIA/aistore/cmn/debug"
"github.com/NVIDIA/aistore/core"
"github.com/NVIDIA/aistore/core/meta"
"github.com/NVIDIA/aistore/fs"
Expand Down Expand Up @@ -154,10 +155,15 @@ func IsIOErrMetric(name string) bool {
return strings.HasPrefix(name, ioErrPrefix) // e.g., "err.io.get.n" (see ioErrNames)
}

//
// name translations, to recompute latency and throughput over client-controlled intervals
// see "Naming conventions"
//

// compare with base.init() at ais/backend/common
func LatencyToCounter(latency string) string {
func LatencyToCounter(latName string) string {
// 1. basics first
switch latency {
switch latName {
case GetLatency, GetRedirLatency, GetLatencyTotal:
return GetCount
case PutLatency, PutRedirLatency, PutLatencyTotal:
Expand All @@ -170,17 +176,17 @@ func LatencyToCounter(latency string) string {
return AppendCount
}
// 2. filter out
if !strings.Contains(latency, "get.") && !strings.Contains(latency, "put.") {
if !strings.Contains(latName, "get.") && !strings.Contains(latName, "put.") {
return ""
}
// backend first
if strings.HasSuffix(latency, ".ns.total") {
if strings.HasSuffix(latName, ".ns.total") {
for prefix := range apc.Providers {
if prefix == apc.AIS {
prefix = apc.RemAIS
}
if strings.HasPrefix(latency, prefix) {
if strings.Contains(latency, ".get.") {
if strings.HasPrefix(latName, prefix) {
if strings.Contains(latName, ".get.") {
return prefix + "." + GetCount
}
return prefix + "." + PutCount
Expand All @@ -189,3 +195,14 @@ func LatencyToCounter(latency string) string {
}
return ""
}

func SizeToThroughput(name, kind string) string {
if kind != KindSize {
return ""
}
if !strings.HasSuffix(name, ".size") { // see "Naming conventions"
debug.Assert(false, name)
return ""
}
return strings.TrimSuffix(name, ".size") + ".bps"
}
9 changes: 8 additions & 1 deletion stats/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,14 @@ import (
jsoniter "github.com/json-iterator/go"
)

// All error counters must have "err_" prefix (see `errPrefix`)
// Naming conventions:
// ========================================================
// "*.n" - KindCounter
// "*.ns" - KindLatency, KindTotal (nanoseconds)
// "*.size" - KindSize (bytes)
// "*.bps" - KindThroughput, KindComputedThroughput
//
// all error counters must have "err_" prefix (see `errPrefix`)

// Linkage:
// - this source is common for both Prometheus (common_prom.go) and StatsD (common_statsd.go)
Expand Down
14 changes: 8 additions & 6 deletions stats/target_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@ import (
"github.com/NVIDIA/aistore/fs"
)

// Naming Convention:
// Naming conventions:
// ========================================================
// "*.n" - KindCounter
// "*.ns" - KindLatency, KindTotal (nanoseconds)
// "*.size" - KindSize (bytes)
// "*.bps" - KindThroughput, KindComputedThroughput
//
// -> "*.n" - counter
// -> "*.ns" - latency (nanoseconds)
// -> "*.size" - size (bytes)
// -> "*.bps" - throughput (in byte/s)
// -> "*.id" - ID
// all error counters must have "err_" prefix (see `errPrefix`)

const (
// KindCounter & KindSize - always incremented

Expand Down

0 comments on commit a8374ce

Please sign in to comment.