diff --git a/scheds/rust/Cargo.lock b/scheds/rust/Cargo.lock index 3ab368d25..955758d05 100644 --- a/scheds/rust/Cargo.lock +++ b/scheds/rust/Cargo.lock @@ -527,6 +527,15 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "gpoint" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c00f1d62d57408109a871dd9e12b76645ec4284406d5ec838d277777ef1ef6c" +dependencies = [ + "libc", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -1152,6 +1161,7 @@ dependencies = [ "crossbeam", "ctrlc", "fb_procfs", + "gpoint", "hex", "itertools 0.13.0", "libbpf-rs", diff --git a/scheds/rust/scx_lavd/Cargo.toml b/scheds/rust/scx_lavd/Cargo.toml index f665821a9..98b67d5be 100644 --- a/scheds/rust/scx_lavd/Cargo.toml +++ b/scheds/rust/scx_lavd/Cargo.toml @@ -27,6 +27,7 @@ simplelog = "0.12" static_assertions = "1.1.0" rlimit = "0.10.1" plain = "0.2.3" +gpoint = "0.2" [build-dependencies] scx_utils = { path = "../../../rust/scx_utils", version = "1.0.3" } diff --git a/scheds/rust/scx_lavd/src/bpf/intf.h b/scheds/rust/scx_lavd/src/bpf/intf.h index 48233564b..a5354aae4 100644 --- a/scheds/rust/scx_lavd/src/bpf/intf.h +++ b/scheds/rust/scx_lavd/src/bpf/intf.h @@ -81,11 +81,12 @@ enum consts { LAVD_PREEMPT_TICK_MARGIN = (100ULL * NSEC_PER_USEC), LAVD_SYS_STAT_INTERVAL_NS = (50ULL * NSEC_PER_MSEC), + LAVD_SYS_STAT_DECAY_TIMES = (2ULL * LAVD_TIME_ONE_SEC) / LAVD_SYS_STAT_INTERVAL_NS, LAVD_CC_PER_CORE_MAX_CTUIL = 500, /* maximum per-core CPU utilization */ LAVD_CC_PER_TURBO_CORE_MAX_CTUIL = 750, /* maximum per-core CPU utilization for a turbo core */ LAVD_CC_NR_ACTIVE_MIN = 1, /* num of mininum active cores */ LAVD_CC_NR_OVRFLW = 1, /* num of overflow cores */ - LAVD_CC_CPU_PIN_INTERVAL = (2ULL * LAVD_TIME_ONE_SEC), + LAVD_CC_CPU_PIN_INTERVAL = (1ULL * LAVD_TIME_ONE_SEC), LAVD_CC_CPU_PIN_INTERVAL_DIV = (LAVD_CC_CPU_PIN_INTERVAL / LAVD_SYS_STAT_INTERVAL_NS), @@ -122,6 +123,16 @@ struct sys_stat { volatile u32 nr_violation; /* number of utilization violation */ volatile u32 nr_active; /* number of active cores */ + + volatile u64 nr_sched; /* total scheduling so far */ + volatile u64 nr_migration; /* number of task migration */ + volatile u64 nr_preemption; /* number of preemption */ + volatile u64 nr_greedy; /* number of greedy tasks scheduled */ + volatile u64 nr_perf_cri; /* number of performance-critical tasks scheduled */ + volatile u64 nr_lat_cri; /* number of latency-critical tasks scheduled */ + volatile u64 nr_big; /* scheduled on big core */ + volatile u64 nr_pc_on_big; /* performance-critical tasks scheduled on big core */ + volatile u64 nr_lc_on_big; /* latency-critical tasks scheduled on big core */ }; /* @@ -169,7 +180,7 @@ struct cpu_ctx { */ volatile u32 max_lat_cri; /* maximum latency criticality */ volatile u32 sum_lat_cri; /* sum of latency criticality */ - volatile u32 sched_nr; /* number of schedules */ + volatile u32 nr_sched; /* number of schedules */ /* * Information used to keep track of performance criticality @@ -205,6 +216,15 @@ struct cpu_ctx { struct bpf_cpumask __kptr *tmp_o_mask; /* temporary cpu mask */ struct bpf_cpumask __kptr *tmp_t_mask; /* temporary cpu mask */ struct bpf_cpumask __kptr *tmp_t2_mask; /* temporary cpu mask */ + + /* + * Information for statistics. + */ + volatile u32 nr_migration; /* number of migrations */ + volatile u32 nr_preemption; /* number of migrations */ + volatile u32 nr_greedy; /* number of greedy tasks scheduled */ + volatile u32 nr_perf_cri; + volatile u32 nr_lat_cri; } __attribute__((aligned(CACHELINE_SIZE))); /* @@ -242,12 +262,18 @@ struct task_ctx { volatile s32 victim_cpu; u16 slice_boost_prio; /* how many times a task fully consumed the slice */ u8 wakeup_ft; /* regular wakeup = 1, sync wakeup = 2 */ + /* * Task's performance criticality */ u8 on_big; /* executable on a big core */ u8 on_little; /* executable on a little core */ u32 perf_cri; /* performance criticality of a task */ + + /* + * Information for statistics collection + */ + u32 cpu_id; /* CPU ID scheduled on */ }; /* diff --git a/scheds/rust/scx_lavd/src/bpf/main.bpf.c b/scheds/rust/scx_lavd/src/bpf/main.bpf.c index fdf78b901..6933ebdbf 100644 --- a/scheds/rust/scx_lavd/src/bpf/main.bpf.c +++ b/scheds/rust/scx_lavd/src/bpf/main.bpf.c @@ -197,8 +197,8 @@ char _license[] SEC("license") = "GPL"; volatile u64 nr_cpus_onln; static volatile u64 nr_cpus_big; -static struct sys_stat __sys_stats[2]; -static volatile int __sys_stat_idx; +struct sys_stat __sys_stats[2]; +volatile int __sys_stat_idx; private(LAVD) struct bpf_cpumask __kptr *turbo_cpumask; /* CPU mask for turbo CPUs */ private(LAVD) struct bpf_cpumask __kptr *big_cpumask; /* CPU mask for big CPUs */ @@ -240,6 +240,18 @@ const volatile bool is_autopilot_on; const volatile u32 is_smt_active; const volatile u8 verbose; +/* + * Statistics + */ +volatile int power_mode; +volatile u64 last_power_mode_clk; +volatile u64 performance_mode_ns; +volatile u64 balanced_mode_ns; +volatile u64 powersave_mode_ns; + +/* + * Exit infomation + */ UEI_DEFINE(uei); #define debugln(fmt, ...) \ @@ -320,6 +332,7 @@ struct { static u16 get_nice_prio(struct task_struct *p); static int reinit_active_cpumask_for_performance(void); +static void update_power_mode_time(void); static u64 sigmoid_u64(u64 v, u64 max) { @@ -582,7 +595,15 @@ struct sys_stat_ctx { s32 max_lat_cri; s32 avg_lat_cri; u64 sum_lat_cri; - u32 sched_nr; + u32 nr_sched; + u32 nr_migration; + u32 nr_preemption; + u32 nr_greedy; + u32 nr_perf_cri; + u32 nr_lat_cri; + u32 nr_big; + u32 nr_pc_on_big; + u32 nr_lc_on_big; u64 sum_perf_cri; u32 avg_perf_cri; u64 new_util; @@ -618,6 +639,30 @@ static void collect_sys_stat(struct sys_stat_ctx *c) c->load_actual += cpuc->load_actual; c->load_run_time_ns += cpuc->load_run_time_ns; c->tot_svc_time += cpuc->tot_svc_time; + cpuc->tot_svc_time = 0; + + /* + * Accumulate statistics. + */ + if (cpuc->big_core) { + c->nr_big += cpuc->nr_sched; + c->nr_pc_on_big += cpuc->nr_perf_cri; + c->nr_lc_on_big += cpuc->nr_lat_cri; + } + c->nr_perf_cri += cpuc->nr_perf_cri; + cpuc->nr_perf_cri = 0; + + c->nr_lat_cri += cpuc->nr_lat_cri; + cpuc->nr_lat_cri = 0; + + c->nr_migration += cpuc->nr_migration; + cpuc->nr_migration = 0; + + c->nr_preemption += cpuc->nr_preemption; + cpuc->nr_preemption = 0; + + c->nr_greedy += cpuc->nr_greedy; + cpuc->nr_greedy = 0; /* * Accumulate task's latency criticlity information. @@ -629,8 +674,8 @@ static void collect_sys_stat(struct sys_stat_ctx *c) c->sum_lat_cri += cpuc->sum_lat_cri; cpuc->sum_lat_cri = 0; - c->sched_nr += cpuc->sched_nr; - cpuc->sched_nr = 0; + c->nr_sched += cpuc->nr_sched; + cpuc->nr_sched = 0; if (cpuc->max_lat_cri > c->max_lat_cri) c->max_lat_cri = cpuc->max_lat_cri; @@ -701,7 +746,7 @@ static void calc_sys_stat(struct sys_stat_ctx *c) c->compute_total = 0; c->new_util = (c->compute_total * LAVD_CPU_UTIL_MAX)/c->duration_total; - if (c->sched_nr == 0) { + if (c->nr_sched == 0) { /* * When a system is completely idle, it is indeed possible * nothing scheduled for an interval. @@ -711,13 +756,15 @@ static void calc_sys_stat(struct sys_stat_ctx *c) c->avg_perf_cri = c->stat_cur->avg_perf_cri; } else { - c->avg_lat_cri = c->sum_lat_cri / c->sched_nr; - c->avg_perf_cri = c->sum_perf_cri / c->sched_nr; + c->avg_lat_cri = c->sum_lat_cri / c->nr_sched; + c->avg_perf_cri = c->sum_perf_cri / c->nr_sched; } } static void update_sys_stat_next(struct sys_stat_ctx *c) { + static int cnt = 0; + /* * Update the CPU utilization to the next version. */ @@ -741,11 +788,45 @@ static void update_sys_stat_next(struct sys_stat_ctx *c) stat_next->nr_violation = calc_avg32(stat_cur->nr_violation, c->nr_violation); - stat_next->avg_svc_time = (c->sched_nr == 0) ? 0 : - c->tot_svc_time / c->sched_nr; + stat_next->avg_svc_time = (c->nr_sched == 0) ? 0 : + c->tot_svc_time / c->nr_sched; stat_next->nr_queued_task = calc_avg(stat_cur->nr_queued_task, c->nr_queued_task); + + + /* + * Half the statistics every minitue so the statistics hold the + * information on a few minutes. + */ + if (cnt++ == LAVD_SYS_STAT_DECAY_TIMES) { + cnt = 0; + stat_next->nr_sched >>= 1; + stat_next->nr_migration >>= 1; + stat_next->nr_preemption >>= 1; + stat_next->nr_greedy >>= 1; + stat_next->nr_perf_cri >>= 1; + stat_next->nr_lat_cri >>= 1; + stat_next->nr_big >>= 1; + stat_next->nr_pc_on_big >>= 1; + stat_next->nr_lc_on_big >>= 1; + + __sync_fetch_and_sub(&performance_mode_ns, performance_mode_ns/2); + __sync_fetch_and_sub(&balanced_mode_ns, balanced_mode_ns/2); + __sync_fetch_and_sub(&powersave_mode_ns, powersave_mode_ns/2); + } + + stat_next->nr_sched += c->nr_sched; + stat_next->nr_migration += c->nr_migration; + stat_next->nr_preemption += c->nr_preemption; + stat_next->nr_greedy += c->nr_greedy; + stat_next->nr_perf_cri += c->nr_perf_cri; + stat_next->nr_lat_cri += c->nr_lat_cri; + stat_next->nr_big += c->nr_big; + stat_next->nr_pc_on_big += c->nr_pc_on_big; + stat_next->nr_lc_on_big += c->nr_lc_on_big; + + update_power_mode_time(); } static void do_update_sys_stat(void) @@ -905,21 +986,49 @@ static void do_core_compaction(void) bpf_rcu_read_unlock(); } -int do_set_power_profile(s32 power_mode, int util) +static void update_power_mode_time(void) { - static s32 cur_mode = LAVD_PM_MAX; + u64 now = bpf_ktime_get_ns(); + u64 delta; + if (last_power_mode_clk == 0) + last_power_mode_clk = now; + + delta = now - last_power_mode_clk; + last_power_mode_clk = now; + + switch (power_mode) { + case LAVD_PM_PERFORMANCE: + __sync_fetch_and_add(&performance_mode_ns, delta); + break; + case LAVD_PM_BALANCED: + __sync_fetch_and_add(&balanced_mode_ns, delta); + break; + case LAVD_PM_POWERSAVE: + __sync_fetch_and_add(&powersave_mode_ns, delta); + break; + } +} + + +static int do_set_power_profile(s32 pm, int util) +{ /* * Skip setting the mode if alreay in the same mode. */ - if (cur_mode == power_mode) + if (power_mode == pm) return 0; - cur_mode = power_mode; + + /* + * Update power mode time + */ + update_power_mode_time(); + power_mode = pm; /* * Change the power mode. */ - switch (power_mode) { + switch (pm) { case LAVD_PM_PERFORMANCE: no_core_compaction = true; no_freq_scaling = true; @@ -1184,13 +1293,6 @@ static u64 calc_time_slice(struct task_struct *p, struct task_ctx *taskc, taskc->slice_boost_prio) / LAVD_SLICE_BOOST_MAX_STEP; } - /* - * Boost time slice based on CPU's capacity to assign a longer time - * slice for a more performant CPU for making each CPU's job processing - * throughput similar. - */ - slice = slice * cpuc->capacity / 1024; - /* * If a task has yet to be scheduled (i.e., a freshly forked task or a * task just under sched_ext), don't give a fair amount of time slice @@ -1274,6 +1376,7 @@ static void update_stat_for_running(struct task_struct *p, struct task_ctx *taskc, struct cpu_ctx *cpuc) { + struct sys_stat *stat_cur = get_sys_stat_cur(); u64 wait_period, interval; u64 now = bpf_ktime_get_ns(); u64 wait_freq_ft, wake_freq_ft, perf_cri; @@ -1306,7 +1409,7 @@ static void update_stat_for_running(struct task_struct *p, if (cpuc->max_lat_cri < taskc->lat_cri) cpuc->max_lat_cri = taskc->lat_cri; cpuc->sum_lat_cri += taskc->lat_cri; - cpuc->sched_nr++; + cpuc->nr_sched++; /* * It is clear there is no need to consider the suspended duration @@ -1345,6 +1448,30 @@ static void update_stat_for_running(struct task_struct *p, * Update task state when starts running. */ taskc->last_running_clk = now; + + /* + * Update statistics information. + */ + if (taskc->cpu_id != cpuc->cpu_id) { + taskc->cpu_id = cpuc->cpu_id; + cpuc->nr_migration++; + } + + if (taskc->victim_cpu >= 0) + cpuc->nr_preemption++; + + if (is_lat_cri(taskc, stat_cur)) { + cpuc->nr_lat_cri++; +// debugln("------------------------ lc = %llu", cpuc->nr__cri); + } + + if (is_perf_cri(taskc, stat_cur)) { + cpuc->nr_perf_cri++; +// debugln("------------------------ pc = %llu", cpuc->nr_perf_cri); + } + + if (is_greedy(taskc)) + cpuc->nr_greedy++; } static u64 calc_svc_time(struct task_struct *p, struct task_ctx *taskc) @@ -1626,13 +1753,14 @@ static s32 pick_idle_cpu(struct task_struct *p, struct task_ctx *taskc, /* * If the task cannot run on either active or overflow cores, * stay on the previous core (if it is okay) or one of its taskset. + * Then, put the CPU to the overflow set. */ +start_any_mask: if (bpf_cpumask_test_cpu(prev_cpu, p->cpus_ptr)) cpu_id = prev_cpu; - else { -start_any_mask: + else cpu_id = bpf_cpumask_any_distribute(p->cpus_ptr); - } + bpf_cpumask_set_cpu(cpu_id, ovrflw); /* * Note that we don't need to kick the picked CPU here since the diff --git a/scheds/rust/scx_lavd/src/main.rs b/scheds/rust/scx_lavd/src/main.rs index c2ddb12eb..261847c9b 100644 --- a/scheds/rust/scx_lavd/src/main.rs +++ b/scheds/rust/scx_lavd/src/main.rs @@ -12,6 +12,7 @@ pub mod bpf_intf; pub use bpf_intf::*; mod stats; +use stats::SysStats; use stats::SchedSample; use stats::SchedSamples; use stats::StatsReq; @@ -122,6 +123,14 @@ struct Opts { #[clap(long = "no-freq-scaling", action = clap::ArgAction::SetTrue)] no_freq_scaling: bool, + /// Enable stats monitoring with the specified interval. + #[clap(long)] + stats: Option, + + /// Run in stats monitoring mode with the specified interval. Scheduler is not launched. + #[clap(long)] + monitor: Option, + /// Run in monitoring mode. Show the specified number of scheduling /// samples every second. #[clap(long)] @@ -135,6 +144,10 @@ struct Opts { /// Print scheduler version and exit. #[clap(short = 'V', long, action = clap::ArgAction::SetTrue)] version: bool, + + /// Show descriptions for statistics. + #[clap(long)] + help_stats: bool, } impl Opts { @@ -448,8 +461,9 @@ struct Scheduler<'a> { rb_mgr: libbpf_rs::RingBuffer<'static>, intrspc: introspec, intrspc_rx: Receiver, - sampler_tid: Option, + monitor_tid: Option, stats_server: StatsServer, + mseq_id: u64, } impl<'a> Scheduler<'a> { @@ -494,8 +508,9 @@ impl<'a> Scheduler<'a> { rb_mgr, intrspc: introspec::new(), intrspc_rx, - sampler_tid: None, + monitor_tid: None, stats_server, + mseq_id: 0, }) } @@ -626,19 +641,97 @@ impl<'a> Scheduler<'a> { self.skel.maps.bss_data.intrspc.cmd = LAVD_CMD_NOP; } + fn get_pc(x: u64, y: u64) -> f64 { + return 100. * x as f64 / y as f64; + } + + fn get_power_mode(power_mode: s32) -> &'static str { + const LAVD_PM_PERFORMANCE: s32 = 0; + const LAVD_PM_BALANCED: s32 = 1; + const LAVD_PM_POWERSAVE: s32 = 2; + + match power_mode { + LAVD_PM_PERFORMANCE => { + return &"performance"; + } + LAVD_PM_BALANCED => { + return &"balanced"; + } + LAVD_PM_POWERSAVE => { + return &"powersave"; + } + _ => { + return &"unknown"; + } + } + } + fn stats_req_to_res(&mut self, req: &StatsReq) -> Result { Ok(match req { StatsReq::NewSampler(tid) => { self.rb_mgr.consume().unwrap(); - self.sampler_tid = Some(*tid); + self.monitor_tid = Some(*tid); StatsRes::Ack } + StatsReq::SysStatsReq { + tid, + } => { + if Some(*tid) != self.monitor_tid { + return Ok(StatsRes::Bye); + } + self.mseq_id += 1; + + let bss_data = &self.skel.maps.bss_data; + let st = bss_data.__sys_stats[0]; + + let mseq = self.mseq_id; + let avg_svc_time = st.avg_svc_time; + let nr_queued_task = st.nr_queued_task; + let nr_active = st.nr_active; + let nr_sched = st.nr_sched; + let pc_migration = Self::get_pc(st.nr_migration, nr_sched); + let pc_preemption = Self::get_pc(st.nr_preemption, nr_sched); + let pc_greedy = Self::get_pc(st.nr_greedy, nr_sched); + let pc_pc = Self::get_pc(st.nr_perf_cri, nr_sched); + let pc_lc = Self::get_pc(st.nr_lat_cri, nr_sched); + let nr_big = st.nr_big; + let pc_big = Self::get_pc(nr_big, nr_sched); + let pc_pc_on_big = Self::get_pc(st.nr_pc_on_big, nr_big); + let pc_lc_on_big = Self::get_pc(st.nr_lc_on_big, nr_big); + let power_mode = Self::get_power_mode(bss_data.power_mode); + let total_time = bss_data.performance_mode_ns + + bss_data.balanced_mode_ns + + bss_data.powersave_mode_ns; + let pc_performance = Self::get_pc(bss_data.performance_mode_ns, total_time); + let pc_balanced = Self::get_pc(bss_data.balanced_mode_ns, total_time); + let pc_powersave = Self::get_pc(bss_data.powersave_mode_ns, total_time); + + StatsRes::SysStats(SysStats { + mseq, + avg_svc_time, + nr_queued_task, + nr_active, + nr_sched, + pc_migration, + pc_preemption, + pc_greedy, + pc_pc, + pc_lc, + pc_big, + pc_pc_on_big, + pc_lc_on_big, + power_mode: power_mode.to_string(), + pc_performance, + pc_balanced, + pc_powersave, + }) + } StatsReq::SchedSamplesNr { tid, nr_samples, interval_ms, } => { - if Some(*tid) != self.sampler_tid { + if Some(*tid) != self.monitor_tid { return Ok(StatsRes::Bye); } @@ -791,6 +884,11 @@ fn main() -> Result<()> { return Ok(()); } + if opts.help_stats { + stats::server_data(0).describe_meta(&mut std::io::stdout(), None)?; + return Ok(()); + } + init_log(&opts); debug!("{:#?}", opts); @@ -808,6 +906,17 @@ fn main() -> Result<()> { return Ok(()); } + if let Some(intv) = opts.monitor.or(opts.stats) { + let shutdown_copy = shutdown.clone(); + let jh = std::thread::spawn(move || { + stats::monitor(Duration::from_secs_f64(intv), shutdown_copy).unwrap() + }); + if opts.monitor.is_some() { + let _ = jh.join(); + return Ok(()); + } + } + let mut open_object = MaybeUninit::uninit(); loop { let mut sched = Scheduler::init(&opts, &mut open_object)?; diff --git a/scheds/rust/scx_lavd/src/stats.rs b/scheds/rust/scx_lavd/src/stats.rs index eabf5f8c6..01cb5e111 100644 --- a/scheds/rust/scx_lavd/src/stats.rs +++ b/scheds/rust/scx_lavd/src/stats.rs @@ -11,30 +11,165 @@ use std::sync::atomic::Ordering; use std::sync::Arc; use std::thread::ThreadId; use std::time::Duration; +use gpoint::GPoint; + +#[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)] +#[stat(top)] +pub struct SysStats { + #[stat(desc = "Sequence ID of this messge")] + pub mseq: u64, + + #[stat(desc = "Average runtime per schedule")] + pub avg_svc_time: u64, + + #[stat(desc = "Number of runnable tasks in runqueues")] + pub nr_queued_task: u64, + + #[stat(desc = "Number of active CPUs when core compaction is enabled")] + pub nr_active: u32, + + #[stat(desc = "Number of context switches")] + pub nr_sched: u64, + + #[stat(desc = "% of task migration")] + pub pc_migration: f64, + + #[stat(desc = "% of task preemption")] + pub pc_preemption: f64, + + #[stat(desc = "% of greedy tasks")] + pub pc_greedy: f64, + + #[stat(desc = "% of performance-critical tasks")] + pub pc_pc: f64, + + #[stat(desc = "% of latency-critical tasks")] + pub pc_lc: f64, + + #[stat(desc = "% of tasks scheduled on big cores")] + pub pc_big: f64, + + #[stat(desc = "% of performance-critical tasks scheduled on big cores")] + pub pc_pc_on_big: f64, + + #[stat(desc = "% of latency-critical tasks scheduled on big cores")] + pub pc_lc_on_big: f64, + + #[stat(desc = "Current power mode")] + pub power_mode: String, + + #[stat(desc = "% of performance mode")] + pub pc_performance: f64, + + #[stat(desc = "% of balanced mode")] + pub pc_balanced: f64, + + #[stat(desc = "% of powersave powersave")] + pub pc_powersave: f64, +} + +impl SysStats { + pub fn format_header(w: &mut W) -> Result<()> { + writeln!( + w, + "\x1b[93m| {:8} | {:9} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |\x1b[0m", + "MSEQ", + "SVC_TIME", + "# Q TASK", + "# ACT CPU", + "# SCHED", + "MIGRATE%", + "PREEMPT%", + "GREEDY%", + "PERF-CR%", + "LAT-CR%", + "BIG%", + "PC/BIG%", + "LC/BIG%", + "POWER MODE", + "PERFORMANCE%", + "BALANCED%", + "POWERSAVE%", + )?; + Ok(()) + } + + fn format(&self, w: &mut W) -> Result<()> { + if self.mseq % 10 == 1 { + Self::format_header(w)?; + } + + writeln!( + w, + "| {:8} | {:9} | {:9} | {:9} | {:9} | {:9} | {:9} | {:8} | {:8} | {:8} | {:8} | {:8} | {:8} | {:11} | {:12} | {:12} | {:12} |", + self.mseq, + self.avg_svc_time, + self.nr_queued_task, + self.nr_active, + self.nr_sched, + GPoint(self.pc_migration), + GPoint(self.pc_preemption), + GPoint(self.pc_greedy), + GPoint(self.pc_pc), + GPoint(self.pc_lc), + GPoint(self.pc_big), + GPoint(self.pc_pc_on_big), + GPoint(self.pc_lc_on_big), + self.power_mode, + GPoint(self.pc_performance), + GPoint(self.pc_balanced), + GPoint(self.pc_powersave), + )?; + Ok(()) + } + +} #[derive(Clone, Debug, Default, Serialize, Deserialize, Stats)] pub struct SchedSample { + #[stat(desc = "Sequence ID of this message")] pub mseq: u64, + #[stat(desc = "Process ID")] pub pid: i32, + #[stat(desc = "Task name")] pub comm: String, + #[stat(desc = "LR: 'L'atency-critical or 'R'egular, HI: performance-'H'ungry or performance-'I'nsensitive, BT: 'B'ig or li'T'tle, EG: 'E'ligigle or 'G'reedy, PN: 'P'reempting or 'N'ot")] pub stat: String, + #[stat(desc = "CPU id where this task is scheduled on")] pub cpu_id: u32, + #[stat(desc = "Victim CPU to be preempted out (-1 = no preemption)")] pub victim_cpu: i32, + #[stat(desc = "Assigned virtual deadline")] pub vdeadline_delta_ns: u64, + #[stat(desc = "Assigned time slice")] pub slice_ns: u64, + #[stat(desc = "How greedy this task is in using CPU time (1000 = fair)")] pub greedy_ratio: u32, + #[stat(desc = "Latency criticality of this task")] pub lat_cri: u32, + #[stat(desc = "Average latency criticality in a system")] pub avg_lat_cri: u32, + #[stat(desc = "Static priority (20 == nice 0)")] pub static_prio: u16, + #[stat(desc = "Slice boost factor (number of consecutive full slice exhaustions)")] pub slice_boost_prio: u16, + #[stat(desc = "How often this task is scheduled per second")] pub run_freq: u64, + #[stat(desc = "Average runtime per schedule")] pub run_time_ns: u64, + #[stat(desc = "How frequently this task waits for other tasks")] pub wait_freq: u64, + #[stat(desc = "How frequently this task wakes other tasks")] pub wake_freq: u64, + #[stat(desc = "Performance criticality of this task")] pub perf_cri: u32, + #[stat(desc = "Average performance criticality in a system")] pub avg_perf_cri: u32, + #[stat(desc = "Target performance level of this CPU")] pub cpuperf_cur: u32, + #[stat(desc = "CPU utilization of this particular CPU")] pub cpu_util: u64, + #[stat(desc = "Number of active CPUs when core compaction is enabled")] pub nr_active: u32, } @@ -42,42 +177,42 @@ impl SchedSample { pub fn format_header(w: &mut W) -> Result<()> { writeln!( w, - "| {:6} | {:7} | {:17} \ + "\x1b[93m| {:6} | {:7} | {:17} \ | {:5} | {:4} | {:4} \ | {:14} | {:8} | {:7} \ | {:8} | {:7} | {:8} \ | {:7} | {:9} | {:9} \ | {:9} | {:9} | {:8} \ | {:8} | {:8} | {:8} \ - | {:6} |", - "mseq", - "pid", - "comm", - "stat", - "cpu", - "vtmc", - "vddln_ns", - "slc_ns", - "grdy_rt", - "lat_cri", - "avg_lc", - "st_prio", - "slc_bst", - "run_freq", - "run_tm_ns", - "wait_freq", - "wake_freq", - "perf_cri", - "avg_pc", - "cpufreq", - "cpu_util", - "nr_act", + | {:6} |\x1b[0m", + "MSEQ", + "PID", + "COMM", + "STAT", + "CPU", + "VTMC", + "VDDLN_NS", + "SLC_NS", + "GRDY_RT", + "LAT_CRI", + "AVG_LC", + "ST_PRIO", + "SLC_BST", + "RUN_FREQ", + "RUN_TM_NS", + "WAIT_FREQ", + "WAKE_FREQ", + "PERF_CRI", + "AVG_PC", + "CPUFREQ", + "CPU_UTIL", + "NR_ACT", )?; Ok(()) } pub fn format(&self, w: &mut W) -> Result<()> { - if self.mseq % 32 == 1 { + if self.mseq % 10 == 1 { Self::format_header(w)?; } @@ -126,6 +261,9 @@ pub struct SchedSamples { #[derive(Debug)] pub enum StatsReq { NewSampler(ThreadId), + SysStatsReq { + tid: ThreadId, + }, SchedSamplesNr { tid: ThreadId, nr_samples: u64, @@ -134,7 +272,15 @@ pub enum StatsReq { } impl StatsReq { - fn from_args( + fn from_args_stats( + tid: ThreadId, + ) -> Result { + Ok(Self::SysStatsReq { + tid, + }) + } + + fn from_args_samples( tid: ThreadId, nr_cpus_onln: u64, args: &BTreeMap, @@ -164,12 +310,36 @@ impl StatsReq { pub enum StatsRes { Ack, Bye, + SysStats(SysStats), SchedSamples(SchedSamples), } pub fn server_data(nr_cpus_onln: u64) -> StatsServerData { - let samples_open: Box> = - Box::new(move |(req_ch, res_ch)| { + let open: Box> = Box::new(move |(req_ch, res_ch)| { + let tid = std::thread::current().id(); + req_ch.send(StatsReq::NewSampler(tid))?; + match res_ch.recv()? { + StatsRes::Ack => {} + res => bail!("invalid response: {:?}", &res), + } + + let read: Box> = + Box::new(move |_args, (req_ch, res_ch)| { + let req = StatsReq::from_args_stats(tid)?; + req_ch.send(req)?; + + let stats = match res_ch.recv()? { + StatsRes::SysStats(v) => v, + StatsRes::Bye => bail!("preempted by another sampler"), + res => bail!("invalid response: {:?}", &res), + }; + + stats.to_json() + }); + Ok(read) + }); + + let samples_open: Box> = Box::new(move |(req_ch, res_ch)| { let tid = std::thread::current().id(); req_ch.send(StatsReq::NewSampler(tid))?; match res_ch.recv()? { @@ -179,7 +349,7 @@ pub fn server_data(nr_cpus_onln: u64) -> StatsServerData { let read: Box> = Box::new(move |args, (req_ch, res_ch)| { - let req = StatsReq::from_args(tid, nr_cpus_onln, args)?; + let req = StatsReq::from_args_samples(tid, nr_cpus_onln, args)?; req_ch.send(req)?; let samples = match res_ch.recv()? { @@ -194,6 +364,14 @@ pub fn server_data(nr_cpus_onln: u64) -> StatsServerData { }); StatsServerData::new() + .add_meta(SysStats::meta()) + .add_ops( + "top", + StatsOps { + open: open, + close: None, + }, + ) .add_meta(SchedSample::meta()) .add_ops( "sched_samples", @@ -205,13 +383,6 @@ pub fn server_data(nr_cpus_onln: u64) -> StatsServerData { } pub fn monitor_sched_samples(nr_samples: u64, shutdown: Arc) -> Result<()> { - println!("## stats"); - println!(" LR: 'L'atency-critical or 'R'egular"); - println!(" HI: performance-'H'ungry or performance-'I'nsensitive"); - println!(" BT: 'B'ig or li'T'tle"); - println!(" EG: 'E'ligigle or 'G'reedy"); - println!(" PN: 'P'reempting or 'N'ot"); - scx_utils::monitor_stats::( &vec![ ("target".into(), "sched_samples".into()), @@ -228,3 +399,13 @@ pub fn monitor_sched_samples(nr_samples: u64, shutdown: Arc) -> Resu }, ) } + +pub fn monitor(intv: Duration, shutdown: Arc) -> Result<()> { + scx_utils::monitor_stats::( + &vec![], + intv, + || shutdown.load(Ordering::Relaxed), + |sysstats| sysstats.format(&mut std::io::stdout()), + ); + Ok(()) +}