From 952b0b95ad41bd569c660e7dfb9d31ccad01d57e Mon Sep 17 00:00:00 2001 From: Christian von Elm Date: Wed, 15 Jan 2025 12:58:11 +0100 Subject: [PATCH] Add print_attr() Add a function for printing the perf_event_attr of Event's. This is incredibly useful for debugging. Printing out the perf_event_attr is triggered on event open and is only printed when the log level trace is specified Move pre-defined event table from global to function scope, so that the events in it do not get initialized before we set up logging. --- include/lo2s/perf/event.hpp | 2 + src/perf/event.cpp | 243 +++++++++++++++++++++++++++++++++++- src/perf/event_provider.cpp | 87 +++++++------ 3 files changed, 287 insertions(+), 45 deletions(-) diff --git a/include/lo2s/perf/event.hpp b/include/lo2s/perf/event.hpp index 2e8a385e..4068536f 100644 --- a/include/lo2s/perf/event.hpp +++ b/include/lo2s/perf/event.hpp @@ -149,6 +149,8 @@ class Event attr_.wakeup_watermark = static_cast(0.8 * mmap_pages * sysconf(_SC_PAGESIZE)); } + void print_attr(); + void exclude_kernel(bool exclude_kernel) { attr_.exclude_kernel = exclude_kernel; diff --git a/src/perf/event.cpp b/src/perf/event.cpp index 5fb3305a..612a0d16 100644 --- a/src/perf/event.cpp +++ b/src/perf/event.cpp @@ -348,6 +348,242 @@ bool Event::degrade_precision() } } +static void print_bits(std::string name, std::map known_bits, uint64_t value) +{ + std::vector active_bits; + for (auto elem : known_bits) + { + if (value & elem.first) + { + active_bits.push_back(elem.second); + value &= ~elem.first; + } + } + + if (value != 0) + { + Log::warn() << "Unknown perf_event_attr." << name << " bits: " << std::hex << value + << std::dec; + } + + std::string active_bits_str; + if (!active_bits.empty()) + { + auto it = active_bits.begin(); + for (; it != --active_bits.end(); it++) + { + active_bits_str += *it; + active_bits_str += " | "; + } + active_bits_str += *it; + } + Log::trace() << "\t" << name << ": " << active_bits_str; +} + +void Event::print_attr() +{ + Log::trace() << "{"; + switch (attr_.type) + { + case PERF_TYPE_HARDWARE: + Log::trace() << "\ttype: PERF_TYPE_HARDWARE"; + break; + case PERF_TYPE_SOFTWARE: + Log::trace() << "\ttype: PERF_TYPE_SOFTWARE"; + break; + case PERF_TYPE_TRACEPOINT: + Log::trace() << "\ttype: PERF_TYPE_TRACEPOINT"; + break; + case PERF_TYPE_HW_CACHE: + Log::trace() << "\ttype: PERF_TYPE_HW_CACHE"; + break; + case PERF_TYPE_RAW: + Log::trace() << "\ttype: PERF_TYPE_RAW"; + break; + case PERF_TYPE_BREAKPOINT: + Log::trace() << "\ttype: PERF_TYPE_BREAKPOINT"; + break; + default: + Log::warn() << "Unknown perf_event_attr.type: " << std::hex << attr_.type << std::dec; + } + Log::trace() << "\tsize: " << attr_.size; + Log::trace() << std::hex << "\tconfig: 0x" << attr_.config << std::dec; + Log::trace() << std::hex << "\tconfig1: 0x" << attr_.config1 << std::dec; + Log::trace() << std::hex << "\tconfig2: 0x" << attr_.config2 << std::dec; + Log::trace() << "\tprecise_ip: " << attr_.precise_ip << "/3"; + + std::map read_format = { + { PERF_FORMAT_TOTAL_TIME_ENABLED, "PERF_FORMAT_TOTAL_TIME_ENABLED" }, + { PERF_FORMAT_TOTAL_TIME_RUNNING, "PERF_FORMAT_TOTAL_TIME_RUNNING" }, + { PERF_FORMAT_ID, "PERF_FORMAT_ID" }, + { PERF_FORMAT_GROUP, "PERF_FORMAT_GROUP" }, + { PERF_FORMAT_LOST, "PERF_FORMAT_LOST" } + }; + + print_bits("read_format", read_format, attr_.read_format); + + if (attr_.type == PERF_TYPE_BREAKPOINT) + { + std::map bp_types = { { HW_BREAKPOINT_EMPTY, "HW_BREAKPOINT_EMPTY" }, + { HW_BREAKPOINT_W, "HW_BREAKPOINT_W" }, + { HW_BREAKPOINT_R, "HW_BREAKPOINT_R" }, + { HW_BREAKPOINT_RW, "HW_BREAKPOINT_RW" }, + { HW_BREAKPOINT_X, "HW_BREAKPOINT_X" } }; + + print_bits("bp_type", bp_types, attr_.bp_type); + Log::trace() << "\tbp_addr: 0x" << std::hex << attr_.bp_addr << std::dec; + switch (attr_.bp_len) + { + case HW_BREAKPOINT_LEN_1: + Log::trace() << "\tbp_len: HW_BREAKPOINT_LEN_1"; + break; + case HW_BREAKPOINT_LEN_2: + Log::trace() << "\tbp_len: HW_BREAKPOINT_LEN_2"; + break; + case HW_BREAKPOINT_LEN_4: + Log::trace() << "\tbp_len: HW_BREAKPOINT_LEN_4"; + break; + case HW_BREAKPOINT_LEN_8: + Log::trace() << "\tbp_len: HW_BREAKPOINT_LEN_8"; + break; + } + } + if (attr_.freq) + { + Log::trace() << "\tsample_freq: " << attr_.sample_freq; + } + else + { + Log::trace() << "\tsample_period: " << attr_.sample_period; + } + + std::map sample_format = { { PERF_SAMPLE_CALLCHAIN, + "PERF_SAMPLE_CALLCHAIN" }, + { PERF_SAMPLE_READ, "PERF_SAMPLE_READ" }, + { PERF_SAMPLE_IP, "PERF_SAMPLE_IP" }, + { PERF_SAMPLE_TID, "PERF_SAMPLE_TID" }, + { PERF_SAMPLE_CPU, "PERF_SAMPLE_CPU" }, + { PERF_SAMPLE_TIME, "PERF_SAMPLE_TIME" } }; + + print_bits("sample_type", sample_format, attr_.sample_type); + + Log::trace() << "\tFLAGS: "; + if (attr_.disabled) + { + Log::trace() << "\t\t- disabled"; + } + if (attr_.pinned) + { + Log::trace() << "\t\t- pinned"; + } + if (attr_.exclusive) + { + Log::trace() << "\t\t- exclusive"; + } + if (attr_.exclude_kernel) + { + Log::trace() << "\t\t- exclude_kernel"; + } + if (attr_.exclude_user) + { + Log::trace() << "\t\t- exclude_user"; + } + if (attr_.exclude_hv) + { + Log::trace() << "\t\t- exclude_hv"; + } + if (attr_.exclude_idle) + { + Log::trace() << "\t\t- exclude_idle"; + } + if (attr_.mmap) + { + Log::trace() << "\t\t- mmap"; + } + if (attr_.freq) + { + Log::trace() << "\t\t- freq"; + } + if (attr_.inherit_stat) + { + Log::trace() << "\t\t- inherit_stat"; + } + if (attr_.enable_on_exec) + { + Log::trace() << "\t\t- enable_on_exec"; + } + if (attr_.task) + { + Log::trace() << "\t\t- task"; + } + if (attr_.watermark) + { + Log::trace() << "\t\t- wakeup_watermark (" << attr_.wakeup_watermark << " bytes)"; + } + else + { + + Log::trace() << "\t\t- wakeup_events (" << attr_.wakeup_events << " events)"; + } + if (attr_.mmap_data) + { + Log::trace() << "\t\t- mmap_data"; + } + if (attr_.sample_id_all) + { + Log::trace() << "\t\t- sample_id_all"; + } + if (attr_.mmap2) + { + Log::trace() << "\t\t- mmap2"; + } + if (attr_.context_switch) + { + Log::trace() << "\t\t- context_switch"; + } + if (attr_.comm_exec) + { + Log::trace() << "\t\t- comm_exec"; + } + if (attr_.comm) + { + Log::trace() << "\t\t- comm"; + } + + if (attr_.comm_exec) + { + Log::trace() << "\t\t- comm_exec"; + } + if (attr_.use_clockid) + { + std::string clock; + switch (attr_.clockid) + { + case CLOCK_MONOTONIC: + clock = "monotonic"; + break; + case CLOCK_MONOTONIC_RAW: + clock = "monotonic-raw"; + break; + case CLOCK_REALTIME: + clock = "realtime"; + break; + case CLOCK_BOOTTIME: + clock = "boottime"; + break; + case CLOCK_TAI: + clock = "tai"; + break; + default: + Log::warn() << "Unknown clockid: " << attr_.clockid; + clock = "???"; + break; + } + Log::trace() << "\t\t- clockid (" << clock << ")"; + } + + Log::trace() << "}"; +} SysfsEvent::SysfsEvent(const std::string& ev_name, bool enable_on_exec) : Event(ev_name, static_cast(0), 0) { @@ -561,18 +797,23 @@ EventGuard::EventGuard(Event& ev, std::variant location, int group_ ExecutionScope scope; std::visit([&scope](auto loc) { scope = loc.as_scope(); }, location); + Log::trace() << "Opening perf event: " << ev.name() << "[" << scope.name() + << ",group: " << group_fd << ", cgroup: " << cgroup_fd << "]"; + ev.print_attr(); fd_ = perf_event_open(&ev.mut_attr(), scope, group_fd, 0, cgroup_fd); if (fd_ < 0) { + Log::trace() << "Couldn't open event!: " << strerror(errno); throw_errno(); } if (fcntl(fd_, F_SETFL, O_NONBLOCK)) { - Log::error() << errno; + Log::trace() << "Couldn't set event nonblocking: " << strerror(errno); throw_errno(); } + Log::trace() << "Succesfully opened perf event!"; } void EventGuard::enable() diff --git a/src/perf/event_provider.cpp b/src/perf/event_provider.cpp index 43eb8b11..5f2befaa 100644 --- a/src/perf/event_provider.cpp +++ b/src/perf/event_provider.cpp @@ -58,50 +58,6 @@ namespace #define PERF_EVENT_HW(name, id) PERF_EVENT(name, PERF_TYPE_HARDWARE, PERF_COUNT_HW_##id) #define PERF_EVENT_SW(name, id) PERF_EVENT(name, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_##id) -static lo2s::perf::Event HW_EVENT_TABLE[] = { - PERF_EVENT_HW("cpu-cycles", CPU_CYCLES), - PERF_EVENT_HW("instructions", INSTRUCTIONS), - PERF_EVENT_HW("cache-references", CACHE_REFERENCES), - PERF_EVENT_HW("cache-misses", CACHE_MISSES), - PERF_EVENT_HW("branch-instructions", BRANCH_INSTRUCTIONS), - PERF_EVENT_HW("branch-misses", BRANCH_MISSES), - PERF_EVENT_HW("bus-cycles", BUS_CYCLES), -#ifdef HAVE_PERF_EVENT_STALLED_CYCLES_FRONTEND - PERF_EVENT_HW("stalled-cycles-frontend", STALLED_CYCLES_FRONTEND), -#endif -#ifdef HAVE_PERF_EVENT_STALLED_CYCLES_BACKEND - PERF_EVENT_HW("stalled-cycles-backend", STALLED_CYCLES_BACKEND), -#endif -#ifdef HAVE_PERF_EVENT_REF_CYCLES - PERF_EVENT_HW("ref-cycles", REF_CPU_CYCLES), -#endif -}; - -static lo2s::perf::Event SW_EVENT_TABLE[] = { - PERF_EVENT_SW("cpu-clock", CPU_CLOCK), - PERF_EVENT_SW("task-clock", TASK_CLOCK), - PERF_EVENT_SW("page-faults", PAGE_FAULTS), - PERF_EVENT_SW("context-switches", CONTEXT_SWITCHES), - PERF_EVENT_SW("cpu-migrations", CPU_MIGRATIONS), - PERF_EVENT_SW("minor-faults", PAGE_FAULTS_MIN), - PERF_EVENT_SW("major-faults", PAGE_FAULTS_MAJ), -#ifdef HAVE_PERF_EVENT_ALIGNMENT_FAULTS - PERF_EVENT_SW("alignment-faults", ALIGNMENT_FAULTS), -#endif -#ifdef HAVE_PERF_EVENT_EMULATION_FAULTS - PERF_EVENT_SW("emulation-faults", EMULATION_FAULTS), -#endif -#ifdef HAVE_PERF_EVENT_DUMMY - PERF_EVENT_SW("dummy", DUMMY), -#endif -#ifdef HAVE_PERF_EVENT_BPF_OUTPUT - PERF_EVENT_SW("bpf-output", BPF_OUTPUT), -#endif -#ifdef HAVE_PERF_EVENT_CGROUP_SWITCHES - PERF_EVENT_SW("cgroup-switches", CGROUP_SWITCHES), -#endif -}; - #define PERF_MAKE_CACHE_ID(id) (id) #define PERF_MAKE_CACHE_OP_ID(id) ((id) << 8) #define PERF_MAKE_CACHE_OP_RES_ID(id) ((id) << 16) @@ -158,6 +114,49 @@ namespace perf static void populate_event_map(std::unordered_map& map) { Log::info() << "checking available events..."; + lo2s::perf::Event HW_EVENT_TABLE[] = { + PERF_EVENT_HW("cpu-cycles", CPU_CYCLES), + PERF_EVENT_HW("instructions", INSTRUCTIONS), + PERF_EVENT_HW("cache-references", CACHE_REFERENCES), + PERF_EVENT_HW("cache-misses", CACHE_MISSES), + PERF_EVENT_HW("branch-instructions", BRANCH_INSTRUCTIONS), + PERF_EVENT_HW("branch-misses", BRANCH_MISSES), + PERF_EVENT_HW("bus-cycles", BUS_CYCLES), +#ifdef HAVE_PERF_EVENT_STALLED_CYCLES_FRONTEND + PERF_EVENT_HW("stalled-cycles-frontend", STALLED_CYCLES_FRONTEND), +#endif +#ifdef HAVE_PERF_EVENT_STALLED_CYCLES_BACKEND + PERF_EVENT_HW("stalled-cycles-backend", STALLED_CYCLES_BACKEND), +#endif +#ifdef HAVE_PERF_EVENT_REF_CYCLES + PERF_EVENT_HW("ref-cycles", REF_CPU_CYCLES), +#endif + }; + lo2s::perf::Event SW_EVENT_TABLE[] = { + PERF_EVENT_SW("cpu-clock", CPU_CLOCK), + PERF_EVENT_SW("task-clock", TASK_CLOCK), + PERF_EVENT_SW("page-faults", PAGE_FAULTS), + PERF_EVENT_SW("context-switches", CONTEXT_SWITCHES), + PERF_EVENT_SW("cpu-migrations", CPU_MIGRATIONS), + PERF_EVENT_SW("minor-faults", PAGE_FAULTS_MIN), + PERF_EVENT_SW("major-faults", PAGE_FAULTS_MAJ), +#ifdef HAVE_PERF_EVENT_ALIGNMENT_FAULTS + PERF_EVENT_SW("alignment-faults", ALIGNMENT_FAULTS), +#endif +#ifdef HAVE_PERF_EVENT_EMULATION_FAULTS + PERF_EVENT_SW("emulation-faults", EMULATION_FAULTS), +#endif +#ifdef HAVE_PERF_EVENT_DUMMY + PERF_EVENT_SW("dummy", DUMMY), +#endif +#ifdef HAVE_PERF_EVENT_BPF_OUTPUT + PERF_EVENT_SW("bpf-output", BPF_OUTPUT), +#endif +#ifdef HAVE_PERF_EVENT_CGROUP_SWITCHES + PERF_EVENT_SW("cgroup-switches", CGROUP_SWITCHES), +#endif + }; + map.reserve(array_size(HW_EVENT_TABLE) + array_size(SW_EVENT_TABLE) + array_size(CACHE_NAME_TABLE) * array_size(CACHE_OPERATION_TABLE)); for (auto& ev : HW_EVENT_TABLE)