Document not found (404)
+This URL is invalid, sorry. Please use the navigation bar or search to continue.
+ +diff --git a/artifacts.js b/artifacts.js new file mode 100644 index 00000000..2dbfc707 --- /dev/null +++ b/artifacts.js @@ -0,0 +1,245 @@ +/* Code modified from the blender website + * https://www.blender.org/wp-content/themes/bthree/assets/js/get_os.js?x82196 + */ + +let options = { + windows64: "x86_64-pc-windows", + windows32: "i686-pc-windows", + windowsArm: "aarch64-pc-windows", + + mac64: "x86_64-apple", + mac32: "i686-apple", + macSilicon: "aarch64-apple", + + linux64: "x86_64-unknown-linux", + linux32: "i686-unknown-linux", + linuxArm: "aarch64-unknown-linux", + + // ios: "ios", + // android: "linux-android", + // freebsd: "freebsd", +}; + +function isAppleSilicon() { + try { + var glcontext = document.createElement("canvas").getContext("webgl"); + var debugrenderer = glcontext + ? glcontext.getExtension("WEBGL_debug_renderer_info") + : null; + var renderername = + (debugrenderer && + glcontext.getParameter(debugrenderer.UNMASKED_RENDERER_WEBGL)) || + ""; + if (renderername.match(/Apple M/) || renderername.match(/Apple GPU/)) { + return true; + } + + return false; + } catch (e) {} +} + +function getOS() { + var OS = options.windows64.default; + var userAgent = navigator.userAgent; + var platform = navigator.platform; + + if (navigator.appVersion.includes("Win")) { + if ( + !userAgent.includes("Windows NT 5.0") && + !userAgent.includes("Windows NT 5.1") && + (userAgent.indexOf("Win64") > -1 || + platform == "Win64" || + userAgent.indexOf("x86_64") > -1 || + userAgent.indexOf("x86_64") > -1 || + userAgent.indexOf("amd64") > -1 || + userAgent.indexOf("AMD64") > -1 || + userAgent.indexOf("WOW64") > -1) + ) { + OS = options.windows64; + } else { + if ( + window.external && + window.external.getHostEnvironmentValue && + window.external + .getHostEnvironmentValue("os-architecture") + .includes("ARM64") + ) { + OS = options.windowsArm; + } else { + try { + var canvas = document.createElement("canvas"); + var gl = canvas.getContext("webgl"); + + var debugInfo = gl.getExtension("WEBGL_debug_renderer_info"); + var renderer = gl.getParameter(debugInfo.UNMASKED_RENDERER_WEBGL); + if (renderer.includes("Qualcomm")) OS = options.windowsArm; + } catch (e) {} + } + } + } + + //MacOS, MacOS X, macOS + if (navigator.appVersion.includes("Mac")) { + if ( + navigator.userAgent.includes("OS X 10.5") || + navigator.userAgent.includes("OS X 10.6") + ) { + OS = options.mac32; + } else { + OS = options.mac64; + + const isSilicon = isAppleSilicon(); + if (isSilicon) { + OS = options.macSilicon; + } + } + } + + // linux + if (platform.includes("Linux")) { + OS = options.linux64; + // FIXME: Can we find out whether linux 32-bit or ARM are used? + } + + // if ( + // userAgent.includes("iPad") || + // userAgent.includes("iPhone") || + // userAgent.includes("iPod") + // ) { + // OS = options.ios; + // } + // if (platform.toLocaleLowerCase().includes("freebsd")) { + // OS = options.freebsd; + // } + + return OS; +} + +let os = getOS(); +window.os = os; + +// Unhide and hydrate selector with events +const archSelect = document.querySelector(".arch-select"); +if (archSelect) { + archSelect.classList.remove("hidden"); + const selector = document.querySelector("#install-arch-select"); + if (selector) { + selector.addEventListener("change", onArchChange); + } +} + +// Hydrate tab buttons with events +Array.from(document.querySelectorAll(".install-tab[data-id]")).forEach((tab) => { + tab.addEventListener("click", onTabClick); +}); + +function onArchChange(evt) { + // Get target + const target = evt.currentTarget.value; + // Find corresponding installer lists + const newContentEl = document.querySelector(`.arch[data-arch=${target}]`); + const oldContentEl = document.querySelector(`.arch[data-arch]:not(.hidden)`); + // Hide old content element (if applicable) + if (oldContentEl) { + oldContentEl.classList.add("hidden"); + } + // Show new content element + newContentEl.classList.remove("hidden"); + // Show the first tab's content if nothing was selected before + if (newContentEl.querySelectorAll(".install-tab.selected").length === 0) { + const firstContentChild = newContentEl.querySelector(".install-content:first-of-type"); + const firstTabChild = newContentEl.querySelector(".install-tab:first-of-type"); + firstContentChild.classList.remove("hidden"); + if (firstTabChild) { + firstTabChild.classList.add("selected"); + } + } + // Hide "no OS detected" message + const noDetectEl = document.querySelector(".no-autodetect"); + noDetectEl.classList.add("hidden"); + // Hide Mac hint + document.querySelector(".mac-switch").classList.add("hidden"); +} + +function onTabClick(evt) { + // Get target and ID + const {triple, id} = evt.currentTarget.dataset; + if (triple) { + // Find corresponding content elements + const newContentEl = document.querySelector(`.install-content[data-id="${String(id)}"][data-triple=${triple}]`); + const oldContentEl = document.querySelector(`.install-content[data-triple=${triple}][data-id]:not(.hidden)`); + // Find old tab to unselect + const oldTabEl = document.querySelector(`.install-tab[data-triple=${triple}].selected`); + // Hide old content element + if (oldContentEl && oldTabEl) { + oldContentEl.classList.add("hidden"); + oldTabEl.classList.remove("selected"); + } + + // Unhide new content element + newContentEl.classList.remove("hidden"); + // Select new tab element + evt.currentTarget.classList.add("selected"); + } +} + +const allPlatforms = Array.from(document.querySelectorAll(`.arch[data-arch]`)); +let hit = allPlatforms.find( + (a) => { + // Show Intel Mac downloads if no M1 Mac downloads are available + if ( + a.attributes["data-arch"].value.includes(options.mac64) && + os.includes(options.macSilicon) && + !allPlatforms.find(p => p.attributes["data-arch"].value.includes(options.macSilicon))) { + // Unhide hint + document.querySelector(".mac-switch").classList.remove("hidden"); + return true; + } + return a.attributes["data-arch"].value.includes(os); + } +); + +if (hit) { + hit.classList.remove("hidden"); + const selectEl = document.querySelector("#install-arch-select"); + selectEl.value = hit.dataset.arch; + const firstContentChild = hit.querySelector(".install-content:first-of-type"); + const firstTabChild = hit.querySelector(".install-tab:first-of-type"); + firstContentChild.classList.remove("hidden"); + if (firstTabChild) { + firstTabChild.classList.add("selected"); + } +} else { + const noDetectEl = document.querySelector(".no-autodetect"); + if (noDetectEl) { + const noDetectElDetails = document.querySelector(".no-autodetect-details"); + if (noDetectElDetails) { + noDetectElDetails.innerHTML = `We detected you're on ${os} but there don't seem to be installers for that. ` + } + noDetectEl.classList.remove("hidden"); + } +} + +let copyButtons = Array.from(document.querySelectorAll("[data-copy]")); +if (copyButtons.length) { + copyButtons.forEach(function (element) { + element.addEventListener("click", () => { + navigator.clipboard.writeText(element.attributes["data-copy"].value); + }); + }); +} + +// Toggle for pre releases +const checkbox = document.getElementById("show-prereleases"); + +if (checkbox) { + checkbox.addEventListener("click", () => { + const all = document.getElementsByClassName("pre-release"); + + if (all) { + for (var item of all) { + item.classList.toggle("hidden"); + } + } + }); +} \ No newline at end of file diff --git a/artifacts.json b/artifacts.json new file mode 100644 index 00000000..fa33cb32 --- /dev/null +++ b/artifacts.json @@ -0,0 +1 @@ +{"format_version":"0.6.1","tag":"v1.0.0","formatted_date":"Feb 12 2024 at 13:48 UTC","platforms_with_downloads":[{"target":["aarch64-apple-darwin"],"display_name":"macOS Apple Silicon","installers":[1,3,0,2]},{"target":["aarch64-unknown-linux-gnu"],"display_name":"Linux arm64","installers":[5,4,1,3,0,2]},{"target":["aarch64-unknown-linux-musl"],"display_name":"musl Linux arm64","installers":[5,4,1,3,0,2]},{"target":["i686-unknown-linux-gnu"],"display_name":"Linux x86","installers":[5,4,1,3,0,2]},{"target":["i686-unknown-linux-musl"],"display_name":"musl Linux x86","installers":[5,4,1,3,0,2]},{"target":["x86_64-apple-darwin"],"display_name":"macOS Intel","installers":[1,3,0,2]},{"target":["x86_64-pc-windows-msvc"],"display_name":"Windows x64","installers":[1,3,0,2]},{"target":["x86_64-unknown-linux-gnu"],"display_name":"Linux x64","installers":[5,4,1,3,0,2]},{"target":["x86_64-unknown-linux-musl"],"display_name":"musl Linux x64","installers":[5,4,1,3,0,2]}],"downloadable_files":[[0,{"name":"scaphandre-1.0.0-1.el9.x86_64.rpm","download_url":"https://github.com/hubblo-org/scaphandre/releases/download/v1.0.0/scaphandre-1.0.0-1.el9.x86_64.rpm","view_path":null,"checksum_file":null},["Linux x86","musl Linux x64","Linux x64","Linux arm64","musl Linux arm64","musl Linux x86"]],[1,{"name":"scaphandre_v1.0.0-deb11_amd64.deb","download_url":"https://github.com/hubblo-org/scaphandre/releases/download/v1.0.0/scaphandre_v1.0.0-deb11_amd64.deb","view_path":null,"checksum_file":null},["Linux x64","musl Linux x86","Linux arm64","musl Linux x64","musl Linux arm64","Linux x86"]]],"release":{"artifacts":{"files":[{"name":"scaphandre-1.0.0-1.el9.x86_64.rpm","download_url":"https://github.com/hubblo-org/scaphandre/releases/download/v1.0.0/scaphandre-1.0.0-1.el9.x86_64.rpm","view_path":null,"checksum_file":null},{"name":"scaphandre_v1.0.0-deb11_amd64.deb","download_url":"https://github.com/hubblo-org/scaphandre/releases/download/v1.0.0/scaphandre_v1.0.0-deb11_amd64.deb","view_path":null,"checksum_file":null},{"name":"scaphandre_v1.0.0_installer.exe","download_url":"https://github.com/hubblo-org/scaphandre/releases/download/v1.0.0/scaphandre_v1.0.0_installer.exe","view_path":null,"checksum_file":null},{"name":"scaphandre_v1.0.0_prometheuspush_installer.exe","download_url":"https://github.com/hubblo-org/scaphandre/releases/download/v1.0.0/scaphandre_v1.0.0_prometheuspush_installer.exe","view_path":null,"checksum_file":null}],"installers":[{"label":"crates.io","description":"","app_name":null,"method":{"type":"Run","file":null,"run_hint":"cargo install scaphandre"}},{"label":"DEB package","description":"","app_name":null,"method":{"type":"Run","file":null,"run_hint":"dpkg -i scaphandre.X.deb"}},{"label":"docker","description":"","app_name":null,"method":{"type":"Run","file":null,"run_hint":"docker pull hubblo/scaphandre"}},{"label":"RPM package","description":"","app_name":null,"method":{"type":"Run","file":null,"run_hint":"rpm -ivh scaphandre.X.rpm"}},{"label":"rpm","description":"","app_name":null,"method":{"type":"Download","file":0}},{"label":"deb","description":"","app_name":null,"method":{"type":"Download","file":1}}],"targets":{"aarch64-apple-darwin":[1,3,0,2],"aarch64-pc-windows-msvc":[1,3,0,2],"aarch64-unknown-linux-gnu":[5,4,1,3,0,2],"aarch64-unknown-linux-musl":[5,4,1,3,0,2],"i686-apple-darwin":[1,3,0,2],"i686-pc-windows-msvc":[1,3,0,2],"i686-unknown-linux-gnu":[5,4,1,3,0,2],"i686-unknown-linux-musl":[5,4,1,3,0,2],"x86_64-apple-darwin":[1,3,0,2],"x86_64-pc-windows-msvc":[1,3,0,2],"x86_64-unknown-linux-gnu":[5,4,1,3,0,2],"x86_64-unknown-linux-musl":[5,4,1,3,0,2]}}},"os_script":"/scaphandre/artifacts.js","has_checksum_files":false} \ No newline at end of file diff --git a/artifacts/index.html b/artifacts/index.html new file mode 100644 index 00000000..41f749cd --- /dev/null +++ b/artifacts/index.html @@ -0,0 +1,258 @@ + + +
+
+cargo install scaphandre
+
+
+
+
++dpkg -i scaphandre.X.deb+ + + + +
+docker pull hubblo/scaphandre
+
+
+
+
++rpm -ivh scaphandre.X.rpm+ + + + +
File | +Platform | + +
---|---|
scaphandre-1.0.0-1.el9.x86_64.rpm | ++ + + Linux x86 + + + + musl Linux x64, + + + + Linux x64, + + + + Linux arm64, + + + + musl Linux arm64, + + + + musl Linux x86, + + + | + +
scaphandre_v1.0.0-deb11_amd64.deb | ++ + + Linux x64 + + + + musl Linux x86, + + + + Linux arm64, + + + + musl Linux x64, + + + + musl Linux arm64, + + + + Linux x86, + + + | + +
This URL is invalid, sorry. Please use the navigation bar or search to continue.
+ +Scaphandre intends to provide multiple ways to gather power consumption metrics and make understanding tech services footprint possible in many situations. Depending on how you use scaphandre, you may have some restrictions.
+To summarize, scaphandre should provide two ways to estimate the power consumption of a service, process or machine. Either by measuring it, using software interfaces that give access to hardware metrics, or by estimating it if measuring is not an option (this is a planned feature, not yet implemented as those lines are written, in december 2020).
+In scaphandre, the code responsible to collect the power consumption data before any further processing is grouped in components called sensors. If you want more details about scaphandre structure, here are the explanations.
+On GNU/Linux PowercapRAPL sensor enables you to measure the power consumption, but it doesn't work in all contexts.
+On Windows, the MsrRAPL sensor, coupled with the driver responsible to read RAPL MSR's enables you to do (almost) the same.
+Sensor | Intel x86 bare metal | AMD x86 bare metal | ARM bare metal | Virtual Machine | Public cloud instance | Container |
---|---|---|---|---|---|---|
PowercapRAPL (GNU/Linux only) | Yes | Yes ⚠️ kernel > 5.11 required | We don't know yet | Yes, if on a qemu/KVM hypervisor that runs scaphandre and the Qemu exporter | No, until your cloud provider uses scaphandre on its hypervisors | Depends on what you want |
MsrRAPL (Windows only) | Yes | Probable yes (not tested yet, if you have windows operated AMD gear, please consider contributing | No | Not yet, depends on improvements on the MsrRAPL sensors and overall windows/hypervisors support in Scaphandre | No, until your cloud provider uses scaphandre on its hypervisors | Might work, not tested yet. If you want to join us in this journey, please consider contributing |
Future estimation based sensor | Future Yes | Future Yes | Future Yes | Future Yes | Future Yes | Future Yes |
Sensors including "RAPL" in their name rely on RAPL.
+The pts
and pln
feature flags ("Intel Package Thermal Status" and "Intel Power Limit Notification" respectively) seem to indicate that RAPL is supported on a CPU. On GNU/Linux, you could be sure of their presence, if this command succeds and matches :
egrep "(pts|pln)" /proc/cpuinfo
+
+
+ If you are reading this, you may be to contribute. Just for that, a big thank you ! 👏
+Feel free to propose pull requests, or open new discussions or issues at will. Scaphandre is a collaborative project and all opinions and propositions shall be heard and studied. The contributions will be received with kindness, gratitude and with an open mind. Remember that we are all dwarfs standing on the shoulders of giants. We all have to learn from others and to give back, with due mutual respect.
+This project adheres to the Rust Code of Conduct, which can be found here.
+Contributions may take multiple forms:
+This project intends to unite a lot of people to have a lot of positive impact. Any action going helping us to get there will be very much appreciated ! 🎉
+Discussions and questions about the project are welcome on gitter or by email.
+This project intends to use conventional commit messages and the gitflow workflow.
+Please open your pull requests on the dev branch :)
+Scaphandre is a not only a tool, but a framework. Modules dedicated to collect energy consumption data from the host are called Sensors. +Modules that are dedicated to send this data to a given channel or remote system are called Exporters. New Sensors and Exporters are going to be created and all contributions are welcome. For more on the internal structure please jump here.
+Documentation source files are in docs_src
, edit the content there and propose pull-requests on the scaphandre repository as if it was a code pull-request.
Here is how to build and publish the documentation after a change. First download mdbook, the static site generator we use for the documentation website:
+cargo install mdbook
+
+Then fork the scaphandre-documentation repository and clone the resulting repository locally.
+Place the resulting documentation folder next to scaphandre's folder. Then run the build:
+cd scaphandre && mdbook build
+
+You should then have the result in scaphandre-documentation/docs
. Commit, push and propose a PR to publish the changes.
You can also serve the current content locally to see your changes before commit:
+mdbook serve
+
+
+ There are several ways scaphandre can interact with containers.
+You may run scaphandre in a container, to not have to manage the dependencies, then measure the power consumption of the bare metal host. This is described in the quickstart tutorial. Note that you need to expose /sys/class/powercap
and /proc
as volumes in the container to allow scaphandre to get the relevant metrics from the bare metal host.
Scaphandre may help you measure the power consumption of containers running on a given host. You can already get to that goal using the tips provided in the howto section called "Get process level power consumption". It may still require some tweaking and inventiveness from you in making the approriate queries to your favorite TSDB. This should be made easier by the upcoming scaphandre features.
+Another use case scenario is measuring the power consumption of a container orchestrator (like kubernetes), its nodes and the containers and applications running on it. Scaphandre can be installed on Kubernetes via the Helm chart and there is a tutorial for installing it along with Prometheus and Grafana to view the metrics.
+As described here, scaphandre provides several ways (sensors) to collect the power consumption metrics. Depending on your use case a sensor should be more suitable than the other. Each of them comes with strengths and weaknesses. This is basically always a tradeoff between precision and simplicity. This is especially true if you run a container-based workloads on public cloud instances. We are working to provide a solution for that as well.
+ +This is true starting from Scaphandre >= 1.0.
+There are several metrics available at the host level in Scaphandre:
+scaph_host_power_microwatts
: always returned, computed from Record structs made from scaph_host_energy_microjoules
metricscaph_host_energy_microjoules
: always returned, either one value or a sum of values coming directly from RAPL counters (energy_uj
files or direct read from an MSR)scaph_host_rapl_psys_microjoules
: is available only when the PSYS RAPL domain is available on the machine.In addition to those metrics, you might want to build, on your time series database, the sum of process_ metrics to have a view of the weight of all processes on the host power. Using Prometheus, it would look like: sum(scaph_process_power_consumption_microwatts{hostname="$hostname"}) / 1000000
, to get it in Watts.
Let's explain the relationship between those metrics, and what you could expect.
+host_power
metric will return :
Briefly explained (see RAPL domains for detailled explanations), PSYS covers most components on the machine ("all components connected to the SoC / motherboard" according to most documentations), so we return this wider ranged metric when available. If not we use a combination of PKG domain, that includes CPU and integrated GPU power, and DRAM domain, that includes memory power. The first options gives higher figures than the second, for now.
+Suming the power of all processes, if the machine is mostly IDLE, you'll get a tiny percentage of the host machine, most likely. The difference between host power and the sum of processes power can be accounted as "power due to IDLE activity", in other words the power your machine demands for "doing nothing". The higher this difference on a long period of time (better seen as a graph), the higher chance that there is room for improvement in moving the workloads to another machine and shut the current machine down (and make it available for another project or to another organization to prevent from buying a new machine).
+Warning: that being said, the way per-process power is computed is still biased and shall be improved in the following versions of Scaphandre. For now, the main key for allocation is CPU time. As host level power metrics include power usage of more and more components on the machine (work in progress) this allocation key will be more and more inaccurate. Future versions of this allocation model should include keys regarding the activity of other components than CPU. Enabling a better set of allocation keys for per-process power is part of the roadmap.
+ +Scaphandre is a tool that makes it possible to see the power being used by a single process on a computer.
+This sounds like a simple thing thing to be able to do, but in practice a number of details can make this more complex.
+So having a good mental model of how it works will make it easier to understand when and how to use Scaphandre. Let's start with a simplified mental model below, before moving on to multiple processors or virtual machines - but once you understand the key ideas outlined below, it's easier to see how they can be applied to thinking about tracking power on virtual machines, or when multiple processors are available.
+When we first think about how much energy a single process running in a computer might use, we might start with a mental model that looks like the figure below, with large, uninterrupted chunks of compute time allocated to each process.
+This is easy to understand, and it matches how we might be billed for a share of a larger machine when paying for cloud services.
+ +However, if the reality was exactly like this diagram, our computers would only ever be able to do one thing at a time. It's more accurate and helpful to think of computers working on lots of different jobs at the same time - they work on one job for short interval of time, then another, and another and so one. You'll often see these small intervals of time referred to as jiffies.
+ +In a given amount of time, some jobs that are prioritized or more resource intensive will use more jiffies than others. Fortunately, each job keeps a running total of the total jiffies allocated to it, so if we know how many jiffies have been used in total, it can give us an idea how much of a machine's resources are being used by a given process.
+ +It's possible without Scaphandre to understand how large a share of a machines' resources are being used by a given process.
+This is useful, by itself, but if we want to understand how much power is used per process, not just the share of the machine's resources, we need to know how much power is being used by the machine in absolute terms.
+To do this, we need a sensor of some kind to track power usage by the machine itself. Some servers have these, like with Intel's RAPL sensors, which we cover this in more detail later on. This makes it possible to understand how much power is being used by CPUs, GPUs and so on, in terms of watts, or, if we are looking at just a single process, various fractions of a watt.
+ +To understand the power used by a single process we combine both of these ideas. We count the jiffies used by our job when it's being worked on, and for each jiffy, we check how much power is being drawn at those moments in time.
+ +Finally, when we group together all the power readings for all our jiffies over a given time period, we can arrive at a usable figure for how much power has been used, in terms of watt hours.
+Once we have a figure in terms of watt hours, there are various ways we can convert this to environmental impact. A common way is to use an emission factor for the electricity used, to turn the power consumption data into an estimate of associated carbon emissions.
+ +While the reality is again more complicated than the diagram below, the same ideas broadly apply when you introduce multiple processors too.
+If you are able to read from sensors that can share how much power is being used by the various processors at work, and know how much of the time is being allocated to our processes during those moments, you can get a good idea of what these figures are, at a per-process level.
+ +Things get a bit more complicated with virtualised environments, because in many cases, virtual machines only have a partial view of the system they are part of, and frequently do not have access to any power usage figures from the host physical machine.
+However, if a guest virtual machine or guest container does have access to readings provided by the host physical machine, it is possible to work out the same kinds of per-process power figures. Scaphandre supports exposing these kinds of power readings to virtual machines and containers, which means, if you are running on infrastructure that is running scaphandre and exposing these readings, you can see these figures for the energy used by your own processes.
+As you can see with the prometheus exporter reference, scaphandre exporters can provide process level power consumption metrics. This section will explain how it is done and how it may be improved in the future.
+We'll talk here about the case where scaphandre is able to effectively measure the power consumption of the host (see compatibility section for more on sensors and their prerequisites) and specifically about the PowercapRAPL sensor.
+Let's clarify what's happening when you collect metrics with scaphandre and the RAPL sensor. +RAPL stands for Running Average Power Limit. It's a technology embedded in most Intel and AMD x86 CPUs produced after 2012.
+Thanks to this technology it is possible to get the total energy consumption of the CPU, of the consumption per CPU socket, plus in some cases, the consumption of the DRAM controller. In most cases it represents the vast majority of the energy consumption of the machine (except when running GPU intensive workloads, for example).
+Further improvements shall be made in scaphandre to fully measure the consumption when GPU are involved (or a lot of hard drives on the same host...).
+Between scaphandre and this data is the powercap kernel module that writes the energy consumption to files. Scaphandre reads those files, stores the data in buffers and then allows for more processing through the exporters.
+The PowercapRAPL sensor does actually more than just collecting those energy consumption metrics (and converting them to power consumption metrics).
+Every time the exporter asks for a measurement (either periodically like in the Stdout exporter, or every time a request comes as would be the case for the Prometheus exporter) the sensor reads the values of the energy counters from powercap. It then stores those values, and does the same for the CPU usage statistics of the CPU (the one you can see in /proc/stats
) and for each running process on the machine at that time (see /proc/PID/stats
).
With this data it is possible to compute the ratio of CPU time actively spent for a given PID on the CPU time actively spent doing something. With this ratio we can then get the subset of power consumption that is related to that PID on a given timeframe (between two measurement requests).
+Services and programs are often not running on only one PID. It's needed to aggregate the consumption of all related PIDs to know what this service is actually consuming.
+To do that, in the current state of scaphandre development, you can use the Prometheus exporter, and then use Prometheus and its query language capabilities. You'll find examples looking at the graphs and queries here. In a near future, more advanced features may be implemented in scaphandre to allow such classification even if you don't have access to a proper Time Series database (TSDB).
+ +Scaphandre is designed to be extensible. As it performs basically two tasks: collecting/pre-computing the power consumption metrics and publishing it, it is composed of two main components: a sensor and an exporter. Each can be implemented in different ways to match a certain use case. When you run scaphandre from the command line, -s
allows you to choose the sensor you want to use, and the next subcommand is the name of the exporter.
Sensors are meant to:
+The PowercapRAPL for instance, gets and transforms metrics coming from the powercap Linux kernel module, that serves as an interface to get the data from the RAPL feature of x86 CPUs. Because this feature is only accessible when you are running on a bare metal machine, this sensor will not work in a virtual machine, except if you first run scaphandre on the hypervisor and make the VM metrics available, with the qemu exporter, to scaphandre running inside the virtual machine.
+When you don't have access to the hypervisor/bare-metal machine (ie. when you run on public cloud instances and your provider doesn't run scaphandre) you still have the option to estimate the power consumption, based on both the ressources (cpu/gpu/ram/io...) consumed by the virtual machine at a given time, and the characteristics of the underlying hardware. This is the way we are designing the future estimation-based sensor, to match that use case.
+Looking at the code, you'll find that the interface between metrics and the exporters is in fact the Topology object. This is intended to be asked by the exporter through the get_topology method of the sensor.
+An exporter is expected to:
+The Stdout exporter exposes the metrics on the standard output (in your terminal). The prometheus exporter exposes the metrics on an HTTP endpoint, to be scraped by a prometheus instance. An exporter should be created for each monitoring scenario (do you want to feed your favorite monitoring/data analysis tool with scaphandre metrics ? feel free to open a PR to create a new exporter !).
+As introduced in the sensors section, the Qemu exporter, is very specific. It is only intended to collect metrics related to running virtual machines on a Qemu/KVM hypervisor. Those metrics can then be made available to each virtual machine and their own scaphandre instance, running the PowercapRAPL sensor (with the --vm
flag on). The qemu exporter puts VM's metrics in files the same way the powercap kernel module does it. It mimics this behavior, so the sensor can act the same way it would on a bare metal machine.
RAPL stands for "Running Average Power Limit", it is a feature on Intel/AMD x86 CPU's (manufactured after 2012) that allows to set limits on power used by the CPU and other components. This feature also allows to just get "measurements" (mind the double quotes, as at least part of the numbers RAPL gives are coming from estimations/modeling) of components power usage.
+ +It is composed of "domains", that, in 2023, may include:
+RAPL documentation from Intel doesn't necessarily give very precise informations about how RAPL behaves depending on the platform, or about what is included in the calculation. Actively looking for other experimentations/feedbacks/documentations is needed. You might find some informations gathered here: awesome-energy. If you have more or more precise informations and are willing to contribute, don't hesitate to open a PR to dev branch on scaphandre's repository (targeting docs_src folder) and/or the awesome-energy repository.
+If you want to know if RAPL is supported by your CPU, please have a look to the end of the Compatibility section.
+ +Now we'll see how to get valuable data in a dashboard. Let's say you want to track the power consumption of a given process or application in a dashboard and eventually set thresholds on it. WHat do you need to get that subset of the power consumption of the host visually ?
+You need basically 3 components for that:
+We'll say that you already have a running prometheus server and an available grafana instance and that you have added prometheus as a datasource in grafana.
+How to get metrics per process as you may see here ?
+The metric that I need from the prometheus exporter to do that is: scaph_process_power_consumption_microwatts
. This metric is a wallet for the power consumption of all the running processes on the host at a given time.
This is a prometheus metrics, so you have labels to filter on the processes you are interested in. Currently the available labels are: instance
, exe
, job
and pid
.
If I want to get power consumption (in Watts) for all processes related to nginx running on a host with ip 10.0.0.9 I may use that query, in grafana, based on the prometheus datasource:
+scaph_process_power_consumption_microwatts{cmdline=~".*nginx.*", instance="10.0.0.9:8080"} / 1000000
+
+Here we assume that scaphandre/the prometheus exporter is running on port number 8080
.
Here is how it looks, creating a panel in grafana:
+ +Those labels are explained in much more detail here.
+ +