From 3f0c7ccf85dfb162bd63e3065bf3f5f3882c9235 Mon Sep 17 00:00:00 2001
From: Etienne Perot <eperot@google.com>
Date: Fri, 10 Jan 2025 19:41:27 -0800
Subject: [PATCH] PGO: Add make target to refresh profiles for PGO.

This runs all benchmarks tagged as PGO-enabled, of which there is
currently just one for simplicity (the ffmpeg benchmark). All other
benchmarks are initially tagged out of PGO. I will send a different
change to enroll other benchmarks in PGO.

The make target runs each such benchmark and gathers profiles for each
benchmark. Multiple profile files for multiple runs of the same benchmark
are merged into one, then compared against the existing checked-in profile
used for PGO builds (which right now doesn't exist). If such a profile
doesn't exist or differs widely from the freshly-collected profile, then
this new profile is copied into the repository.

Such profiles are not used at all in builds yet, this is just the glue
that keeps them fresh in the repo.

PiperOrigin-RevId: 714311978
---
 Makefile                         | 70 ++++++++++++++++++++++++++++++++
 test/benchmarks/base/BUILD       |  6 +++
 test/benchmarks/database/BUILD   |  1 +
 test/benchmarks/defs.bzl         | 17 ++++----
 test/benchmarks/fs/BUILD         |  3 ++
 test/benchmarks/ml/BUILD         |  1 +
 test/benchmarks/network/BUILD    |  5 +++
 tools/bazel.mk                   |  1 +
 tools/profiletool/profiletool.go | 18 +++++++-
 9 files changed, 114 insertions(+), 8 deletions(-)
diff --git a/Makefile b/Makefile
index f74d7b451f..04230817b2 100644
--- a/Makefile
+++ b/Makefile
@@ -24,6 +24,7 @@ header = echo --- $(1) >&2
 EMPTY :=
 SPACE := $(EMPTY) $(EMPTY)
 SHELL = /bin/bash
+REPO_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
 COMMA := ,
 
 ## usage: make <target>
@@ -72,6 +73,10 @@ run: ## Runs the given $(TARGETS), built with $(OPTIONS), using $(ARGS). E.g. ma
 	@$(call run,$(TARGETS),$(ARGS))
 .PHONY: run
 
+query: ## Runs a bazel query. E.g. make query TARGETS=//test/...
+	@$(call query,$(OPTIONS) $(TARGETS))
+.PHONY: query
+
 sudo: ## Runs the given $(TARGETS) as per run, but using "sudo -E". E.g. make sudo TARGETS=test/root:root_test ARGS=-test.v
 	@$(call sudo,$(TARGETS),$(ARGS))
 .PHONY: sudo
@@ -533,6 +538,71 @@ run-benchmark: load-benchmarks ## Runs single benchmark and optionally sends dat
 	@$(call run_benchmark,$(RUNTIME))
 .PHONY: run-benchmark
 
+# The arguments passed to benchmarks when run for PGO profile collection.
+# This should *not* include the `-profile` or `-profile-cpu` arguments, as
+# those are added automatically.
+BENCHMARKS_ARGS_PGO  := -test.v -test.bench=. -test.benchtime=30s
+# The threshold below which the `benchmark-refresh-pgo` rule will update the
+# profile.
+BENCHMARKS_PGO_REFRESH_THRESHOLD ?= 0.7
+# The number of days that must have elapsed since the last time a profile
+# was updated before being considered for refresh. This limits the growth
+# of the repository size.
+BENCHMARKS_PGO_REFRESH_MIN_DAYS_SINCE_LAST_UPDATE ?= 28
+
+benchmark-refresh-pgo: load-benchmarks $(RUNTIME_BIN) ## Refresh profiles of all benchmarks for PGO purposes.
+	@set -e; if test -z "$(BENCHMARKS_PLATFORMS)"; then \
+		echo 'Must specify BENCHMARKS_PLATFORMS.' >&2; \
+		exit 1; \
+	else \
+		PGO_RUNTIME_KEY="$$( $(call run,tools/profiletool,runtime-info) )"; \
+		export PGO_RUNTIME_KEY; \
+		PGO_LAST_PKG_COMMIT_HASH="$$(git log --max-count=1 --format='%H' -- pkg)"; \
+		export PGO_LAST_PKG_COMMIT_HASH; \
+		for PLATFORM in $(BENCHMARKS_PLATFORMS); do \
+			export PLATFORM; \
+			mkdir -p "$(REPO_DIR)/runsc/profiles/$${PGO_RUNTIME_KEY}_$${PLATFORM}"; \
+			PLATFORM_TMPDIR="$$(mktemp --tmpdir=/tmp --directory "pgo_$${PGO_RUNTIME_KEY}_$${PLATFORM}.XXXXXXXX")"; \
+			export PLATFORM_TMPDIR; \
+			for PGO_BENCHMARK_TARGET in $$( $(call query,'attr(tags, gvisor_pgo_benchmark, //test/benchmarks/...)') | sed 's~^//~~'); do \
+				PGO_BENCHMARK_BASENAME="$$(echo "$${PGO_BENCHMARK_TARGET}" | cut -d: -f2 | sed 's/_test$$//')"; \
+				export PGO_BENCHMARK_BASENAME; \
+				PGO_PROFILE_OLD_COMMIT_HASH="$(REPO_DIR)/runsc/profiles/$${PGO_RUNTIME_KEY}_$${PLATFORM}/$${PGO_BENCHMARK_BASENAME}.pgo.pkg_commithash"; \
+				export PGO_PROFILE_OLD_COMMIT_HASH; \
+				PGO_PROFILE_OLD="$(REPO_DIR)/runsc/profiles/$${PGO_RUNTIME_KEY}_$${PLATFORM}/$${PGO_BENCHMARK_BASENAME}.pgo.pprof.pb.gz"; \
+				export PGO_PROFILE_OLD; \
+				PGO_PROFILE_NEW="$${PLATFORM_TMPDIR}/$${PGO_BENCHMARK_BASENAME}.pgo.pprof.pb.gz"; \
+				export PGO_PROFILE_NEW; \
+				if [[ -f "$${PGO_PROFILE_OLD_COMMIT_HASH}" ]] && [[ "$$(cat "$${PGO_PROFILE_OLD_COMMIT_HASH}")" == "$${PGO_LAST_PKG_COMMIT_HASH}" ]]; then \
+					echo "Skipping refresh for $${PGO_PROFILE_OLD}: profile is already up-to-date." >&2; \
+					continue; \
+				fi; \
+				if [[ -f "$${PGO_PROFILE_OLD}" ]] && [[ "$$(stat -c %Y $${PGO_PROFILE_OLD})" -gt "$$(date +%s -d '$(BENCHMARKS_PGO_REFRESH_MIN_DAYS_SINCE_LAST_UPDATE) days ago')" ]]; then \
+					echo "Skipping refresh for $${PGO_PROFILE_OLD}: profile is less than $(BENCHMARKS_PGO_REFRESH_MIN_DAYS_SINCE_LAST_UPDATE) days old." >&2; \
+					continue; \
+				fi; \
+				mkdir -p "$$(dirname "$${PGO_PROFILE_OLD}")"; \
+				mkdir -p "$${PLATFORM_TMPDIR}/$${PGO_BENCHMARK_BASENAME}"; \
+				$(call install_runtime,$${PLATFORM}_$${PGO_RUNTIME_KEY}_pgo_$${PGO_BENCHMARK_BASENAME},--platform $${PLATFORM} --profile --profile-cpu="$${PLATFORM_TMPDIR}/$${PGO_BENCHMARK_BASENAME}/$${PGO_BENCHMARK_BASENAME}.%YYYY%-%MM%-%DD%_%HH%-%II%-%SS%-%NN%.pgo.pprof.pb.gz"); \
+				$(call sudo,$${PGO_BENCHMARK_TARGET},-runtime=$${PLATFORM}_$${PGO_RUNTIME_KEY}_pgo_$${PGO_BENCHMARK_BASENAME} $(BENCHMARKS_ARGS_PGO)); \
+				$(call run,tools/profiletool,merge --out="$${PGO_PROFILE_NEW}" "$${PLATFORM_TMPDIR}/$${PGO_BENCHMARK_BASENAME}"); \
+				rm -rf --one-file-system "$${PLATFORM_TMPDIR}/$${PGO_BENCHMARK_BASENAME}"; \
+				if [[ ! -f "$${PGO_PROFILE_OLD}" ]]; then \
+					cp "$${PGO_PROFILE_NEW}" "$${PGO_PROFILE_OLD}"; \
+					echo "$${PGO_LAST_PKG_COMMIT_HASH}" > "$${PGO_PROFILE_OLD_COMMIT_HASH}"; \
+					echo "--- PGO: Added new profile for $${PGO_BENCHMARK_BASENAME} on $${PGO_RUNTIME_KEY} $${PLATFORM}" >&2; \
+				elif $(call run,tools/profiletool,check-similar --threshold=$(BENCHMARKS_PGO_REFRESH_THRESHOLD) "$${PGO_PROFILE_OLD}" "$${PGO_PROFILE_NEW}"); then \
+					echo "--- PGO: Profile for $${PGO_BENCHMARK_BASENAME} on $${PGO_RUNTIME_KEY} $${PLATFORM} is already up-to-date." >&2; \
+				else \
+					cp "$${PGO_PROFILE_NEW}" "$${PGO_PROFILE_OLD}"; \
+					echo "$${PGO_LAST_PKG_COMMIT_HASH}" > "$${PGO_PROFILE_OLD_COMMIT_HASH}"; \
+					echo "--- PGO: Updated profile for $${PGO_BENCHMARK_BASENAME} on $${PGO_RUNTIME_KEY} $${PLATFORM}" >&2; \
+				fi; \
+			done; \
+		done; \
+	fi
+.PHONY: benchmark-refresh-pgo
+
 ## Seccomp targets.
 seccomp-sentry-filters:  # Dumps seccomp-bpf program for the Sentry binary.
 	@$(call run,//runsc/boot/filter/dumpfilter,$(ARGS))
diff --git a/test/benchmarks/base/BUILD b/test/benchmarks/base/BUILD
index 1948736807..0ff67eee99 100644
--- a/test/benchmarks/base/BUILD
+++ b/test/benchmarks/base/BUILD
@@ -21,6 +21,7 @@ go_library(
 benchmark_test(
     name = "startup_test",
     srcs = ["startup_test.go"],
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
@@ -33,6 +34,7 @@ benchmark_test(
 benchmark_test(
     name = "size_test",
     srcs = ["size_test.go"],
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
@@ -46,6 +48,7 @@ benchmark_test(
 benchmark_test(
     name = "sysbench_test",
     srcs = ["sysbench_test.go"],
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
@@ -58,6 +61,7 @@ benchmark_test(
 benchmark_test(
     name = "syscallbench_test",
     srcs = ["syscallbench_test.go"],
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
@@ -70,6 +74,7 @@ benchmark_test(
 benchmark_test(
     name = "hackbench_test",
     srcs = ["hackbench_test.go"],
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
@@ -82,6 +87,7 @@ benchmark_test(
 benchmark_test(
     name = "usage_test",
     srcs = ["usage_test.go"],
+    use_for_pgo = False,
     deps = [
         ":base",
         "//pkg/test/dockerutil",
diff --git a/test/benchmarks/database/BUILD b/test/benchmarks/database/BUILD
index 60dc8ed772..cd4a1b14c3 100644
--- a/test/benchmarks/database/BUILD
+++ b/test/benchmarks/database/BUILD
@@ -16,6 +16,7 @@ benchmark_test(
     name = "redis_test",
     srcs = ["redis_test.go"],
     library = ":database",
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
diff --git a/test/benchmarks/defs.bzl b/test/benchmarks/defs.bzl
index a33cae1631..d19ed8524f 100644
--- a/test/benchmarks/defs.bzl
+++ b/test/benchmarks/defs.bzl
@@ -2,15 +2,18 @@
 
 load("//tools:defs.bzl", "go_test")
 
-def benchmark_test(name, tags = [], **kwargs):
+def benchmark_test(name, tags = [], use_for_pgo = True, **kwargs):
+    tags = tags + [
+        # Requires docker and runsc to be configured before the test runs.
+        "local",
+        "manual",
+        "gvisor_benchmark",
+    ]
+    if use_for_pgo:
+        tags = tags + ["gvisor_pgo_benchmark"]
     go_test(
         name,
-        tags = tags + [
-            # Requires docker and runsc to be configured before the test runs.
-            "local",
-            "manual",
-            "gvisor_benchmark",
-        ],
+        tags = tags,
         # Benchmark test binaries are built inside a bazel docker container in
         # OSS but are executed directly on the host. Use static binaries to
         # avoid hitting glibc incompatibility.
diff --git a/test/benchmarks/fs/BUILD b/test/benchmarks/fs/BUILD
index ce23fb31c0..a105c60ec2 100644
--- a/test/benchmarks/fs/BUILD
+++ b/test/benchmarks/fs/BUILD
@@ -8,6 +8,7 @@ package(
 benchmark_test(
     name = "bazel_test",
     srcs = ["bazel_test.go"],
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/cleanup",
@@ -23,6 +24,7 @@ benchmark_test(
     name = "fio_test",
     srcs = ["fio_test.go"],
     data = ["//test/runner/fuse"],
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/cleanup",
@@ -37,6 +39,7 @@ benchmark_test(
 benchmark_test(
     name = "rubydev_test",
     srcs = ["rubydev_test.go"],
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
diff --git a/test/benchmarks/ml/BUILD b/test/benchmarks/ml/BUILD
index bca251d10d..37896dc468 100644
--- a/test/benchmarks/ml/BUILD
+++ b/test/benchmarks/ml/BUILD
@@ -16,6 +16,7 @@ benchmark_test(
     name = "tensorflow_test",
     srcs = ["tensorflow_test.go"],
     library = ":ml",
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
diff --git a/test/benchmarks/network/BUILD b/test/benchmarks/network/BUILD
index 37b97e5543..4c6f068d97 100644
--- a/test/benchmarks/network/BUILD
+++ b/test/benchmarks/network/BUILD
@@ -26,6 +26,7 @@ benchmark_test(
         "iperf_test.go",
     ],
     library = ":network",
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
@@ -42,6 +43,7 @@ benchmark_test(
         "node_test.go",
     ],
     library = ":network",
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
@@ -57,6 +59,7 @@ benchmark_test(
         "ruby_test.go",
     ],
     library = ":network",
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
@@ -72,6 +75,7 @@ benchmark_test(
         "nginx_test.go",
     ],
     library = ":network",
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
@@ -86,6 +90,7 @@ benchmark_test(
         "httpd_test.go",
     ],
     library = ":network",
+    use_for_pgo = False,
     visibility = ["//:sandbox"],
     deps = [
         "//pkg/test/dockerutil",
diff --git a/tools/bazel.mk b/tools/bazel.mk
index 877377f51f..4d838ab919 100644
--- a/tools/bazel.mk
+++ b/tools/bazel.mk
@@ -313,6 +313,7 @@ copy  = $(call header,COPY $(1) $(2)) && $(call build_paths,$(1),cp -fa {} $(2))
 run   = $(call header,RUN $(1) $(2)) && $(call build_paths,$(1),{} $(2))
 sudo  = $(call header,SUDO $(1) $(2)) && $(call build_paths,$(1),sudo -E {} $(2))
 test  = $(call header,TEST $(1)) && $(call wrapper,$(BAZEL) test --strip=never $(BAZEL_OPTIONS) $(TEST_OPTIONS) $(1))
+query = $(call wrapper,$(BAZEL) query $(BAZEL_OPTIONS) $(1))
 
 clean: ## Cleans the bazel cache.
 	@$(call clean)
diff --git a/tools/profiletool/profiletool.go b/tools/profiletool/profiletool.go
index 8219335118..4b8d88e734 100644
--- a/tools/profiletool/profiletool.go
+++ b/tools/profiletool/profiletool.go
@@ -22,6 +22,7 @@ import (
 	"math"
 	"os"
 	"path/filepath"
+	"runtime"
 	"strings"
 
 	"github.com/google/pprof/profile"
@@ -34,14 +35,16 @@ var (
 	mergeOut              = mergeCmd.String("out", "/dev/stdout", "file to write the merged profile to")
 	compactCmd            = flag.NewFlagSet("compact", flag.ContinueOnError)
 	compactOut            = compactCmd.String("out", "/dev/stdout", "file to write the compacted profile to")
+	runtimeInfoCmd        = flag.NewFlagSet("runtime-info", flag.ContinueOnError)
 	checkSimilarCmd       = flag.NewFlagSet("check-similar", flag.ContinueOnError)
 	checkSimilarQuiet     = checkSimilarCmd.Bool("quiet", false, "if set, do not print any output; comparison result is still provided as exit code")
 	checkSimilarThreshold = checkSimilarCmd.Float64("threshold", 0.7, "threshold (between 0.0 and 1.0) above which the profiles are considered similar")
 
-	allCommands = []*flag.FlagSet{mergeCmd, compactCmd, checkSimilarCmd}
+	allCommands = []*flag.FlagSet{mergeCmd, compactCmd, runtimeInfoCmd, checkSimilarCmd}
 	commandSet  = map[*flag.FlagSet]string{
 		mergeCmd:        "merge two or more profile files into one",
 		compactCmd:      "minimize the size of a profile",
+		runtimeInfoCmd:  "print a runtime information key that identifies dimensions impacting profiles (Go version, CPU architecture)",
 		checkSimilarCmd: "check if two profiles are similar",
 	}
 )
@@ -177,6 +180,15 @@ func writeMaxCompressionProfile(p *profile.Profile, out *os.File) error {
 	return nil
 }
 
+func runtimeInfo() error {
+	goVersion := runtime.Version()
+	if strings.Contains(goVersion, " ") {
+		goVersion = strings.Split(goVersion, " ")[0]
+	}
+	fmt.Fprintf(os.Stdout, "%s-%s-%s", goVersion, runtime.GOOS, runtime.GOARCH)
+	return nil
+}
+
 type comparisonKey struct {
 	Filename              string
 	FunctionName          string
@@ -379,6 +391,10 @@ func main() {
 		os.Exit(1)
 	}
 	switch os.Args[1] {
+	case runtimeInfoCmd.Name():
+		if err := runtimeInfo(); err != nil {
+			fail(err.Error())
+		}
 	case mergeCmd.Name():
 		if err := mergeProfiles(); err != nil {
 			fail(err.Error())