Add TorchAO aggregated geomean speedup metric (#6119)

This is the follow-up of #6118 to add an aggregated geomean speedup metric for all models grouped by devices. I limit this change to TorchAO `speedup` metric for now until I have time to polish the rest of the metrics (or if there is a need to add them at all) ### Testing https://torchci-git-fork-huydhn-add-benchmark-summary-fbopensource.vercel.app/benchmark/llms?repoName=pytorch%2Fao
pytorch · Dec 28, 2024 · ca303db · ca303db
1 parent d407580
commit ca303db
Show file tree

Hide file tree

Showing 5 changed files with 163 additions and 91 deletions.
diff --git a/torchci/components/benchmark/llms/ModelGraphPanel.tsx b/torchci/components/benchmark/llms/ModelGraphPanel.tsx
@@ -19,7 +19,7 @@ import {
 } from "components/metrics/panels/TimeSeriesPanel";
 import dayjs from "dayjs";
 import { computeSpeedup } from "lib/benchmark/aoUtils";
-import { useBenchmark } from "lib/benchmark/llmUtils";
+import { computeGeomean, useBenchmark } from "lib/benchmark/llmUtils";
 import { BranchAndCommit } from "lib/types";
 
 const GRAPH_ROW_HEIGHT = 245;
@@ -64,10 +64,6 @@ export function GraphPanel({
     );
   }
 
-  if (modelName === DEFAULT_MODEL_NAME) {
-    return <></>;
-  }
-
   const dataWithSpeedup = computeSpeedup(repoName, data);
 
   // Clamp to the nearest granularity (e.g. nearest hour) so that the times will
@@ -84,39 +80,67 @@ export function GraphPanel({
   const chartData: { [k: string]: any } = {};
   const graphSeries: { [k: string]: any } = {};
   metricNames.forEach((metric: string) => {
-    chartData[metric] = dataWithSpeedup
-      .filter((record: LLMsBenchmarkData) => {
-        return (
-          record.model === modelName &&
-          (record.dtype === dtypeName || dtypeName === DEFAULT_DTYPE_NAME) &&
-          (`${record.device} (${record.arch})` === deviceName ||
-            deviceName === DEFAULT_DEVICE_NAME) &&
-          record.metric === metric
-        );
-      })
-      .filter((record: LLMsBenchmarkData) => {
-        const id = record.workflow_id;
-        return (
-          (id >= lWorkflowId && id <= rWorkflowId) ||
-          (id <= lWorkflowId && id >= rWorkflowId) ||
-          (lWorkflowId === undefined && rWorkflowId === undefined)
-        );
-      })
-      .map((record: LLMsBenchmarkData) => {
-        const model = record.model;
-        const dtype = record.dtype;
-        const device = record.device;
+    // TODO (huydhn): Only display aggregated speedup metric for now
+    if (modelName === DEFAULT_MODEL_NAME && metric !== "speedup") {
+      chartData[metric] = [];
+      return;
+    }
+
+    const geomean = computeGeomean(dataWithSpeedup, metric);
+    chartData[metric] =
+      modelName === DEFAULT_MODEL_NAME
+        ? geomean
+            .filter((record: LLMsBenchmarkData) => {
+              const id = record.workflow_id;
+              return (
+                (id >= lWorkflowId && id <= rWorkflowId) ||
+                (id <= lWorkflowId && id >= rWorkflowId) ||
+                (lWorkflowId === undefined && rWorkflowId === undefined) ||
+                // This is a hack to handle the mock workflow ID coming from running TorchAO benchmark locally
+                // In such caase, the workflow ID is actually the epoch timestamp and the value is completely
+                // different than the regular GitHub workflow ID
+                0.5 > rWorkflowId / lWorkflowId ||
+                rWorkflowId / lWorkflowId > 2
+              );
+            })
+            .map((record: LLMsBenchmarkData) => {
+              record.display = `${record.device} (${record.arch})`;
+              return record;
+            })
+        : dataWithSpeedup
+            .filter((record: LLMsBenchmarkData) => {
+              return (
+                record.model === modelName &&
+                (record.dtype === dtypeName ||
+                  dtypeName === DEFAULT_DTYPE_NAME) &&
+                (`${record.device} (${record.arch})` === deviceName ||
+                  deviceName === DEFAULT_DEVICE_NAME) &&
+                record.metric === metric
+              );
+            })
+            .filter((record: LLMsBenchmarkData) => {
+              const id = record.workflow_id;
+              return (
+                (id >= lWorkflowId && id <= rWorkflowId) ||
+                (id <= lWorkflowId && id >= rWorkflowId) ||
+                (lWorkflowId === undefined && rWorkflowId === undefined)
+              );
+            })
+            .map((record: LLMsBenchmarkData) => {
+              const model = record.model;
+              const dtype = record.dtype;
+              const device = record.device;
 
-        record.display = model.includes(dtype)
-          ? model.includes(device)
-            ? model
-            : `${model} (${device})`
-          : model.includes(device)
-          ? `${model} (${dtype})`
-          : `${model} (${dtype} / ${device})`;
+              record.display = model.includes(dtype)
+                ? model.includes(device)
+                  ? model
+                  : `${model} (${device})`
+                : model.includes(device)
+                ? `${model} (${dtype})`
+                : `${model} (${dtype} / ${device})`;
 
-        return record;
-      });
+              return record;
+            });
 
     graphSeries[metric] = seriesWithInterpolatedTimes(
       chartData[metric],
@@ -141,7 +165,13 @@ export function GraphPanel({
           {metricNames
             .filter((metric) => chartData[metric].length !== 0)
             .map((metric: string) => (
-              <Grid item xs={12} lg={4} height={GRAPH_ROW_HEIGHT} key={metric}>
+              <Grid
+                item
+                xs={12}
+                lg={modelName === DEFAULT_MODEL_NAME ? 12 : 4}
+                height={GRAPH_ROW_HEIGHT}
+                key={metric}
+              >
                 <TimeSeriesPanelWithData
                   data={chartData[metric]}
                   series={graphSeries[metric]}
@@ -169,54 +199,56 @@ export function GraphPanel({
             ))}
         </Grid>
       </div>
-      <div>
-        <table>
-          <thead>
-            <tr>
-              <th>Date</th>
-              <th>Commit</th>
-              {metricNames.map((metric: string) => (
-                <th key={metric}>
-                  {chartData[metric].length !== 0
-                    ? metric in METRIC_DISPLAY_SHORT_HEADERS
-                      ? METRIC_DISPLAY_SHORT_HEADERS[metric]
-                      : metric
-                    : ""}
-                </th>
-              ))}
-            </tr>
-          </thead>
-          <tbody>
-            {chartData[availableMetric].map((entry: any, index: number) => {
-              let commit = WORKFLOW_ID_TO_COMMIT[entry.workflow_id];
-              return (
-                <tr key={index}>
-                  <td>{entry.granularity_bucket}</td>
-                  <td>
-                    <code>
-                      <a
-                        onClick={() => navigator.clipboard.writeText(commit)}
-                        className="animate-on-click"
-                      >
-                        {commit}
-                      </a>
-                    </code>
-                  </td>
-                  {metricNames
-                    .filter((metric) => chartData[metric].length !== 0)
-                    .map((metric: string) => (
-                      <td key={`${metric}-${index}`}>
-                        {chartData[metric][index] !== undefined
-                          ? chartData[metric][index].actual
-                          : ""}
-                      </td>
-                    ))}
-                </tr>
-              );
-            })}
-          </tbody>
-        </table>
-      </div>
+      {modelName !== DEFAULT_MODEL_NAME && (
+        <div>
+          <table>
+            <thead>
+              <tr>
+                <th>Date</th>
+                <th>Commit</th>
+                {metricNames.map((metric: string) => (
+                  <th key={metric}>
+                    {chartData[metric].length !== 0
+                      ? metric in METRIC_DISPLAY_SHORT_HEADERS
+                        ? METRIC_DISPLAY_SHORT_HEADERS[metric]
+                        : metric
+                      : ""}
+                  </th>
+                ))}
+              </tr>
+            </thead>
+            <tbody>
+              {chartData[availableMetric].map((entry: any, index: number) => {
+                let commit = WORKFLOW_ID_TO_COMMIT[entry.workflow_id];
+                return (
+                  <tr key={index}>
+                    <td>{entry.granularity_bucket}</td>
+                    <td>
+                      <code>
+                        <a
+                          onClick={() => navigator.clipboard.writeText(commit)}
+                          className="animate-on-click"
+                        >
+                          {commit}
+                        </a>
+                      </code>
+                    </td>
+                    {metricNames
+                      .filter((metric) => chartData[metric].length !== 0)
+                      .map((metric: string) => (
+                        <td key={`${metric}-${index}`}>
+                          {chartData[metric][index] !== undefined
+                            ? chartData[metric][index].actual
+                            : ""}
+                        </td>
+                      ))}
+                  </tr>
+                );
+              })}
+            </tbody>
+          </table>
+        </div>
+      )}
     </>
   );
 }
diff --git a/torchci/components/benchmark/llms/SummaryPanel.tsx b/torchci/components/benchmark/llms/SummaryPanel.tsx
@@ -83,13 +83,9 @@ export function SummaryPanel({
           model
         )}${backend}${dtype}&deviceName=${encodeURIComponent(deviceArch)}`;
 
-        const isNewModel = params.value.l === undefined ? "(NEW!) " : "";
-        const isModelStopRunning = params.value.r === undefined ? "❌" : "";
-
         return (
           <a href={url}>
-            {isNewModel}
-            {isModelStopRunning}&nbsp;<b>{model}</b>
+            <b>{model}</b>
           </a>
         );
       },

diff --git a/torchci/components/benchmark/llms/common.tsx b/torchci/components/benchmark/llms/common.tsx
@@ -14,6 +14,7 @@ export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = {
   token_per_sec: "Token per second",
   flops_utilization: "FLOPs utilization",
   "compilation_time(s)": "Compilation Time (s)",
+  speedup: "Speedup",
 };
 // The variable name is a bit dumb, but it tells if a higher metric value
 // is good or bad so that we can highlight it on the dashboard accordingly.

diff --git a/torchci/lib/benchmark/llmUtils.ts b/torchci/lib/benchmark/llmUtils.ts
@@ -2,6 +2,7 @@ import {
   BranchAndCommitPerfData,
   LLMsBenchmarkData,
 } from "components/benchmark/llms/common";
+import { geomean } from "lib/benchmark/compilerUtils";
 import { fetcher } from "lib/GeneralUtils";
 import { BranchAndCommit } from "lib/types";
 import useSWR from "swr";
@@ -158,3 +159,45 @@ export function combineLeftAndRight(
 
   return data;
 }
+
+export function computeGeomean(data: LLMsBenchmarkData[], metricName: string) {
+  const metricValues: { [key: string]: number[] } = {};
+  const returnedGeomean: LLMsBenchmarkData[] = [];
+
+  data.forEach((r: LLMsBenchmarkData) => {
+    if (r.metric !== metricName) {
+      return;
+    }
+
+    const k = `${r.granularity_bucket}+${r.workflow_id}+${r.job_id}+${r.backend}+${r.dtype}+${r.device}+${r.arch}+${r.metric}`;
+    if (!(k in metricValues)) {
+      metricValues[k] = [];
+    }
+
+    if (r.actual !== 0) {
+      metricValues[k].push(r.actual);
+    }
+  });
+
+  Object.keys(metricValues).forEach((k: string) => {
+    const gm = geomean(metricValues[k]);
+
+    const [bucket, workflowId, jobId, backend, dtype, device, arch, metric] =
+      k.split("+");
+    returnedGeomean.push({
+      granularity_bucket: bucket,
+      model: "",
+      backend: backend,
+      workflow_id: Number(workflowId),
+      job_id: Number(jobId),
+      metric: `${metric} (geomean)`,
+      actual: Number(gm),
+      target: 0,
+      dtype: dtype,
+      device: device,
+      arch: arch,
+    });
+  });
+
+  return returnedGeomean;
+}
diff --git a/torchci/pages/benchmark/llms.tsx b/torchci/pages/benchmark/llms.tsx
@@ -152,7 +152,7 @@ export default function Page() {
   const defaultStopTime = dayjs();
   const [stopTime, setStopTime] = useState(defaultStopTime);
   const [timeRange, setTimeRange] = useState<number>(LAST_N_DAYS);
-  const [granularity, setGranularity] = useState<Granularity>("hour");
+  const [granularity, setGranularity] = useState<Granularity>("day");
   const [lBranch, setLBranch] = useState<string>(MAIN_BRANCH);
   const [lCommit, setLCommit] = useState<string>("");
   const [rBranch, setRBranch] = useState<string>(MAIN_BRANCH);