Skip to content

Commit

Permalink
Add TorchAO aggregated geomean speedup metric (#6119)
Browse files Browse the repository at this point in the history
This is the follow-up of #6118
to add an aggregated geomean speedup metric for all models grouped by
devices.

I limit this change to TorchAO `speedup` metric for now until I have
time to polish the rest of the metrics (or if there is a need to add
them at all)

### Testing


https://torchci-git-fork-huydhn-add-benchmark-summary-fbopensource.vercel.app/benchmark/llms?repoName=pytorch%2Fao
  • Loading branch information
huydhn authored Dec 28, 2024
1 parent d407580 commit ca303db
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 91 deletions.
202 changes: 117 additions & 85 deletions torchci/components/benchmark/llms/ModelGraphPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import {
} from "components/metrics/panels/TimeSeriesPanel";
import dayjs from "dayjs";
import { computeSpeedup } from "lib/benchmark/aoUtils";
import { useBenchmark } from "lib/benchmark/llmUtils";
import { computeGeomean, useBenchmark } from "lib/benchmark/llmUtils";
import { BranchAndCommit } from "lib/types";

const GRAPH_ROW_HEIGHT = 245;
Expand Down Expand Up @@ -64,10 +64,6 @@ export function GraphPanel({
);
}

if (modelName === DEFAULT_MODEL_NAME) {
return <></>;
}

const dataWithSpeedup = computeSpeedup(repoName, data);

// Clamp to the nearest granularity (e.g. nearest hour) so that the times will
Expand All @@ -84,39 +80,67 @@ export function GraphPanel({
const chartData: { [k: string]: any } = {};
const graphSeries: { [k: string]: any } = {};
metricNames.forEach((metric: string) => {
chartData[metric] = dataWithSpeedup
.filter((record: LLMsBenchmarkData) => {
return (
record.model === modelName &&
(record.dtype === dtypeName || dtypeName === DEFAULT_DTYPE_NAME) &&
(`${record.device} (${record.arch})` === deviceName ||
deviceName === DEFAULT_DEVICE_NAME) &&
record.metric === metric
);
})
.filter((record: LLMsBenchmarkData) => {
const id = record.workflow_id;
return (
(id >= lWorkflowId && id <= rWorkflowId) ||
(id <= lWorkflowId && id >= rWorkflowId) ||
(lWorkflowId === undefined && rWorkflowId === undefined)
);
})
.map((record: LLMsBenchmarkData) => {
const model = record.model;
const dtype = record.dtype;
const device = record.device;
// TODO (huydhn): Only display aggregated speedup metric for now
if (modelName === DEFAULT_MODEL_NAME && metric !== "speedup") {
chartData[metric] = [];
return;
}

const geomean = computeGeomean(dataWithSpeedup, metric);
chartData[metric] =
modelName === DEFAULT_MODEL_NAME
? geomean
.filter((record: LLMsBenchmarkData) => {
const id = record.workflow_id;
return (
(id >= lWorkflowId && id <= rWorkflowId) ||
(id <= lWorkflowId && id >= rWorkflowId) ||
(lWorkflowId === undefined && rWorkflowId === undefined) ||
// This is a hack to handle the mock workflow ID coming from running TorchAO benchmark locally
// In such caase, the workflow ID is actually the epoch timestamp and the value is completely
// different than the regular GitHub workflow ID
0.5 > rWorkflowId / lWorkflowId ||
rWorkflowId / lWorkflowId > 2
);
})
.map((record: LLMsBenchmarkData) => {
record.display = `${record.device} (${record.arch})`;
return record;
})
: dataWithSpeedup
.filter((record: LLMsBenchmarkData) => {
return (
record.model === modelName &&
(record.dtype === dtypeName ||
dtypeName === DEFAULT_DTYPE_NAME) &&
(`${record.device} (${record.arch})` === deviceName ||
deviceName === DEFAULT_DEVICE_NAME) &&
record.metric === metric
);
})
.filter((record: LLMsBenchmarkData) => {
const id = record.workflow_id;
return (
(id >= lWorkflowId && id <= rWorkflowId) ||
(id <= lWorkflowId && id >= rWorkflowId) ||
(lWorkflowId === undefined && rWorkflowId === undefined)
);
})
.map((record: LLMsBenchmarkData) => {
const model = record.model;
const dtype = record.dtype;
const device = record.device;

record.display = model.includes(dtype)
? model.includes(device)
? model
: `${model} (${device})`
: model.includes(device)
? `${model} (${dtype})`
: `${model} (${dtype} / ${device})`;
record.display = model.includes(dtype)
? model.includes(device)
? model
: `${model} (${device})`
: model.includes(device)
? `${model} (${dtype})`
: `${model} (${dtype} / ${device})`;

return record;
});
return record;
});

graphSeries[metric] = seriesWithInterpolatedTimes(
chartData[metric],
Expand All @@ -141,7 +165,13 @@ export function GraphPanel({
{metricNames
.filter((metric) => chartData[metric].length !== 0)
.map((metric: string) => (
<Grid item xs={12} lg={4} height={GRAPH_ROW_HEIGHT} key={metric}>
<Grid
item
xs={12}
lg={modelName === DEFAULT_MODEL_NAME ? 12 : 4}
height={GRAPH_ROW_HEIGHT}
key={metric}
>
<TimeSeriesPanelWithData
data={chartData[metric]}
series={graphSeries[metric]}
Expand Down Expand Up @@ -169,54 +199,56 @@ export function GraphPanel({
))}
</Grid>
</div>
<div>
<table>
<thead>
<tr>
<th>Date</th>
<th>Commit</th>
{metricNames.map((metric: string) => (
<th key={metric}>
{chartData[metric].length !== 0
? metric in METRIC_DISPLAY_SHORT_HEADERS
? METRIC_DISPLAY_SHORT_HEADERS[metric]
: metric
: ""}
</th>
))}
</tr>
</thead>
<tbody>
{chartData[availableMetric].map((entry: any, index: number) => {
let commit = WORKFLOW_ID_TO_COMMIT[entry.workflow_id];
return (
<tr key={index}>
<td>{entry.granularity_bucket}</td>
<td>
<code>
<a
onClick={() => navigator.clipboard.writeText(commit)}
className="animate-on-click"
>
{commit}
</a>
</code>
</td>
{metricNames
.filter((metric) => chartData[metric].length !== 0)
.map((metric: string) => (
<td key={`${metric}-${index}`}>
{chartData[metric][index] !== undefined
? chartData[metric][index].actual
: ""}
</td>
))}
</tr>
);
})}
</tbody>
</table>
</div>
{modelName !== DEFAULT_MODEL_NAME && (
<div>
<table>
<thead>
<tr>
<th>Date</th>
<th>Commit</th>
{metricNames.map((metric: string) => (
<th key={metric}>
{chartData[metric].length !== 0
? metric in METRIC_DISPLAY_SHORT_HEADERS
? METRIC_DISPLAY_SHORT_HEADERS[metric]
: metric
: ""}
</th>
))}
</tr>
</thead>
<tbody>
{chartData[availableMetric].map((entry: any, index: number) => {
let commit = WORKFLOW_ID_TO_COMMIT[entry.workflow_id];
return (
<tr key={index}>
<td>{entry.granularity_bucket}</td>
<td>
<code>
<a
onClick={() => navigator.clipboard.writeText(commit)}
className="animate-on-click"
>
{commit}
</a>
</code>
</td>
{metricNames
.filter((metric) => chartData[metric].length !== 0)
.map((metric: string) => (
<td key={`${metric}-${index}`}>
{chartData[metric][index] !== undefined
? chartData[metric][index].actual
: ""}
</td>
))}
</tr>
);
})}
</tbody>
</table>
</div>
)}
</>
);
}
6 changes: 1 addition & 5 deletions torchci/components/benchmark/llms/SummaryPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -83,13 +83,9 @@ export function SummaryPanel({
model
)}${backend}${dtype}&deviceName=${encodeURIComponent(deviceArch)}`;

const isNewModel = params.value.l === undefined ? "(NEW!) " : "";
const isModelStopRunning = params.value.r === undefined ? "❌" : "";

return (
<a href={url}>
{isNewModel}
{isModelStopRunning}&nbsp;<b>{model}</b>
<b>{model}</b>
</a>
);
},
Expand Down
1 change: 1 addition & 0 deletions torchci/components/benchmark/llms/common.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export const METRIC_DISPLAY_HEADERS: { [k: string]: string } = {
token_per_sec: "Token per second",
flops_utilization: "FLOPs utilization",
"compilation_time(s)": "Compilation Time (s)",
speedup: "Speedup",
};
// The variable name is a bit dumb, but it tells if a higher metric value
// is good or bad so that we can highlight it on the dashboard accordingly.
Expand Down
43 changes: 43 additions & 0 deletions torchci/lib/benchmark/llmUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {
BranchAndCommitPerfData,
LLMsBenchmarkData,
} from "components/benchmark/llms/common";
import { geomean } from "lib/benchmark/compilerUtils";
import { fetcher } from "lib/GeneralUtils";
import { BranchAndCommit } from "lib/types";
import useSWR from "swr";
Expand Down Expand Up @@ -158,3 +159,45 @@ export function combineLeftAndRight(

return data;
}

export function computeGeomean(data: LLMsBenchmarkData[], metricName: string) {
const metricValues: { [key: string]: number[] } = {};
const returnedGeomean: LLMsBenchmarkData[] = [];

data.forEach((r: LLMsBenchmarkData) => {
if (r.metric !== metricName) {
return;
}

const k = `${r.granularity_bucket}+${r.workflow_id}+${r.job_id}+${r.backend}+${r.dtype}+${r.device}+${r.arch}+${r.metric}`;
if (!(k in metricValues)) {
metricValues[k] = [];
}

if (r.actual !== 0) {
metricValues[k].push(r.actual);
}
});

Object.keys(metricValues).forEach((k: string) => {
const gm = geomean(metricValues[k]);

const [bucket, workflowId, jobId, backend, dtype, device, arch, metric] =
k.split("+");
returnedGeomean.push({
granularity_bucket: bucket,
model: "",
backend: backend,
workflow_id: Number(workflowId),
job_id: Number(jobId),
metric: `${metric} (geomean)`,
actual: Number(gm),
target: 0,
dtype: dtype,
device: device,
arch: arch,
});
});

return returnedGeomean;
}
2 changes: 1 addition & 1 deletion torchci/pages/benchmark/llms.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ export default function Page() {
const defaultStopTime = dayjs();
const [stopTime, setStopTime] = useState(defaultStopTime);
const [timeRange, setTimeRange] = useState<number>(LAST_N_DAYS);
const [granularity, setGranularity] = useState<Granularity>("hour");
const [granularity, setGranularity] = useState<Granularity>("day");
const [lBranch, setLBranch] = useState<string>(MAIN_BRANCH);
const [lCommit, setLCommit] = useState<string>("");
const [rBranch, setRBranch] = useState<string>(MAIN_BRANCH);
Expand Down

0 comments on commit ca303db

Please sign in to comment.