Skip to content

Commit

Permalink
Add speedup metric for TorchAO (#6118)
Browse files Browse the repository at this point in the history
This is my initial attempt to add the speedup metric for TorchAO. This
is done by comparing the gain of `autoquant` v.s. `noquant`. This is by
no means the best approach because it requires custom logic for TorchAO
on the dashboard. On the other hand, it's easy to implement and I think
it's better to have the UX done first to gather early feedbacks from
@jerryzh168 and the rest of ao team first.

IMO, better approaches would be to either 1) set the speedup metric on
TorchAO side or 2) compute the speed up metric on ClickHouse. Both are
more involved and requires further design discussion.

### Testing


https://torchci-git-fork-huydhn-add-speedup-llm-dashboard-fbopensource.vercel.app/benchmark/llms?repoName=pytorch%2Fao
  • Loading branch information
huydhn authored Dec 28, 2024
1 parent 6f108ab commit d407580
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 38 deletions.
7 changes: 6 additions & 1 deletion torchci/components/benchmark/llms/ModelGraphPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
TimeSeriesPanelWithData,
} from "components/metrics/panels/TimeSeriesPanel";
import dayjs from "dayjs";
import { computeSpeedup } from "lib/benchmark/aoUtils";
import { useBenchmark } from "lib/benchmark/llmUtils";
import { BranchAndCommit } from "lib/types";

Expand All @@ -26,6 +27,7 @@ const GRAPH_ROW_HEIGHT = 245;
export function GraphPanel({
queryParams,
granularity,
repoName,
modelName,
backendName,
dtypeName,
Expand All @@ -36,6 +38,7 @@ export function GraphPanel({
}: {
queryParams: { [key: string]: any };
granularity: Granularity;
repoName: string;
modelName: string;
backendName: string;
dtypeName: string;
Expand Down Expand Up @@ -65,6 +68,8 @@ export function GraphPanel({
return <></>;
}

const dataWithSpeedup = computeSpeedup(repoName, data);

// Clamp to the nearest granularity (e.g. nearest hour) so that the times will
// align with the data we get from the database
const startTime = dayjs(queryParams["startTime"]).startOf(granularity);
Expand All @@ -79,7 +84,7 @@ export function GraphPanel({
const chartData: { [k: string]: any } = {};
const graphSeries: { [k: string]: any } = {};
metricNames.forEach((metric: string) => {
chartData[metric] = data
chartData[metric] = dataWithSpeedup
.filter((record: LLMsBenchmarkData) => {
return (
record.model === modelName &&
Expand Down
63 changes: 38 additions & 25 deletions torchci/components/benchmark/llms/SummaryPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -63,43 +63,51 @@ export function SummaryPanel({
},
renderCell: (params: GridRenderCellParams<any>) => {
const model = params.value.model;
const dtype = params.value.dtype;
const deviceArch = `${params.value.device} (${params.value.arch})`;
if (model === undefined) {
return `Invalid model name`;
}
if (dtype === undefined) {
return `Invalid dtype for model ${model}`;
}

const dtype =
params.value.dtype !== undefined
? `&dtypeName=${encodeURIComponent(params.value.dtype)}`
: "";
const backend =
params.value.backend !== undefined
? `&${encodeURIComponent(params.value.backend)}`
? `&backendName=${encodeURIComponent(params.value.backend)}`
: "";
const deviceArch = `${params.value.device} (${params.value.arch})`;

const url = `/benchmark/llms?startTime=${startTime}&stopTime=${stopTime}&granularity=${granularity}&repoName=${encodeURIComponent(
repoName
)}&modelName=${encodeURIComponent(
model
)}${backend}&dtypeName=${encodeURIComponent(
dtype
)}&deviceName=${encodeURIComponent(deviceArch)}`;
)}${backend}${dtype}&deviceName=${encodeURIComponent(deviceArch)}`;

const isNewModel = params.value.l === undefined ? "(NEW!) " : "";
const isModelStopRunning = params.value.r === undefined ? "❌" : "";

const displayName = model.includes(dtype)
? model
: `${model} (${dtype})`;
return (
<a href={url}>
{isNewModel}
{isModelStopRunning}&nbsp;<b>{displayName}</b>
{isModelStopRunning}&nbsp;<b>{model}</b>
</a>
);
},
},
];

const hasDtype = data.length > 0 && "dtype" in data[0] ? true : false;
if (hasDtype) {
columns.push({
field: "dtype",
headerName: "Quantization",
flex: 1,
renderCell: (params: GridRenderCellParams<any>) => {
return `${params.value}`;
},
});
}

const hasBackend = data.length > 0 && "backend" in data[0] ? true : false;
if (hasBackend) {
columns.push({
Expand Down Expand Up @@ -155,18 +163,23 @@ export function SummaryPanel({
return styles.error;
}

// Higher value
if (r - l > RELATIVE_THRESHOLD * l) {
return IS_INCREASING_METRIC_VALUE_GOOD[metric]
? styles.ok
: styles.error;
}

// Lower value
if (l - r > RELATIVE_THRESHOLD * r) {
return IS_INCREASING_METRIC_VALUE_GOOD[metric]
? styles.error
: styles.ok;
if (metric in IS_INCREASING_METRIC_VALUE_GOOD) {
// Higher value
if (r - l > RELATIVE_THRESHOLD * l) {
return IS_INCREASING_METRIC_VALUE_GOOD[metric]
? styles.ok
: styles.error;
}

// Lower value
if (l - r > RELATIVE_THRESHOLD * r) {
return IS_INCREASING_METRIC_VALUE_GOOD[metric]
? styles.error
: styles.ok;
}
} else {
// No data
return "";
}
}

Expand Down
7 changes: 4 additions & 3 deletions torchci/components/benchmark/llms/common.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { BranchAndCommit } from "lib/types";

export const REPOS = ["pytorch/pytorch", "pytorch/executorch"];
export const REPOS = ["pytorch/pytorch", "pytorch/executorch", "pytorch/ao"];
export const REPO_TO_BENCHMARKS: { [k: string]: string[] } = {
"pytorch/pytorch": ["PyTorch gpt-fast benchmark"],
"pytorch/executorch": ["ExecuTorch"],
Expand All @@ -23,6 +23,7 @@ export const IS_INCREASING_METRIC_VALUE_GOOD: { [k: string]: boolean } = {
token_per_sec: true,
flops_utilization: true,
"compilation_time(s)": false,
speedup: true,
};
export const METRIC_DISPLAY_SHORT_HEADERS: { [k: string]: string } = {
"memory_bandwidth(GB/s)": "Bandwidth",
Expand All @@ -40,9 +41,9 @@ export const RELATIVE_THRESHOLD = 0.05;
export interface LLMsBenchmarkData {
granularity_bucket: string;
model: string;
backend?: string;
backend: string;
workflow_id: number;
job_id?: number;
job_id: number;
metric: string;
actual: number;
target: number;
Expand Down
57 changes: 57 additions & 0 deletions torchci/lib/benchmark/aoUtils.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
import { LLMsBenchmarkData } from "components/benchmark/llms/common";
import { BenchmarkData, CompilerPerformanceData } from "lib/types";

export const TORCHAO_REPO = "pytorch/ao";
// TODO (huydhn): Find a better way to abstract this baseline concept, for example,
// this could be dtype noquant for TorchAO, or eager config for inductor
export const TORCHAO_BASELINE = "noquant";
// TODO (huydhn): The following are TorchAO speedup metrics. Check with ao team to
// see if this information could be codified on the benchmark instead of keeping it
// here on the dashboard
const SPEEDUP_METRICS = ["tok/s", "time_ms(avg)", "time_s(avg)", "img_s(avg)"];

// TODO (huydhn): Use this function to convert the generic benchmark data to the old
// CompilerPerformanceData format. This is needed until the TorchInductor dashboard
// is migrated to the new format
Expand Down Expand Up @@ -43,3 +53,50 @@ export function convertToCompilerPerformanceData(data: BenchmarkData[]) {

return Object.values(convertData);
}

export function computeSpeedup(repoName: string, data: LLMsBenchmarkData[]) {
if (repoName !== TORCHAO_REPO) {
return data;
}

const baselineMetrics: { [key: string]: LLMsBenchmarkData } = {};
data.forEach((r: LLMsBenchmarkData) => {
if (r.dtype !== TORCHAO_BASELINE) {
return;
}

const k = `${r.workflow_id} ${r.job_id} ${r.model} ${r.metric} ${r.device} ${r.arch}`;
baselineMetrics[k] = r;
});

const withSpeedup: LLMsBenchmarkData[] = [];
data.forEach((r: LLMsBenchmarkData) => {
if (r.dtype === TORCHAO_BASELINE) {
return;
}

if (SPEEDUP_METRICS.includes(r.metric)) {
const k = `${r.workflow_id} ${r.job_id} ${r.model} ${r.metric} ${r.device} ${r.arch}`;
if (
k in baselineMetrics &&
baselineMetrics[k].actual !== 0 &&
r.actual !== 0
) {
const speedup = r.metric.includes("time")
? baselineMetrics[k].actual / r.actual
: r.actual / baselineMetrics[k].actual;

withSpeedup.push({
...r,
metric: "speedup",
actual: Number(speedup.toFixed(4)),
target: 0,
});
}
}

withSpeedup.push(r);
});

return withSpeedup;
}
4 changes: 4 additions & 0 deletions torchci/lib/benchmark/llmUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ export function combineLeftAndRight(
row["metadata"]["r"] ?? (hasR ? record["r"]["job_id"] : undefined);
}

if (dtype !== "") {
row["dtype"] = dtype;
}

if (backend !== "") {
row["backend"] = backend;
}
Expand Down
34 changes: 25 additions & 9 deletions torchci/pages/benchmark/llms.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import CopyLink from "components/CopyLink";
import GranularityPicker from "components/GranularityPicker";
import { Granularity } from "components/metrics/panels/TimeSeriesPanel";
import dayjs from "dayjs";
import { computeSpeedup, TORCHAO_BASELINE } from "lib/benchmark/aoUtils";
import { useBenchmark } from "lib/benchmark/llmUtils";
import { fetcher } from "lib/GeneralUtils";
import { BranchAndCommit } from "lib/types";
Expand Down Expand Up @@ -81,22 +82,29 @@ function Report({
);
}

const lDataWithSpeedup = computeSpeedup(repoName, lData);
const rDataWithSpeedup = computeSpeedup(repoName, rData);

if (repoName === "pytorch/ao") {
metricNames = ["speedup", ...metricNames];
}

return (
<div>
<CommitPanel
repoName={repoName}
lBranchAndCommit={{
...rBranchAndCommit,
date:
rData !== undefined && rData.length !== 0
? rData[0].granularity_bucket
rDataWithSpeedup !== undefined && rDataWithSpeedup.length !== 0
? rDataWithSpeedup[0].granularity_bucket
: undefined,
}}
rBranchAndCommit={{
...lBranchAndCommit,
date:
lData !== undefined && lData.length !== 0
? lData[0].granularity_bucket
lDataWithSpeedup !== undefined && lDataWithSpeedup.length !== 0
? lDataWithSpeedup[0].granularity_bucket
: undefined,
}}
workflowName={"inductor-micro-benchmark"}
Expand All @@ -106,6 +114,7 @@ function Report({
<GraphPanel
queryParams={queryParams}
granularity={granularity}
repoName={repoName}
modelName={modelName}
backendName={backendName}
dtypeName={dtypeName}
Expand All @@ -124,11 +133,11 @@ function Report({
metricNames={metricNames}
lPerfData={{
...lBranchAndCommit,
data: lData,
data: lDataWithSpeedup,
}}
rPerfData={{
...rBranchAndCommit,
data: rData,
data: rDataWithSpeedup,
}}
/>
</div>
Expand Down Expand Up @@ -237,7 +246,12 @@ export default function Page() {
const queryName = "oss_ci_benchmark_names";
const queryParams = {
deviceArch: deviceName === DEFAULT_DEVICE_NAME ? "" : deviceName,
dtypes: dtypeName === DEFAULT_DTYPE_NAME ? [] : [dtypeName],
dtypes:
dtypeName === DEFAULT_DTYPE_NAME
? []
: repoName !== "pytorch/ao"
? [dtypeName]
: [dtypeName, TORCHAO_BASELINE],
excludedMetrics: EXCLUDED_METRICS,
benchmarks: REPO_TO_BENCHMARKS[repoName],
granularity: granularity,
Expand Down Expand Up @@ -274,7 +288,10 @@ export default function Page() {
];
const dtypeNames: string[] = _.compact([
DEFAULT_DTYPE_NAME,
...(_.uniq(data.map((r: any) => r.dtype)) as string[]),
..._.filter(
_.uniq(data.map((r: any) => r.dtype)) as string[],
(r: string) => r !== TORCHAO_BASELINE
),
]);
const metricNames: string[] = _.uniq(data.map((r: any) => r.metric));

Expand Down Expand Up @@ -372,7 +389,6 @@ export default function Page() {
useClickHouse={true}
/>
</Stack>

<Report
queryParams={queryParams}
startTime={startTime}
Expand Down

0 comments on commit d407580

Please sign in to comment.