Skip to content

Commit

Permalink
tc
Browse files Browse the repository at this point in the history
  • Loading branch information
clee2000 committed Jan 13, 2025
1 parent ec8c108 commit 7ee68a7
Show file tree
Hide file tree
Showing 74 changed files with 937 additions and 641 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/benchmarks/gather_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import os
import json
import os
import time
from typing import Any

Expand Down
1 change: 1 addition & 0 deletions .github/scripts/get_tutorials_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import boto3 # type: ignore[import]


METADATA_PATH = "ossci_tutorials_stats/metadata.csv"
FILENAMES_PATH = "ossci_tutorials_stats/filenames.csv"

Expand Down
1 change: 1 addition & 0 deletions .github/scripts/update_commit_hashes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import requests


UPDATEBOT_TOKEN = os.environ["UPDATEBOT_TOKEN"]
PYTORCHBOT_TOKEN = os.environ["PYTORCHBOT_TOKEN"]

Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/upload_benchmark_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
from argparse import Action, ArgumentParser, Namespace
from decimal import Decimal
from json.decoder import JSONDecodeError

from logging import info
from typing import Any, Callable, Dict, List, Optional
from warnings import warn

import boto3


logging.basicConfig(level=logging.INFO)


Expand Down
4 changes: 1 addition & 3 deletions .github/scripts/validate_scale_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,14 @@
import copy
import json
import os

import urllib.request
from pathlib import Path

from typing import Any, cast, Dict, List, NamedTuple

import jsonschema

import yaml


MAX_AVAILABLE_MINIMUM = 50

# Paths relative to their respective repositories
Expand Down
69 changes: 44 additions & 25 deletions tools/analytics/cubinsizes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,30 @@
try:
from elftools.elf.elffile import ELFFile
except ModuleNotFoundError:
print(f'elftools module not found, trying to install it from pip')
print(f"elftools module not found, trying to install it from pip")
from pip._internal import main as pip_main

try:
pip_main(["install", "pyelftools", "--user"])
except SystemExit:
print(f'PIP installation failed, please install it manually by invoking "{sys.executable} -mpip install pyelftools --user"')
print(
f'PIP installation failed, please install it manually by invoking "{sys.executable} -mpip install pyelftools --user"'
)
sys.exit(-1)
from elftools.elf.elffile import ELFFile


# From https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
def sizeof_fmt(num, suffix="B"):
for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
return "%.1f%s%s" % (num, "Yi", suffix)


def compute_cubin_sizes(file_name, section_name='.nv_fatbin', debug=False):
with open(file_name, 'rb') as f:
def compute_cubin_sizes(file_name, section_name=".nv_fatbin", debug=False):
with open(file_name, "rb") as f:
elf_file = ELFFile(f)
nv_fatbin = elf_file.get_section_by_name(section_name)
if nv_fatbin is None:
Expand All @@ -41,20 +44,32 @@ def compute_cubin_sizes(file_name, section_name='.nv_fatbin', debug=False):
idx, offs = 0, 0
elf_sizes = {}
while offs < len(data):
(magic, version, header_size, fatbin_size) = struct.unpack('IHHL', data[offs: offs + 16])
if magic != 0xba55ed50 or version != 1:
raise RuntimeError(f"Unexpected fatbin magic {hex(magic)} or version {version}")
(magic, version, header_size, fatbin_size) = struct.unpack(
"IHHL", data[offs : offs + 16]
)
if magic != 0xBA55ED50 or version != 1:
raise RuntimeError(
f"Unexpected fatbin magic {hex(magic)} or version {version}"
)
if debug:
print(f"Found fatbin at {offs} header_size={header_size} fatbin_size={fatbin_size}")
print(
f"Found fatbin at {offs} header_size={header_size} fatbin_size={fatbin_size}"
)
offs += header_size
fatbin_end = offs + fatbin_size
while offs < fatbin_end:
(kind, version, hdr_size, elf_size, empty, code_ver, sm_ver) = struct.unpack('HHILLIH', data[offs: offs + 30])
(kind, version, hdr_size, elf_size, empty, code_ver, sm_ver) = (
struct.unpack("HHILLIH", data[offs : offs + 30])
)
if version != 0x0101 or kind not in [1, 2]:
raise RuntimeError(f"Unexpected cubin version {hex(version)} or kind {kind}")
raise RuntimeError(
f"Unexpected cubin version {hex(version)} or kind {kind}"
)
sm_ver = f'{"ptx" if kind == 1 else "sm"}_{sm_ver}'
if debug:
print(f" {idx}: elf_size={elf_size} code_ver={hex(code_ver)} sm={sm_ver}")
print(
f" {idx}: elf_size={elf_size} code_ver={hex(code_ver)} sm={sm_ver}"
)
if sm_ver not in elf_sizes:
elf_sizes[sm_ver] = 0
elf_sizes[sm_ver] += elf_size
Expand All @@ -71,7 +86,7 @@ def __init__(self, ar_name: str) -> None:
def __enter__(self) -> str:
self._pwd = os.getcwd()
rc = self._tmpdir.__enter__()
subprocess.check_call(['ar', 'x', self.ar_name])
subprocess.check_call(["ar", "x", self.ar_name])
return rc

def __exit__(self, ex, value, tb) -> None:
Expand All @@ -86,40 +101,44 @@ def dict_add(rc: Dict[str, int], b: Dict[str, int]) -> Dict[str, int]:


def main():
if sys.platform != 'linux':
print('This script only works with Linux ELF files')
if sys.platform != "linux":
print("This script only works with Linux ELF files")
return
if len(sys.argv) < 2:
print(f"{sys.argv[0]} invoked without any arguments trying to infer location of libtorch_cuda")
print(
f"{sys.argv[0]} invoked without any arguments trying to infer location of libtorch_cuda"
)
import torch
fname = os.path.join(os.path.dirname(torch.__file__), 'lib', 'libtorch_cuda.so')

fname = os.path.join(os.path.dirname(torch.__file__), "lib", "libtorch_cuda.so")
else:
fname = sys.argv[1]

if not os.path.exists(fname):
print(f"Can't find {fname}")
sys.exit(-1)

section_names = ['.nv_fatbin', '__nv_relfatbin']
section_names = [".nv_fatbin", "__nv_relfatbin"]
results = {name: {} for name in section_names}
print(f"Analyzing {fname}")
if os.path.splitext(fname)[1] == '.a':
if os.path.splitext(fname)[1] == ".a":
with ArFileCtx(fname):
for fname in os.listdir("."):
if not fname.endswith(".o"): continue
if not fname.endswith(".o"):
continue
for section_name in section_names:
elf_sizes = compute_cubin_sizes(fname, section_name)
dict_add(results[section_name], elf_sizes)
else:
for section_name in ['.nv_fatbin', '__nv_relfatbin']:
for section_name in [".nv_fatbin", "__nv_relfatbin"]:
dict_add(results[section_name], compute_cubin_sizes(fname, section_name))

for section_name in section_names:
elf_sizes = results[section_name]
print(f"{section_name} size {sizeof_fmt(sum(elf_sizes.values()))}")
for (sm_ver, total_size) in elf_sizes.items():
for sm_ver, total_size in elf_sizes.items():
print(f" {sm_ver}: {sizeof_fmt(total_size)}")


if __name__ == '__main__':
if __name__ == "__main__":
main()
75 changes: 32 additions & 43 deletions tools/analytics/download_count_wheels.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from collections import defaultdict
from datetime import datetime, timedelta, timezone
import gzip
import os
import re
import urllib
from collections import defaultdict
from datetime import datetime, timedelta, timezone

from tqdm import tqdm
import boto3
from tqdm import tqdm


S3 = boto3.resource("s3")
CLIENT = boto3.client("s3")
BUCKET = S3.Bucket("pytorch")

S3 = boto3.resource('s3')
CLIENT = boto3.client('s3')
BUCKET = S3.Bucket('pytorch')

class CacheEntry:
_size = None
Expand Down Expand Up @@ -38,66 +40,56 @@ def target_arch(self) -> str:

@property
def package_name(self) -> str:
filename_contents = os.path.basename(self.download_uri).split('-')
filename_contents = os.path.basename(self.download_uri).split("-")
return filename_contents[0]

@property
def package_version(self) -> str:
if "dev" in self.download_uri:
results = re.search(
r"[0-9]+\.[0-9]+\.[0-9]+\.dev[0-9]+",
self.download_uri
)
results = re.search(r"[0-9]+\.[0-9]+\.[0-9]+\.dev[0-9]+", self.download_uri)
else:
results = re.search(
r"[0-9]+\.[0-9]+\.[0-9]+", self.download_uri
)
results = re.search(r"[0-9]+\.[0-9]+\.[0-9]+", self.download_uri)
if not results:
raise Exception("Wtf there's no version o.O")
return results[0]

@property
def size(self) -> int:
if self._size is None:
for key in BUCKET.objects.filter(
Prefix=self.download_uri.lstrip("/")
):
for key in BUCKET.objects.filter(Prefix=self.download_uri.lstrip("/")):
self._size = key.size
if self._size is None:
raise Exception(
f"No object found for prefix {self.download_uri}"
)
raise Exception(f"No object found for prefix {self.download_uri}")
return self._size

@property
def downloads(self):
return self.bytes_sent // self.size


def parse_logs(log_directory: str) -> dict:
bytes_cache = {}
for (dirpath, _, filenames) in os.walk(log_directory):
for dirpath, _, filenames in os.walk(log_directory):
for filename in tqdm(filenames):
with gzip.open(os.path.join(dirpath, filename), 'r') as gf:
with gzip.open(os.path.join(dirpath, filename), "r") as gf:
string = gf.read().decode("utf-8")
entries = []
entries += string.splitlines()[2:]
for entry in entries:
columns = entry.split('\t')
columns = entry.split("\t")
bytes_sent = int(columns[3])
download_uri = urllib.parse.unquote(
urllib.parse.unquote(columns[7])
)
download_uri = urllib.parse.unquote(urllib.parse.unquote(columns[7]))
status = columns[8]
if not all([
status.startswith("2"),
download_uri.endswith((".whl", ".zip"))
]):
if not all(
[status.startswith("2"), download_uri.endswith((".whl", ".zip"))]
):
continue
if not bytes_cache.get(download_uri):
bytes_cache[download_uri] = CacheEntry(download_uri)
bytes_cache[download_uri].bytes_sent += bytes_sent
return bytes_cache


def output_results(bytes_cache: dict) -> None:
os_results = defaultdict(int)
arch_results = defaultdict(int)
Expand All @@ -106,25 +98,19 @@ def output_results(bytes_cache: dict) -> None:
try:
os_results[val.os_type] += val.downloads
arch_results[val.target_arch] += val.downloads
package_results[val.package_name][val.package_version] += (
val.downloads
)
package_results[val.package_name][val.package_version] += val.downloads
except Exception:
pass
print("=-=-= Results =-=-=")
print("=-=-= OS =-=-=")
total_os_num = sum(os_results.values())
for os_type, num in os_results.items():
print(
f"\t* {os_type}: {num} ({(num/total_os_num)*100:.2f}%)"
)
print(f"\t* {os_type}: {num} ({(num/total_os_num)*100:.2f}%)")

print("=-=-= ARCH =-=-=")
total_arch_num = sum(arch_results.values())
for arch_type, num in arch_results.items():
print(
f"\t* {arch_type}: {num} ({(num/total_arch_num) * 100:.2f}%)"
)
print(f"\t* {arch_type}: {num} ({(num/total_arch_num) * 100:.2f}%)")

print("=-=-= By Package =-=-=")
for package_name, upper_val in package_results.items():
Expand All @@ -135,11 +121,14 @@ def output_results(bytes_cache: dict) -> None:
f"\t* {package_version}: {num} ({(num/total_package_num) * 100:.2f}%)"
)


def download_logs(log_directory: str, since: float):
dt_now = datetime.now(timezone.utc)
dt_end = datetime(dt_now.year, dt_now.month, dt_now.day, tzinfo=timezone.utc)
dt_start = dt_end - timedelta(days=1, hours=1) # Add 1 hour padding to account for potentially missed logs due to timing
for key in tqdm(BUCKET.objects.filter(Prefix='cflogs')):
dt_start = dt_end - timedelta(
days=1, hours=1
) # Add 1 hour padding to account for potentially missed logs due to timing
for key in tqdm(BUCKET.objects.filter(Prefix="cflogs")):
remote_fname = key.key
local_fname = os.path.join(log_directory, remote_fname)
# Only download things from yesterday
Expand All @@ -156,8 +145,8 @@ def download_logs(log_directory: str, since: float):

if __name__ == "__main__":
print("Downloading logs")
download_logs('cache', 1)
download_logs("cache", 1)
print("Parsing logs")
cache = parse_logs('cache/cflogs/')
cache = parse_logs("cache/cflogs/")
print("Calculating results")
output_results(cache)
Loading

0 comments on commit 7ee68a7

Please sign in to comment.