Skip to content

Commit

Permalink
Merge branch 'main' into features/1457-Add_randomized_SVD
Browse files Browse the repository at this point in the history
  • Loading branch information
mrfh92 authored Aug 20, 2024
2 parents 5b8ecdb + 15c4478 commit a3611bc
Show file tree
Hide file tree
Showing 19 changed files with 239 additions and 128 deletions.
12 changes: 5 additions & 7 deletions .github/ISSUE_TEMPLATE/bug_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ body:
description: What version of Heat are you running?
options:
- main (development branch)
- 1.4.x
- 1.3.x
- 1.2.x
validations:
required: true
- type: dropdown
Expand All @@ -44,23 +44,21 @@ body:
label: Python version
description: What Python version?
options:
- 3.12
- 3.11
- "3.10"
- 3.9
- 3.8
- type: dropdown
id: pytorch-version
attributes:
label: PyTorch version
description: What PyTorch version?
options:
- 2.4
- 2.3
- 2.2
- 2.1
- 2.0
- 1.13
- 1.12
- 1.11
- "1.10"
- '2.0'
- type: textarea
id: mpi-version
attributes:
Expand Down
2 changes: 2 additions & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
- Implementation:
- [ ] unit tests: all split configurations tested
- [ ] unit tests: multiple dtypes tested
- [ ] benchmarks: created for new functionality
- [ ] benchmarks: performance improved or maintained
- [ ] documentation updated where needed

## Description
Expand Down
74 changes: 0 additions & 74 deletions .github/workflows/bench_report.yml

This file was deleted.

17 changes: 2 additions & 15 deletions .github/workflows/bench_trigger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
SHA: ${{ github.event.pull_request.head.sha }}
PR_NUMBER: ${{ github.event.pull_request.number }}
run: |
SHORT_SHA=$(git rev-parse --short ${{ github.event.pull_request.head.sha }})
SHORT_SHA=$(git rev-parse --short $SHA)
curl -s -X POST \
--fail-with-body \
-F "token=$PIPE_TRIGGER_TOKEN" \
Expand All @@ -45,7 +45,7 @@ jobs:
env:
AUTHOR: ${{ github.event.pull_request.assignee.login }}
PIPE_TRIGGER_TOKEN: ${{ secrets.BENCH_PIPE_TRIGGER }}
SHA: ${{ github.event.pull_request.head.sha }}
SHA: ${{ github.sha }}
run: |
SHORT_SHA=$(git rev-parse --short $GITHUB_SHA)
curl -s -X POST \
Expand All @@ -57,16 +57,3 @@ jobs:
-F "variables[BRANCH]=main" \
-F "variables[AUTHOR]=${AUTHOR:-heat_team}" \
https://codebase.helmholtz.cloud/api/v4/projects/7930/trigger/pipeline
- name: Create status
if: ${{ steps.setup_pr.outcome == 'success' || steps.setup_push.outcome == 'success'}}
env:
REPO: ${{ github.repository }}
SHA: ${{ github.event.pull_request.head.sha }}
run: |
curl -L -X POST \
--fail-with-body \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
https://api.github.com/repos/$REPO/statuses/$SHA \
-d '{ "state":"pending", "target_url":"https://codebase.helmholtz.cloud/helmholtz-analytics/cb/-/pipelines", "description":"Waiting for benchmarks to execute.", "context":"cb/report" }'
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
- 'torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2'
- 'torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2'
- 'torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1'
- 'torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0'
exclude:
- py-version: '3.12'
pytorch-version: 'torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2'
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
uses: github/codeql-action/init@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
Expand All @@ -60,7 +60,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
uses: github/codeql-action/autobuild@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2

# ℹ️ Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
Expand All @@ -73,6 +73,6 @@ jobs:
# ./location_of_script_within_repo/buildscript.sh

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
uses: github/codeql-action/analyze@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
with:
category: "/language:${{matrix.language}}"
2 changes: 1 addition & 1 deletion .github/workflows/create-branch-on-assignment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ jobs:
egress-policy: audit

- name: Create Issue Branch
uses: robvanderleek/create-issue-branch@066a452d2aa439a992baec3360a322a49eb62e0b # main
uses: robvanderleek/create-issue-branch@941dca58430f58b198228e633954eef1699722fe # main
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
6 changes: 3 additions & 3 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@4fd812986e6c8c2a69e18311145f9371337f27d4 # v3.4.0
uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1
with:
driver: docker
-
Expand All @@ -49,7 +49,7 @@ jobs:
password: ${{ secrets.GITHUB_TOKEN }}
-
name: Build
uses: docker/build-push-action@16ebe778df0e7752d2cfcbd924afdbbd89c1a755 # v6.6.1
uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0
with:
file: docker/Dockerfile.release
build-args: |
Expand All @@ -65,7 +65,7 @@ jobs:
docker run -v `pwd`:`pwd` -w `pwd` --rm test_${{ inputs.name }} pytest
-
name: Build and push
uses: docker/build-push-action@16ebe778df0e7752d2cfcbd924afdbbd89c1a755 # v6.6.1
uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0
with:
file: docker/Dockerfile.release
build-args: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scorecard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,6 @@ jobs:

# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
uses: github/codeql-action/upload-sarif@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
with:
sarif_file: results.sarif
9 changes: 9 additions & 0 deletions .perun.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,12 @@ data_out = ./bench_data
[benchmarking]
rounds = 10
warmup_rounds = 1
metrics=runtime
region_metrics=runtime

[benchmarking.units]
joule = k
second =
percent =
watt =
byte = G
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ repos:
- id: check-added-large-files
- id: check-toml
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.4.2
rev: 24.8.0
hooks:
- id: black
- repo: https://github.com/PyCQA/flake8
Expand Down
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ Heat is a distributed tensor framework for high performance data analytics.
[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/helmholtz-analytics/heat/badge)](https://securityscorecards.dev/viewer/?uri=github.com/helmholtz-analytics/heat)
[![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/7688/badge)](https://bestpractices.coreinfrastructure.org/projects/7688)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2531472.svg)](https://doi.org/10.5281/zenodo.2531472)
[![Benchmarks](https://img.shields.io/badge/Github--Pages-Benchmarks-2ea44f)](https://helmholtz-analytics.github.io/heat/dev/bench)
[![Benchmarks](https://img.shields.io/badge/Grafana-Benchmarks-2ea44f)](https://57bc8d92-72f2-4869-accd-435ec06365cb.ka.bw-cloud-instance.org:3000/d/adjpqduq9r7k0a/heat-cb?orgId=1)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
[![JuRSE Code Pick of the Month](https://img.shields.io/badge/JuRSE_Code_Pick-August_2024-blue)](https://www.fz-juelich.de/en/rse/jurse-community/jurse-code-of-the-month/august-2024)

# Table of Contents
- [What is Heat for?](#what-is-heat-for)
Expand Down Expand Up @@ -228,9 +229,10 @@ under project number ZT-I-0003 and the Helmholtz AI platform grant.*

*This project has received funding from Google Summer of Code (GSoC) in 2022.*

*This work is partially carried out under a [programme](https://activities.esa.int/index.php/4000144045) of, and funded by, the European Space Agency.
Any view expressed in this repository or related publications can in no way be taken to reflect the official opinion of the European Space Agency.*

---

<div align="center">
<a href="https://www.dlr.de/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/dlr_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="https://www.fz-juelich.de/portal/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/fzj_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="http://www.kit.edu/english/index.php"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/kit_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="https://www.helmholtz.de/en/"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/helmholtz_logo.svg" height="50px" hspace="3%" vspace="20px"></a>
</div>
<a href="https://www.dlr.de/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/dlr_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="https://www.fz-juelich.de/portal/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/fzj_logo.svg" height="40px" hspace="3%" vspace="20px"></a><a href="http://www.kit.edu/english/index.php"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/kit_logo.svg" height="40px" hspace="3%" vspace="5px"></a><a href="https://www.helmholtz.de/en/"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/helmholtz_logo.svg" height="50px" hspace="3%" vspace="5px"></a><a href="https://www.esa.int/"><img src="https://github.com/user-attachments/assets/2ee251b4-733e-44ea-8d1c-8b75928eef55" height="45px" hspace="3%" vspace="20px"></a>
18 changes: 18 additions & 0 deletions benchmarks/cb/manipulations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# flake8: noqa
import heat as ht
from typing import List
from perun import monitor


Expand All @@ -15,6 +16,13 @@ def reshape(arrays):
a = ht.reshape(array, (10000000, -1), new_split=1)


@monitor()
def resplit(array, new_split: List[int | None]):
for new_split in new_split:
a = ht.resplit(array, axis=new_split)
del a


def run_manipulation_benchmarks():
sizes = [10000, 20000, 40000]
arrays = []
Expand All @@ -30,3 +38,13 @@ def run_manipulation_benchmarks():
split = 1
arrays.append(ht.zeros((1000, size), split=split))
concatenate(arrays)

if ht.comm.size > 1:
shape = [100, 50, 50, 20, 86]
n_elements = ht.array(shape).prod().item()
mem = n_elements * 4 / 1e9
array = ht.reshape(ht.arange(0, n_elements, split=0, dtype=ht.float32), shape) * (
ht.comm.rank + 1
)

resplit(array, [None, 2, 4])
10 changes: 8 additions & 2 deletions heat/cluster/batchparallelclustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,19 @@
"""


def _initialize_plus_plus(X, n_clusters, p, random_state=None):
def _initialize_plus_plus(X, n_clusters, p, random_state=None, max_samples=2**24 - 1):
"""
Auxiliary function: single-process k-means++/k-medians++ initialization in pytorch
p is the norm used for computing distances
The value max_samples=2**24 - 1 is necessary as PyTorchs multinomial currently only
supports this number of different categories.
"""
if random_state is not None:
torch.manual_seed(random_state)
if X.shape[0] > max_samples: # torch's multinomial is limited to 2^24 categories
idxs_subsampling = torch.randint(0, X.shape[0], (max_samples,))
X = X[idxs_subsampling]
# actual K-Means++
idxs = torch.zeros(n_clusters, dtype=torch.long, device=X.device)
idxs[0] = torch.randint(0, X.shape[0], (1,))
for i in range(1, n_clusters):
Expand Down Expand Up @@ -289,7 +295,7 @@ def predict(self, x: DNDarray):

local_labels = _parallel_batched_kmex_predict(
x.larray, self._cluster_centers.larray, self._p
)
).to(torch.int32)
labels = DNDarray(
local_labels,
gshape=(x.shape[0], 1),
Expand Down
6 changes: 5 additions & 1 deletion heat/cluster/tests/test_batchparallelclustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from mpi4py import MPI

from ...core.tests.test_suites.basic_test import TestCase
from ..batchparallelclustering import _kmex, _BatchParallelKCluster
from ..batchparallelclustering import _kmex, _initialize_plus_plus, _BatchParallelKCluster

# test BatchParallelKCluster base class and auxiliary functions

Expand All @@ -32,6 +32,10 @@ def test_kmex(self):
init = torch.rand(2, 3)
_kmex(X, 2, 2, init, max_iter, tol)

def test_initialize_plus_plus(self):
X = torch.rand(100, 3)
_initialize_plus_plus(X, 3, 2, random_state=None, max_samples=50)

def test_BatchParallelKClustering(self):
with self.assertRaises(TypeError):
_BatchParallelKCluster(2, 10, "++", 100, 1e-2, random_state=3.14, n_procs_to_merge=None)
Expand Down
Loading

0 comments on commit a3611bc

Please sign in to comment.