Merge branch 'main' into features/1457-Add_randomized_SVD

helmholtz-analytics · Aug 20, 2024 · a3611bc · a3611bc
2 parents 5b8ecdb + 15c4478
commit a3611bc
Show file tree

Hide file tree

Showing 19 changed files with 239 additions and 128 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -34,8 +34,8 @@ body:
       description: What version of Heat are you running?
       options:
         - main (development branch)
+        - 1.4.x
         - 1.3.x
-        - 1.2.x
     validations:
       required: true
   - type: dropdown
@@ -44,23 +44,21 @@ body:
       label: Python version
       description: What Python version?
       options:
+        - 3.12
         - 3.11
         - "3.10"
         - 3.9
-        - 3.8
   - type: dropdown
     id: pytorch-version
     attributes:
       label: PyTorch version
       description: What PyTorch version?
       options:
+        - 2.4
+        - 2.3
         - 2.2
         - 2.1
-        - 2.0
-        - 1.13
-        - 1.12
-        - 1.11
-        - "1.10"
+        - '2.0'
   - type: textarea
     id: mpi-version
     attributes:

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -6,6 +6,8 @@
 - Implementation:
     - [ ] unit tests: all split configurations tested
     - [ ] unit tests: multiple dtypes tested
+    - [ ] benchmarks: created for new functionality
+    - [ ] benchmarks: performance improved or maintained
     - [ ] documentation updated where needed
 
 ## Description

diff --git a/.github/workflows/bench_report.yml b/.github/workflows/bench_report.yml
diff --git a/.github/workflows/bench_trigger.yml b/.github/workflows/bench_trigger.yml
@@ -28,7 +28,7 @@ jobs:
           SHA: ${{ github.event.pull_request.head.sha }}
           PR_NUMBER: ${{ github.event.pull_request.number }}
         run: |
-          SHORT_SHA=$(git rev-parse --short ${{ github.event.pull_request.head.sha }})
+          SHORT_SHA=$(git rev-parse --short $SHA)
           curl -s -X POST \
             --fail-with-body \
             -F "token=$PIPE_TRIGGER_TOKEN" \
@@ -45,7 +45,7 @@ jobs:
         env:
           AUTHOR: ${{ github.event.pull_request.assignee.login }}
           PIPE_TRIGGER_TOKEN: ${{ secrets.BENCH_PIPE_TRIGGER }}
-          SHA: ${{ github.event.pull_request.head.sha }}
+          SHA: ${{ github.sha }}
         run: |
           SHORT_SHA=$(git rev-parse --short $GITHUB_SHA)
           curl -s -X POST \
@@ -57,16 +57,3 @@ jobs:
             -F "variables[BRANCH]=main" \
             -F "variables[AUTHOR]=${AUTHOR:-heat_team}" \
             https://codebase.helmholtz.cloud/api/v4/projects/7930/trigger/pipeline
-      - name: Create status
-        if: ${{ steps.setup_pr.outcome == 'success' || steps.setup_push.outcome == 'success'}}
-        env:
-          REPO: ${{ github.repository }}
-          SHA: ${{ github.event.pull_request.head.sha }}
-        run: |
-          curl -L -X POST \
-            --fail-with-body \
-            -H "Accept: application/vnd.github+json" \
-            -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            https://api.github.com/repos/$REPO/statuses/$SHA \
-            -d '{ "state":"pending", "target_url":"https://codebase.helmholtz.cloud/helmholtz-analytics/cb/-/pipelines", "description":"Waiting for benchmarks to execute.", "context":"cb/report" }'
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -23,6 +23,7 @@ jobs:
           - 'torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2'
           - 'torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2'
           - 'torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1'
+          - 'torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0'
         exclude:
           - py-version: '3.12'
             pytorch-version: 'torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2'

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -50,7 +50,7 @@ jobs:
 
       # Initializes the CodeQL tools for scanning.
       - name: Initialize CodeQL
-        uses: github/codeql-action/init@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/init@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
         with:
           languages: ${{ matrix.language }}
           # If you wish to specify custom queries, you can do so here or in a config file.
@@ -60,7 +60,7 @@ jobs:
       # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
       # If this step fails, then you should remove it and run the build manually (see below)
       - name: Autobuild
-        uses: github/codeql-action/autobuild@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/autobuild@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
 
       # ℹ️ Command-line programs to run using the OS shell.
       # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
@@ -73,6 +73,6 @@ jobs:
       #   ./location_of_script_within_repo/buildscript.sh
 
       - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/analyze@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
         with:
           category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/create-branch-on-assignment.yml b/.github/workflows/create-branch-on-assignment.yml
@@ -16,6 +16,6 @@ jobs:
           egress-policy: audit
 
       - name: Create Issue Branch
-        uses: robvanderleek/create-issue-branch@066a452d2aa439a992baec3360a322a49eb62e0b # main
+        uses: robvanderleek/create-issue-branch@941dca58430f58b198228e633954eef1699722fe # main
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -37,7 +37,7 @@ jobs:
               uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0
             -
               name: Set up Docker Buildx
-              uses: docker/setup-buildx-action@4fd812986e6c8c2a69e18311145f9371337f27d4 # v3.4.0
+              uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1
               with:
                 driver: docker
             -
@@ -49,7 +49,7 @@ jobs:
                 password: ${{ secrets.GITHUB_TOKEN }}
             -
               name: Build
-              uses: docker/build-push-action@16ebe778df0e7752d2cfcbd924afdbbd89c1a755 # v6.6.1
+              uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0
               with:
                 file: docker/Dockerfile.release
                 build-args: |
@@ -65,7 +65,7 @@ jobs:
                 docker run -v `pwd`:`pwd` -w `pwd` --rm test_${{ inputs.name }} pytest
             -
               name: Build and push
-              uses: docker/build-push-action@16ebe778df0e7752d2cfcbd924afdbbd89c1a755 # v6.6.1
+              uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0
               with:
                 file: docker/Dockerfile.release
                 build-args: |

diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
@@ -72,6 +72,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/upload-sarif@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
         with:
           sarif_file: results.sarif
diff --git a/.perun.ini b/.perun.ini
@@ -5,3 +5,12 @@ data_out = ./bench_data
 [benchmarking]
 rounds = 10
 warmup_rounds = 1
+metrics=runtime
+region_metrics=runtime
+
+[benchmarking.units]
+joule = k
+second =
+percent =
+watt =
+byte = G
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,7 +10,7 @@ repos:
       - id: check-added-large-files
       - id: check-toml
   - repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 24.4.2
+    rev: 24.8.0
     hooks:
     - id: black
   - repo: https://github.com/PyCQA/flake8

diff --git a/README.md b/README.md
@@ -19,8 +19,9 @@ Heat is a distributed tensor framework for high performance data analytics.
 [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/helmholtz-analytics/heat/badge)](https://securityscorecards.dev/viewer/?uri=github.com/helmholtz-analytics/heat)
 [![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/7688/badge)](https://bestpractices.coreinfrastructure.org/projects/7688)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2531472.svg)](https://doi.org/10.5281/zenodo.2531472)
-[![Benchmarks](https://img.shields.io/badge/Github--Pages-Benchmarks-2ea44f)](https://helmholtz-analytics.github.io/heat/dev/bench)
+[![Benchmarks](https://img.shields.io/badge/Grafana-Benchmarks-2ea44f)](https://57bc8d92-72f2-4869-accd-435ec06365cb.ka.bw-cloud-instance.org:3000/d/adjpqduq9r7k0a/heat-cb?orgId=1)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![JuRSE Code Pick of the Month](https://img.shields.io/badge/JuRSE_Code_Pick-August_2024-blue)](https://www.fz-juelich.de/en/rse/jurse-community/jurse-code-of-the-month/august-2024)
 
 # Table of Contents
   - [What is Heat for?](#what-is-heat-for)
@@ -228,9 +229,10 @@ under project number ZT-I-0003 and the Helmholtz AI platform grant.*
 
 *This project has received funding from Google Summer of Code (GSoC) in 2022.*
 
+*This work is partially carried out under a [programme](https://activities.esa.int/index.php/4000144045) of, and funded by, the European Space Agency.
+Any view expressed in this repository or related publications can in no way be taken to reflect the official opinion of the European Space Agency.*
 
 ---
 
 <div align="center">
-  <a href="https://www.dlr.de/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/dlr_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="https://www.fz-juelich.de/portal/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/fzj_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="http://www.kit.edu/english/index.php"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/kit_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="https://www.helmholtz.de/en/"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/helmholtz_logo.svg" height="50px" hspace="3%" vspace="20px"></a>
-</div>
+  <a href="https://www.dlr.de/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/dlr_logo.svg" height="50px" hspace="3%" vspace="20px"></a><a href="https://www.fz-juelich.de/portal/EN/Home/home_node.html"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/fzj_logo.svg" height="40px" hspace="3%" vspace="20px"></a><a href="http://www.kit.edu/english/index.php"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/kit_logo.svg" height="40px" hspace="3%" vspace="5px"></a><a href="https://www.helmholtz.de/en/"><img src="https://raw.githubusercontent.com/helmholtz-analytics/heat/main/doc/images/helmholtz_logo.svg" height="50px" hspace="3%" vspace="5px"></a><a href="https://www.esa.int/"><img src="https://github.com/user-attachments/assets/2ee251b4-733e-44ea-8d1c-8b75928eef55" height="45px" hspace="3%" vspace="20px"></a>
diff --git a/benchmarks/cb/manipulations.py b/benchmarks/cb/manipulations.py
@@ -1,5 +1,6 @@
 # flake8: noqa
 import heat as ht
+from typing import List
 from perun import monitor
 
 
@@ -15,6 +16,13 @@ def reshape(arrays):
         a = ht.reshape(array, (10000000, -1), new_split=1)
 
 
+@monitor()
+def resplit(array, new_split: List[int | None]):
+    for new_split in new_split:
+        a = ht.resplit(array, axis=new_split)
+        del a
+
+
 def run_manipulation_benchmarks():
     sizes = [10000, 20000, 40000]
     arrays = []
@@ -30,3 +38,13 @@ def run_manipulation_benchmarks():
             split = 1
         arrays.append(ht.zeros((1000, size), split=split))
     concatenate(arrays)
+
+    if ht.comm.size > 1:
+        shape = [100, 50, 50, 20, 86]
+        n_elements = ht.array(shape).prod().item()
+        mem = n_elements * 4 / 1e9
+        array = ht.reshape(ht.arange(0, n_elements, split=0, dtype=ht.float32), shape) * (
+            ht.comm.rank + 1
+        )
+
+        resplit(array, [None, 2, 4])
diff --git a/heat/cluster/batchparallelclustering.py b/heat/cluster/batchparallelclustering.py
@@ -19,13 +19,19 @@
 """
 
 
-def _initialize_plus_plus(X, n_clusters, p, random_state=None):
+def _initialize_plus_plus(X, n_clusters, p, random_state=None, max_samples=2**24 - 1):
     """
     Auxiliary function: single-process k-means++/k-medians++ initialization in pytorch
     p is the norm used for computing distances
+    The value max_samples=2**24 - 1 is necessary as PyTorchs multinomial currently only
+    supports this number of different categories.
     """
     if random_state is not None:
         torch.manual_seed(random_state)
+    if X.shape[0] > max_samples:  # torch's multinomial is limited to 2^24 categories
+        idxs_subsampling = torch.randint(0, X.shape[0], (max_samples,))
+        X = X[idxs_subsampling]
+    # actual K-Means++
     idxs = torch.zeros(n_clusters, dtype=torch.long, device=X.device)
     idxs[0] = torch.randint(0, X.shape[0], (1,))
     for i in range(1, n_clusters):
@@ -289,7 +295,7 @@ def predict(self, x: DNDarray):
 
         local_labels = _parallel_batched_kmex_predict(
             x.larray, self._cluster_centers.larray, self._p
-        )
+        ).to(torch.int32)
         labels = DNDarray(
             local_labels,
             gshape=(x.shape[0], 1),

diff --git a/heat/cluster/tests/test_batchparallelclustering.py b/heat/cluster/tests/test_batchparallelclustering.py
@@ -7,7 +7,7 @@
 from mpi4py import MPI
 
 from ...core.tests.test_suites.basic_test import TestCase
-from ..batchparallelclustering import _kmex, _BatchParallelKCluster
+from ..batchparallelclustering import _kmex, _initialize_plus_plus, _BatchParallelKCluster
 
 # test BatchParallelKCluster base class and auxiliary functions
 
@@ -32,6 +32,10 @@ def test_kmex(self):
         init = torch.rand(2, 3)
         _kmex(X, 2, 2, init, max_iter, tol)
 
+    def test_initialize_plus_plus(self):
+        X = torch.rand(100, 3)
+        _initialize_plus_plus(X, 3, 2, random_state=None, max_samples=50)
+
     def test_BatchParallelKClustering(self):
         with self.assertRaises(TypeError):
             _BatchParallelKCluster(2, 10, "++", 100, 1e-2, random_state=3.14, n_procs_to_merge=None)