Merge branch 'main' into features/1104-Implement_consistent_linear_al…

…gebra_for_arrays_with_dimension_2_in_particular_matmul
helmholtz-analytics · Aug 20, 2024 · 9f9462c · 9f9462c
2 parents 3886942 + 15c4478
commit 9f9462c
Show file tree

Hide file tree

Showing 16 changed files with 222 additions and 122 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -34,8 +34,8 @@ body:
       description: What version of Heat are you running?
       options:
         - main (development branch)
+        - 1.4.x
         - 1.3.x
-        - 1.2.x
     validations:
       required: true
   - type: dropdown
@@ -44,23 +44,21 @@ body:
       label: Python version
       description: What Python version?
       options:
+        - 3.12
         - 3.11
         - "3.10"
         - 3.9
-        - 3.8
   - type: dropdown
     id: pytorch-version
     attributes:
       label: PyTorch version
       description: What PyTorch version?
       options:
+        - 2.4
+        - 2.3
         - 2.2
         - 2.1
-        - 2.0
-        - 1.13
-        - 1.12
-        - 1.11
-        - "1.10"
+        - '2.0'
   - type: textarea
     id: mpi-version
     attributes:

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -6,6 +6,8 @@
 - Implementation:
     - [ ] unit tests: all split configurations tested
     - [ ] unit tests: multiple dtypes tested
+    - [ ] benchmarks: created for new functionality
+    - [ ] benchmarks: performance improved or maintained
     - [ ] documentation updated where needed
 
 ## Description

diff --git a/.github/workflows/bench_report.yml b/.github/workflows/bench_report.yml
diff --git a/.github/workflows/bench_trigger.yml b/.github/workflows/bench_trigger.yml
@@ -28,7 +28,7 @@ jobs:
           SHA: ${{ github.event.pull_request.head.sha }}
           PR_NUMBER: ${{ github.event.pull_request.number }}
         run: |
-          SHORT_SHA=$(git rev-parse --short ${{ github.event.pull_request.head.sha }})
+          SHORT_SHA=$(git rev-parse --short $SHA)
           curl -s -X POST \
             --fail-with-body \
             -F "token=$PIPE_TRIGGER_TOKEN" \
@@ -45,7 +45,7 @@ jobs:
         env:
           AUTHOR: ${{ github.event.pull_request.assignee.login }}
           PIPE_TRIGGER_TOKEN: ${{ secrets.BENCH_PIPE_TRIGGER }}
-          SHA: ${{ github.event.pull_request.head.sha }}
+          SHA: ${{ github.sha }}
         run: |
           SHORT_SHA=$(git rev-parse --short $GITHUB_SHA)
           curl -s -X POST \
@@ -57,16 +57,3 @@ jobs:
             -F "variables[BRANCH]=main" \
             -F "variables[AUTHOR]=${AUTHOR:-heat_team}" \
             https://codebase.helmholtz.cloud/api/v4/projects/7930/trigger/pipeline
-      - name: Create status
-        if: ${{ steps.setup_pr.outcome == 'success' || steps.setup_push.outcome == 'success'}}
-        env:
-          REPO: ${{ github.repository }}
-          SHA: ${{ github.event.pull_request.head.sha }}
-        run: |
-          curl -L -X POST \
-            --fail-with-body \
-            -H "Accept: application/vnd.github+json" \
-            -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            https://api.github.com/repos/$REPO/statuses/$SHA \
-            -d '{ "state":"pending", "target_url":"https://codebase.helmholtz.cloud/helmholtz-analytics/cb/-/pipelines", "description":"Waiting for benchmarks to execute.", "context":"cb/report" }'
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -23,6 +23,7 @@ jobs:
           - 'torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2'
           - 'torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2'
           - 'torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1'
+          - 'torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0'
         exclude:
           - py-version: '3.12'
             pytorch-version: 'torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2'

diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -50,7 +50,7 @@ jobs:
 
       # Initializes the CodeQL tools for scanning.
       - name: Initialize CodeQL
-        uses: github/codeql-action/init@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/init@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
         with:
           languages: ${{ matrix.language }}
           # If you wish to specify custom queries, you can do so here or in a config file.
@@ -60,7 +60,7 @@ jobs:
       # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
       # If this step fails, then you should remove it and run the build manually (see below)
       - name: Autobuild
-        uses: github/codeql-action/autobuild@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/autobuild@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
 
       # ℹ️ Command-line programs to run using the OS shell.
       # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
@@ -73,6 +73,6 @@ jobs:
       #   ./location_of_script_within_repo/buildscript.sh
 
       - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/analyze@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
         with:
           category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/create-branch-on-assignment.yml b/.github/workflows/create-branch-on-assignment.yml
@@ -16,6 +16,6 @@ jobs:
           egress-policy: audit
 
       - name: Create Issue Branch
-        uses: robvanderleek/create-issue-branch@066a452d2aa439a992baec3360a322a49eb62e0b # main
+        uses: robvanderleek/create-issue-branch@941dca58430f58b198228e633954eef1699722fe # main
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -37,7 +37,7 @@ jobs:
               uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0
             -
               name: Set up Docker Buildx
-              uses: docker/setup-buildx-action@4fd812986e6c8c2a69e18311145f9371337f27d4 # v3.4.0
+              uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1
               with:
                 driver: docker
             -
@@ -49,7 +49,7 @@ jobs:
                 password: ${{ secrets.GITHUB_TOKEN }}
             -
               name: Build
-              uses: docker/build-push-action@16ebe778df0e7752d2cfcbd924afdbbd89c1a755 # v6.6.1
+              uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0
               with:
                 file: docker/Dockerfile.release
                 build-args: |
@@ -65,7 +65,7 @@ jobs:
                 docker run -v `pwd`:`pwd` -w `pwd` --rm test_${{ inputs.name }} pytest
             -
               name: Build and push
-              uses: docker/build-push-action@16ebe778df0e7752d2cfcbd924afdbbd89c1a755 # v6.6.1
+              uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0
               with:
                 file: docker/Dockerfile.release
                 build-args: |

diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
@@ -72,6 +72,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/upload-sarif@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
         with:
           sarif_file: results.sarif
diff --git a/.perun.ini b/.perun.ini
@@ -5,3 +5,12 @@ data_out = ./bench_data
 [benchmarking]
 rounds = 10
 warmup_rounds = 1
+metrics=runtime
+region_metrics=runtime
+
+[benchmarking.units]
+joule = k
+second =
+percent =
+watt =
+byte = G
diff --git a/README.md b/README.md
@@ -19,8 +19,9 @@ Heat is a distributed tensor framework for high performance data analytics.
 [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/helmholtz-analytics/heat/badge)](https://securityscorecards.dev/viewer/?uri=github.com/helmholtz-analytics/heat)
 [![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/7688/badge)](https://bestpractices.coreinfrastructure.org/projects/7688)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2531472.svg)](https://doi.org/10.5281/zenodo.2531472)
-[![Benchmarks](https://img.shields.io/badge/Github--Pages-Benchmarks-2ea44f)](https://helmholtz-analytics.github.io/heat/dev/bench)
+[![Benchmarks](https://img.shields.io/badge/Grafana-Benchmarks-2ea44f)](https://57bc8d92-72f2-4869-accd-435ec06365cb.ka.bw-cloud-instance.org:3000/d/adjpqduq9r7k0a/heat-cb?orgId=1)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![JuRSE Code Pick of the Month](https://img.shields.io/badge/JuRSE_Code_Pick-August_2024-blue)](https://www.fz-juelich.de/en/rse/jurse-community/jurse-code-of-the-month/august-2024)
 
 # Table of Contents
   - [What is Heat for?](#what-is-heat-for)

diff --git a/benchmarks/cb/manipulations.py b/benchmarks/cb/manipulations.py
@@ -1,5 +1,6 @@
 # flake8: noqa
 import heat as ht
+from typing import List
 from perun import monitor
 
 
@@ -15,6 +16,13 @@ def reshape(arrays):
         a = ht.reshape(array, (10000000, -1), new_split=1)
 
 
+@monitor()
+def resplit(array, new_split: List[int | None]):
+    for new_split in new_split:
+        a = ht.resplit(array, axis=new_split)
+        del a
+
+
 def run_manipulation_benchmarks():
     sizes = [10000, 20000, 40000]
     arrays = []
@@ -30,3 +38,13 @@ def run_manipulation_benchmarks():
             split = 1
         arrays.append(ht.zeros((1000, size), split=split))
     concatenate(arrays)
+
+    if ht.comm.size > 1:
+        shape = [100, 50, 50, 20, 86]
+        n_elements = ht.array(shape).prod().item()
+        mem = n_elements * 4 / 1e9
+        array = ht.reshape(ht.arange(0, n_elements, split=0, dtype=ht.float32), shape) * (
+            ht.comm.rank + 1
+        )
+
+        resplit(array, [None, 2, 4])
diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py
@@ -384,14 +384,18 @@ def __prephalo(self, start, end) -> torch.Tensor:
 
         return self.__array[ix].clone().contiguous()
 
-    def get_halo(self, halo_size: int) -> torch.Tensor:
+    def get_halo(self, halo_size: int, prev: bool = True, next: bool = True) -> torch.Tensor:
         """
         Fetch halos of size ``halo_size`` from neighboring ranks and save them in ``self.halo_next/self.halo_prev``.
 
         Parameters
         ----------
         halo_size : int
             Size of the halo.
+        prev : bool, optional
+            If True, fetch the halo from the previous rank. Default: True.
+        next : bool, optional
+            If True, fetch the halo from the next rank. Default: True.
         """
         if not isinstance(halo_size, int):
             raise TypeError(
@@ -433,25 +437,29 @@ def get_halo(self, halo_size: int) -> torch.Tensor:
             req_list = []
 
             # exchange data with next populated process
-            if rank != last_rank:
-                self.comm.Isend(a_next, next_rank)
-                res_prev = torch.zeros(
-                    a_prev.size(), dtype=a_prev.dtype, device=self.device.torch_device
-                )
-                req_list.append(self.comm.Irecv(res_prev, source=next_rank))
+            if prev:
+                if rank != last_rank:
+                    self.comm.Isend(a_next, next_rank)
+                if rank != first_rank:
+                    res_prev = torch.zeros(
+                        a_prev.size(), dtype=a_prev.dtype, device=self.device.torch_device
+                    )
+                    req_list.append(self.comm.Irecv(res_prev, source=prev_rank))
 
-            if rank != first_rank:
-                self.comm.Isend(a_prev, prev_rank)
-                res_next = torch.zeros(
-                    a_next.size(), dtype=a_next.dtype, device=self.device.torch_device
-                )
-                req_list.append(self.comm.Irecv(res_next, source=prev_rank))
+            if next:
+                if rank != first_rank:
+                    req_list.append(self.comm.Isend(a_prev, prev_rank))
+                if rank != last_rank:
+                    res_next = torch.zeros(
+                        a_next.size(), dtype=a_next.dtype, device=self.device.torch_device
+                    )
+                    req_list.append(self.comm.Irecv(res_next, source=next_rank))
 
             for req in req_list:
                 req.Wait()
 
-            self.__halo_next = res_prev
-            self.__halo_prev = res_next
+            self.__halo_next = res_next
+            self.__halo_prev = res_prev
             self.__ishalo = True
 
     def __cat_halo(self) -> torch.Tensor: