diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index e73f744681..febdbbade2 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -34,8 +34,8 @@ body: description: What version of Heat are you running? options: - main (development branch) + - 1.4.x - 1.3.x - - 1.2.x validations: required: true - type: dropdown @@ -44,23 +44,21 @@ body: label: Python version description: What Python version? options: + - 3.12 - 3.11 - "3.10" - 3.9 - - 3.8 - type: dropdown id: pytorch-version attributes: label: PyTorch version description: What PyTorch version? options: + - 2.4 + - 2.3 - 2.2 - 2.1 - - 2.0 - - 1.13 - - 1.12 - - 1.11 - - "1.10" + - '2.0' - type: textarea id: mpi-version attributes: diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 83c4eaf091..b7ac0c46da 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -6,6 +6,8 @@ - Implementation: - [ ] unit tests: all split configurations tested - [ ] unit tests: multiple dtypes tested + - [ ] benchmarks: created for new functionality + - [ ] benchmarks: performance improved or maintained - [ ] documentation updated where needed ## Description diff --git a/.github/workflows/bench_report.yml b/.github/workflows/bench_report.yml deleted file mode 100644 index 3a6adde3a3..0000000000 --- a/.github/workflows/bench_report.yml +++ /dev/null @@ -1,74 +0,0 @@ -name: Benchmarks report -on: - workflow_dispatch: - inputs: - job_id: - description: "Gitlab job id" - required: true - type: string - author: - description: "Commit author" - required: true - type: string - -jobs: - bench_report: - name: Benchmark report - runs-on: ubuntu-latest - steps: - - name: Harden Runner - uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1 - with: - egress-policy: audit - - - name: Checkout - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 - - name: "Collect Gitlab Benchmarks" - env: - GITLAB_CB_API_TOKEN: ${{ secrets.GITLAB_CB_API_TOKEN }} - run: | - curl --location \ - --fail-with-body \ - --header "PRIVATE-TOKEN: $GITLAB_CB_API_TOKEN" \ - --output benchmarks.json \ - "https://codebase.helmholtz.cloud/api/v4/projects/7930/jobs/${{ inputs.job_id }}/artifacts/heat/bench_data/benchmarks.json" - cat benchmarks.json - curl --location \ - --fail-with-body \ - --header "PRIVATE-TOKEN: $GITLAB_CB_API_TOKEN" \ - --output report.txt \ - "https://codebase.helmholtz.cloud/api/v4/projects/7930/jobs/${{ inputs.job_id }}/artifacts/heat/bench_data/report.txt" - echo "Pipeline URL: https://codebase.helmholtz.cloud/helmholtz-analytics/cb/-/jobs/${{ inputs.job_id}}" >> $GITHUB_STEP_SUMMARY - cat report.txt >> $GITHUB_STEP_SUMMARY - - name: Compare and Save Benchmark Results - id: action_bench - uses: benchmark-action/github-action-benchmark@4de1bed97a47495fc4c5404952da0499e31f5c29 # v1.20.3 - with: - github-token: ${{secrets.GITHUB_TOKEN}} - # Benchmark action input and output - tool: "customSmallerIsBetter" - output-file-path: benchmarks.json - # Alert configuration - fail-on-alert: true # Don't fail on main branch - comment-on-alert: true - alert-comment-cc-users: ${{ format('@{0}', inputs.author) }} - # Save benchmarks from the main branch - save-data-file: ${{ github.ref == 'refs/heads/main' }} - # Pages configuration - auto-push: ${{ github.ref == 'refs/heads/main' }} - gh-pages-branch: gh-pages - benchmark-data-dir-path: dev/bench - - name: Update commit status - if: always() - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - if [[ "${{ steps.action_bench.outcome }}" =~ success|failure ]]; then export STEP_STATE="${{ steps.action_bench.outcome }}" && echo "then $STEP_STATE"; else export STEP_STATE=error && echo "else $STEP_STATE"; fi - echo "$STEP_STATE" - curl -L -X POST \ - --fail-with-body \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer $GITHUB_TOKEN" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - https://api.github.com/repos/${{ github.repository }}/statuses/${{ github.sha }} \ - -d "{ \"state\":\"$STEP_STATE\", \"target_url\":\"https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\", \"description\":\"The results are here!\", \"context\":\"cb/report\" }" diff --git a/.github/workflows/bench_trigger.yml b/.github/workflows/bench_trigger.yml index b0762d4ddf..53fa95477f 100644 --- a/.github/workflows/bench_trigger.yml +++ b/.github/workflows/bench_trigger.yml @@ -28,7 +28,7 @@ jobs: SHA: ${{ github.event.pull_request.head.sha }} PR_NUMBER: ${{ github.event.pull_request.number }} run: | - SHORT_SHA=$(git rev-parse --short ${{ github.event.pull_request.head.sha }}) + SHORT_SHA=$(git rev-parse --short $SHA) curl -s -X POST \ --fail-with-body \ -F "token=$PIPE_TRIGGER_TOKEN" \ @@ -45,7 +45,7 @@ jobs: env: AUTHOR: ${{ github.event.pull_request.assignee.login }} PIPE_TRIGGER_TOKEN: ${{ secrets.BENCH_PIPE_TRIGGER }} - SHA: ${{ github.event.pull_request.head.sha }} + SHA: ${{ github.sha }} run: | SHORT_SHA=$(git rev-parse --short $GITHUB_SHA) curl -s -X POST \ @@ -57,16 +57,3 @@ jobs: -F "variables[BRANCH]=main" \ -F "variables[AUTHOR]=${AUTHOR:-heat_team}" \ https://codebase.helmholtz.cloud/api/v4/projects/7930/trigger/pipeline - - name: Create status - if: ${{ steps.setup_pr.outcome == 'success' || steps.setup_push.outcome == 'success'}} - env: - REPO: ${{ github.repository }} - SHA: ${{ github.event.pull_request.head.sha }} - run: | - curl -L -X POST \ - --fail-with-body \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - https://api.github.com/repos/$REPO/statuses/$SHA \ - -d '{ "state":"pending", "target_url":"https://codebase.helmholtz.cloud/helmholtz-analytics/cb/-/pipelines", "description":"Waiting for benchmarks to execute.", "context":"cb/report" }' diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f7a980fd03..daec3fac63 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -23,6 +23,7 @@ jobs: - 'torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2' - 'torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2' - 'torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1' + - 'torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0' exclude: - py-version: '3.12' pytorch-version: 'torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2' diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 4c87098c74..474c3aed38 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -50,7 +50,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0 + uses: github/codeql-action/init@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -60,7 +60,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0 + uses: github/codeql-action/autobuild@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2 # ℹī¸ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -73,6 +73,6 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0 + uses: github/codeql-action/analyze@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/create-branch-on-assignment.yml b/.github/workflows/create-branch-on-assignment.yml index 75da22508c..3da87204f1 100644 --- a/.github/workflows/create-branch-on-assignment.yml +++ b/.github/workflows/create-branch-on-assignment.yml @@ -16,6 +16,6 @@ jobs: egress-policy: audit - name: Create Issue Branch - uses: robvanderleek/create-issue-branch@066a452d2aa439a992baec3360a322a49eb62e0b # main + uses: robvanderleek/create-issue-branch@941dca58430f58b198228e633954eef1699722fe # main env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index cbae715725..8327f935d0 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -37,7 +37,7 @@ jobs: uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@4fd812986e6c8c2a69e18311145f9371337f27d4 # v3.4.0 + uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1 with: driver: docker - @@ -49,7 +49,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build - uses: docker/build-push-action@16ebe778df0e7752d2cfcbd924afdbbd89c1a755 # v6.6.1 + uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0 with: file: docker/Dockerfile.release build-args: | @@ -65,7 +65,7 @@ jobs: docker run -v `pwd`:`pwd` -w `pwd` --rm test_${{ inputs.name }} pytest - name: Build and push - uses: docker/build-push-action@16ebe778df0e7752d2cfcbd924afdbbd89c1a755 # v6.6.1 + uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0 with: file: docker/Dockerfile.release build-args: | diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 46e93fe28e..69e903c4ba 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -72,6 +72,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0 + uses: github/codeql-action/upload-sarif@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2 with: sarif_file: results.sarif diff --git a/.perun.ini b/.perun.ini index c9965dec05..0919670d6e 100644 --- a/.perun.ini +++ b/.perun.ini @@ -5,3 +5,12 @@ data_out = ./bench_data [benchmarking] rounds = 10 warmup_rounds = 1 +metrics=runtime +region_metrics=runtime + +[benchmarking.units] +joule = k +second = +percent = +watt = +byte = G diff --git a/README.md b/README.md index e673fe2777..0c9f12e5c7 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,9 @@ Heat is a distributed tensor framework for high performance data analytics. [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/helmholtz-analytics/heat/badge)](https://securityscorecards.dev/viewer/?uri=github.com/helmholtz-analytics/heat) [![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/7688/badge)](https://bestpractices.coreinfrastructure.org/projects/7688) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2531472.svg)](https://doi.org/10.5281/zenodo.2531472) -[![Benchmarks](https://img.shields.io/badge/Github--Pages-Benchmarks-2ea44f)](https://helmholtz-analytics.github.io/heat/dev/bench) +[![Benchmarks](https://img.shields.io/badge/Grafana-Benchmarks-2ea44f)](https://57bc8d92-72f2-4869-accd-435ec06365cb.ka.bw-cloud-instance.org:3000/d/adjpqduq9r7k0a/heat-cb?orgId=1) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![JuRSE Code Pick of the Month](https://img.shields.io/badge/JuRSE_Code_Pick-August_2024-blue)](https://www.fz-juelich.de/en/rse/jurse-community/jurse-code-of-the-month/august-2024) # Table of Contents - [What is Heat for?](#what-is-heat-for) diff --git a/benchmarks/cb/manipulations.py b/benchmarks/cb/manipulations.py index 0b140a8f35..0fde87cff9 100644 --- a/benchmarks/cb/manipulations.py +++ b/benchmarks/cb/manipulations.py @@ -1,5 +1,6 @@ # flake8: noqa import heat as ht +from typing import List from perun import monitor @@ -15,6 +16,13 @@ def reshape(arrays): a = ht.reshape(array, (10000000, -1), new_split=1) +@monitor() +def resplit(array, new_split: List[int | None]): + for new_split in new_split: + a = ht.resplit(array, axis=new_split) + del a + + def run_manipulation_benchmarks(): sizes = [10000, 20000, 40000] arrays = [] @@ -30,3 +38,13 @@ def run_manipulation_benchmarks(): split = 1 arrays.append(ht.zeros((1000, size), split=split)) concatenate(arrays) + + if ht.comm.size > 1: + shape = [100, 50, 50, 20, 86] + n_elements = ht.array(shape).prod().item() + mem = n_elements * 4 / 1e9 + array = ht.reshape(ht.arange(0, n_elements, split=0, dtype=ht.float32), shape) * ( + ht.comm.rank + 1 + ) + + resplit(array, [None, 2, 4]) diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py index 6416086372..9d9bda1037 100644 --- a/heat/core/dndarray.py +++ b/heat/core/dndarray.py @@ -384,7 +384,7 @@ def __prephalo(self, start, end) -> torch.Tensor: return self.__array[ix].clone().contiguous() - def get_halo(self, halo_size: int) -> torch.Tensor: + def get_halo(self, halo_size: int, prev: bool = True, next: bool = True) -> torch.Tensor: """ Fetch halos of size ``halo_size`` from neighboring ranks and save them in ``self.halo_next/self.halo_prev``. @@ -392,6 +392,10 @@ def get_halo(self, halo_size: int) -> torch.Tensor: ---------- halo_size : int Size of the halo. + prev : bool, optional + If True, fetch the halo from the previous rank. Default: True. + next : bool, optional + If True, fetch the halo from the next rank. Default: True. """ if not isinstance(halo_size, int): raise TypeError( @@ -433,25 +437,29 @@ def get_halo(self, halo_size: int) -> torch.Tensor: req_list = [] # exchange data with next populated process - if rank != last_rank: - self.comm.Isend(a_next, next_rank) - res_prev = torch.zeros( - a_prev.size(), dtype=a_prev.dtype, device=self.device.torch_device - ) - req_list.append(self.comm.Irecv(res_prev, source=next_rank)) + if prev: + if rank != last_rank: + self.comm.Isend(a_next, next_rank) + if rank != first_rank: + res_prev = torch.zeros( + a_prev.size(), dtype=a_prev.dtype, device=self.device.torch_device + ) + req_list.append(self.comm.Irecv(res_prev, source=prev_rank)) - if rank != first_rank: - self.comm.Isend(a_prev, prev_rank) - res_next = torch.zeros( - a_next.size(), dtype=a_next.dtype, device=self.device.torch_device - ) - req_list.append(self.comm.Irecv(res_next, source=prev_rank)) + if next: + if rank != first_rank: + req_list.append(self.comm.Isend(a_prev, prev_rank)) + if rank != last_rank: + res_next = torch.zeros( + a_next.size(), dtype=a_next.dtype, device=self.device.torch_device + ) + req_list.append(self.comm.Irecv(res_next, source=next_rank)) for req in req_list: req.Wait() - self.__halo_next = res_prev - self.__halo_prev = res_next + self.__halo_next = res_next + self.__halo_prev = res_prev self.__ishalo = True def __cat_halo(self) -> torch.Tensor: diff --git a/heat/core/manipulations.py b/heat/core/manipulations.py index d1958c27a4..5985df65e3 100644 --- a/heat/core/manipulations.py +++ b/heat/core/manipulations.py @@ -61,6 +61,7 @@ "unique", "vsplit", "vstack", + "unfold", ] @@ -4213,3 +4214,92 @@ def mpi_topk(a, b, mpi_type): MPI_TOPK = MPI.Op.Create(mpi_topk, commute=True) + + +def unfold(a: DNDarray, axis: int, size: int, step: int = 1): + """ + Returns a DNDarray which contains all slices of size `size` in the axis `axis`. + + Behaves like torch.Tensor.unfold for DNDarrays. [torch.Tensor.unfold](https://pytorch.org/docs/stable/generated/torch.Tensor.unfold.html) + + Parameters + ---------- + a : DNDarray + array to unfold + axis : int + axis in which unfolding happens + size : int + the size of each slice that is unfolded, must be greater than 1 + step : int + the step between each slice, must be at least 1 + + Example: + ``` + >>> x = ht.arange(1., 8) + >>> x + DNDarray([1., 2., 3., 4., 5., 6., 7.], dtype=ht.float32, device=cpu:0, split=e) + >>> ht.unfold(x, 0, 2, 1) + DNDarray([[1., 2.], + [2., 3.], + [3., 4.], + [4., 5.], + [5., 6.], + [6., 7.]], dtype=ht.float32, device=cpu:0, split=None) + >>> ht.unfold(x, 0, 2, 2) + DNDarray([[1., 2.], + [3., 4.], + [5., 6.]], dtype=ht.float32, device=cpu:0, split=None) + ``` + + Note + --------- + You have to make sure that every node has at least chunk size size-1 if the split axis of the array is the unfold axis. + """ + if step < 1: + raise ValueError("step must be >= 1.") + if size <= 1: + raise ValueError("size must be > 1.") + axis = stride_tricks.sanitize_axis(a.shape, axis) + if size > a.shape[axis]: + raise ValueError( + f"maximum size for DNDarray at axis {axis} is {a.shape[axis]} but size is {size}." + ) + + comm = a.comm + dev = a.device + tdev = dev.torch_device + + if a.split is None or comm.size == 1 or a.split != axis: # early out + ret = factories.array( + a.larray.unfold(axis, size, step), is_split=a.split, device=dev, comm=comm + ) + + return ret + else: # comm.size > 1 and split axis == unfold axis + # index range [0:sizedim-1-(size-1)] = [0:sizedim-size] + # --> size of axis: ceil((sizedim-size+1) / step) = floor(sizedim-size) / step)) + 1 + # ret_shape = (*a_shape[:axis], int((a_shape[axis]-size)/step) + 1, a_shape[axis+1:], size) + + if (size - 1 > a.lshape_map[:, axis]).any(): + raise RuntimeError("Chunk-size needs to be at least size - 1.") + a.get_halo(size - 1, prev=False) + + counts, displs = a.counts_displs() + displs = torch.tensor(displs, device=tdev) + + # min local index in unfold axis + min_index = ((displs[comm.rank] - 1) // step + 1) * step - displs[comm.rank] + if min_index >= a.lshape[axis] or ( + comm.rank == comm.size - 1 and min_index + size > a.lshape[axis] + ): + loc_unfold_shape = list(a.lshape) + loc_unfold_shape[axis] = 0 + ret_larray = torch.zeros((*loc_unfold_shape, size), device=tdev) + else: # unfold has local data + ret_larray = a.array_with_halos[ + axis * (slice(None, None, None),) + (slice(min_index, None, None), Ellipsis) + ].unfold(axis, size, step) + + ret = factories.array(ret_larray, is_split=axis, device=dev, comm=comm) + + return ret diff --git a/heat/core/tests/test_manipulations.py b/heat/core/tests/test_manipulations.py index 81b6af1215..554293fa25 100644 --- a/heat/core/tests/test_manipulations.py +++ b/heat/core/tests/test_manipulations.py @@ -3752,3 +3752,63 @@ def test_vstack(self): b = ht.ones((12,), split=0) res = ht.vstack((a, b)) self.assertEqual(res.shape, (2, 12)) + + def test_unfold(self): + dtypes = (ht.int, ht.float) + + for dtype in dtypes: # test with different datatypes + # exceptions + n = 1000 + x = ht.arange(n, dtype=dtype) + with self.assertRaises(ValueError): # size too small + ht.unfold(x, 0, 1, 1) + with self.assertRaises(ValueError): # step too small + ht.unfold(x, 0, 2, 0) + x.resplit_(0) + min_chunk_size = x.lshape_map[:, 0].min().item() + if min_chunk_size + 2 > n: # size too large + with self.assertRaises(ValueError): + ht.unfold(x, 0, min_chunk_size + 2) + else: # size too large for chunk_size + with self.assertRaises(RuntimeError): + ht.unfold(x, 0, min_chunk_size + 2) + with self.assertRaises(ValueError): # size too large + ht.unfold(x, 0, n + 1, 1) + ht.unfold( + x, 0, min_chunk_size, min_chunk_size + 1 + ) # no fully local unfolds on some nodes + + # 2D sliding views + n = 100 + + x = torch.arange(n * n).reshape((n, n)) + y = ht.array(x, dtype) + y.resplit_(0) + + u = x.unfold(0, 3, 3) + u = u.unfold(1, 3, 3) + u = ht.array(u) + v = ht.unfold(y, 0, 3, 3) + v = ht.unfold(v, 1, 3, 3) + + self.assertTrue(ht.equal(u, v)) + + # more dimensions, different split axes + n = 53 + k = 3 # number of dimensions + shape = k * (n,) + size = n**k + + x = torch.arange(size).reshape(shape) + _y = x.clone().detach() + y = ht.array(_y, dtype) + + for split in (None, *range(k)): + y.resplit_(split) + for size in range(2, 9): + for step in range(1, 21): + for dimension in range(k): + u = ht.array(x.unfold(dimension, size, step)) + v = ht.unfold(y, dimension, size, step) + + self.assertTrue(ht.equal(u, v)) diff --git a/setup.py b/setup.py index b98401ec17..78931ab36b 100644 --- a/setup.py +++ b/setup.py @@ -35,10 +35,10 @@ install_requires=[ "mpi4py>=3.0.0, <4.0.0", "numpy>=1.22.0, <2", - "torch>=2.0.0, <2.3.2", + "torch>=2.0.0, <2.4.1", "scipy>=1.10.0", "pillow>=6.0.0", - "torchvision>=0.15.2", + "torchvision>=0.15.2, <0.19.1", ], extras_require={ "docutils": ["docutils>=0.16"],