diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index e73f744681..febdbbade2 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -34,8 +34,8 @@ body:
       description: What version of Heat are you running?
       options:
         - main (development branch)
+        - 1.4.x
         - 1.3.x
-        - 1.2.x
     validations:
       required: true
   - type: dropdown
@@ -44,23 +44,21 @@ body:
       label: Python version
       description: What Python version?
       options:
+        - 3.12
         - 3.11
         - "3.10"
         - 3.9
-        - 3.8
   - type: dropdown
     id: pytorch-version
     attributes:
       label: PyTorch version
       description: What PyTorch version?
       options:
+        - 2.4
+        - 2.3
         - 2.2
         - 2.1
-        - 2.0
-        - 1.13
-        - 1.12
-        - 1.11
-        - "1.10"
+        - '2.0'
   - type: textarea
     id: mpi-version
     attributes:
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 83c4eaf091..b7ac0c46da 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -6,6 +6,8 @@
 - Implementation:
     - [ ] unit tests: all split configurations tested
     - [ ] unit tests: multiple dtypes tested
+    - [ ] benchmarks: created for new functionality
+    - [ ] benchmarks: performance improved or maintained
     - [ ] documentation updated where needed
 
 ## Description
diff --git a/.github/workflows/bench_report.yml b/.github/workflows/bench_report.yml
deleted file mode 100644
index 3a6adde3a3..0000000000
--- a/.github/workflows/bench_report.yml
+++ /dev/null
@@ -1,74 +0,0 @@
-name: Benchmarks report
-on:
-  workflow_dispatch:
-    inputs:
-      job_id:
-        description: "Gitlab job id"
-        required: true
-        type: string
-      author:
-        description: "Commit author"
-        required: true
-        type: string
-
-jobs:
-  bench_report:
-    name: Benchmark report
-    runs-on: ubuntu-latest
-    steps:
-      - name: Harden Runner
-        uses: step-security/harden-runner@5c7944e73c4c2a096b17a9cb74d65b6c2bbafbde # v2.9.1
-        with:
-          egress-policy: audit
-
-      - name: Checkout
-        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
-      - name: "Collect Gitlab Benchmarks"
-        env:
-          GITLAB_CB_API_TOKEN: ${{ secrets.GITLAB_CB_API_TOKEN }}
-        run: |
-          curl --location \
-          --fail-with-body \
-          --header "PRIVATE-TOKEN: $GITLAB_CB_API_TOKEN" \
-          --output benchmarks.json \
-          "https://codebase.helmholtz.cloud/api/v4/projects/7930/jobs/${{ inputs.job_id }}/artifacts/heat/bench_data/benchmarks.json"
-          cat benchmarks.json
-          curl --location \
-          --fail-with-body \
-          --header "PRIVATE-TOKEN: $GITLAB_CB_API_TOKEN" \
-          --output report.txt \
-          "https://codebase.helmholtz.cloud/api/v4/projects/7930/jobs/${{ inputs.job_id }}/artifacts/heat/bench_data/report.txt"
-          echo "Pipeline URL: https://codebase.helmholtz.cloud/helmholtz-analytics/cb/-/jobs/${{ inputs.job_id}}" >> $GITHUB_STEP_SUMMARY
-          cat report.txt >> $GITHUB_STEP_SUMMARY
-      - name: Compare and Save Benchmark Results
-        id: action_bench
-        uses: benchmark-action/github-action-benchmark@4de1bed97a47495fc4c5404952da0499e31f5c29 # v1.20.3
-        with:
-          github-token: ${{secrets.GITHUB_TOKEN}}
-          # Benchmark action input and output
-          tool: "customSmallerIsBetter"
-          output-file-path: benchmarks.json
-          # Alert configuration
-          fail-on-alert: true # Don't fail on main branch
-          comment-on-alert: true
-          alert-comment-cc-users: ${{ format('@{0}', inputs.author) }}
-          # Save benchmarks from the main branch
-          save-data-file: ${{ github.ref == 'refs/heads/main' }}
-          # Pages configuration
-          auto-push: ${{ github.ref == 'refs/heads/main' }}
-          gh-pages-branch: gh-pages
-          benchmark-data-dir-path: dev/bench
-      - name: Update commit status
-        if: always()
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          if [[ "${{ steps.action_bench.outcome }}" =~ success|failure ]]; then export STEP_STATE="${{ steps.action_bench.outcome }}" && echo "then $STEP_STATE"; else export STEP_STATE=error && echo "else $STEP_STATE"; fi
-          echo "$STEP_STATE"
-          curl -L -X POST \
-            --fail-with-body \
-            -H "Accept: application/vnd.github+json" \
-            -H "Authorization: Bearer $GITHUB_TOKEN" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            https://api.github.com/repos/${{ github.repository }}/statuses/${{ github.sha }} \
-            -d "{ \"state\":\"$STEP_STATE\", \"target_url\":\"https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}\", \"description\":\"The results are here!\", \"context\":\"cb/report\" }"
diff --git a/.github/workflows/bench_trigger.yml b/.github/workflows/bench_trigger.yml
index b0762d4ddf..53fa95477f 100644
--- a/.github/workflows/bench_trigger.yml
+++ b/.github/workflows/bench_trigger.yml
@@ -28,7 +28,7 @@ jobs:
           SHA: ${{ github.event.pull_request.head.sha }}
           PR_NUMBER: ${{ github.event.pull_request.number }}
         run: |
-          SHORT_SHA=$(git rev-parse --short ${{ github.event.pull_request.head.sha }})
+          SHORT_SHA=$(git rev-parse --short $SHA)
           curl -s -X POST \
             --fail-with-body \
             -F "token=$PIPE_TRIGGER_TOKEN" \
@@ -45,7 +45,7 @@ jobs:
         env:
           AUTHOR: ${{ github.event.pull_request.assignee.login }}
           PIPE_TRIGGER_TOKEN: ${{ secrets.BENCH_PIPE_TRIGGER }}
-          SHA: ${{ github.event.pull_request.head.sha }}
+          SHA: ${{ github.sha }}
         run: |
           SHORT_SHA=$(git rev-parse --short $GITHUB_SHA)
           curl -s -X POST \
@@ -57,16 +57,3 @@ jobs:
             -F "variables[BRANCH]=main" \
             -F "variables[AUTHOR]=${AUTHOR:-heat_team}" \
             https://codebase.helmholtz.cloud/api/v4/projects/7930/trigger/pipeline
-      - name: Create status
-        if: ${{ steps.setup_pr.outcome == 'success' || steps.setup_push.outcome == 'success'}}
-        env:
-          REPO: ${{ github.repository }}
-          SHA: ${{ github.event.pull_request.head.sha }}
-        run: |
-          curl -L -X POST \
-            --fail-with-body \
-            -H "Accept: application/vnd.github+json" \
-            -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            https://api.github.com/repos/$REPO/statuses/$SHA \
-            -d '{ "state":"pending", "target_url":"https://codebase.helmholtz.cloud/helmholtz-analytics/cb/-/pipelines", "description":"Waiting for benchmarks to execute.", "context":"cb/report" }'
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index f7a980fd03..daec3fac63 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -23,6 +23,7 @@ jobs:
           - 'torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2'
           - 'torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2'
           - 'torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1'
+          - 'torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0'
         exclude:
           - py-version: '3.12'
             pytorch-version: 'torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2'
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 4c87098c74..474c3aed38 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -50,7 +50,7 @@ jobs:
 
       # Initializes the CodeQL tools for scanning.
       - name: Initialize CodeQL
-        uses: github/codeql-action/init@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/init@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
         with:
           languages: ${{ matrix.language }}
           # If you wish to specify custom queries, you can do so here or in a config file.
@@ -60,7 +60,7 @@ jobs:
       # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
       # If this step fails, then you should remove it and run the build manually (see below)
       - name: Autobuild
-        uses: github/codeql-action/autobuild@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/autobuild@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
 
       # ℹ️ Command-line programs to run using the OS shell.
       # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
@@ -73,6 +73,6 @@ jobs:
       #   ./location_of_script_within_repo/buildscript.sh
 
       - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/analyze@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
         with:
           category: "/language:${{matrix.language}}"
diff --git a/.github/workflows/create-branch-on-assignment.yml b/.github/workflows/create-branch-on-assignment.yml
index 75da22508c..3da87204f1 100644
--- a/.github/workflows/create-branch-on-assignment.yml
+++ b/.github/workflows/create-branch-on-assignment.yml
@@ -16,6 +16,6 @@ jobs:
           egress-policy: audit
 
       - name: Create Issue Branch
-        uses: robvanderleek/create-issue-branch@066a452d2aa439a992baec3360a322a49eb62e0b # main
+        uses: robvanderleek/create-issue-branch@941dca58430f58b198228e633954eef1699722fe # main
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index cbae715725..8327f935d0 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -37,7 +37,7 @@ jobs:
               uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0
             -
               name: Set up Docker Buildx
-              uses: docker/setup-buildx-action@4fd812986e6c8c2a69e18311145f9371337f27d4 # v3.4.0
+              uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1
               with:
                 driver: docker
             -
@@ -49,7 +49,7 @@ jobs:
                 password: ${{ secrets.GITHUB_TOKEN }}
             -
               name: Build
-              uses: docker/build-push-action@16ebe778df0e7752d2cfcbd924afdbbd89c1a755 # v6.6.1
+              uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0
               with:
                 file: docker/Dockerfile.release
                 build-args: |
@@ -65,7 +65,7 @@ jobs:
                 docker run -v `pwd`:`pwd` -w `pwd` --rm test_${{ inputs.name }} pytest
             -
               name: Build and push
-              uses: docker/build-push-action@16ebe778df0e7752d2cfcbd924afdbbd89c1a755 # v6.6.1
+              uses: docker/build-push-action@5cd11c3a4ced054e52742c5fd54dca954e0edd85 # v6.7.0
               with:
                 file: docker/Dockerfile.release
                 build-args: |
diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
index 46e93fe28e..69e903c4ba 100644
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@@ -72,6 +72,6 @@ jobs:
 
       # Upload the results to GitHub's code scanning dashboard.
       - name: "Upload to code-scanning"
-        uses: github/codeql-action/upload-sarif@eb055d739abdc2e8de2e5f4ba1a8b246daa779aa # v3.26.0
+        uses: github/codeql-action/upload-sarif@429e1977040da7a23b6822b13c129cd1ba93dbb2 # v3.26.2
         with:
           sarif_file: results.sarif
diff --git a/.perun.ini b/.perun.ini
index c9965dec05..0919670d6e 100644
--- a/.perun.ini
+++ b/.perun.ini
@@ -5,3 +5,12 @@ data_out = ./bench_data
 [benchmarking]
 rounds = 10
 warmup_rounds = 1
+metrics=runtime
+region_metrics=runtime
+
+[benchmarking.units]
+joule = k
+second =
+percent =
+watt =
+byte = G
diff --git a/README.md b/README.md
index e673fe2777..0c9f12e5c7 100644
--- a/README.md
+++ b/README.md
@@ -19,8 +19,9 @@ Heat is a distributed tensor framework for high performance data analytics.
 [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/helmholtz-analytics/heat/badge)](https://securityscorecards.dev/viewer/?uri=github.com/helmholtz-analytics/heat)
 [![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/7688/badge)](https://bestpractices.coreinfrastructure.org/projects/7688)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2531472.svg)](https://doi.org/10.5281/zenodo.2531472)
-[![Benchmarks](https://img.shields.io/badge/Github--Pages-Benchmarks-2ea44f)](https://helmholtz-analytics.github.io/heat/dev/bench)
+[![Benchmarks](https://img.shields.io/badge/Grafana-Benchmarks-2ea44f)](https://57bc8d92-72f2-4869-accd-435ec06365cb.ka.bw-cloud-instance.org:3000/d/adjpqduq9r7k0a/heat-cb?orgId=1)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![JuRSE Code Pick of the Month](https://img.shields.io/badge/JuRSE_Code_Pick-August_2024-blue)](https://www.fz-juelich.de/en/rse/jurse-community/jurse-code-of-the-month/august-2024)
 
 # Table of Contents
   - [What is Heat for?](#what-is-heat-for)
diff --git a/benchmarks/cb/manipulations.py b/benchmarks/cb/manipulations.py
index 0b140a8f35..0fde87cff9 100644
--- a/benchmarks/cb/manipulations.py
+++ b/benchmarks/cb/manipulations.py
@@ -1,5 +1,6 @@
 # flake8: noqa
 import heat as ht
+from typing import List
 from perun import monitor
 
 
@@ -15,6 +16,13 @@ def reshape(arrays):
         a = ht.reshape(array, (10000000, -1), new_split=1)
 
 
+@monitor()
+def resplit(array, new_split: List[int | None]):
+    for new_split in new_split:
+        a = ht.resplit(array, axis=new_split)
+        del a
+
+
 def run_manipulation_benchmarks():
     sizes = [10000, 20000, 40000]
     arrays = []
@@ -30,3 +38,13 @@ def run_manipulation_benchmarks():
             split = 1
         arrays.append(ht.zeros((1000, size), split=split))
     concatenate(arrays)
+
+    if ht.comm.size > 1:
+        shape = [100, 50, 50, 20, 86]
+        n_elements = ht.array(shape).prod().item()
+        mem = n_elements * 4 / 1e9
+        array = ht.reshape(ht.arange(0, n_elements, split=0, dtype=ht.float32), shape) * (
+            ht.comm.rank + 1
+        )
+
+        resplit(array, [None, 2, 4])
diff --git a/heat/core/dndarray.py b/heat/core/dndarray.py
index 6416086372..9d9bda1037 100644
--- a/heat/core/dndarray.py
+++ b/heat/core/dndarray.py
@@ -384,7 +384,7 @@ def __prephalo(self, start, end) -> torch.Tensor:
 
         return self.__array[ix].clone().contiguous()
 
-    def get_halo(self, halo_size: int) -> torch.Tensor:
+    def get_halo(self, halo_size: int, prev: bool = True, next: bool = True) -> torch.Tensor:
         """
         Fetch halos of size ``halo_size`` from neighboring ranks and save them in ``self.halo_next/self.halo_prev``.
 
@@ -392,6 +392,10 @@ def get_halo(self, halo_size: int) -> torch.Tensor:
         ----------
         halo_size : int
             Size of the halo.
+        prev : bool, optional
+            If True, fetch the halo from the previous rank. Default: True.
+        next : bool, optional
+            If True, fetch the halo from the next rank. Default: True.
         """
         if not isinstance(halo_size, int):
             raise TypeError(
@@ -433,25 +437,29 @@ def get_halo(self, halo_size: int) -> torch.Tensor:
             req_list = []
 
             # exchange data with next populated process
-            if rank != last_rank:
-                self.comm.Isend(a_next, next_rank)
-                res_prev = torch.zeros(
-                    a_prev.size(), dtype=a_prev.dtype, device=self.device.torch_device
-                )
-                req_list.append(self.comm.Irecv(res_prev, source=next_rank))
+            if prev:
+                if rank != last_rank:
+                    self.comm.Isend(a_next, next_rank)
+                if rank != first_rank:
+                    res_prev = torch.zeros(
+                        a_prev.size(), dtype=a_prev.dtype, device=self.device.torch_device
+                    )
+                    req_list.append(self.comm.Irecv(res_prev, source=prev_rank))
 
-            if rank != first_rank:
-                self.comm.Isend(a_prev, prev_rank)
-                res_next = torch.zeros(
-                    a_next.size(), dtype=a_next.dtype, device=self.device.torch_device
-                )
-                req_list.append(self.comm.Irecv(res_next, source=prev_rank))
+            if next:
+                if rank != first_rank:
+                    req_list.append(self.comm.Isend(a_prev, prev_rank))
+                if rank != last_rank:
+                    res_next = torch.zeros(
+                        a_next.size(), dtype=a_next.dtype, device=self.device.torch_device
+                    )
+                    req_list.append(self.comm.Irecv(res_next, source=next_rank))
 
             for req in req_list:
                 req.Wait()
 
-            self.__halo_next = res_prev
-            self.__halo_prev = res_next
+            self.__halo_next = res_next
+            self.__halo_prev = res_prev
             self.__ishalo = True
 
     def __cat_halo(self) -> torch.Tensor:
diff --git a/heat/core/manipulations.py b/heat/core/manipulations.py
index d1958c27a4..5985df65e3 100644
--- a/heat/core/manipulations.py
+++ b/heat/core/manipulations.py
@@ -61,6 +61,7 @@
     "unique",
     "vsplit",
     "vstack",
+    "unfold",
 ]
 
 
@@ -4213,3 +4214,92 @@ def mpi_topk(a, b, mpi_type):
 
 
 MPI_TOPK = MPI.Op.Create(mpi_topk, commute=True)
+
+
+def unfold(a: DNDarray, axis: int, size: int, step: int = 1):
+    """
+    Returns a DNDarray which contains all slices of size `size` in the axis `axis`.
+
+    Behaves like torch.Tensor.unfold for DNDarrays. [torch.Tensor.unfold](https://pytorch.org/docs/stable/generated/torch.Tensor.unfold.html)
+
+    Parameters
+    ----------
+    a : DNDarray
+        array to unfold
+    axis : int
+        axis in which unfolding happens
+    size : int
+        the size of each slice that is unfolded, must be greater than 1
+    step : int
+        the step between each slice, must be at least 1
+
+    Example:
+    ```
+    >>> x = ht.arange(1., 8)
+    >>> x
+    DNDarray([1., 2., 3., 4., 5., 6., 7.], dtype=ht.float32, device=cpu:0, split=e)
+    >>> ht.unfold(x, 0, 2, 1)
+    DNDarray([[1., 2.],
+              [2., 3.],
+              [3., 4.],
+              [4., 5.],
+              [5., 6.],
+              [6., 7.]], dtype=ht.float32, device=cpu:0, split=None)
+    >>> ht.unfold(x, 0, 2, 2)
+    DNDarray([[1., 2.],
+              [3., 4.],
+              [5., 6.]], dtype=ht.float32, device=cpu:0, split=None)
+    ```
+
+    Note
+    ---------
+    You have to make sure that every node has at least chunk size size-1 if the split axis of the array is the unfold axis.
+    """
+    if step < 1:
+        raise ValueError("step must be >= 1.")
+    if size <= 1:
+        raise ValueError("size must be > 1.")
+    axis = stride_tricks.sanitize_axis(a.shape, axis)
+    if size > a.shape[axis]:
+        raise ValueError(
+            f"maximum size for DNDarray at axis {axis} is {a.shape[axis]} but size is {size}."
+        )
+
+    comm = a.comm
+    dev = a.device
+    tdev = dev.torch_device
+
+    if a.split is None or comm.size == 1 or a.split != axis:  # early out
+        ret = factories.array(
+            a.larray.unfold(axis, size, step), is_split=a.split, device=dev, comm=comm
+        )
+
+        return ret
+    else:  # comm.size > 1 and split axis == unfold axis
+        # index range [0:sizedim-1-(size-1)] = [0:sizedim-size]
+        # --> size of axis: ceil((sizedim-size+1) / step) = floor(sizedim-size) / step)) + 1
+        # ret_shape = (*a_shape[:axis], int((a_shape[axis]-size)/step) + 1, a_shape[axis+1:], size)
+
+        if (size - 1 > a.lshape_map[:, axis]).any():
+            raise RuntimeError("Chunk-size needs to be at least size - 1.")
+        a.get_halo(size - 1, prev=False)
+
+        counts, displs = a.counts_displs()
+        displs = torch.tensor(displs, device=tdev)
+
+        # min local index in unfold axis
+        min_index = ((displs[comm.rank] - 1) // step + 1) * step - displs[comm.rank]
+        if min_index >= a.lshape[axis] or (
+            comm.rank == comm.size - 1 and min_index + size > a.lshape[axis]
+        ):
+            loc_unfold_shape = list(a.lshape)
+            loc_unfold_shape[axis] = 0
+            ret_larray = torch.zeros((*loc_unfold_shape, size), device=tdev)
+        else:  # unfold has local data
+            ret_larray = a.array_with_halos[
+                axis * (slice(None, None, None),) + (slice(min_index, None, None), Ellipsis)
+            ].unfold(axis, size, step)
+
+        ret = factories.array(ret_larray, is_split=axis, device=dev, comm=comm)
+
+        return ret
diff --git a/heat/core/tests/test_manipulations.py b/heat/core/tests/test_manipulations.py
index 81b6af1215..554293fa25 100644
--- a/heat/core/tests/test_manipulations.py
+++ b/heat/core/tests/test_manipulations.py
@@ -3752,3 +3752,63 @@ def test_vstack(self):
         b = ht.ones((12,), split=0)
         res = ht.vstack((a, b))
         self.assertEqual(res.shape, (2, 12))
+
+    def test_unfold(self):
+        dtypes = (ht.int, ht.float)
+
+        for dtype in dtypes:  # test with different datatypes
+            # exceptions
+            n = 1000
+            x = ht.arange(n, dtype=dtype)
+            with self.assertRaises(ValueError):  # size too small
+                ht.unfold(x, 0, 1, 1)
+            with self.assertRaises(ValueError):  # step too small
+                ht.unfold(x, 0, 2, 0)
+            x.resplit_(0)
+            min_chunk_size = x.lshape_map[:, 0].min().item()
+            if min_chunk_size + 2 > n:  # size too large
+                with self.assertRaises(ValueError):
+                    ht.unfold(x, 0, min_chunk_size + 2)
+            else:  # size too large for chunk_size
+                with self.assertRaises(RuntimeError):
+                    ht.unfold(x, 0, min_chunk_size + 2)
+            with self.assertRaises(ValueError):  # size too large
+                ht.unfold(x, 0, n + 1, 1)
+            ht.unfold(
+                x, 0, min_chunk_size, min_chunk_size + 1
+            )  # no fully local unfolds on some nodes
+
+            # 2D sliding views
+            n = 100
+
+            x = torch.arange(n * n).reshape((n, n))
+            y = ht.array(x, dtype)
+            y.resplit_(0)
+
+            u = x.unfold(0, 3, 3)
+            u = u.unfold(1, 3, 3)
+            u = ht.array(u)
+            v = ht.unfold(y, 0, 3, 3)
+            v = ht.unfold(v, 1, 3, 3)
+
+            self.assertTrue(ht.equal(u, v))
+
+            # more dimensions, different split axes
+            n = 53
+            k = 3  # number of dimensions
+            shape = k * (n,)
+            size = n**k
+
+            x = torch.arange(size).reshape(shape)
+            _y = x.clone().detach()
+            y = ht.array(_y, dtype)
+
+            for split in (None, *range(k)):
+                y.resplit_(split)
+                for size in range(2, 9):
+                    for step in range(1, 21):
+                        for dimension in range(k):
+                            u = ht.array(x.unfold(dimension, size, step))
+                            v = ht.unfold(y, dimension, size, step)
+
+                            self.assertTrue(ht.equal(u, v))
diff --git a/setup.py b/setup.py
index b98401ec17..78931ab36b 100644
--- a/setup.py
+++ b/setup.py
@@ -35,10 +35,10 @@
     install_requires=[
         "mpi4py>=3.0.0, <4.0.0",
         "numpy>=1.22.0, <2",
-        "torch>=2.0.0, <2.3.2",
+        "torch>=2.0.0, <2.4.1",
         "scipy>=1.10.0",
         "pillow>=6.0.0",
-        "torchvision>=0.15.2",
+        "torchvision>=0.15.2, <0.19.1",
     ],
     extras_require={
         "docutils": ["docutils>=0.16"],