From a73dea92d5ea5e4dee58a8b58c12078e5300f998 Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Mon, 17 Jun 2024 11:19:47 -0500 Subject: [PATCH] Split imperfect-loop-collapse into usm/non-usm variants This patch splits the smoke-fails/imperfect-loop-collapse test into two variants, one of which does and one which doesn't need unified shared memory (which is orthogonal to the main purpose of the test). It also fixes the HSA_XNACK setting for smoke-fails/imperfect-loop-collapse-usm version, so both versions now work. --- .../imperfect-loop-collapse-usm/Makefile | 25 +++++++++++++ .../imperfect_loop_collapse_usm.cpp | 35 +++++++++++++++++++ .../imperfect-loop-collapse/Makefile | 5 +-- .../imperfect_loop_collapse.cpp | 5 ++- 4 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 test/smoke-fails/imperfect-loop-collapse-usm/Makefile create mode 100644 test/smoke-fails/imperfect-loop-collapse-usm/imperfect_loop_collapse_usm.cpp diff --git a/test/smoke-fails/imperfect-loop-collapse-usm/Makefile b/test/smoke-fails/imperfect-loop-collapse-usm/Makefile new file mode 100644 index 0000000000..7b09b6a44c --- /dev/null +++ b/test/smoke-fails/imperfect-loop-collapse-usm/Makefile @@ -0,0 +1,25 @@ +include ../../Makefile.defs + +TESTNAME = imperfect_loop_collapse_usm +TESTSRC_MAIN = imperfect_loop_collapse_usm.cpp +TESTSRC_AUX = +TESTSRC_ALL = $(TESTSRC_MAIN) $(TESTSRC_AUX) + +CLANG ?= clang++ +OMP_BIN = $(AOMP)/bin/$(CLANG) +CC = $(OMP_BIN) $(VERBOSE) + +HSA_XNACK ?= 1 +SUPPORTED = $(SUPPORTS_USM) + +# Our "run" target gets overridden. Make sure we run with HSA_XNACK set +# appropriately. +RUNENV += HSA_XNACK=${HSA_XNACK} + +#-ccc-print-phases +#"-\#\#\#" + +include ../Makefile.rules + +run: + HSA_XNACK=${HSA_XNACK} ./$(TESTNAME) diff --git a/test/smoke-fails/imperfect-loop-collapse-usm/imperfect_loop_collapse_usm.cpp b/test/smoke-fails/imperfect-loop-collapse-usm/imperfect_loop_collapse_usm.cpp new file mode 100644 index 0000000000..4be83e0a54 --- /dev/null +++ b/test/smoke-fails/imperfect-loop-collapse-usm/imperfect_loop_collapse_usm.cpp @@ -0,0 +1,35 @@ +#include + +#define N 1024 + +#pragma omp requires unified_shared_memory + +int main() { + double *a = new double[N*N]; + + #pragma omp parallel for collapse(2) + for(int i = 0; i < N; i++) + for(int j = 0; j < N; j++) + a[i*N+j] = (double) (i*N+j); + + #pragma omp target teams distribute parallel for collapse(2) + for(int i = 0; i < N; i++) { + double k = i*3.14; + for(int j = 0; j < N; j++) + a[i*N+j] += k; + } + + //check + int err = 0; + for(int i = 0; i < N; i++) { + double k = i*3.14; + for(int j = 0; j < N; j++) + if (a[i*N+j] != (double) (i*N+j) + k) { + err++; + printf("Error at (%d,%d): got %lf expected %lf\n", i, j, a[i*N+j], (double) (i*N+j) + k); + if (err > 10) return err; + } + } + + return err; +} diff --git a/test/smoke-fails/imperfect-loop-collapse/Makefile b/test/smoke-fails/imperfect-loop-collapse/Makefile index eb0665a22a..7edc5fa32e 100644 --- a/test/smoke-fails/imperfect-loop-collapse/Makefile +++ b/test/smoke-fails/imperfect-loop-collapse/Makefile @@ -9,13 +9,10 @@ CLANG ?= clang++ OMP_BIN = $(AOMP)/bin/$(CLANG) CC = $(OMP_BIN) $(VERBOSE) -HSA_XNACK ?= 1 -SUPPORTED = $(SUPPORTS_USM) - #-ccc-print-phases #"-\#\#\#" include ../Makefile.rules run: - HSA_XNACK=${HSA_XNACK} ./$(TESTNAME) + ./$(TESTNAME) diff --git a/test/smoke-fails/imperfect-loop-collapse/imperfect_loop_collapse.cpp b/test/smoke-fails/imperfect-loop-collapse/imperfect_loop_collapse.cpp index 4be83e0a54..11818bb4bb 100644 --- a/test/smoke-fails/imperfect-loop-collapse/imperfect_loop_collapse.cpp +++ b/test/smoke-fails/imperfect-loop-collapse/imperfect_loop_collapse.cpp @@ -2,8 +2,6 @@ #define N 1024 -#pragma omp requires unified_shared_memory - int main() { double *a = new double[N*N]; @@ -12,7 +10,8 @@ int main() { for(int j = 0; j < N; j++) a[i*N+j] = (double) (i*N+j); - #pragma omp target teams distribute parallel for collapse(2) + #pragma omp target teams distribute parallel for collapse(2) \ + map(tofrom: a[0:N*N]) for(int i = 0; i < N; i++) { double k = i*3.14; for(int j = 0; j < N; j++)