From 2ddda2a8dadb8302b556d5a6b02968df16b25318 Mon Sep 17 00:00:00 2001 From: Jeremy L Thompson Date: Thu, 11 Nov 2021 07:49:16 -0700 Subject: [PATCH] sve - initial SVE backend framework --- Makefile | 17 +++- README.md | 4 + backends/ceed-backend-list.h | 2 + backends/sve/ceed-sve-blocked.c | 45 +++++++++ backends/sve/ceed-sve-serial.c | 45 +++++++++ backends/sve/ceed-sve-tensor-f32.c | 150 +++++++++++++++++++++++++++++ backends/sve/ceed-sve-tensor-f64.c | 150 +++++++++++++++++++++++++++++ backends/sve/ceed-sve.h | 17 ++++ 8 files changed, 428 insertions(+), 2 deletions(-) create mode 100644 backends/sve/ceed-sve-blocked.c create mode 100644 backends/sve/ceed-sve-serial.c create mode 100644 backends/sve/ceed-sve-tensor-f32.c create mode 100644 backends/sve/ceed-sve-tensor-f64.c create mode 100644 backends/sve/ceed-sve.h diff --git a/Makefile b/Makefile index 29d1b7d104..12efc7d92d 100644 --- a/Makefile +++ b/Makefile @@ -236,6 +236,7 @@ blocked.c := $(sort $(wildcard backends/blocked/*.c)) ceedmemcheck.c := $(sort $(wildcard backends/memcheck/*.c)) opt.c := $(sort $(wildcard backends/opt/*.c)) avx.c := $(sort $(wildcard backends/avx/*.c)) +sve.c := $(sort $(wildcard backends/sve/*.c)) xsmm.c := $(sort $(wildcard backends/xsmm/*.c)) cuda.c := $(sort $(wildcard backends/cuda/*.c)) cuda.cpp := $(sort $(wildcard backends/cuda/*.cpp)) @@ -301,6 +302,7 @@ info: $(info ------------------------------------) $(info MEMCHK_STATUS = $(MEMCHK_STATUS)$(call backend_status,$(MEMCHK_BACKENDS))) $(info AVX_STATUS = $(AVX_STATUS)$(call backend_status,$(AVX_BACKENDS))) + $(info SVE_STATUS = $(SVE_STATUS)$(call backend_status,$(SVE_BACKENDS))) $(info XSMM_DIR = $(XSMM_DIR)$(call backend_status,$(XSMM_BACKENDS))) $(info OCCA_DIR = $(OCCA_DIR)$(call backend_status,$(OCCA_BACKENDS))) $(info MAGMA_DIR = $(MAGMA_DIR)$(call backend_status,$(MAGMA_BACKENDS))) @@ -341,7 +343,7 @@ ifeq ($(MEMCHK),1) BACKENDS_MAKE += $(MEMCHK_BACKENDS) endif -# AVX Backed +# AVX Backends AVX_STATUS = Disabled AVX_FLAG := $(if $(filter clang,$(CC_VENDOR)),+avx,-mavx) AVX := $(filter $(AVX_FLAG),$(shell $(CC) $(CFLAGS:-M%=) -v -E -x c /dev/null 2>&1)) @@ -352,6 +354,17 @@ ifneq ($(AVX),) BACKENDS_MAKE += $(AVX_BACKENDS) endif +# SVE Backends +SVE_STATUS = Disabled +SVE_FLAG := $(if $(filter clang,$(CC_VENDOR)),+sve,-msve) +SVE ?= +SVE_BACKENDS = /cpu/self/sve/serial /cpu/self/sve/blocked +ifneq ($(SVE),) + SVE_STATUS = Enabled + libceed.c += $(sve.c) + BACKENDS_MAKE += $(SVE_BACKENDS) +endif + # Collect list of libraries and paths for use in linking and pkg-config PKG_LIBS = # Stubs that will not be RPATH'd @@ -434,7 +447,7 @@ ifneq ($(HIP_LIB_DIR),) BACKENDS_MAKE += $(HIP_BACKENDS) endif -# MAGMA Backend +# MAGMA Backends ifneq ($(wildcard $(MAGMA_DIR)/lib/libmagma.*),) MAGMA_ARCH=$(shell nm -g $(MAGMA_DIR)/lib/libmagma.* | grep -c "hipblas") ifeq ($(MAGMA_ARCH), 0) #CUDA MAGMA diff --git a/README.md b/README.md index c943cd0a2c..bf0bfa97cb 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,8 @@ There are multiple supported backends, which can be selected at runtime in the e | `/cpu/self/opt/blocked` | Blocked optimized C implementation | Yes | | `/cpu/self/avx/serial` | Serial AVX implementation | Yes | | `/cpu/self/avx/blocked` | Blocked AVX implementation | Yes | +| `/cpu/self/sve/serial` | Serial SVE implementation | Yes | +| `/cpu/self/sve/blocked` | Blocked SVE implementation | Yes | || | **CPU Valgrind** | | `/cpu/self/memcheck/*` | Memcheck backends, undefined value checks | Yes | @@ -180,6 +182,8 @@ The `/cpu/self/opt/*` backends are written in pure C and use partial e-vectors t The `/cpu/self/avx/*` backends rely upon AVX instructions to provide vectorized CPU performance. +The `/cpu/self/sve/*` backends rely upon SVE instructions to provide vectorized CPU performance. + The `/cpu/self/memcheck/*` backends rely upon the [Valgrind](http://valgrind.org/) Memcheck tool to help verify that user QFunctions have no undefined values. To use, run your code with Valgrind and the Memcheck backends, e.g. `valgrind ./build/ex1 -ceed /cpu/self/ref/memcheck`. A 'development' or 'debugging' version of Valgrind with headers is required to use this backend. diff --git a/backends/ceed-backend-list.h b/backends/ceed-backend-list.h index d350f365ed..0745a3b43e 100644 --- a/backends/ceed-backend-list.h +++ b/backends/ceed-backend-list.h @@ -28,5 +28,7 @@ MACRO(CeedRegister_Opt_Blocked, 1, "/cpu/self/opt/blocked") MACRO(CeedRegister_Opt_Serial, 1, "/cpu/self/opt/serial") MACRO(CeedRegister_Ref, 1, "/cpu/self/ref/serial") MACRO(CeedRegister_Ref_Blocked, 1, "/cpu/self/ref/blocked") +MACRO(CeedRegister_Sve_Serial, 1, "/cpu/self/sve/serial") +MACRO(CeedRegister_Sve_Blocked, 1, "/cpu/self/sve/blocked") MACRO(CeedRegister_Xsmm_Blocked, 1, "/cpu/self/xsmm/blocked") MACRO(CeedRegister_Xsmm_Serial, 1, "/cpu/self/xsmm/serial") diff --git a/backends/sve/ceed-sve-blocked.c b/backends/sve/ceed-sve-blocked.c new file mode 100644 index 0000000000..50d9fe0154 --- /dev/null +++ b/backends/sve/ceed-sve-blocked.c @@ -0,0 +1,45 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#include +#include +#include +#include + +#include "ceed-sve.h" + +//------------------------------------------------------------------------------ +// Backend Init +//------------------------------------------------------------------------------ +static int CeedInit_Sve(const char *resource, Ceed ceed) { + if (strcmp(resource, "/cpu/self") && strcmp(resource, "/cpu/self/sve") && strcmp(resource, "/cpu/self/sve/blocked")) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, "SVE backend cannot use resource: %s", resource); + // LCOV_EXCL_STOP + } + CeedCallBackend(CeedSetDeterministic(ceed, true)); + + // Create reference CEED that implementation will be dispatched + // through unless overridden + Ceed ceed_ref; + CeedInit("/cpu/self/opt/blocked", &ceed_ref); + CeedCallBackend(CeedSetDelegate(ceed, ceed_ref)); + + if (CEED_SCALAR_TYPE == CEED_SCALAR_FP64) { + CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate", CeedTensorContractCreate_f64_Sve)); + } else { + CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate", CeedTensorContractCreate_f32_Sve); + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Backend Register +//------------------------------------------------------------------------------ +CEED_INTERN int CeedRegister_Sve_Blocked(void) { return CeedRegister("/cpu/self/sve/blocked", CeedInit_Sve, 30); } +//------------------------------------------------------------------------------ diff --git a/backends/sve/ceed-sve-serial.c b/backends/sve/ceed-sve-serial.c new file mode 100644 index 0000000000..c988164978 --- /dev/null +++ b/backends/sve/ceed-sve-serial.c @@ -0,0 +1,45 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#include +#include +#include +#include + +#include "ceed-sve.h" + +//------------------------------------------------------------------------------ +// Backend Init +//------------------------------------------------------------------------------ +static int CeedInit_Sve(const char *resource, Ceed ceed) { + if (strcmp(resource, "/cpu/self") && strcmp(resource, "/cpu/self/sve/serial")) { + // LCOV_EXCL_START + return CeedError(ceed, CEED_ERROR_BACKEND, "SVE backend cannot use resource: %s", resource); + // LCOV_EXCL_STOP + } + CeedCallBackend(CeedSetDeterministic(ceed, true)); + + // Create reference CEED that implementation will be dispatched + // through unless overridden + Ceed ceed_ref; + CeedInit("/cpu/self/opt/serial", &ceed_ref); + CeedCallBackend(CeedSetDelegate(ceed, ceed_ref)); + + if (CEED_SCALAR_TYPE == CEED_SCALAR_FP64) { + CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate", CeedTensorContractCreate_f64_Sve)); + } else { + CeedCallBackend(CeedSetBackendFunction(ceed, "Ceed", ceed, "TensorContractCreate", CeedTensorContractCreate_f32_Sve)); + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Backend Register +//------------------------------------------------------------------------------ +CEED_INTERN int CeedRegister_Sve_Serial(void) { return CeedRegister("/cpu/self/sve/serial", CeedInit_Sve, 35); } +//------------------------------------------------------------------------------ diff --git a/backends/sve/ceed-sve-tensor-f32.c b/backends/sve/ceed-sve-tensor-f32.c new file mode 100644 index 0000000000..2971db4b7c --- /dev/null +++ b/backends/sve/ceed-sve-tensor-f32.c @@ -0,0 +1,150 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#include +#include +#ifdef __ARM_FEATURE_SVE +#include +#endif +#include + +#include "ceed-sve.h" + +//------------------------------------------------------------------------------ +// Blocked Tensor Contract +//------------------------------------------------------------------------------ +static inline int CeedTensorContract_Sve_Blocked(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const float *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const float *restrict u, float *restrict v, + const CeedInt JJ) { + CeedInt t_stride_0 = B, t_stride_1 = 1; + if (t_mode == CEED_TRANSPOSE) { + t_stride_0 = 1; + t_stride_1 = J; + } + + for (CeedInt a = 0; a < A; a++) { + for (CeedInt b = 0; b < B; b++) { + // Blocks of JJ rows + for (CeedInt j = 0; j < (J / JJ) * JJ; j += JJ) { + for (CeedInt jj = 0; jj < JJ; jj++) { // unroll + // C vectorization by compiler + for (int32_t c = 0; c < C; c += svcntd()) { + svbool_t pg = svwhilelt_b32(c, C); + // Load u, v into vectors + svfloat32_t u_vec = svld1(pg, &u[(a * B + b) * C + c]); + svfloat32_t v_vec = svld1(pg, &v[(a * J + j + jj) * C + c]); + // Basis matrix value + float tq = t[(j + jj) * t_stride_0 + b * t_stride_1]; + // fmadd + svst1(pg, &v[(a * J + j + jj) * C + c], svmla_x(pg, v_vec, u_vec, tq)); + } + } + } + } + } + + // Remainder of rows + CeedInt j = (J / JJ) * JJ; + if (j < J) { + for (CeedInt a = 0; a < A; a++) { + for (CeedInt b = 0; b < B; b++) { + // Blocks of JJ rows + for (CeedInt jj = 0; jj < J - j; jj++) { // not unrolled + // C vectorization by compiler + for (int32_t c = 0; c < C; c += svcntd()) { + svbool_t pg = svwhilelt_b32(c, C); + // Load u, v into vectors + svfloat32_t u_vec = svld1(pg, &u[(a * B + b) * C + c]); + svfloat32_t v_vec = svld1(pg, &v[(a * J + j + jj) * C + c]); + // Basis matrix value + float tq = t[(j + jj) * t_stride_0 + b * t_stride_1]; + // fmadd + svst1(pg, &v[(a * J + j + jj) * C + c], svmla_x(pg, v_vec, u_vec, tq)); + } + } + } + } + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Blocked Tensor Contract +//------------------------------------------------------------------------------ +static inline int CeedTensorContract_Sve_Serial(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const float *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const float *restrict u, float *restrict v, + const CeedInt JJ) { + CeedInt t_stride_0 = B, t_stride_1 = 1; + if (t_mode == CEED_TRANSPOSE) { + t_stride_0 = 1; + t_stride_1 = J; + } + + for (CeedInt a = 0; a < A; a++) { + for (CeedInt b = 0; b < B; b++) { + for (CeedInt j = 0; j < (J / JJ) * JJ; j += JJ) { + for (CeedInt jj = 0; jj < JJ; jj++) { // unroll + v[a * J + (j + jj)] += t[(j + jj) * t_stride_0 + b * t_stride_1] * u[a * B + b]; + } + } + } + } + + CeedInt j = (J / JJ) * JJ; + if (j < J) { + for (CeedInt a = 0; a < A; a++) { + for (CeedInt b = 0; b < B; b++) { + for (CeedInt jj = 0; jj < J - j; jj++) { // not unrolled + v[a * J + (j + jj)] += t[(j + jj) * t_stride_0 + b * t_stride_1] * u[a * B + b]; + } + } + } + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Tensor Contract - Common Sizes +//------------------------------------------------------------------------------ +static int CeedTensorContract_Sve_Blocked_8(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const float *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const float *restrict u, float *restrict v) { + return CeedTensorContract_Sve_Blocked(contract, A, B, C, J, t, t_mode, add, u, v, 8); +} +static int CeedTensorContract_Sve_Serial_8(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const float *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const float *restrict u, float *restrict v) { + return CeedTensorContract_Sve_Serial(contract, A, B, C, J, t, t_mode, add, u, v, 8); +} + +//------------------------------------------------------------------------------ +// Tensor Contract Apply +//------------------------------------------------------------------------------ +static int CeedTensorContractApply_Sve(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const float *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const float *restrict u, float *restrict v) { + if (!add) { + for (CeedInt q = 0; q < A * J * C; q++) v[q] = (float)0.0; + } + + if (C == 1) CeedTensorContract_Sve_Serial_8(contract, A, B, C, J, t, t_mode, true, u, v); + else CeedTensorContract_Sve_Blocked_8(contract, A, B, C, J, t, t_mode, true, u, v); + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Tensor Contract Create +//------------------------------------------------------------------------------ +int CeedTensorContractCreate_f32_Sve(CeedBasis basis, CeedTensorContract contract) { + Ceed ceed; + CeedCallBackend(CeedTensorContractGetCeed(contract, &ceed)); + + CeedCallBackend(CeedSetBackendFunction(ceed, "TensorContract", contract, "Apply", CeedTensorContractApply_Sve)); + + return CEED_ERROR_SUCCESS; +} +//------------------------------------------------------------------------------ diff --git a/backends/sve/ceed-sve-tensor-f64.c b/backends/sve/ceed-sve-tensor-f64.c new file mode 100644 index 0000000000..970c57d93a --- /dev/null +++ b/backends/sve/ceed-sve-tensor-f64.c @@ -0,0 +1,150 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#include +#include +#ifdef __ARM_FEATURE_SVE +#include +#endif +#include + +#include "ceed-sve.h" + +//------------------------------------------------------------------------------ +// Blocked Tensor Contract +//------------------------------------------------------------------------------ +static inline int CeedTensorContract_Sve_Blocked(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const double *restrict u, double *restrict v, + const CeedInt JJ) { + CeedInt t_stride_0 = B, t_stride_1 = 1; + if (t_mode == CEED_TRANSPOSE) { + t_stride_0 = 1; + t_stride_1 = J; + } + + for (CeedInt a = 0; a < A; a++) { + for (CeedInt b = 0; b < B; b++) { + // Blocks of JJ rows + for (CeedInt j = 0; j < (J / JJ) * JJ; j += JJ) { + for (CeedInt jj = 0; jj < JJ; jj++) { // unroll + // C vectorization by compiler + for (int32_t c = 0; c < C; c += svcntd()) { + svbool_t pg = svwhilelt_b64(c, C); + // Load u, v into vectors + svfloat64_t u_vec = svld1(pg, &u[(a * B + b) * C + c]); + svfloat64_t v_vec = svld1(pg, &v[(a * J + j + jj) * C + c]); + // Basis matrix value + double tq = t[(j + jj) * t_stride_0 + b * t_stride_1]; + // fmadd + svst1(pg, &v[(a * J + j + jj) * C + c], svmla_x(pg, v_vec, u_vec, tq)); + } + } + } + } + } + + // Remainder of rows + CeedInt j = (J / JJ) * JJ; + if (j < J) { + for (CeedInt a = 0; a < A; a++) { + for (CeedInt b = 0; b < B; b++) { + // Blocks of JJ rows + for (CeedInt jj = 0; jj < J - j; jj++) { // not unrolled + // C vectorization by compiler + for (int32_t c = 0; c < C; c += svcntd()) { + svbool_t pg = svwhilelt_b64(c, C); + // Load u, v into vectors + svfloat64_t u_vec = svld1(pg, &u[(a * B + b) * C + c]); + svfloat64_t v_vec = svld1(pg, &v[(a * J + j + jj) * C + c]); + // Basis matrix value + double tq = t[(j + jj) * t_stride_0 + b * t_stride_1]; + // fmadd + svst1(pg, &v[(a * J + j + jj) * C + c], svmla_x(pg, v_vec, u_vec, tq)); + } + } + } + } + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Blocked Tensor Contract +//------------------------------------------------------------------------------ +static inline int CeedTensorContract_Sve_Serial(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const double *restrict u, double *restrict v, + const CeedInt JJ) { + CeedInt t_stride_0 = B, t_stride_1 = 1; + if (t_mode == CEED_TRANSPOSE) { + t_stride_0 = 1; + t_stride_1 = J; + } + + for (CeedInt a = 0; a < A; a++) { + for (CeedInt b = 0; b < B; b++) { + for (CeedInt j = 0; j < (J / JJ) * JJ; j += JJ) { + for (CeedInt jj = 0; jj < JJ; jj++) { // unroll + v[a * J + (j + jj)] += t[(j + jj) * t_stride_0 + b * t_stride_1] * u[a * B + b]; + } + } + } + } + + CeedInt j = (J / JJ) * JJ; + if (j < J) { + for (CeedInt a = 0; a < A; a++) { + for (CeedInt b = 0; b < B; b++) { + for (CeedInt jj = 0; jj < J - j; jj++) { // not unrolled + v[a * J + (j + jj)] += t[(j + jj) * t_stride_0 + b * t_stride_1] * u[a * B + b]; + } + } + } + } + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Tensor Contract - Common Sizes +//------------------------------------------------------------------------------ +static int CeedTensorContract_Sve_Blocked_8(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const double *restrict u, double *restrict v) { + return CeedTensorContract_Sve_Blocked(contract, A, B, C, J, t, t_mode, add, u, v, 8); +} +static int CeedTensorContract_Sve_Serial_8(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const double *restrict u, double *restrict v) { + return CeedTensorContract_Sve_Serial(contract, A, B, C, J, t, t_mode, add, u, v, 8); +} + +//------------------------------------------------------------------------------ +// Tensor Contract Apply +//------------------------------------------------------------------------------ +static int CeedTensorContractApply_Sve(CeedTensorContract contract, CeedInt A, CeedInt B, CeedInt C, CeedInt J, const double *restrict t, + CeedTransposeMode t_mode, const CeedInt add, const double *restrict u, double *restrict v) { + if (!add) { + for (CeedInt q = 0; q < A * J * C; q++) v[q] = (double)0.0; + } + + if (C == 1) CeedTensorContract_Sve_Serial_8(contract, A, B, C, J, t, t_mode, true, u, v); + else CeedTensorContract_Sve_Blocked_8(contract, A, B, C, J, t, t_mode, true, u, v); + + return CEED_ERROR_SUCCESS; +} + +//------------------------------------------------------------------------------ +// Tensor Contract Create +//------------------------------------------------------------------------------ +int CeedTensorContractCreate_f64_Sve(CeedBasis basis, CeedTensorContract contract) { + Ceed ceed; + CeedCallBackend(CeedTensorContractGetCeed(contract, &ceed)); + + CeedCallBackend(CeedSetBackendFunction(ceed, "TensorContract", contract, "Apply", CeedTensorContractApply_Sve)); + + return CEED_ERROR_SUCCESS; +} +//------------------------------------------------------------------------------ diff --git a/backends/sve/ceed-sve.h b/backends/sve/ceed-sve.h new file mode 100644 index 0000000000..b9e0be853c --- /dev/null +++ b/backends/sve/ceed-sve.h @@ -0,0 +1,17 @@ +// Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. +// All Rights Reserved. See the top-level LICENSE and NOTICE files for details. +// +// SPDX-License-Identifier: BSD-2-Clause +// +// This file is part of CEED: http://github.com/ceed + +#ifndef _ceed_sve_h +#define _ceed_sve_h + +#include +#include + +CEED_INTERN int CeedTensorContractCreate_f32_Sve(CeedBasis basis, CeedTensorContract contract); +CEED_INTERN int CeedTensorContractCreate_f64_Sve(CeedBasis basis, CeedTensorContract contract); + +#endif // _ceed_sve_h