Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sve2 implementation for code optimiztion. #35

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ with older GCC versions.
* x86_64 is the main development platform and thoroughly tested. This includes
support from SSE-only up to AVX512 on Xeon Phi or Xeon CPUs.
* aarch64, arm, and ppc64le was tested and verified to work. No significant
performance evaluation was done.
performance evaluation was done. Sve2 instructions are in development.
* In any case, a fallback to correct execution via builtin arthmetic types is
available for all targets.

Expand Down
36 changes: 29 additions & 7 deletions experimental/bits/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@
#include <arm_neon.h>
#endif

#ifdef __ARM_FEATURE_SVE
#include <arm_sve.h>
#endif /* __ARM_FEATURE_SVE */

/* There are several closely related types, with the following naming
* convention:
* _Tp: vectorizable (arithmetic) type (or any type)
Expand Down Expand Up @@ -110,17 +114,20 @@ template <int _UsedBytes>
template <typename _Tp, int _Np>
using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;

template <int _UsedBytes = 16>
template <svint16_t _UsedBytes = 16>
using _Sse = _VecBuiltin<_UsedBytes>;

template <int _UsedBytes = 32>
using _Avx = _VecBuiltin<_UsedBytes>;
template <svint16_t _UsedBytes = 16>
using _Sve2 = _VecBuiltin<_UsedBytes>;

template <svint32_t _UsedBytes = 32>
using _Avx = _VecBuiltin<_UsedBytes>;

template <int _UsedBytes = 64>
using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
template <svint64_t _UsedBytes = 64>
using _Avx512 = _VecBltnBtmsk<_UsedBytes>;

template <int _UsedBytes = 16>
using _Neon = _VecBuiltin<_UsedBytes>;
template <svint16_t _UsedBytes = 16>
using _Neon = _VecBuiltin<_UsedBytes>;

// implementation-defined:
using __sse = _Sse<>;
Expand All @@ -129,6 +136,9 @@ using __avx512 = _Avx512<>;
using __neon = _Neon<>;
using __neon128 = _Neon<16>;
using __neon64 = _Neon<8>;
using __sve2 = _Sve2<>;
using __sve2_8 = _Sve2<8>;
using __sve2_16 = _Sve2<16>;

// standard:
template <typename _Tp, size_t _Np, typename...>
Expand Down Expand Up @@ -492,6 +502,8 @@ constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
constexpr inline bool __have_sve2_a32 = _GLIBCXX_SIMD_HAVE_SVE2_A32;
constexpr inline bool __have_sve2_a64 = _GLIBCXX_SIMD_HAVE_SVE2_A64;
constexpr inline bool __support_neon_float =
#if defined __GCC_IEC_559
__GCC_IEC_559 == 0;
Expand Down Expand Up @@ -2262,6 +2274,16 @@ template <>
{ using type = float64x2_t; };
#endif

#if _GLIBCXX_SIMD_HAVE_SVE2_A64
template <>
struct __intrinsic_type<double, 8, void>
{ using type = float64x1_t; };

template <>
struct __intrinsic_type<double, 16, void>
{ using type = float64x2_t; };
#endif

#define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
template <> \
struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
Expand Down
11 changes: 11 additions & 0 deletions experimental/bits/simd_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,17 @@
#else
#define _GLIBCXX_SIMD_HAVE_NEON_A64 0
#endif
#if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
#define _GLIBCXX_SIMD_HAVE_SVE2_A32 1
#else
#define _GLIBCXX_SIMD_HAVE_SVE2_A32 0
#endif
#if defined __ARM_SVE2 && defined __aarch64__
#define _GLIBCXX_SIMD_HAVE_SVE2_A64 1
#else
#define _GLIBCXX_SIMD_HAVE_SVE2_A64 0
#endif

//}}}
// x86{{{
#ifdef __MMX__
Expand Down