diff --git a/oclmath/fpcontrol.h b/oclmath/fpcontrol.h index d4acf1849..12aba0a94 100644 --- a/oclmath/fpcontrol.h +++ b/oclmath/fpcontrol.h @@ -1,75 +1,128 @@ - +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// #ifndef _fpcontrol_h #define _fpcontrol_h -// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware -// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations -// in integer code, and have found this is the only way to correctly verify operation. +#include + +// In order to get tests for correctly rounded operations (e.g. multiply) to +// work properly we need to be able to set the reference hardware to FTZ mode if +// the device hardware is running in that mode. We have explored all other +// options short of writing correctly rounded operations in integer code, and +// have found this is the only way to correctly verify operation. // -// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both -// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default -// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode. -#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__) - typedef int FPU_mode_type; -#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ ) - #include -#elif defined( __PPC__ ) - #include - extern __thread fpu_control_t fpu_control; -#endif - // Set the reference hardware floating point unit to FTZ mode - static inline void ForceFTZ( FPU_mode_type *mode ) - { -#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) - *mode = _mm_getcsr(); - _mm_setcsr( *mode | 0x8040); -#elif defined( __PPC__ ) - *mode = fpu_control; - fpu_control |= _FPU_MASK_NI; -#elif defined ( __arm__ ) - unsigned fpscr; - __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr)); - *mode = fpscr; - __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24))); +// Non-Apple implementations will need to provide their own implentation for +// these features. If the reference hardware and device are both running in the +// same state (either FTZ or IEEE compliant modes) then these functions may be +// empty. If the device is running in non-default rounding mode (e.g. round +// toward zero), then these functions should also set the reference device into +// that rounding mode. +#if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) \ + || defined(__MINGW32__) +#ifdef _MSC_VER +typedef int FPU_mode_type; #else - #error ForceFTZ needs an implentation +typedef int64_t FPU_mode_type; #endif - } - - // Disable the denorm flush to zero - static inline void DisableFTZ( FPU_mode_type *mode ) - { -#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) - *mode = _mm_getcsr(); - _mm_setcsr( *mode & ~0x8040); -#elif defined( __PPC__ ) - *mode = fpu_control; - fpu_control &= ~_FPU_MASK_NI; -#elif defined ( __arm__ ) - unsigned fpscr; - __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr)); - *mode = fpscr; - __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24))); +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ + || defined(__MINGW32__) +#include +#elif defined(__PPC__) +#include +extern __thread fpu_control_t fpu_control; +#elif defined(__mips__) +#include "mips/m32c1.h" +#endif +// Set the reference hardware floating point unit to FTZ mode +inline void ForceFTZ(FPU_mode_type *mode) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ + || defined(__MINGW32__) + *mode = _mm_getcsr(); + _mm_setcsr(*mode | 0x8040); +#elif defined(__PPC__) + *mode = fpu_control; + fpu_control |= _FPU_MASK_NI; +#elif defined(__arm__) + unsigned fpscr; + __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr)); + *mode = fpscr; + __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24))); + // Add 64 bit support +#elif defined(__aarch64__) + uint64_t fpscr; + __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); + *mode = fpscr; + __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24))); +#elif defined(__mips__) + fpa_bissr(FPA_CSR_FS); +#else +#error ForceFTZ needs an implentation +#endif +} + +// Disable the denorm flush to zero +inline void DisableFTZ(FPU_mode_type *mode) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ + || defined(__MINGW32__) + *mode = _mm_getcsr(); + _mm_setcsr(*mode & ~0x8040); +#elif defined(__PPC__) + *mode = fpu_control; + fpu_control &= ~_FPU_MASK_NI; +#elif defined(__arm__) + unsigned fpscr; + __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr)); + *mode = fpscr; + __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24))); + // Add 64 bit support +#elif defined(__aarch64__) + uint64_t fpscr; + __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr)); + *mode = fpscr; + __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24))); +#elif defined(__mips__) + fpa_bicsr(FPA_CSR_FS); #else #error DisableFTZ needs an implentation -#endif - } +#endif +} - // Restore the reference hardware to floating point state indicated by *mode - static inline void RestoreFPState( FPU_mode_type *mode ) - { -#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__) - _mm_setcsr( *mode ); -#elif defined( __PPC__) - fpu_control = *mode; -#elif defined (__arm__) - __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode)); +// Restore the reference hardware to floating point state indicated by *mode +inline void RestoreFPState(FPU_mode_type *mode) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \ + || defined(__MINGW32__) + _mm_setcsr(*mode); +#elif defined(__PPC__) + fpu_control = *mode; +#elif defined(__arm__) + __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode)); + // Add 64 bit support +#elif defined(__aarch64__) + __asm__ volatile("msr fpcr, %0" ::"r"(*mode)); +#elif defined(__mips__) + // Mips runs by default with DAZ=1 FTZ=1 #else - #error RestoreFPState needs an implementation +#error RestoreFPState needs an implementation #endif - } +} #else - #error ForceFTZ and RestoreFPState need implentations +#error ForceFTZ and RestoreFPState need implentations #endif -#endif \ No newline at end of file +#endif diff --git a/oclmath/rounding_mode.cpp b/oclmath/rounding_mode.cpp index 399e40414..191c04d92 100644 --- a/oclmath/rounding_mode.cpp +++ b/oclmath/rounding_mode.cpp @@ -1,238 +1,263 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "rounding_mode.h" -/****************************************************************** - // - // OpenCL Conformance Tests - // - // Copyright: (c) 2008-2013 by Apple Inc. All Rights Reserved. - // - ******************************************************************/ - +#if (defined(__arm__) || defined(__aarch64__)) +#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode +#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode: -#include "rounding_mode.h" +#define _ARM_FE_FTZ 0x1000000 +#define _ARM_FE_NFTZ 0x0 +#if defined(__aarch64__) +#define _FPU_GETCW(cw) __asm__("MRS %0,FPCR" : "=r"(cw)) +#define _FPU_SETCW(cw) __asm__("MSR FPCR,%0" : : "ri"(cw)) +#else +#define _FPU_GETCW(cw) __asm__("VMRS %0,FPSCR" : "=r"(cw)) +#define _FPU_SETCW(cw) __asm__("VMSR FPSCR,%0" : : "ri"(cw)) +#endif +#endif -#if !(defined(_WIN32) && defined(_MSC_VER)) -RoundingMode set_round( RoundingMode r, Type outType ) +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) +#define _ARM_FE_TONEAREST 0x0 +#define _ARM_FE_UPWARD 0x400000 +#define _ARM_FE_DOWNWARD 0x800000 +#define _ARM_FE_TOWARDZERO 0xc00000 +RoundingMode set_round(RoundingMode r, Type outType) { - static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO }; - static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO }; + static const int flt_rounds[kRoundingModeCount] = { + _ARM_FE_TONEAREST, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, + _ARM_FE_TOWARDZERO + }; + static const int int_rounds[kRoundingModeCount] = { + _ARM_FE_TOWARDZERO, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, + _ARM_FE_TOWARDZERO + }; const int *p = int_rounds; - if( outType == kfloat || outType == kdouble ) - p = flt_rounds; - int oldRound = fegetround(); - fesetround( p[r] ); - - switch( oldRound ) - { - case FE_TONEAREST: - return kRoundToNearestEven; - case FE_UPWARD: - return kRoundUp; - case FE_DOWNWARD: - return kRoundDown; - case FE_TOWARDZERO: - return kRoundTowardZero; - default: - abort(); // ??! - } - return kDefaultRoundingMode; //never happens + if (outType == kfloat || outType == kdouble) p = flt_rounds; + + int64_t fpscr = 0; + RoundingMode oldRound = get_round(); + + _FPU_GETCW(fpscr); + _FPU_SETCW(p[r] | (fpscr & ~FPSCR_ROUND_MASK)); + + return oldRound; } -RoundingMode get_round( void ) +RoundingMode get_round(void) { - int oldRound = fegetround(); + int64_t fpscr; + int oldRound; + + _FPU_GETCW(fpscr); + oldRound = (fpscr & FPSCR_ROUND_MASK); - switch( oldRound ) + switch (oldRound) { - case FE_TONEAREST: - return kRoundToNearestEven; - case FE_UPWARD: - return kRoundUp; - case FE_DOWNWARD: - return kRoundDown; - case FE_TOWARDZERO: - return kRoundTowardZero; + case _ARM_FE_TONEAREST: return kRoundToNearestEven; + case _ARM_FE_UPWARD: return kRoundUp; + case _ARM_FE_DOWNWARD: return kRoundDown; + case _ARM_FE_TOWARDZERO: return kRoundTowardZero; } - + return kDefaultRoundingMode; } -#elif defined( __arm__ ) && defined( __GNUC__ ) -#define _ARM_FE_TONEAREST 0x0 -#define _ARM_FE_UPWARD 0x400000 -#define _ARM_FE_DOWNWARD 0x800000 -#define _ARM_FE_TOWARDZERO 0xc00000 -#define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw)) -#define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw)) -RoundingMode set_round( RoundingMode r, Type outType ) +#elif !(defined(_WIN32) && defined(_MSC_VER)) +RoundingMode set_round(RoundingMode r, Type outType) { - static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST, - _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO }; - static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO, - _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO }; - const int *p = int_rounds; - if( outType == kfloat || outType == kdouble ) - p = flt_rounds; - int oldRound; - _FPU_GETCW(oldRound); - _FPU_SETCW( p[r] ); - - switch( oldRound ) - { - case _ARM_FE_TONEAREST: - return kRoundToNearestEven; - case _ARM_FE_UPWARD: - return kRoundUp; - case _ARM_FE_DOWNWARD: - return kRoundDown; - case _ARM_FE_TOWARDZERO: - return kRoundTowardZero; - default: - abort(); // ??! - } - return kDefaultRoundingMode; //never happens + static const int flt_rounds[kRoundingModeCount] = { + FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO + }; + static const int int_rounds[kRoundingModeCount] = { + FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO + }; + const int *p = int_rounds; + if (outType == kfloat || outType == kdouble) p = flt_rounds; + int oldRound = fegetround(); + fesetround(p[r]); + + switch (oldRound) + { + case FE_TONEAREST: return kRoundToNearestEven; + case FE_UPWARD: return kRoundUp; + case FE_DOWNWARD: return kRoundDown; + case FE_TOWARDZERO: return kRoundTowardZero; + default: abort(); // ??! + } + return kDefaultRoundingMode; // never happens } -RoundingMode get_round( void ) +RoundingMode get_round(void) { - int oldRound; - _FPU_GETCW(oldRound); - - switch( oldRound ) - { - case _ARM_FE_TONEAREST: - return kRoundToNearestEven; - case _ARM_FE_UPWARD: - return kRoundUp; - case _ARM_FE_DOWNWARD: - return kRoundDown; - case _ARM_FE_TOWARDZERO: - return kRoundTowardZero; - } - - return kDefaultRoundingMode; + int oldRound = fegetround(); + + switch (oldRound) + { + case FE_TONEAREST: return kRoundToNearestEven; + case FE_UPWARD: return kRoundUp; + case FE_DOWNWARD: return kRoundDown; + case FE_TOWARDZERO: return kRoundTowardZero; + } + + return kDefaultRoundingMode; } - + #else -RoundingMode set_round( RoundingMode r, Type outType ) +RoundingMode set_round(RoundingMode r, Type outType) { - static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP }; - static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP }; - const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds; + static const int flt_rounds[kRoundingModeCount] = { _RC_NEAR, _RC_NEAR, + _RC_UP, _RC_DOWN, + _RC_CHOP }; + static const int int_rounds[kRoundingModeCount] = { _RC_CHOP, _RC_NEAR, + _RC_UP, _RC_DOWN, + _RC_CHOP }; + const int *p = + (outType == kfloat || outType == kdouble) ? flt_rounds : int_rounds; unsigned int oldRound; - int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound - if (err) { -// vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__); - return kDefaultRoundingMode; //what else never happens + int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound + if (err) + { + vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", + __FILE__, __LINE__); + return kDefaultRoundingMode; // what else never happens } - + oldRound &= _MCW_RC; - RoundingMode old = - (oldRound == _RC_NEAR)? kRoundToNearestEven : - (oldRound == _RC_UP)? kRoundUp : - (oldRound == _RC_DOWN)? kRoundDown : - (oldRound == _RC_CHOP)? kRoundTowardZero: - kDefaultRoundingMode; + RoundingMode old = (oldRound == _RC_NEAR) + ? kRoundToNearestEven + : (oldRound == _RC_UP) ? kRoundUp + : (oldRound == _RC_DOWN) + ? kRoundDown + : (oldRound == _RC_CHOP) ? kRoundTowardZero + : kDefaultRoundingMode; - _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode - return old; //returning old rounding mode + _controlfp_s(&oldRound, p[r], _MCW_RC); // setting new rounding mode + return old; // returning old rounding mode } -RoundingMode get_round( void ) +RoundingMode get_round(void) { unsigned int oldRound; - int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound + int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound oldRound &= _MCW_RC; - return - (oldRound == _RC_NEAR)? kRoundToNearestEven : - (oldRound == _RC_UP)? kRoundUp : - (oldRound == _RC_DOWN)? kRoundDown : - (oldRound == _RC_CHOP)? kRoundTowardZero: - kDefaultRoundingMode; + return (oldRound == _RC_NEAR) + ? kRoundToNearestEven + : (oldRound == _RC_UP) ? kRoundUp + : (oldRound == _RC_DOWN) + ? kRoundDown + : (oldRound == _RC_CHOP) ? kRoundTowardZero + : kDefaultRoundingMode; } #endif // -// FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in -// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in -// software by testing against FLT_MIN or DBL_MIN in that file. +// FlushToZero() sets the host processor into ftz mode. It is intended to have +// a remote effect on the behavior of the code in basic_test_conversions.c. Some +// host processors may not support this mode, which case you'll need to do some +// clamping in software by testing against FLT_MIN or DBL_MIN in that file. // -// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of -// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic +// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* +// have the behavior in section 7.5.3 of the OpenCL spec. They *ALWAYS* flush to +// zero for subnormal inputs or outputs when FTZ mode is on like other basic // operators do (e.g. add, subtract, multiply, divide, etc.) // -// Configuring hardware to FTZ mode varies by platform. -// CAUTION: Some C implementations may also fail to behave properly in this mode. +// Configuring hardware to FTZ mode varies by platform. +// CAUTION: Some C implementations may also fail to behave properly in this +// mode. // // On PowerPC, it is done by setting the FPSCR into non-IEEE mode. -// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2 -// is used for floating point computation! If your OS uses x87, you'll need to figure out how -// to turn that off for the conversions code in basic_test_conversions.c so that they flush to -// zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c -// in which case, these function are at liberty to do nothing. +// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- +// provided that SSE/SSE2 +// is used for floating point computation! If your OS uses x87, you'll +// need to figure out how to turn that off for the conversions code in +// basic_test_conversions.c so that they flush to zero properly. +// Otherwise, you'll need to add appropriate software clamping to +// basic_test_conversions.c in which case, these function are at +// liberty to do nothing. // -#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32) - #include -#elif defined( __PPC__ ) - #include +#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32) +#include +#elif defined(__PPC__) +#include +#elif defined(__mips__) +#include "mips/m32c1.h" #endif -void *FlushToZero( void ) +void *FlushToZero(void) { -#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32) - #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER) - union{ unsigned int i; void *p; }u = { _mm_getcsr() }; - _mm_setcsr( u.i | 0x8040 ); - return u.p; - #elif defined( __arm__ ) - #define _ARM_FE_FTZ 0x1000000 - #define _ARM_FE_NFTZ 0x0 - #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw)) - - static const int ftz_modes[ kRoundingModeCount ] = { _ARM_FE_FTZ, _ARM_FE_NFTZ }; - const int *f = ftz_modes; - _FPU_SETCW( f[0] ); - return NULL; - #elif defined( __PPC__ ) - fpu_control_t flags = 0; - _FPU_GETCW(flags); - flags |= _FPU_MASK_NI; - _FPU_SETCW(flags); - return NULL; - #else - #error Unknown arch - #endif +#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) + union { + unsigned int i; + void *p; + } u = { _mm_getcsr() }; + _mm_setcsr(u.i | 0x8040); + return u.p; +#elif defined(__arm__) || defined(__aarch64__) + int64_t fpscr; + _FPU_GETCW(fpscr); + _FPU_SETCW(fpscr | FPSCR_FZ); + return NULL; +#elif defined(__PPC__) + fpu_control_t flags = 0; + _FPU_GETCW(flags); + flags |= _FPU_MASK_NI; + _FPU_SETCW(flags); + return NULL; +#elif defined(__mips__) + fpa_bissr(FPA_CSR_FS); + return NULL; #else - #error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system. +#error Unknown arch +#endif +#else +#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system. #endif } -// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p. -void UnFlushToZero( void *p) +// Undo the effects of FlushToZero above, restoring the host to default +// behavior, using the information passed in p. +void UnFlushToZero(void *p) { -#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32) - #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER) - union{ void *p; int i; }u = { p }; - _mm_setcsr( u.i ); - #elif defined( __arm__ ) - #define _ARM_FE_FTZ 0x1000000 - #define _ARM_FE_NFTZ 0x0 - #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw)) - - static const int ftz_modes[ kRoundingModeCount ] = { _ARM_FE_FTZ, _ARM_FE_NFTZ }; - const int *f = ftz_modes; - _FPU_SETCW( f[1] ); - #elif defined( __PPC__) - fpu_control_t flags = 0; - _FPU_GETCW(flags); - flags &= ~_FPU_MASK_NI; - _FPU_SETCW(flags); - #else - #error Unknown arch - #endif +#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32) +#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) + union { + void *p; + int i; + } u = { p }; + _mm_setcsr(u.i); +#elif defined(__arm__) || defined(__aarch64__) + int64_t fpscr; + _FPU_GETCW(fpscr); + _FPU_SETCW(fpscr & ~FPSCR_FZ); +#elif defined(__PPC__) + fpu_control_t flags = 0; + _FPU_GETCW(flags); + flags &= ~_FPU_MASK_NI; + _FPU_SETCW(flags); +#elif defined(__mips__) + fpa_bicsr(FPA_CSR_FS); +#else +#error Unknown arch +#endif #else - #error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system. +#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system. #endif } diff --git a/oclmath/rounding_mode.h b/oclmath/rounding_mode.h index e4cf14e97..6f52f0a00 100644 --- a/oclmath/rounding_mode.h +++ b/oclmath/rounding_mode.h @@ -1,17 +1,28 @@ - -/****************************************************************** - // - // OpenCL Conformance Tests - // - // Copyright: (c) 2008-2013 by Apple Inc. All Rights Reserved. - // - ******************************************************************/ - +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// #ifndef __ROUNDING_MODE_H__ #define __ROUNDING_MODE_H__ #include "compat.h" +#if (defined(_WIN32) && defined(_MSC_VER)) +#include "errorHelpers.h" +#include "testHarness.h" +#endif + typedef enum { kDefaultRoundingMode = 0, @@ -21,7 +32,7 @@ typedef enum kRoundTowardZero, kRoundingModeCount -}RoundingMode; +} RoundingMode; typedef enum { @@ -35,24 +46,15 @@ typedef enum kdouble = 7, kulong = 8, klong = 9, - - //This goes last - kTypeCount -}Type; -#ifdef __cplusplus -extern "C" { -#endif - -extern RoundingMode set_round( RoundingMode r, Type outType ); -extern RoundingMode get_round( void ); -extern void *FlushToZero( void ); -extern void UnFlushToZero( void *p); - -#ifdef __cplusplus -} -#endif + // This goes last + kTypeCount +} Type; +extern RoundingMode set_round(RoundingMode r, Type outType); +extern RoundingMode get_round(void); +extern void *FlushToZero(void); +extern void UnFlushToZero(void *p); #endif /* __ROUNDING_MODE_H__ */