Skip to content

Commit

Permalink
Merge branch 'main' into default_context_tests
Browse files Browse the repository at this point in the history
  • Loading branch information
bader authored Nov 8, 2024
2 parents 8f39ce4 + d0b076f commit 0f660ea
Show file tree
Hide file tree
Showing 10 changed files with 446 additions and 331 deletions.
15 changes: 4 additions & 11 deletions conformance/submission_details_template.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,11 @@ SYCL Version:
# backend, platform and device.
#
SYCL implementation and version:
Device:
Backend / platform:
Host architecture:
Host operating system and version:
Host toolchain and version:
Target SYCL backend:
Target SYCL platform:
Target SYCL device:

# Conformant products.
#
Expand All @@ -58,10 +57,11 @@ Target SYCL device:
# and device combinations.
#
SYCL implementation version(s):
Device(s):
Backend / platform(s):
Host architecture(s):
Host operating system(s) and version(s):
Host toolchains(s) and version(s):
SYCL backend/platform/device combinations(s):

# Environment requirements.
#
Expand All @@ -75,13 +75,6 @@ Environment requirements:
# List of KHR extension supported by the SYCL implementation.
# KHR extensions:

# Supported optional features.
#
# List of optional features supported by the SYCL implementation, and the
# backend/platform/device combninations for which they are supported.
#
Optional features:

# Tests version.
#
# Commit SHA (full hash) of the commit which was used to run the tests.
Expand Down
172 changes: 111 additions & 61 deletions oclmath/fpcontrol.h
Original file line number Diff line number Diff line change
@@ -1,75 +1,125 @@

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _fpcontrol_h
#define _fpcontrol_h

// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations
// in integer code, and have found this is the only way to correctly verify operation.
#include <cstdint>

// In order to get tests for correctly rounded operations (e.g. multiply) to
// work properly we need to be able to set the reference hardware to FTZ mode if
// the device hardware is running in that mode. We have explored all other
// options short of writing correctly rounded operations in integer code, and
// have found this is the only way to correctly verify operation.
//
// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both
// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
typedef int FPU_mode_type;
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
#include <xmmintrin.h>
#elif defined( __PPC__ )
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#endif
// Set the reference hardware floating point unit to FTZ mode
static inline void ForceFTZ( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr( *mode | 0x8040);
#elif defined( __PPC__ )
*mode = fpu_control;
fpu_control |= _FPU_MASK_NI;
#elif defined ( __arm__ )
unsigned fpscr;
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
// Non-Apple implementations will need to provide their own implentation for
// these features. If the reference hardware and device are both running in the
// same state (either FTZ or IEEE compliant modes) then these functions may be
// empty. If the device is running in non-default rounding mode (e.g. round
// toward zero), then these functions should also set the reference device into
// that rounding mode.
#if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) || \
defined(__MINGW32__)
#ifdef _MSC_VER
typedef int FPU_mode_type;
#else
#error ForceFTZ needs an implentation
typedef int64_t FPU_mode_type;
#endif
}

// Disable the denorm flush to zero
static inline void DisableFTZ( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr( *mode & ~0x8040);
#elif defined( __PPC__ )
*mode = fpu_control;
fpu_control &= ~_FPU_MASK_NI;
#elif defined ( __arm__ )
unsigned fpscr;
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \
defined(__MINGW32__)
#include <xmmintrin.h>
#elif defined(__PPC__)
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#elif defined(__mips__)
#include "mips/m32c1.h"
#endif
// Set the reference hardware floating point unit to FTZ mode
inline void ForceFTZ(FPU_mode_type* mode) {
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \
defined(__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr(*mode | 0x8040);
#elif defined(__PPC__)
*mode = fpu_control;
fpu_control |= _FPU_MASK_NI;
#elif defined(__arm__)
unsigned fpscr;
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
// Add 64 bit support
#elif defined(__aarch64__)
uint64_t fpscr;
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
#elif defined(__mips__)
fpa_bissr(FPA_CSR_FS);
#else
#error ForceFTZ needs an implentation
#endif
}

// Disable the denorm flush to zero
inline void DisableFTZ(FPU_mode_type* mode) {
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \
defined(__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr(*mode & ~0x8040);
#elif defined(__PPC__)
*mode = fpu_control;
fpu_control &= ~_FPU_MASK_NI;
#elif defined(__arm__)
unsigned fpscr;
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
// Add 64 bit support
#elif defined(__aarch64__)
uint64_t fpscr;
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
#elif defined(__mips__)
fpa_bicsr(FPA_CSR_FS);
#else
#error DisableFTZ needs an implentation
#endif
}
#endif
}

// Restore the reference hardware to floating point state indicated by *mode
static inline void RestoreFPState( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
_mm_setcsr( *mode );
#elif defined( __PPC__)
fpu_control = *mode;
#elif defined (__arm__)
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
// Restore the reference hardware to floating point state indicated by *mode
inline void RestoreFPState(FPU_mode_type* mode) {
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) || \
defined(__MINGW32__)
_mm_setcsr(*mode);
#elif defined(__PPC__)
fpu_control = *mode;
#elif defined(__arm__)
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
// Add 64 bit support
#elif defined(__aarch64__)
__asm__ volatile("msr fpcr, %0" ::"r"(*mode));
#elif defined(__mips__)
// Mips runs by default with DAZ=1 FTZ=1
#else
#error RestoreFPState needs an implementation
#error RestoreFPState needs an implementation
#endif
}
}
#else
#error ForceFTZ and RestoreFPState need implentations
#error ForceFTZ and RestoreFPState need implentations
#endif

#endif
#endif
Loading

0 comments on commit 0f660ea

Please sign in to comment.