diff --git a/oclmath/fpcontrol.h b/oclmath/fpcontrol.h
index d4acf1849..12aba0a94 100644
--- a/oclmath/fpcontrol.h
+++ b/oclmath/fpcontrol.h
@@ -1,75 +1,128 @@
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
 #ifndef _fpcontrol_h
 #define _fpcontrol_h
 
-// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware 
-// to FTZ mode if the device hardware is running in that mode.  We have explored all other options short of writing correctly rounded operations 
-// in integer code, and have found this is the only way to correctly verify operation.
+#include <cstdint>
+
+// In order to get tests for correctly rounded operations (e.g. multiply) to
+// work properly we need to be able to set the reference hardware to FTZ mode if
+// the device hardware is running in that mode.  We have explored all other
+// options short of writing correctly rounded operations in integer code, and
+// have found this is the only way to correctly verify operation.
 //
-// Non-Apple implementations will need to provide their own implentation for these features.  If the reference hardware and device are both 
-// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty.  If the device is running in non-default 
-// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
-#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
-    typedef int     FPU_mode_type;
-#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
-    #include <xmmintrin.h>
-#elif defined( __PPC__ ) 
-    #include <fpu_control.h>
-    extern __thread fpu_control_t fpu_control;
-#endif    
-    // Set the reference hardware floating point unit to FTZ mode
-    static inline void ForceFTZ( FPU_mode_type *mode )
-    {
-#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
-        *mode = _mm_getcsr();
-        _mm_setcsr( *mode | 0x8040);
-#elif defined( __PPC__ ) 
-        *mode = fpu_control;
-        fpu_control |= _FPU_MASK_NI;
-#elif defined ( __arm__ )
-        unsigned fpscr;
-        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
-        *mode = fpscr;
-        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
+// Non-Apple implementations will need to provide their own implentation for
+// these features.  If the reference hardware and device are both running in the
+// same state (either FTZ or IEEE compliant modes) then these functions may be
+// empty.  If the device is running in non-default rounding mode (e.g. round
+// toward zero), then these functions should also set the reference device into
+// that rounding mode.
+#if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__)              \
+    || defined(__MINGW32__)
+#ifdef _MSC_VER
+typedef int FPU_mode_type;
 #else
-        #error ForceFTZ needs an implentation
+typedef int64_t FPU_mode_type;
 #endif
-    }
-    
-    // Disable the denorm flush to zero
-    static inline void DisableFTZ( FPU_mode_type *mode )
-    {
-#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
-        *mode = _mm_getcsr();
-        _mm_setcsr( *mode & ~0x8040);
-#elif defined( __PPC__ ) 
-        *mode = fpu_control;
-        fpu_control &= ~_FPU_MASK_NI;
-#elif defined ( __arm__ )
-        unsigned fpscr;
-        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
-        *mode = fpscr;
-        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
+#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
+    || defined(__MINGW32__)
+#include <xmmintrin.h>
+#elif defined(__PPC__)
+#include <fpu_control.h>
+extern __thread fpu_control_t fpu_control;
+#elif defined(__mips__)
+#include "mips/m32c1.h"
+#endif
+// Set the reference hardware floating point unit to FTZ mode
+inline void ForceFTZ(FPU_mode_type *mode)
+{
+#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
+    || defined(__MINGW32__)
+    *mode = _mm_getcsr();
+    _mm_setcsr(*mode | 0x8040);
+#elif defined(__PPC__)
+    *mode = fpu_control;
+    fpu_control |= _FPU_MASK_NI;
+#elif defined(__arm__)
+    unsigned fpscr;
+    __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
+    *mode = fpscr;
+    __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
+    // Add 64 bit support
+#elif defined(__aarch64__)
+    uint64_t fpscr;
+    __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
+    *mode = fpscr;
+    __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
+#elif defined(__mips__)
+    fpa_bissr(FPA_CSR_FS);
+#else
+#error ForceFTZ needs an implentation
+#endif
+}
+
+// Disable the denorm flush to zero
+inline void DisableFTZ(FPU_mode_type *mode)
+{
+#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
+    || defined(__MINGW32__)
+    *mode = _mm_getcsr();
+    _mm_setcsr(*mode & ~0x8040);
+#elif defined(__PPC__)
+    *mode = fpu_control;
+    fpu_control &= ~_FPU_MASK_NI;
+#elif defined(__arm__)
+    unsigned fpscr;
+    __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
+    *mode = fpscr;
+    __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
+    // Add 64 bit support
+#elif defined(__aarch64__)
+    uint64_t fpscr;
+    __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
+    *mode = fpscr;
+    __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
+#elif defined(__mips__)
+    fpa_bicsr(FPA_CSR_FS);
 #else
 #error DisableFTZ needs an implentation
-#endif  
-    }
+#endif
+}
 
-    // Restore the reference hardware to floating point state indicated by *mode
-    static inline void RestoreFPState( FPU_mode_type *mode )
-    {
-#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
-        _mm_setcsr( *mode );
-#elif defined( __PPC__)
-        fpu_control = *mode;
-#elif defined (__arm__)
-        __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
+// Restore the reference hardware to floating point state indicated by *mode
+inline void RestoreFPState(FPU_mode_type *mode)
+{
+#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
+    || defined(__MINGW32__)
+    _mm_setcsr(*mode);
+#elif defined(__PPC__)
+    fpu_control = *mode;
+#elif defined(__arm__)
+    __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
+    // Add 64 bit support
+#elif defined(__aarch64__)
+    __asm__ volatile("msr fpcr, %0" ::"r"(*mode));
+#elif defined(__mips__)
+    // Mips runs by default with DAZ=1 FTZ=1
 #else
-        #error RestoreFPState needs an implementation
+#error RestoreFPState needs an implementation
 #endif
-    }
+}
 #else
-        #error ForceFTZ and RestoreFPState need implentations
+#error ForceFTZ and RestoreFPState need implentations
 #endif
 
-#endif
\ No newline at end of file
+#endif
diff --git a/oclmath/rounding_mode.cpp b/oclmath/rounding_mode.cpp
index 399e40414..191c04d92 100644
--- a/oclmath/rounding_mode.cpp
+++ b/oclmath/rounding_mode.cpp
@@ -1,238 +1,263 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "rounding_mode.h"
 
-/******************************************************************
- //
- //  OpenCL Conformance Tests
- // 
- //  Copyright:	(c) 2008-2013 by Apple Inc. All Rights Reserved.
- //
- ******************************************************************/
-
+#if (defined(__arm__) || defined(__aarch64__))
+#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
+#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
 
-#include "rounding_mode.h"
+#define _ARM_FE_FTZ 0x1000000
+#define _ARM_FE_NFTZ 0x0
+#if defined(__aarch64__)
+#define _FPU_GETCW(cw) __asm__("MRS %0,FPCR" : "=r"(cw))
+#define _FPU_SETCW(cw) __asm__("MSR FPCR,%0" : : "ri"(cw))
+#else
+#define _FPU_GETCW(cw) __asm__("VMRS %0,FPSCR" : "=r"(cw))
+#define _FPU_SETCW(cw) __asm__("VMSR FPSCR,%0" : : "ri"(cw))
+#endif
+#endif
 
-#if !(defined(_WIN32) && defined(_MSC_VER))
-RoundingMode set_round( RoundingMode r, Type outType )
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+#define _ARM_FE_TONEAREST 0x0
+#define _ARM_FE_UPWARD 0x400000
+#define _ARM_FE_DOWNWARD 0x800000
+#define _ARM_FE_TOWARDZERO 0xc00000
+RoundingMode set_round(RoundingMode r, Type outType)
 {
-    static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
-    static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
+    static const int flt_rounds[kRoundingModeCount] = {
+        _ARM_FE_TONEAREST, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
+        _ARM_FE_TOWARDZERO
+    };
+    static const int int_rounds[kRoundingModeCount] = {
+        _ARM_FE_TOWARDZERO, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
+        _ARM_FE_TOWARDZERO
+    };
     const int *p = int_rounds;
-    if( outType == kfloat || outType == kdouble )
-        p = flt_rounds;
-    int oldRound = fegetround();
-    fesetround( p[r] );
-    
-    switch( oldRound )
-    {
-        case FE_TONEAREST:
-            return kRoundToNearestEven;
-        case FE_UPWARD:
-            return kRoundUp;
-        case FE_DOWNWARD:
-            return kRoundDown;
-        case FE_TOWARDZERO:
-            return kRoundTowardZero;
-        default:
-            abort();    // ??!
-    }
-    return kDefaultRoundingMode;    //never happens
+    if (outType == kfloat || outType == kdouble) p = flt_rounds;
+
+    int64_t fpscr = 0;
+    RoundingMode oldRound = get_round();
+
+    _FPU_GETCW(fpscr);
+    _FPU_SETCW(p[r] | (fpscr & ~FPSCR_ROUND_MASK));
+
+    return oldRound;
 }
 
-RoundingMode get_round( void )
+RoundingMode get_round(void)
 {
-    int oldRound = fegetround();
+    int64_t fpscr;
+    int oldRound;
+
+    _FPU_GETCW(fpscr);
+    oldRound = (fpscr & FPSCR_ROUND_MASK);
 
-    switch( oldRound )
+    switch (oldRound)
     {
-        case FE_TONEAREST:
-            return kRoundToNearestEven;
-        case FE_UPWARD:
-            return kRoundUp;
-        case FE_DOWNWARD:
-            return kRoundDown;
-        case FE_TOWARDZERO:
-            return kRoundTowardZero;
+        case _ARM_FE_TONEAREST: return kRoundToNearestEven;
+        case _ARM_FE_UPWARD: return kRoundUp;
+        case _ARM_FE_DOWNWARD: return kRoundDown;
+        case _ARM_FE_TOWARDZERO: return kRoundTowardZero;
     }
-    
+
     return kDefaultRoundingMode;
 }
 
-#elif defined( __arm__ ) && defined( __GNUC__ )
-#define _ARM_FE_TONEAREST           0x0
-#define _ARM_FE_UPWARD              0x400000
-#define _ARM_FE_DOWNWARD            0x800000
-#define _ARM_FE_TOWARDZERO          0xc00000
-#define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
-#define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
-RoundingMode set_round( RoundingMode r, Type outType )
+#elif !(defined(_WIN32) && defined(_MSC_VER))
+RoundingMode set_round(RoundingMode r, Type outType)
 {
-	static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
-														  _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
-	static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
-														  _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
-	const int *p = int_rounds;
-	if( outType == kfloat || outType == kdouble )
-		p = flt_rounds;
-	int oldRound;
-	_FPU_GETCW(oldRound);
-	_FPU_SETCW( p[r] );
-
-	switch( oldRound )
-	{
-		case _ARM_FE_TONEAREST:
-			return kRoundToNearestEven;
-		case _ARM_FE_UPWARD:
-			return kRoundUp;
-		case _ARM_FE_DOWNWARD:
-			return kRoundDown;
-		case _ARM_FE_TOWARDZERO:
-			return kRoundTowardZero;
-		default:
-			abort();    // ??!
-	}
-	return kDefaultRoundingMode;    //never happens
+    static const int flt_rounds[kRoundingModeCount] = {
+        FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
+    };
+    static const int int_rounds[kRoundingModeCount] = {
+        FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
+    };
+    const int *p = int_rounds;
+    if (outType == kfloat || outType == kdouble) p = flt_rounds;
+    int oldRound = fegetround();
+    fesetround(p[r]);
+
+    switch (oldRound)
+    {
+        case FE_TONEAREST: return kRoundToNearestEven;
+        case FE_UPWARD: return kRoundUp;
+        case FE_DOWNWARD: return kRoundDown;
+        case FE_TOWARDZERO: return kRoundTowardZero;
+        default: abort(); // ??!
+    }
+    return kDefaultRoundingMode; // never happens
 }
 
-RoundingMode get_round( void )
+RoundingMode get_round(void)
 {
-	int oldRound;
-	_FPU_GETCW(oldRound);
-
-	switch( oldRound )
-	{
-		case _ARM_FE_TONEAREST:
-			return kRoundToNearestEven;
-		case _ARM_FE_UPWARD:
-			return kRoundUp;
-		case _ARM_FE_DOWNWARD:
-			return kRoundDown;
-		case _ARM_FE_TOWARDZERO:
-			return kRoundTowardZero;
-	}
-
-	return kDefaultRoundingMode;
+    int oldRound = fegetround();
+
+    switch (oldRound)
+    {
+        case FE_TONEAREST: return kRoundToNearestEven;
+        case FE_UPWARD: return kRoundUp;
+        case FE_DOWNWARD: return kRoundDown;
+        case FE_TOWARDZERO: return kRoundTowardZero;
+    }
+
+    return kDefaultRoundingMode;
 }
- 
+
 #else
-RoundingMode set_round( RoundingMode r, Type outType )
+RoundingMode set_round(RoundingMode r, Type outType)
 {
-    static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
-    static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
-    const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
+    static const int flt_rounds[kRoundingModeCount] = { _RC_NEAR, _RC_NEAR,
+                                                        _RC_UP, _RC_DOWN,
+                                                        _RC_CHOP };
+    static const int int_rounds[kRoundingModeCount] = { _RC_CHOP, _RC_NEAR,
+                                                        _RC_UP, _RC_DOWN,
+                                                        _RC_CHOP };
+    const int *p =
+        (outType == kfloat || outType == kdouble) ? flt_rounds : int_rounds;
     unsigned int oldRound;
 
-    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
-    if (err) {
-//        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__); 
-        return kDefaultRoundingMode;    //what else never happens
+    int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
+    if (err)
+    {
+        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n",
+                   __FILE__, __LINE__);
+        return kDefaultRoundingMode; // what else never happens
     }
-   
+
     oldRound &= _MCW_RC;
 
-    RoundingMode old = 
-        (oldRound == _RC_NEAR)? kRoundToNearestEven :
-        (oldRound == _RC_UP)?   kRoundUp :
-        (oldRound == _RC_DOWN)? kRoundDown :
-        (oldRound == _RC_CHOP)? kRoundTowardZero: 
-        kDefaultRoundingMode; 
+    RoundingMode old = (oldRound == _RC_NEAR)
+        ? kRoundToNearestEven
+        : (oldRound == _RC_UP) ? kRoundUp
+                               : (oldRound == _RC_DOWN)
+                ? kRoundDown
+                : (oldRound == _RC_CHOP) ? kRoundTowardZero
+                                         : kDefaultRoundingMode;
 
-    _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
-    return old;    //returning old rounding mode
+    _controlfp_s(&oldRound, p[r], _MCW_RC); // setting new rounding mode
+    return old; // returning old rounding mode
 }
 
-RoundingMode get_round( void )
+RoundingMode get_round(void)
 {
     unsigned int oldRound;
 
-    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
+    int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
     oldRound &= _MCW_RC;
-    return 
-        (oldRound == _RC_NEAR)? kRoundToNearestEven :
-        (oldRound == _RC_UP)?   kRoundUp :
-        (oldRound == _RC_DOWN)? kRoundDown :
-        (oldRound == _RC_CHOP)? kRoundTowardZero: 
-        kDefaultRoundingMode; 
+    return (oldRound == _RC_NEAR)
+        ? kRoundToNearestEven
+        : (oldRound == _RC_UP) ? kRoundUp
+                               : (oldRound == _RC_DOWN)
+                ? kRoundDown
+                : (oldRound == _RC_CHOP) ? kRoundTowardZero
+                                         : kDefaultRoundingMode;
 }
 
 #endif
 
 //
-// FlushToZero() sets the host processor into ftz mode.  It is intended to have a remote effect on the behavior of the code in
-// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in 
-// software by testing against FLT_MIN or DBL_MIN in that file.
+// FlushToZero() sets the host processor into ftz mode.  It is intended to have
+// a remote effect on the behavior of the code in basic_test_conversions.c. Some
+// host processors may not support this mode, which case you'll need to do some
+// clamping in software by testing against FLT_MIN or DBL_MIN in that file.
 //
-// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT* have the behavior in section 7.5.3 of 
-// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic 
+// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT*
+// have the behavior in section 7.5.3 of the OpenCL spec. They *ALWAYS* flush to
+// zero for subnormal inputs or outputs when FTZ mode is on like other basic
 // operators do (e.g. add, subtract, multiply, divide, etc.)
 //
-// Configuring hardware to FTZ mode varies by platform.  
-// CAUTION: Some C implementations may also fail to behave properly in this mode.
+// Configuring hardware to FTZ mode varies by platform.
+// CAUTION: Some C implementations may also fail to behave properly in this
+// mode.
 //
 //  On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
-//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2 
-//          is used for floating point computation! If your OS uses x87, you'll need to figure out how 
-//          to turn that off for the conversions code in basic_test_conversions.c so that they flush to 
-//          zero properly.  Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
-//          in which case, these function are at liberty to do nothing.
+//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR --
+//  provided that SSE/SSE2
+//          is used for floating point computation! If your OS uses x87, you'll
+//          need to figure out how to turn that off for the conversions code in
+//          basic_test_conversions.c so that they flush to zero properly.
+//          Otherwise, you'll need to add appropriate software clamping to
+//          basic_test_conversions.c in which case, these function are at
+//          liberty to do nothing.
 //
-#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
-    #include <xmmintrin.h>
-#elif defined( __PPC__ ) 
-    #include <fpu_control.h>
+#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32)
+#include <xmmintrin.h>
+#elif defined(__PPC__)
+#include <fpu_control.h>
+#elif defined(__mips__)
+#include "mips/m32c1.h"
 #endif
-void *FlushToZero( void )
+void *FlushToZero(void)
 {
-#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
-    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
-        union{ unsigned int i;  void *p; }u = { _mm_getcsr() };
-        _mm_setcsr( u.i | 0x8040 );
-        return u.p;
-    #elif defined( __arm__ )
-        #define _ARM_FE_FTZ     0x1000000
-		#define _ARM_FE_NFTZ    0x0
-        #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
-
-		static const int ftz_modes[ kRoundingModeCount ] = { _ARM_FE_FTZ, _ARM_FE_NFTZ };
-		const int *f = ftz_modes;
-		_FPU_SETCW( f[0] );
-        return NULL;
-    #elif defined( __PPC__ )
-        fpu_control_t flags = 0;
-        _FPU_GETCW(flags);
-        flags |= _FPU_MASK_NI;
-        _FPU_SETCW(flags);    
-        return NULL;
-        #else
-        #error Unknown arch
-    #endif
+#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
+#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
+    union {
+        unsigned int i;
+        void *p;
+    } u = { _mm_getcsr() };
+    _mm_setcsr(u.i | 0x8040);
+    return u.p;
+#elif defined(__arm__) || defined(__aarch64__)
+    int64_t fpscr;
+    _FPU_GETCW(fpscr);
+    _FPU_SETCW(fpscr | FPSCR_FZ);
+    return NULL;
+#elif defined(__PPC__)
+    fpu_control_t flags = 0;
+    _FPU_GETCW(flags);
+    flags |= _FPU_MASK_NI;
+    _FPU_SETCW(flags);
+    return NULL;
+#elif defined(__mips__)
+    fpa_bissr(FPA_CSR_FS);
+    return NULL;
 #else
-    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+#error Unknown arch
+#endif
+#else
+#error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
 #endif
 }
 
-// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
-void UnFlushToZero( void *p)
+// Undo the effects of FlushToZero above, restoring the host to default
+// behavior, using the information passed in p.
+void UnFlushToZero(void *p)
 {
-#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
-    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
-        union{ void *p; int i;  }u = { p };
-        _mm_setcsr( u.i );
-    #elif defined( __arm__ )
-        #define _ARM_FE_FTZ     0x1000000
-		#define _ARM_FE_NFTZ    0x0
-        #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
-
-		static const int ftz_modes[ kRoundingModeCount ] = { _ARM_FE_FTZ, _ARM_FE_NFTZ };
-		const int *f = ftz_modes;
-		_FPU_SETCW( f[1] );
-    #elif defined( __PPC__)
-        fpu_control_t flags = 0;
-        _FPU_GETCW(flags);
-        flags &= ~_FPU_MASK_NI;
-        _FPU_SETCW(flags);
-        #else
-        #error Unknown arch
-    #endif
+#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
+#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
+    union {
+        void *p;
+        int i;
+    } u = { p };
+    _mm_setcsr(u.i);
+#elif defined(__arm__) || defined(__aarch64__)
+    int64_t fpscr;
+    _FPU_GETCW(fpscr);
+    _FPU_SETCW(fpscr & ~FPSCR_FZ);
+#elif defined(__PPC__)
+    fpu_control_t flags = 0;
+    _FPU_GETCW(flags);
+    flags &= ~_FPU_MASK_NI;
+    _FPU_SETCW(flags);
+#elif defined(__mips__)
+    fpa_bicsr(FPA_CSR_FS);
+#else
+#error Unknown arch
+#endif
 #else
-    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+#error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
 #endif
 }
diff --git a/oclmath/rounding_mode.h b/oclmath/rounding_mode.h
index e4cf14e97..6f52f0a00 100644
--- a/oclmath/rounding_mode.h
+++ b/oclmath/rounding_mode.h
@@ -1,17 +1,28 @@
-
-/******************************************************************
- //
- //  OpenCL Conformance Tests
- // 
- //  Copyright:	(c) 2008-2013 by Apple Inc. All Rights Reserved.
- //
- ******************************************************************/
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
 #ifndef __ROUNDING_MODE_H__
 #define __ROUNDING_MODE_H__
 
 #include "compat.h"
 
+#if (defined(_WIN32) && defined(_MSC_VER))
+#include "errorHelpers.h"
+#include "testHarness.h"
+#endif
+
 typedef enum
 {
     kDefaultRoundingMode = 0,
@@ -21,7 +32,7 @@ typedef enum
     kRoundTowardZero,
 
     kRoundingModeCount
-}RoundingMode;
+} RoundingMode;
 
 typedef enum
 {
@@ -35,24 +46,15 @@ typedef enum
     kdouble = 7,
     kulong = 8,
     klong = 9,
-    
-    //This goes last
-    kTypeCount
-}Type;
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern RoundingMode set_round( RoundingMode r, Type outType );
-extern RoundingMode get_round( void );
-extern void *FlushToZero( void );
-extern void UnFlushToZero( void *p);
-
-#ifdef __cplusplus
-}
-#endif
+    // This goes last
+    kTypeCount
+} Type;
 
+extern RoundingMode set_round(RoundingMode r, Type outType);
+extern RoundingMode get_round(void);
+extern void *FlushToZero(void);
+extern void UnFlushToZero(void *p);
 
 
 #endif /* __ROUNDING_MODE_H__ */