diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..0bb9d45e --- /dev/null +++ b/.travis.yml @@ -0,0 +1,44 @@ +language: cpp + +compiler: + - gcc + +before_install: + - sudo apt-get update -qq + - sudo apt-get install -qq fglrx opencl-headers libboost-program-options-dev libfftw3-dev libgtest-dev +# Uncomment below to help verify the installs above work +# - ls -la /usr/lib/libboost* +# - ls -la /usr/include/boost +# - ls -la /usr/src/gtest + +install: + - mkdir -p bin/gTest + - cd bin/gTest + - cmake -DCMAKE_BUILD_TYPE=Release /usr/src/gtest + - make + - sudo mv libg* /usr/lib + +before_script: + - cd ${TRAVIS_BUILD_DIR} + - mkdir -p bin/clFFT + - cd bin/clFFT + - cmake -DBoost_NO_SYSTEM_PATHS=OFF ../../src + +script: + - make install +# - ls -Rla package +# Run a simple test to validate that the build works; CPU device in a VM + - cd package/bin + - export LD_LIBRARY_PATH=${TRAVIS_BUILD_DIR}/bin/clFFT/package/lib64:${LD_LIBRARY_PATH} + - ./Client -i + +after_success: + - cd ${TRAVIS_BUILD_DIR}/bin/clFFT + - make package + +notifications: + email: + - clmath-developers@googlegroups.com + on_success: change + on_failure: always + \ No newline at end of file diff --git a/CHANGELOG b/CHANGELOG index a88cad8e..e5814efe 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -171,34 +171,3 @@ For example: ./clfft.Sample -iv - Watch for the version strings to print out; watch for 'Client Test *****PASS*****' to print out. -_______________________________________________________________________________ -(C) 2010-2013 Advanced Micro Devices, Inc. All rights reserved. AMD, the AMD -Arrow logo, ATI, the ATI logo, Radeon, FireStream, FireGL, Catalyst, and -combinations thereof are trademarks of Advanced Micro Devices, Inc. Microsoft -(R), Windows, and Windows Vista (R) are registered trademarks of Microsoft -Corporation in the U.S. and/or other jurisdictions. OpenCL and the OpenCL logo -are trademarks of Apple Inc. used by permission by Khronos. Other names are for -informational purposes only and may be trademarks of their respective owners. - -The contents of this document are provided in connection with Advanced Micro -Devices, Inc. ("AMD") products. AMD makes no representations or warranties with -respect to the accuracy or completeness of the contents of this publication and -reserves the right to make changes to specifications and product descriptions -at any time without notice. The information contained herein may be of a -preliminary or advance nature and is subject to change without notice. No -license, whether express, implied, arising by estoppel or otherwise, to any -intellectual property rights is granted by this publication. Except as set forth -in AMD's Standard Terms and Conditions of Sale, AMD assumes no liability -whatsoever, and disclaims any express or implied warranty, relating to its -products including, but not limited to, the implied warranty of -merchantability, fitness for a particular purpose, or infringement of any -intellectual property right. - -AMD's products are not designed, intended, authorized or warranted for use as -components in systems intended for surgical implant into the body, or in other -applications intended to support or sustain life, or in any other application -in which the failure of AMD's product could create a situation where personal -injury, death, or severe property or environmental damage may occur. AMD -reserves the right to discontinue or make changes to its products at any time -without notice. -_______________________________________________________________________________ diff --git a/README.md b/README.md index 13b73dab..00c5f79d 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,87 @@ clFFT ===== +[![Build Status](https://travis-ci.org/clMathLibraries/clFFT.png)](https://travis-ci.org/clMathLibraries/clFFT) -clMath is a software library containing FFT and BLAS functions written in OpenCL. In addition to GPU devices, the libraries also support running on CPU devices to facilitate debugging and multicore programming. +clMath is a software library containing FFT and BLAS functions written +in OpenCL. In addition to GPU devices, the libraries also support +running on CPU devices to facilitate debugging and multicore +programming. -APPML 1.10 is the most current generally available version of the library, and pre-built binaries are available for download on both Linux and Windows platforms. +clMath 2.1 is the latest version and is available as source only. +clMath's predecessor APPML 1.10 has pre-built binaries available for +download on both Linux and Windows platforms. ## Introduction to clFFT -The FFT is an implementation of the Discrete Fourier Transform (DFT) that makes use of symmetries in the FFT definition to reduce the mathematical intensity required from O(N2) to O(N log2( N )) when the sequence length N is the product of small prime factors. Currently, there is no standard API for FFT routines. Hardware vendors usually provide a set of high-performance FFTs optimized for their systems: no two vendors employ the same interfaces for their FFT routines. clFFT provides a set of FFT routines that are optimized for AMD graphics processors, but also are functional across CPU and other compute devices. +The FFT is an implementation of the Discrete Fourier Transform (DFT) +that makes use of symmetries in the FFT definition to reduce the +mathematical intensity required from O(N2) to O(N log2( N )) when the +sequence length N is the product of small prime factors. Currently, +there is no standard API for FFT routines. Hardware vendors usually +provide a set of high-performance FFTs optimized for their systems: no +two vendors employ the same interfaces for their FFT routines. clFFT +provides a set of FFT routines that are optimized for AMD graphics +processors, but also are functional across CPU and other compute +devices. -The clFFT library is an open source OpenCL library implementation of discrete Fast Fourier Transforms. It: +The clFFT library is an open source OpenCL library implementation of +discrete Fast Fourier Transforms. It: -* Provides a fast and accurate platform for calculating discrete FFTs. -* Works on CPU or GPU backends. -* Supports in-place or out-of-place transforms. -* Supports 1D, 2D, and 3D transforms with a batch size that can be greater than 1. -* Supports planar (real and complex components in separate arrays) and interleaved (real and complex components as a pair contiguous in memory) formats. -* Supports dimension lengths that can be any mix of powers of 2, 3, and 5. -* Supports single and double precision floating point formats. +- Provides a fast and accurate platform for calculating discrete FFTs. + +- Works on CPU or GPU backends. + +- Supports in-place or out-of-place transforms. + +- Supports 1D, 2D, and 3D transforms with a batch size that can be + greater than 1. + +- Supports planar (real and complex components in separate arrays) and + interleaved (real and complex components as a pair contiguous in + memory) formats. + +- Supports dimension lengths that can be any mix of powers of 2, 3, + and 5. + +- Supports single and double precision floating point formats. ## clFFT library user documentation -[Library and API documentation]( http://clmathlibraries.github.io/clFFT/ ) for developers is available online as a GitHub Pages website + +[Library and API documentation][] for developers is available online as +a GitHub Pages website + +### Google Groups + +Two mailing lists have been created for the clMath projects: + +- [clmath@googlegroups.com][] - group whose focus is to answer + questions on using the library or reporting issues + +- [clmath-developers@googlegroups.com][] - group whose focus is for + developers interested in contributing to the library code itself ## clFFT Wiki -The [project wiki](https://github.com/clMathLibraries/clFFT/wiki) contains helpful documentation, including a [build primer](https://github.com/clMathLibraries/clFFT/wiki/Build) + +The [project wiki][clmath@googlegroups.com] contains helpful +documentation, including a [build +primer][clmath-developers@googlegroups.com] ## Contributing code -Please refer to and read the [Contributing](CONTRIBUTING.md) document for guidelines on how to contribute code to this open source project + +Please refer to and read the [Contributing][] document for guidelines on +how to contribute code to this open source project. The code in the +/master branch is considered to be stable, and all pull-requests should +be made against the /develop branch. ## License -The source for clFFT is licensed under the [Apache License, Version 2.0]( http://www.apache.org/licenses/LICENSE-2.0 ) + +The source for clFFT is licensed under the [Apache License, Version +2.0][] ## Example -The simple example below shows how to use clFFT to compute an simple 1D forward transform +The simple example below shows how to use clFFT to compute an simple 1D +forward transform ```c #include @@ -122,21 +170,43 @@ int main( void ) ``` ## Build dependencies + ### Library for Windows -* Windows® 7/8 -* Visual Studio 2010 SP1, 2012 -* Latest CMake -* An OpenCL SDK, such as APP SDK 2.8 + +- Windows® 7/8 + +- Visual Studio 2010 SP1, 2012 + +- Latest CMake + +- An OpenCL SDK, such as APP SDK 2.9 ### Library for Linux -* GCC 4.6 and onwards -* Latest CMake -* An OpenCL SDK, such as APP SDK 2.8 + +- GCC 4.6 and onwards + +- Latest CMake + +- An OpenCL SDK, such as APP SDK 2.9 + +### Library for Mac OSX + +- Recommended to generate Unix makefiles with cmake ### Test infrastructure -* Latest Googletest -* Latest FFTW -* Latest Boost + +- Googletest v1.6 + +- Latest FFTW + +- Latest Boost ### Performance infrastructure -* Python \ No newline at end of file + +- Python + + [Library and API documentation]: http://clmathlibraries.github.io/clFFT/ + [clmath@googlegroups.com]: https://github.com/clMathLibraries/clFFT/wiki + [clmath-developers@googlegroups.com]: https://github.com/clMathLibraries/clFFT/wiki/Build + [Contributing]: CONTRIBUTING.md + [Apache License, Version 2.0]: http://www.apache.org/licenses/LICENSE-2.0 diff --git a/doc/clFFT.doxy b/doc/clFFT.doxy index 366f9c75..f771f04d 100644 --- a/doc/clFFT.doxy +++ b/doc/clFFT.doxy @@ -26,7 +26,7 @@ DOXYFILE_ENCODING = UTF-8 # identify the project. Note that if you do not use Doxywizard you need # to put quotes around the project name if it contains spaces. -PROJECT_NAME = clMathFft +PROJECT_NAME = clFFT # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5589649a..f9617943 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,19 +22,35 @@ else( ) cmake_minimum_required( VERSION 2.6 ) endif( ) +if( CMAKE_GENERATOR MATCHES "NMake" ) + option( NMAKE_COMPILE_VERBOSE "Print compile and link strings to the console" OFF ) + if( NMAKE_COMPILE_VERBOSE ) + set( CMAKE_START_TEMP_FILE "" ) + set( CMAKE_END_TEMP_FILE "" ) + set( CMAKE_VERBOSE_MAKEFILE 1 ) + endif( ) +endif( ) + # This becomes the name of the solution file project( clFFT ) # Define a version for the code -set( CLFFT_VERSION_MAJOR 2 ) -set( CLFFT_VERSION_MINOR 0 ) -set( CLFFT_VERSION_PATCH 0 ) +if( NOT DEFINED CLFFT_VERSION_MAJOR ) + set( CLFFT_VERSION_MAJOR 2 ) +endif( ) + +if( NOT DEFINED CLFFT_VERSION_MINOR ) + set( CLFFT_VERSION_MINOR 2 ) +endif( ) + +if( NOT DEFINED CLFFT_VERSION_PATCH ) + set( CLFFT_VERSION_PATCH 0 ) +endif( ) + set( CLFFT_VERSION "${CLFFT_VERSION_MAJOR}.${CLFFT_VERSION_MINOR}.${CLFFT_VERSION_PATCH}") -# uncomment these to debug nmake and borland makefiles -#SET(CMAKE_START_TEMP_FILE "") -#SET(CMAKE_END_TEMP_FILE "") -#SET(CMAKE_VERBOSE_MAKEFILE 1) +# This is incremented when the ABI to the library changes +set( CLFFT_SOVERSION 2 ) set( CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR} ) @@ -54,17 +70,12 @@ option( BUILD_RUNTIME "Build the FFT runtime library" ON ) option( BUILD_CLIENT "Build a command line clFFT client program with a variety of configurable parameters (dependency on Boost)" ON ) option( BUILD_TEST "Build the library testing suite (dependency on google test, Boost, and FFTW)" ON ) option( BUILD_LOADLIBRARIES "Build the optional dynamic load libraries that the FFT runtime will search for" ON ) +option( BUILD_SHARED_LIBRARY "Build shared libraries." ON) # If BOOST_ROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui. # Otherwise, create a sensible default that the user can change if( DEFINED ENV{BOOST_ROOT} ) set( BOOST_ROOT $ENV{BOOST_ROOT} CACHE PATH "Environment variable defining the root of the Boost installation" ) -else( ) - if( UNIX ) - set( BOOST_ROOT "/usr" CACHE PATH "Modify this variable to point to the root of the Boost installation" ) - else( ) - set( BOOST_ROOT "/Path/To/boost_x_xx_x" CACHE PATH "Modify this variable to point to the root of the Boost installation" ) - endif() endif( ) # Currently, linux has a problem outputing both narrow and wide characters, @@ -80,8 +91,9 @@ else() endif() if( MSVC_IDE ) - set( BUILD64 ${CMAKE_CL_64} ) set_property( GLOBAL PROPERTY USE_FOLDERS TRUE ) + + set( BUILD64 ${CMAKE_CL_64} ) else() option( BUILD64 "Build a 64-bit product" ON ) @@ -93,32 +105,49 @@ else() endif() endif() +# These variables are meant to contain string which should be appended to the installation paths +# of library and executable binaries, respectively. They are meant to be user configurable/overridable. +set( SUFFIX_LIB_DEFAULT "" ) +set( SUFFIX_BIN_DEFAULT "" ) + # Modify the global find property to help us find libraries like Boost in the correct paths for 64-bit # Essentially, find_library calls will look for /lib64 instead of /lib; works for windows and linux if( BUILD64 ) set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS TRUE ) message( STATUS "64bit build - FIND_LIBRARY_USE_LIB64_PATHS TRUE" ) -else() + + set( SUFFIX_LIB_DEFAULT "64" ) +else( ) set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS FALSE ) message( STATUS "32bit build - FIND_LIBRARY_USE_LIB64_PATHS FALSE" ) -endif() +endif( ) + +set( SUFFIX_LIB ${SUFFIX_LIB_DEFAULT} CACHE STRING "String to append to 'lib' install path" ) +set( SUFFIX_BIN ${SUFFIX_BIN_DEFAULT} CACHE STRING "String to append to 'bin' install path" ) + +# Useful variables to configure FindBoost.cake +# set( Boost_USE_MULTITHREADED ON ) +# set( Boost_DETAILED_FAILURE_MSG ON ) +# set( Boost_DEBUG ON ) +# set( Boost_NO_SYSTEM_PATHS ON ) # Client is built only if boost is found; on windows, we need vs10 or higher # Find Boost on the system, and configure the type of boost build we want -set( Boost_USE_MULTITHREADED ON ) -set( Boost_USE_STATIC_LIBS ON ) -set( Boost_DETAILED_FAILURE_MSG ON ) -set( Boost_DEBUG ON ) -set( Boost_ADDITIONAL_VERSIONS "1.46.1" "1.46" "1.44.0" "1.44" ) - -# On linux, the boost installed in the system always appears to override any user boost installs -if( UNIX ) - set( Boost_NO_SYSTEM_PATHS TRUE ) +if( NOT DEFINED Boost_USE_STATIC_LIBS ) + set( Boost_USE_STATIC_LIBS ON ) +endif( ) + +if( NOT DEFINED Boost_USE_STATIC_RUNTIME ) + set( Boost_USE_STATIC_RUNTIME OFF ) endif( ) # This will define Boost_FOUND find_package( Boost 1.33.0 COMPONENTS program_options ) -message( STATUS "Boost_PROGRAM_OPTIONS_LIBRARY: ${Boost_PROGRAM_OPTIONS_LIBRARY}" ) +if( Boost_FOUND ) + message( STATUS "Boost_PROGRAM_OPTIONS_LIBRARY: ${Boost_PROGRAM_OPTIONS_LIBRARY}" ) +else( ) + message( WARNING "Try setting Boost_DEBUG and Boost_DETAILED_FAILURE_MSG for more information" ) +endif( ) # This will define OPENCL_FOUND find_package( OpenCL ) @@ -159,24 +188,16 @@ get_filename_component( C_COMPILER_NAME ${CMAKE_C_COMPILER} NAME_WE ) # message( "CMAKE_C_COMPILER: " ${CMAKE_C_COMPILER} ) # Set common compile and link options -if( C_COMPILER_NAME STREQUAL "cl" ) +if( MSVC ) # Following options for nMake message( STATUS "Detected MSVS Ver: " ${MSVC_VERSION} ) - if( NOT MSVC_IDE ) - message( STATUS "Using an nMake environment to build" ) - - # I can't get nmake to work because of faulty /machine:, not sure that this isn't a cmake bug - # if( BUILD64 ) - # set( CMAKE_EXE_LINKER_FLAGS "/machine:amd64 ${CMAKE_EXE_LINKER_FLAGS}" ) - # set( CMAKE_SHARED_LINKER_FLAGS "/machine:amd64 ${CMAKE_SHARED_LINKER_FLAGS}" ) - # set( CMAKE_MODULE_LINKER_FLAGS "/machine:amd64 ${CMAKE_MODULE_LINKER_FLAGS }" ) - # else( ) - # set( CMAKE_EXE_LINKER_FLAGS "/machine:i386 ${CMAKE_EXE_LINKER_FLAGS}" ) - # endif( ) - endif( ) + # CMake sets huge stack frames for windows, for whatever reason. We go with compiler default. + string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}" ) + string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}" ) + string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS}" ) -elseif( C_COMPILER_NAME STREQUAL "gcc" ) +elseif( CMAKE_COMPILER_IS_GNUCXX ) message( STATUS "Detected GNU fortran compiler." ) EXEC_PROGRAM( ${CMAKE_CXX_COMPILER} ARGS --version OUTPUT_VARIABLE vnum ) STRING(REGEX REPLACE ".*([0-9])\\.([0-9])\\.([0-9]).*" "\\1\\2\\3" vnum ${vnum}) @@ -185,6 +206,9 @@ elseif( C_COMPILER_NAME STREQUAL "gcc" ) set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" ) endif() + set( CMAKE_CXX_FLAGS "-pthread ${CMAKE_CXX_FLAGS}" ) + set( CMAKE_C_FLAGS "-pthread ${CMAKE_C_FLAGS}" ) + if( BUILD64 ) set( CMAKE_CXX_FLAGS "-m64 ${CMAKE_CXX_FLAGS}" ) set( CMAKE_C_FLAGS "-m64 ${CMAKE_C_FLAGS}" ) @@ -197,12 +221,10 @@ elseif( C_COMPILER_NAME STREQUAL "gcc" ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage") endif() -else( ) - message( FATAL_ERROR "Compiler name not detected" ) endif( ) -# If UNICODE is defined, pass extra definitions into -if( UNICODE ) +# If UNICODE is defined for microsoft compilers, pass extra definitions +if( MSVC AND UNICODE ) add_definitions( "/DUNICODE /D_UNICODE" ) endif( ) @@ -214,9 +236,9 @@ message( STATUS "CMAKE_CXX_COMPILER relwithdebinfo flags: " ${CMAKE_CXX_FLAGS_RE message( STATUS "CMAKE_EXE_LINKER link flags: " ${CMAKE_EXE_LINKER_FLAGS} ) # configure a header file to pass the CMake version settings to the source, and package the header files in the output archive -configure_file( "${PROJECT_SOURCE_DIR}/include/version.h.in" "${PROJECT_BINARY_DIR}/include/version.h" ) +configure_file( "${PROJECT_SOURCE_DIR}/include/clFFT.version.h.in" "${PROJECT_BINARY_DIR}/include/clFFT.version.h" ) install( FILES - "${PROJECT_BINARY_DIR}/include/version.h" + "${PROJECT_BINARY_DIR}/include/clFFT.version.h" "include/clFFT.h" "include/clAmdFft.h" "include/clAmdFft.version.h" diff --git a/src/FindFFTW.cmake b/src/FindFFTW.cmake index 50a632e8..cac5fc64 100644 --- a/src/FindFFTW.cmake +++ b/src/FindFFTW.cmake @@ -33,7 +33,9 @@ # FFTW_ROOT - (as a CMake or environment variable) # The root directory of the fftw install prefix # -#----------------------- +# FIND_LIBRARY_USE_LIB64_PATHS - Global property that controls whether +# findFFTW should search for 64bit or 32bit libs +#----------------------------------------------- # Example Usage: # # find_package(FFTW REQUIRED) @@ -42,53 +44,52 @@ # add_executable(foo foo.cc) # target_link_libraries(foo ${FFTW_LIBRARIES}) # -#----------------------- -if( DEFINED ENV{FFTW_ROOT} ) - set( FFTW_ROOT $ENV{FFTW_ROOT} CACHE PATH "Environment variable defining the root of FFTW" ) -else( ) - set( FFTW_ROOT "/usr/lib" CACHE PATH "Environment variable defining the root of FFTW" ) -endif( ) +#----------------------------------------------- find_path(FFTW_INCLUDE_DIRS - NAMES fftw3.h + NAMES fftw3.h HINTS - ${FFTW_ROOT}/api ${FFTW_ROOT}/include + ${FFTW_ROOT}/api ${FFTW_ROOT} - $ENV{FFTW_ROOT}/api $ENV{FFTW_ROOT}/include - $ENV{FFTW_ROOT} - PATHS - /usr/include - /usr/local/include + $ENV{FFTW_ROOT}/api + ENV FFTW_ROOT + PATHS + /usr/include + /usr/local/include ) mark_as_advanced( FFTW_INCLUDE_DIRS ) find_library( FFTW_SINGLE_PRECISION_LIBRARIES - NAMES fftw3f libfftw3f-3 - HINTS - ${FFTW_ROOT} - ${FFTW_ROOT}/lib - $ENV{FFTW_ROOT} - $ENV{FFTW_ROOT}/lib - PATHS - /usr/lib - /usr/local/lib - DOC "FFTW dynamic library" + NAMES fftw3f libfftw3f-3 + HINTS + ${FFTW_ROOT}/lib + ${FFTW_ROOT}/.libs + ${FFTW_ROOT} + $ENV{FFTW_ROOT}/lib + $ENV{FFTW_ROOT}/.libs + ENV FFTW_ROOT + PATHS + /usr/lib + /usr/local/lib + DOC "FFTW dynamic library" ) mark_as_advanced( FFTW_SINGLE_PRECISION_LIBRARIES ) find_library( FFTW_DOUBLE_PRECISION_LIBRARIES - NAMES fftw3 libfftw3-3 - HINTS - ${FFTW_ROOT} - ${FFTW_ROOT}/lib - $ENV{FFTW_ROOT} - $ENV{FFTW_ROOT}/lib - PATHS - /usr/lib - /usr/local/lib - DOC "FFTW dynamic library" + NAMES fftw3 libfftw3-3 + HINTS + ${FFTW_ROOT}/lib + ${FFTW_ROOT}/.libs + ${FFTW_ROOT} + $ENV{FFTW_ROOT}/lib + $ENV{FFTW_ROOT}/.libs + ENV FFTW_ROOT + PATHS + /usr/lib + /usr/local/lib + DOC "FFTW dynamic library" ) mark_as_advanced( FFTW_DOUBLE_PRECISION_LIBRARIES ) @@ -99,6 +100,6 @@ include( FindPackageHandleStandardArgs ) FIND_PACKAGE_HANDLE_STANDARD_ARGS( FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDE_DIRS ) if( NOT FFTW_FOUND ) - message( STATUS "FindFFTW looked for single precision libraries named: fftw3f or libfftw3f-3" ) - message( STATUS "FindFFTW looked for double precision libraries named: fftw3 or libfftw3-3" ) + message( STATUS "FindFFTW looked for single precision libraries named: fftw3f or libfftw3f-3" ) + message( STATUS "FindFFTW looked for double precision libraries named: fftw3 or libfftw3-3" ) endif() diff --git a/src/FindOpenCL.cmake b/src/FindOpenCL.cmake index 4dbb3d57..8725612f 100644 --- a/src/FindOpenCL.cmake +++ b/src/FindOpenCL.cmake @@ -46,21 +46,17 @@ # target_link_libraries(foo ${OPENCL_LIBRARIES}) # #----------------------- -if( DEFINED ENV{AMDAPPSDKROOT} ) - set( OPENCL_ROOT $ENV{AMDAPPSDKROOT} CACHE PATH "Environment variable defining the root of OPENCL implementation" ) -else( ) - set( OPENCL_ROOT "/usr/lib" CACHE PATH "Environment variable defining the root of OPENCL implementation" ) -endif( ) find_path(OPENCL_INCLUDE_DIRS - NAMES OpenCL/cl.h CL/cl.h + NAMES OpenCL/cl.h CL/cl.h HINTS - ${OPENCL_ROOT}/include - ENV AMDAPPSDKROOT/include - PATHS - /usr/include - /usr/local/include - DOC "OpenCL header file path" + ${OPENCL_ROOT}/include + $ENV{AMDAPPSDKROOT}/include + $ENV{CUDA_PATH}/include + PATHS + /usr/include + /usr/local/include + DOC "OpenCL header file path" ) mark_as_advanced( OPENCL_INCLUDE_DIRS ) @@ -68,23 +64,29 @@ mark_as_advanced( OPENCL_INCLUDE_DIRS ) get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ) if( LIB64 ) - find_library( OPENCL_LIBRARIES - NAMES OpenCL - HINTS - ${OPENCL_ROOT}/lib - ENV AMDAPPSDKROOT/lib - DOC "OpenCL dynamic library path" - PATH_SUFFIXES x86_64 x64 - ) + find_library( OPENCL_LIBRARIES + NAMES OpenCL + HINTS + ${OPENCL_ROOT}/lib + $ENV{AMDAPPSDKROOT}/lib + $ENV{CUDA_PATH}/lib + DOC "OpenCL dynamic library path" + PATH_SUFFIXES x86_64 x64 + PATHS + /usr/lib + ) else( ) - find_library( OPENCL_LIBRARIES - NAMES OpenCL - HINTS - ${OPENCL_ROOT}/lib - ENV AMDAPPSDKROOT/lib - DOC "OpenCL dynamic library path" - PATH_SUFFIXES x86 - ) + find_library( OPENCL_LIBRARIES + NAMES OpenCL + HINTS + ${OPENCL_ROOT}/lib + $ENV{AMDAPPSDKROOT}/lib + $ENV{CUDA_PATH}/lib + DOC "OpenCL dynamic library path" + PATH_SUFFIXES x86 Win32 + PATHS + /usr/lib + ) endif( ) mark_as_advanced( OPENCL_LIBRARIES ) @@ -92,5 +94,5 @@ include( FindPackageHandleStandardArgs ) FIND_PACKAGE_HANDLE_STANDARD_ARGS( OPENCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS ) if( NOT OPENCL_FOUND ) - message( STATUS "FindOpenCL looked for libraries named: OpenCL" ) + message( STATUS "FindOpenCL looked for libraries named: OpenCL" ) endif() diff --git a/src/FindclFFT.cmake b/src/FindclFFT.cmake new file mode 100644 index 00000000..92dcc829 --- /dev/null +++ b/src/FindclFFT.cmake @@ -0,0 +1,61 @@ +# - Find clFFT, AMD's OpenCL FFT library + +# This script defines the following variables: +# CLFFT_INCLUDE_DIRS - Location of clFFT's include directory. +# CLFFT_LIBRARIES - Location of clFFT's libraries +# CLFFT_FOUND - True if clFFT has been located +# +# If your clFFT installation is not in a standard installation directory, you +# may provide a hint to where it may be found. Simply set the value CLFFT_ROOT +# to the directory containing 'include/clFFT.h" prior to calling this script. +# +# By default this script will attempt to find the 32-bit version of clFFT. +# If you desire to use the 64-bit version instead, set +# set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON) +# prior to calling this script. +# +#============================================================================= +# Copyright 2014 Brian Kloppenborg +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +IF(CLFFT_INCLUDE_DIRS) + # Already in cache, be silent + set (CLFFT_FIND_QUIETLY TRUE) +ENDIF (CLFFT_INCLUDE_DIRS) + +FIND_PATH(CLFFT_ROOT_DIR + NAMES include/clFFT.h + HINTS /usr/local/ ${CLFFT_ROOT} + DOC "clFFT root directory.") + +FIND_PATH(_CLFFT_INCLUDE_DIRS + NAMES clFFT.h + HINTS ${CLFFT_ROOT_DIR}/include + DOC "clFFT Include directory") + +FIND_LIBRARY(_CLFFT_LIBRARY + NAMES clFFT + HINTS ${CLFFT_ROOT_DIR}/lib) + +SET(CLFFT_INCLUDE_DIRS ${_CLFFT_INCLUDE_DIRS}) +SET(CLFFT_LIBRARIES ${_CLFFT_LIBRARY}) + +# handle the QUIETLY and REQUIRED arguments and set CLFFT_FOUND to TRUE if +# all listed variables are TRUE +INCLUDE (FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(CLFFT DEFAULT_MSG CLFFT_LIBRARIES CLFFT_INCLUDE_DIRS) +MARK_AS_ADVANCED(CLFFT_LIBRARIES CLFFT_INCLUDE_DIRS) + + diff --git a/src/client/CMakeLists.pack b/src/client/CMakeLists.pack index b5903633..2fcf3ea1 100644 --- a/src/client/CMakeLists.pack +++ b/src/client/CMakeLists.pack @@ -1,6 +1,18 @@ -############################################################################# -## Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -############################################################################# +# ######################################################################## +# Copyright 2013 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ######################################################################## cmake_minimum_required( VERSION 2.6 ) project( clFFT.Sample ) diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index d9609cca..fdf209f5 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -33,10 +33,12 @@ set( Client.Files ${Client.Source} ${Client.Headers} ) set( DL_LIB "" ) if( WIN32 ) add_definitions( "/D_CONSOLE" ) -else() +elseif( APPLE ) + set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) +else( ) # To use the dlopen() and dlclose() functions, we should link with libdl set( DL_LIB "-ldl" ) -endif() +endif( ) # Include standard OpenCL headers include_directories( ${Boost_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS} ../../../common ${PROJECT_BINARY_DIR}/include ../include ) @@ -48,22 +50,9 @@ target_link_libraries( Client clFFT ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} ${DL_ set_target_properties( Client PROPERTIES VERSION ${CLFFT_VERSION} ) set_target_properties( Client PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) -if( BUILD64 ) - # CPack configuration; include the executable into the package - install( TARGETS Client - RUNTIME DESTINATION bin64 - LIBRARY DESTINATION lib64 - ARCHIVE DESTINATION lib64/import - ) - -else() - # CPack configuration; include the executable into the package - install( TARGETS Client - RUNTIME DESTINATION bin32 - LIBRARY DESTINATION lib32 - ARCHIVE DESTINATION lib32/import - ) -endif() - -# configure_file( "${PROJECT_SOURCE_DIR}/client/CMakeLists.pack" - # "${PROJECT_BINARY_DIR}/samples/CMakeLists.txt" COPYONLY ) +# CPack configuration; include the executable into the package +install( TARGETS Client + RUNTIME DESTINATION bin${SUFFIX_BIN} + LIBRARY DESTINATION lib${SUFFIX_LIB} + ARCHIVE DESTINATION lib${SUFFIX_LIB}/import + ) diff --git a/src/client/openCL.misc.h b/src/client/openCL.misc.h index f7f6c202..4560d2b4 100644 --- a/src/client/openCL.misc.h +++ b/src/client/openCL.misc.h @@ -23,7 +23,7 @@ #include "unicode.compatibility.h" // Creating a portable defintion of countof -#if defined( _WIN32 ) +#if defined( _MSC_VER ) #define countOf _countof #else #define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) ) diff --git a/src/include/clFFT.h b/src/include/clFFT.h index f75ded30..583a76b9 100644 --- a/src/include/clFFT.h +++ b/src/include/clFFT.h @@ -33,7 +33,7 @@ #include #endif -#include "version.h" +#include "clFFT.version.h" /*! This preprocessor definition is the standard way of making exporting APIs * from a DLL simpler. All files within this DLL are compiled with the CLFFT_EXPORTS @@ -127,6 +127,7 @@ enum clfftStatus_ CLFFT_VERSION_MISMATCH, /*!< Version conflict between client and library. */ CLFFT_INVALID_PLAN, /*!< Requested plan could not be found. */ CLFFT_DEVICE_NO_DOUBLE, /*!< Double precision not supported on this device. */ + CLFFT_DEVICE_MISMATCH, /*!< Attempt to run on a device using a plan baked for a different device. */ CLFFT_ENDSTATUS /* This value will always be last, and marks the length of clfftStatus. */ }; typedef enum clfftStatus_ clfftStatus; diff --git a/src/include/version.h.in b/src/include/clFFT.version.h.in similarity index 100% rename from src/include/version.h.in rename to src/include/clFFT.version.h.in diff --git a/src/include/sharedLibrary.h b/src/include/sharedLibrary.h index 9f34b3a1..e5e65de7 100644 --- a/src/include/sharedLibrary.h +++ b/src/include/sharedLibrary.h @@ -79,7 +79,7 @@ inline void* LoadFunctionAddr( void* libHandle, std::string funcName ) #if defined( _WIN32 ) HMODULE fileHandle = reinterpret_cast< HMODULE >( libHandle ); - void* pFunc = ::GetProcAddress( fileHandle, funcName.c_str( ) ); + void* pFunc = reinterpret_cast< void* >( ::GetProcAddress( fileHandle, funcName.c_str( ) ) ); #else void* pFunc = ::dlsym( libHandle, funcName.c_str( ) ); #endif diff --git a/src/include/stdafx.h b/src/include/stdafx.h index 5a8077bf..4ab26bf2 100644 --- a/src/include/stdafx.h +++ b/src/include/stdafx.h @@ -42,8 +42,11 @@ #include #include "targetver.h" +#if !defined( NOMINMAX ) #define NOMINMAX - #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers +#endif + + #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers // Windows Header Files: #include #endif diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt index ea7637bf..63ac0f9c 100644 --- a/src/library/CMakeLists.txt +++ b/src/library/CMakeLists.txt @@ -74,29 +74,59 @@ add_definitions( "/DCLFFT_EXPORTS" ) # Include standard OpenCL headers include_directories( ${OPENCL_INCLUDE_DIRS} ${PROJECT_BINARY_DIR}/include ../include ) -add_library( clFFT SHARED ${clFFT.Files} ) +if(BUILD_SHARED_LIBRARY) + add_library( clFFT SHARED ${clFFT.Files} ) +else() + add_library( clFFT STATIC ${clFFT.Files} ) +endif() target_link_libraries( clFFT ${OPENCL_LIBRARIES} ) set_target_properties( clFFT PROPERTIES VERSION ${CLFFT_VERSION} ) +set_target_properties( clFFT PROPERTIES SOVERSION ${CLFFT_SOVERSION} ) set_target_properties( clFFT PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) -if( UNIX ) - # Right now, linux has problems compiling dynamic_cast, but the flag below doesn't help - # set_target_properties( clFFT PROPERTIES COMPILE_FLAGS "-frtti" ) +if( CMAKE_COMPILER_IS_GNUCC ) + configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/clFFT.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/clFFT.pc @ONLY ) + + install( FILES ${CMAKE_CURRENT_BINARY_DIR}/clFFT.pc + DESTINATION lib${SUFFIX_LIB}/pkgconfig ) endif( ) -if( BUILD64 ) - # CPack configuration; include the executable into the package - install( TARGETS clFFT - RUNTIME DESTINATION bin64 - LIBRARY DESTINATION lib64 - ARCHIVE DESTINATION lib64/import - ) -else() - # CPack configuration; include the executable into the package - install( TARGETS clFFT - RUNTIME DESTINATION bin32 - LIBRARY DESTINATION lib32 - ARCHIVE DESTINATION lib32/import - ) -endif() +# CPack configuration; include the executable into the package +install( TARGETS clFFT + RUNTIME DESTINATION bin${SUFFIX_BIN} + LIBRARY DESTINATION lib${SUFFIX_LIB} + ARCHIVE DESTINATION lib${SUFFIX_LIB}/import + ) + +# For debug builds, include the debug runtimes into the package for testing on non-developer machines +set( CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_SKIP true ) +set( CMAKE_INSTALL_DEBUG_LIBRARIES true ) +set( CMAKE_INSTALL_DEBUG_LIBRARIES_ONLY true ) + +if( WIN32 ) + set( CLFFT_RUNTIME_DESTINATION bin${SUFFIX_BIN} ) +else( ) + set( CLFFT_RUNTIME_DESTINATION lib${SUFFIX_LIB} ) +endif( ) + +include( InstallRequiredSystemLibraries ) + +# Install necessary runtime files for debug builds +install( PROGRAMS ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS} + CONFIGURATIONS Debug + DESTINATION ${CLFFT_RUNTIME_DESTINATION} ) + +# Install all *.pdb files for debug builds +install( DIRECTORY ${PROJECT_BINARY_DIR}/staging/ + DESTINATION ${CLFFT_RUNTIME_DESTINATION} + OPTIONAL + CONFIGURATIONS Debug + FILES_MATCHING PATTERN "*.pdb" ) + +# Install a snapshot of the source as it was for this build; useful for the .pdb's +install( DIRECTORY ${PROJECT_SOURCE_DIR} + DESTINATION ${CLFFT_RUNTIME_DESTINATION} + OPTIONAL + CONFIGURATIONS Debug ) diff --git a/src/library/clFFT.pc.in b/src/library/clFFT.pc.in new file mode 100644 index 00000000..63dc0ea6 --- /dev/null +++ b/src/library/clFFT.pc.in @@ -0,0 +1,12 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix}/bin@SUFFIX_BIN@ +includedir=${prefix}/include +libdir=${prefix}/lib@SUFFIX_LIB@ + +Name: clFFT +Description: Open source OpenCL FFT library +Version: @CLFFT_VERSION@ +URL: https://github.com/clMathLibraries/clFFT + +Cflags: -I${includedir} +Libs: -L${libdir} -lclFFT diff --git a/src/library/generator.copy.cpp b/src/library/generator.copy.cpp index e839ed8a..b88adaef 100644 --- a/src/library/generator.copy.cpp +++ b/src/library/generator.copy.cpp @@ -445,30 +445,35 @@ clfftStatus FFTPlan::GetMax1DLengthPvt (size_t * longest) const using namespace CopyGenerator; template<> -clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo ) const +clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const { - FFTKernelGenKeyParams params; - OPENCL_V( this->GetKernelGenKeyPvt (params), _T("GetKernelGenKey() failed!") ); + FFTKernelGenKeyParams params; + OPENCL_V( this->GetKernelGenKeyPvt (params), _T("GetKernelGenKey() failed!") ); + + std::string programCode; + Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE; + switch(pr) + { + case P_SINGLE: + { + CopyKernel kernel(params); + kernel.GenerateKernel(programCode); + } break; + case P_DOUBLE: + { + CopyKernel kernel(params); + kernel.GenerateKernel(programCode); + } break; + } - std::string programCode; - Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE; - switch(pr) - { - case P_SINGLE: - { - CopyKernel kernel(params); - kernel.GenerateKernel(programCode); - } break; - case P_DOUBLE: - { - CopyKernel kernel(params); - kernel.GenerateKernel(programCode); - } break; - } + cl_int status = CL_SUCCESS; + cl_context QueueContext = NULL; + status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL); + OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) ); - OPENCL_V( fftRepo.setProgramCode( Copy, params, programCode ), _T( "fftRepo.setclString() failed!" ) ); - OPENCL_V( fftRepo.setProgramEntryPoints( Copy, params, "copy_c2h", "copy_h2c" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); + OPENCL_V( fftRepo.setProgramCode( Copy, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) ); + OPENCL_V( fftRepo.setProgramEntryPoints( Copy, params, "copy_c2h", "copy_h2c", QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); - return CLFFT_SUCCESS; + return CLFFT_SUCCESS; } diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp index 8a6f5a60..f6f7241e 100644 --- a/src/library/generator.stockham.cpp +++ b/src/library/generator.stockham.cpp @@ -2348,7 +2348,7 @@ namespace StockhamGenerator } - void GenerateKernel(std::string &str) + void GenerateKernel(std::string &str, cl_device_id Dev_ID) { std::string twType = RegBaseType(2); std::string rType = RegBaseType(1); @@ -2501,8 +2501,19 @@ namespace StockhamGenerator else str += "fft_back"; str += "("; - // TODO : address this kludge - str += "__constant cb_t *cb __attribute__((max_constant_size(32))), "; + // TODO : address this kludge + size_t SizeParam_ret = 0; + clGetDeviceInfo(Dev_ID, CL_DEVICE_VENDOR, 0, NULL, &SizeParam_ret); + char* nameVendor = new char[SizeParam_ret]; + clGetDeviceInfo(Dev_ID, CL_DEVICE_VENDOR, SizeParam_ret, nameVendor, NULL); + + //nv compiler doesn't support __constant kernel argument + if (strncmp(nameVendor, "NVIDIA",6)!=0) + str += "__constant cb_t *cb __attribute__((max_constant_size(32))), "; + else + str += "__global cb_t *cb, "; + + delete [] nameVendor; // Function attributes if(params.fft_placeness == CLFFT_INPLACE) @@ -3218,11 +3229,22 @@ clfftStatus FFTPlan::GetMax1DLengthPvt (size_t * longest) const } template<> -clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo ) const +clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const { FFTKernelGenKeyParams params; OPENCL_V( this->GetKernelGenKeyPvt (params), _T("GetKernelGenKey() failed!") ); + cl_int status = CL_SUCCESS; + cl_device_id Device = NULL; + status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &Device, NULL); + + OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) ); + + cl_context QueueContext = NULL; + status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL); + + OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) ); + std::string programCode; Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE; switch(pr) @@ -3230,12 +3252,12 @@ clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo ) const case P_SINGLE: { Kernel kernel(params); - kernel.GenerateKernel(programCode); + kernel.GenerateKernel(programCode, Device); } break; case P_DOUBLE: { Kernel kernel(params); - kernel.GenerateKernel(programCode); + kernel.GenerateKernel(programCode, Device); } break; } @@ -3243,8 +3265,8 @@ clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo ) const ReadKernelFromFile(programCode); #endif - OPENCL_V( fftRepo.setProgramCode( Stockham, params, programCode ), _T( "fftRepo.setclString() failed!" ) ); - OPENCL_V( fftRepo.setProgramEntryPoints( Stockham, params, "fft_fwd", "fft_back" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); + OPENCL_V( fftRepo.setProgramCode( Stockham, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) ); + OPENCL_V( fftRepo.setProgramEntryPoints( Stockham, params, "fft_fwd", "fft_back", QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); return CLFFT_SUCCESS; } diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp index 3b12504f..0615b99b 100644 --- a/src/library/generator.transpose.cpp +++ b/src/library/generator.transpose.cpp @@ -822,7 +822,7 @@ clfftStatus FFTPlan::GetWorkSizesPvt (std::vector & globalWS, // OpenCL does not take unicode strings as input, so this routine returns only ASCII strings // Feed this generator the FFTPlan, and it returns the generated program as a string template<> -clfftStatus FFTPlan::GenerateKernelPvt ( FFTRepo& fftRepo ) const +clfftStatus FFTPlan::GenerateKernelPvt ( FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const { FFTKernelGenKeyParams params; OPENCL_V( this->GetKernelGenKeyPvt (params), _T("GetKernelGenKey() failed!") ); @@ -830,8 +830,14 @@ clfftStatus FFTPlan::GenerateKernelPvt ( FFTRepo& fftRepo ) const std::string programCode; OPENCL_V( GenerateTransposeKernel( params, programCode ), _T( "GenerateTransposeKernel() failed!" ) ); - OPENCL_V( fftRepo.setProgramCode( Transpose, params, programCode ), _T( "fftRepo.setclString() failed!" ) ); - OPENCL_V( fftRepo.setProgramEntryPoints( Transpose, params, "fft_trans", "fft_trans" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); + cl_int status = CL_SUCCESS; + cl_context QueueContext = NULL; + status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL); + + OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) ); + + OPENCL_V( fftRepo.setProgramCode( Transpose, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) ); + OPENCL_V( fftRepo.setProgramEntryPoints( Transpose, params, "fft_trans", "fft_trans",QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); return CLFFT_SUCCESS; } diff --git a/src/library/generator.transpose.h b/src/library/generator.transpose.h index b08e3d3a..12ad7014 100644 --- a/src/library/generator.transpose.h +++ b/src/library/generator.transpose.h @@ -14,10 +14,6 @@ * limitations under the License. * ************************************************************************/ -//////////////////////////////////////////// -// Copyright (C) 2011 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - #pragma once #if !defined( AMD_CLFFT_generator_transpose_H ) #define AMD_CLFFT_generator_transpose_H diff --git a/src/library/mainpage.h b/src/library/mainpage.h index 326ad7a1..70e014c7 100644 --- a/src/library/mainpage.h +++ b/src/library/mainpage.h @@ -538,6 +538,12 @@ In-place transforms:
  • \c CLFFT_HERMITIAN_INTERLEAVED to \c CLFFT_REAL +@subsection ExplicitStrides Setting strides + +The library currently requires the user to explicitly set input and output strides for real transforms. See +the following examples to understand what values to use for input and output strides under different scenarios. The +examples only show typical usages. The user has flexibility in allocating their buffers and laying out data according +to their needs. @subsection RealExamples Examples diff --git a/src/library/plan.cpp b/src/library/plan.cpp index ec87b2d4..5a750d16 100644 --- a/src/library/plan.cpp +++ b/src/library/plan.cpp @@ -194,7 +194,7 @@ clfftStatus clfftCreateDefaultPlan( clfftPlanHandle* plHandle, cl_context contex } // Read the kernels that this plan uses from file, and store into the plan -clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators gen, const FFTKernelGenKeyParams& fftParams ) +clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators gen, const FFTKernelGenKeyParams& fftParams, const cl_context& context ) { FFTRepo& fftRepo = FFTRepo::getInstance( ); @@ -220,7 +220,7 @@ clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators g } std::string kernel; - OPENCL_V( fftRepo.getProgramCode( gen, fftParams, kernel ), _T( "fftRepo.getProgramCode failed." ) ); + OPENCL_V( fftRepo.getProgramCode( gen, fftParams, kernel, context ), _T( "fftRepo.getProgramCode failed." ) ); kernelFile.get( ) << kernel << std::endl; @@ -250,16 +250,16 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan OPENCL_V( fftPlan->GetKernelGenKey( fftParams ), _T("GetKernelGenKey() failed!") ); cl_program program; - if( fftRepo.getclProgram( gen, fftParams, program ) == CLFFT_INVALID_PROGRAM ) + if( fftRepo.getclProgram( gen, fftParams, program, fftPlan->context ) == CLFFT_INVALID_PROGRAM ) { // If the user wishes us to write the kernels out to disk, we do so if( fftRepo.setupData.debugFlags & CLFFT_DUMP_PROGRAMS ) { - OPENCL_V( WriteKernel( plHandle, gen, fftParams ), _T( "WriteKernel failed." ) ); + OPENCL_V( WriteKernel( plHandle, gen, fftParams, fftPlan->context ), _T( "WriteKernel failed." ) ); } std::string programCode; - OPENCL_V( fftRepo.getProgramCode( gen, fftParams, programCode ), _T( "fftRepo.getProgramCode failed." ) ); + OPENCL_V( fftRepo.getProgramCode( gen, fftParams, programCode, fftPlan->context ), _T( "fftRepo.getProgramCode failed." ) ); const char* source = programCode.c_str(); program = clCreateProgramWithSource( fftPlan->context, 1, &source, NULL, &status ); @@ -317,7 +317,7 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan if( fftRepo.getclKernel( program, CLFFT_FORWARD, kernel ) == CLFFT_INVALID_KERNEL ) { std::string entryPoint; - OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_FORWARD, entryPoint ), _T( "fftRepo.getProgramEntryPoint failed." ) ); + OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_FORWARD, entryPoint, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) ); kernel = clCreateKernel( program, entryPoint.c_str( ), &status ); OPENCL_V( status, _T( "clCreateKernel failed" ) ); @@ -331,7 +331,7 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan if( fftRepo.getclKernel( program, CLFFT_BACKWARD, kernel ) == CLFFT_INVALID_KERNEL ) { std::string entryPoint; - OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_BACKWARD, entryPoint ), _T( "fftRepo.getProgramEntryPoint failed." ) ); + OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_BACKWARD, entryPoint, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) ); kernel = clCreateKernel( program, entryPoint.c_str( ), &status ); OPENCL_V( status, _T( "clCreateKernel failed" ) ); @@ -494,6 +494,9 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma return CLFFT_SUCCESS; } + // Store the device for which we are baking + clGetCommandQueueInfo(*commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &fftPlan->bakeDevice, NULL); + //find product of lengths size_t pLength = 1; switch(fftPlan->dim) @@ -543,7 +546,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma if(fftPlan->gen == Copy) { - OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) ); + OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateKernel() failed" ) ); OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) ); fftPlan->baked = true; return CLFFT_SUCCESS; @@ -1505,13 +1508,27 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma //break; if (fftPlan->transflag) //Transpose for 2D { - OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateTransposeProgram() failed" ) ); + OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateTransposeProgram() failed" ) ); OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) ); fftPlan->baked = true; return CLFFT_SUCCESS; } + // TODO : Check for a better way to do this. + bool isnvidia = false; + for (size_t Idx = 0; !isnvidia && Idx < numQueues; Idx++) + { + cl_command_queue QIdx = commQueueFFT[Idx]; + cl_device_id Device; + clGetCommandQueueInfo(QIdx, CL_QUEUE_DEVICE, sizeof(Device), &Device, NULL); + char Vendor[256]; + clGetDeviceInfo(Device, CL_DEVICE_VENDOR, sizeof(Vendor), &Vendor, NULL); + isnvidia |= (strncmp(Vendor, "NVIDIA", 6) == 0); + } + // nvidia gpus are failing when doing transpose for 2D FFTs + if (isnvidia) break; + if (fftPlan->length.size() != 2) break; if (!(IsPo2(fftPlan->length[0])) || !(IsPo2(fftPlan->length[1]))) break; @@ -2445,7 +2462,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma } // For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels - OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) ); + OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateKernel() failed" ) ); // For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) ); @@ -3265,13 +3282,13 @@ clfftStatus FFTPlan::GetKernelGenKey (FFTKernelGenKeyParams & params) const } } -clfftStatus FFTPlan::GenerateKernel (FFTRepo & fftRepo) const +clfftStatus FFTPlan::GenerateKernel (FFTRepo & fftRepo, const cl_command_queue commQueueFFT) const { switch(gen) { - case Stockham: return GenerateKernelPvt(fftRepo); - case Transpose: return GenerateKernelPvt(fftRepo); - case Copy: return GenerateKernelPvt(fftRepo); + case Stockham: return GenerateKernelPvt(fftRepo, commQueueFFT); + case Transpose: return GenerateKernelPvt(fftRepo, commQueueFFT); + case Copy: return GenerateKernelPvt(fftRepo, commQueueFFT); default: assert(false); return CLFFT_NOTIMPLEMENTED; } } diff --git a/src/library/plan.h b/src/library/plan.h index ec96fadb..acafa070 100644 --- a/src/library/plan.h +++ b/src/library/plan.h @@ -202,7 +202,7 @@ class FFTPlan clfftStatus GetKernelGenKeyPvt (FFTKernelGenKeyParams & params) const; template - clfftStatus GenerateKernelPvt (FFTRepo& fftRepo) const; + clfftStatus GenerateKernelPvt (FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const; template clfftStatus GetMax1DLengthPvt (size_t *longest ) const; @@ -223,6 +223,10 @@ class FFTPlan size_t iDist, oDist; size_t batchsize; + // Note the device passed to BakePlan, assuming we are baking for one device + // TODO, change this logic for handling multiple GPUs/devices + cl_device_id bakeDevice; + // Devices that the user specified in the context passed to the create function std::vector< cl_device_id > devices; @@ -338,7 +342,7 @@ class FFTPlan clfftStatus GetWorkSizes (std::vector & globalws, std::vector & localws) const; clfftStatus GetKernelGenKey (FFTKernelGenKeyParams & params) const; - clfftStatus GenerateKernel (FFTRepo & fftRepo) const; + clfftStatus GenerateKernel (FFTRepo & fftRepo, const cl_command_queue commQueueFFT) const; clfftStatus GetMax1DLength (size_t *longest ) const; void ResetBinarySizes(); diff --git a/src/library/private.h b/src/library/private.h index fe3770d7..5a31d2f2 100644 --- a/src/library/private.h +++ b/src/library/private.h @@ -46,13 +46,15 @@ #endif // Creating a portable defintion of countof -#if defined( _WIN32 ) +// This excludes mingw compilers; mingw32 does not have _countof +#if defined( _MSC_VER ) #define countOf _countof #else #define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) ) #endif -#if defined( _WIN32 ) +// This excludes mingw compilers; mingw32 does not have +#if defined( _MSC_VER ) #include #if defined( _WIN64 ) diff --git a/src/library/repo.cpp b/src/library/repo.cpp index 018dff5c..0b6e532a 100644 --- a/src/library/repo.cpp +++ b/src/library/repo.cpp @@ -34,6 +34,9 @@ size_t FFTRepo::planCount = 1; void* FFTRepo::timerHandle = NULL; GpuStatTimer* FFTRepo::pStatTimer = NULL; + + + clfftStatus FFTRepo::releaseResources( ) { scopedLock sLock( lockRepo, _T( "releaseResources" ) ); @@ -88,16 +91,34 @@ clfftStatus FFTRepo::releaseResources( ) return CLFFT_SUCCESS; } -clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const std::string& kernel ) +clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const std::string& kernel, const cl_context& context ) { scopedLock sLock( lockRepo, _T( "setProgramCode" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + std::pair Params = std::make_pair(fftParam, context); + fftRepoKey key = std::make_pair( gen, Params ); + // Prefix copyright statement at the top of generated kernels std::stringstream ss; - ss << std::endl << std::endl; - ss << "// Copyright (C) 2010-2013 Advanced Micro Devices, Inc. All Rights Reserved." << std::endl << std::endl; + ss << + "/* ************************************************************************\n" + " * Copyright 2013 Advanced Micro Devices, Inc.\n" + " *\n" + " * Licensed under the Apache License, Version 2.0 (the \"License\");\n" + " * you may not use this file except in compliance with the License.\n" + " * You may obtain a copy of the License at\n" + " *\n" + " * http://www.apache.org/licenses/LICENSE-2.0\n" + " *\n" + " * Unless required by applicable law or agreed to in writing, software\n" + " * distributed under the License is distributed on an \"AS IS\" BASIS,\n" + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" + " * See the License for the specific language governing permissions and\n" + " * limitations under the License.\n" + " * ************************************************************************/" + << std::endl << std::endl; + std::string prefixCopyright = ss.str(); mapFFTs[ key ].ProgramString = prefixCopyright + kernel; @@ -105,26 +126,28 @@ clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelG return CLFFT_SUCCESS; } -clfftStatus FFTRepo::getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, std::string& kernel ) +clfftStatus FFTRepo::getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, std::string& kernel, const cl_context& context ) { scopedLock sLock( lockRepo, _T( "getProgramCode" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + std::pair Params = std::make_pair(fftParam, context); + fftRepoKey key = std::make_pair( gen, Params ); fftRepo_iterator pos = mapFFTs.find( key); if( pos == mapFFTs.end( ) ) return CLFFT_FILE_NOT_FOUND; - kernel = pos->second.ProgramString; + kernel = pos->second.ProgramString; return CLFFT_SUCCESS; } clfftStatus FFTRepo::setProgramEntryPoints( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, - const char * kernel_fwd, const char * kernel_back ) + const char * kernel_fwd, const char * kernel_back, const cl_context& context ) { scopedLock sLock( lockRepo, _T( "setProgramEntryPoints" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + std::pair Params = std::make_pair(fftParam, context); + fftRepoKey key = std::make_pair( gen, Params ); fftRepoValue& fft = mapFFTs[ key ]; fft.EntryPoint_fwd = kernel_fwd; @@ -134,11 +157,12 @@ clfftStatus FFTRepo::setProgramEntryPoints( const clfftGenerators gen, const FFT } clfftStatus FFTRepo::getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, - clfftDirection dir, std::string& kernel ) + clfftDirection dir, std::string& kernel, const cl_context& context ) { scopedLock sLock( lockRepo, _T( "getProgramEntryPoint" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + std::pair Params = std::make_pair(fftParam, context); + fftRepoKey key = std::make_pair( gen, Params ); fftRepo_iterator pos = mapFFTs.find( key ); if( pos == mapFFTs.end( ) ) @@ -166,7 +190,14 @@ clfftStatus FFTRepo::setclProgram( const clfftGenerators gen, const FFTKernelGen { scopedLock sLock( lockRepo, _T( "setclProgram" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + cl_int status = CL_SUCCESS; + cl_context ProgramContext = NULL; + status = clGetProgramInfo(prog, CL_PROGRAM_CONTEXT, sizeof(cl_context), &ProgramContext, NULL); + + OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) ); + + std::pair Params = std::make_pair(fftParam, ProgramContext); + fftRepoKey key = std::make_pair( gen, Params ); fftRepo_iterator pos = mapFFTs.find( key ); if( pos == mapFFTs.end( ) ) @@ -182,11 +213,12 @@ clfftStatus FFTRepo::setclProgram( const clfftGenerators gen, const FFTKernelGen return CLFFT_SUCCESS; } -clfftStatus FFTRepo::getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& prog ) +clfftStatus FFTRepo::getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& prog, const cl_context& PlanContext ) { scopedLock sLock( lockRepo, _T( "getclProgram" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + std::pair Params = std::make_pair(fftParam, PlanContext); + fftRepoKey key = std::make_pair( gen, Params ); fftRepo_iterator pos = mapFFTs.find( key ); if( pos == mapFFTs.end( ) ) @@ -194,6 +226,11 @@ clfftStatus FFTRepo::getclProgram( const clfftGenerators gen, const FFTKernelGen prog = pos->second.clProgram; if (NULL == prog) return CLFFT_INVALID_PROGRAM; + + cl_context ProgContext; + clGetProgramInfo(prog, CL_PROGRAM_CONTEXT, sizeof(cl_context), &ProgContext, NULL); + if (PlanContext!=ProgContext) + return CLFFT_INVALID_PROGRAM; return CLFFT_SUCCESS; } diff --git a/src/library/repo.h b/src/library/repo.h index f68242de..f2619e79 100644 --- a/src/library/repo.h +++ b/src/library/repo.h @@ -22,6 +22,7 @@ #include "private.h" #include "plan.h" #include "lock.h" + #include "../statTimer/statisticalTimer.GPU.h" @@ -48,10 +49,14 @@ class FFTRepo // Map structure to map parameters that a generator uses to a specific set of kernels that the generator // has created - typedef std::pair< clfftGenerators, FFTKernelGenKeyParams > fftRepoKey; + //typedef std::pair< clfftGenerators, FFTKernelGenKeyParams > fftRepoKey; + + typedef std::pair< clfftGenerators, std::pair > fftRepoKey; typedef std::map< fftRepoKey, fftRepoValue > fftRepoType; typedef fftRepoType::iterator fftRepo_iterator; + + fftRepoType mapFFTs; struct fftKernels { @@ -134,15 +139,15 @@ class FFTRepo clfftStatus releaseResources( ); - clfftStatus setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, const std::string& kernel ); - clfftStatus getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, std::string& kernel ); + clfftStatus setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, const std::string& kernel, const cl_context& context); + clfftStatus getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, std::string& kernel, const cl_context& context ); clfftStatus setProgramEntryPoints( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, - const char * kernel_fwd, const char * kernel_back ); - clfftStatus getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, clfftDirection dir, std::string& kernel ); + const char * kernel_fwd, const char * kernel_back, const cl_context& context ); + clfftStatus getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, clfftDirection dir, std::string& kernel , const cl_context& context); clfftStatus setclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const cl_program& kernel ); - clfftStatus getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& kernel ); + clfftStatus getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& kernel, const cl_context& PlanContext ); clfftStatus setclKernel ( cl_program prog, clfftDirection dir, const cl_kernel& kernel ); clfftStatus getclKernel ( cl_program prog, clfftDirection dir, cl_kernel& kernel ); @@ -150,9 +155,9 @@ class FFTRepo clfftStatus createPlan( clfftPlanHandle* plHandle, FFTPlan*& fftPlan ); clfftStatus getPlan( clfftPlanHandle plHandle, FFTPlan*& fftPlan, lockRAII*& planLock ); clfftStatus deletePlan( clfftPlanHandle* plHandle ); + }; - #endif diff --git a/src/library/transform.cpp b/src/library/transform.cpp index e53e830f..56a50dde 100644 --- a/src/library/transform.cpp +++ b/src/library/transform.cpp @@ -63,6 +63,16 @@ clfftStatus clfftEnqueueTransform( OPENCL_V( clfftBakePlan( plHandle, numQueuesAndEvents, commQueues, NULL, NULL ), _T( "Failed to bake plan" ) ); } + + // get the device information + cl_device_id q_device; + clGetCommandQueueInfo(*commQueues, CL_QUEUE_DEVICE, sizeof(cl_device_id), &q_device, NULL); + + // verify if the current device is the same as the one used for baking the plan + if(q_device != fftPlan->bakeDevice) + return CLFFT_DEVICE_MISMATCH; + + if (fftPlan->inputLayout == CLFFT_REAL) dir = CLFFT_FORWARD; else if (fftPlan->outputLayout == CLFFT_REAL) dir = CLFFT_BACKWARD; @@ -1152,7 +1162,7 @@ clfftStatus clfftEnqueueTransform( cl_program prog; cl_kernel kern; - OPENCL_V( fftRepo.getclProgram( fftPlan->gen, fftParams, prog ), _T( "fftRepo.getclProgram failed" ) ); + OPENCL_V( fftRepo.getclProgram( fftPlan->gen, fftParams, prog, fftPlan->context ), _T( "fftRepo.getclProgram failed" ) ); OPENCL_V( fftRepo.getclKernel( prog, dir, kern ), _T( "fftRepo.getclKernels failed" ) ); diff --git a/src/scripts/perf/CMakeLists.txt b/src/scripts/perf/CMakeLists.txt index 95add5a7..37337893 100644 --- a/src/scripts/perf/CMakeLists.txt +++ b/src/scripts/perf/CMakeLists.txt @@ -21,10 +21,4 @@ set(GRAPHING_SCRIPTS measurePerformance.py performanceUtility.py ) -if( BUILD64 ) - set( BIN_DIR bin64 ) -else() - set( BIN_DIR bin32 ) -endif() - -install( FILES ${GRAPHING_SCRIPTS} DESTINATION ${BIN_DIR} ) \ No newline at end of file +install( FILES ${GRAPHING_SCRIPTS} DESTINATION bin${SUFFIX_BIN} ) \ No newline at end of file diff --git a/src/statTimer/CMakeLists.txt b/src/statTimer/CMakeLists.txt index 0b5d75f8..a635bf12 100644 --- a/src/statTimer/CMakeLists.txt +++ b/src/statTimer/CMakeLists.txt @@ -63,28 +63,24 @@ add_definitions( "/DSTATTIMER_EXPORTS" ) # Include standard OpenCL headers include_directories( ${OPENCL_INCLUDE_DIRS} ${PROJECT_BINARY_DIR}/include ../include ) -add_library( StatTimer SHARED ${StatTimer.Files} ) +if(BUILD_SHARED_LIBRARY) + add_library( StatTimer SHARED ${StatTimer.Files} ) +else() + add_library( StatTimer STATIC ${StatTimer.Files} ) +endif() set_target_properties( StatTimer PROPERTIES VERSION ${CLFFT_VERSION} ) +set_target_properties( StatTimer PROPERTIES SOVERSION ${CLFFT_SOVERSION} ) set_target_properties( StatTimer PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) target_link_libraries( StatTimer ${OPENCL_LIBRARIES} ) -if( UNIX ) +if( UNIX AND NOT APPLE ) # This library dependency is brought in by the high precision timer available in linux target_link_libraries( StatTimer -lrt ) endif( ) -if( BUILD64 ) - # CPack configuration; include the executable into the package - install( TARGETS StatTimer - RUNTIME DESTINATION bin64 - LIBRARY DESTINATION lib64 - ARCHIVE DESTINATION lib64/import - ) -else() - # CPack configuration; include the executable into the package - install( TARGETS StatTimer - RUNTIME DESTINATION bin32 - LIBRARY DESTINATION lib32 - ARCHIVE DESTINATION lib32/import - ) -endif() +# CPack configuration; include the executable into the package +install( TARGETS StatTimer + RUNTIME DESTINATION bin${SUFFIX_BIN} + LIBRARY DESTINATION lib${SUFFIX_LIB} + ARCHIVE DESTINATION lib${SUFFIX_LIB}/import + ) diff --git a/src/statTimer/statisticalTimer.CPU.h b/src/statTimer/statisticalTimer.CPU.h index 876250e1..0c7af3a2 100644 --- a/src/statTimer/statisticalTimer.CPU.h +++ b/src/statTimer/statisticalTimer.CPU.h @@ -47,7 +47,8 @@ class CpuStatTimer : public baseStatTimer cl_ulong clkFrequency; // For linux; the resolution of a high-precision timer -#if defined( __GNUC__ ) + // Mingw32 does not define timespec; can use windows timers +#if !defined( _WIN32 ) timespec res; #endif diff --git a/src/statTimer/stdafx.h b/src/statTimer/stdafx.h index 09875f9d..774fef75 100644 --- a/src/statTimer/stdafx.h +++ b/src/statTimer/stdafx.h @@ -41,7 +41,10 @@ // #include #include "targetver.h" +#if !defined( NOMINMAX ) #define NOMINMAX +#endif + #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers // Windows Header Files: #include diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 87510efb..8ee80a35 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -49,20 +49,17 @@ set( clFFT.Test.Headers set( clFFT.Test.Files ${clFFT.Test.Source} ${clFFT.Test.Headers} ) - - -if( BUILD64 ) - set( BIN_DIR bin64 ) - set( LIB_DIR lib64 ) -else() - set( BIN_DIR bin32 ) - set( LIB_DIR lib32 ) -endif() - set( LD_PTHREAD "" ) -if( CMAKE_COMPILER_IS_GNUCXX ) +if( MINGW ) + # -std=c++0x causes g++ to go into strict ANSI mode, which doesn't declare non-standard functions + # Googletest for mingw appears to have a dependency on _stricmp and off64_t + set( CMAKE_CXX_FLAGS "-std=gnu++0x ${CMAKE_CXX_FLAGS}" ) +elseif( CMAKE_COMPILER_IS_GNUCXX ) set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" ) set( LD_PTHREAD "-lpthread" ) +elseif( APPLE ) + set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) + add_definitions( -DGTEST_USE_OWN_TR1_TUPLE ) endif( ) # Include standard OpenCL headers @@ -103,7 +100,19 @@ set_target_properties( Test PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINAR # CPack configuration; include the executable into the package install( TARGETS Test - RUNTIME DESTINATION ${BIN_DIR} - LIBRARY DESTINATION ${LIB_DIR} - ARCHIVE DESTINATION ${LIB_DIR}/import - ) + RUNTIME DESTINATION bin${SUFFIX_BIN} + LIBRARY DESTINATION lib${SUFFIX_LIB} + ARCHIVE DESTINATION lib${SUFFIX_LIB}/import + ) + +get_target_property( testLocation Test LOCATION ) + +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/copyTestDependencies.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/copyTestDependencies.cmake" + @ONLY +) + +# Register script at run at install time to analyze the executable and copy dependencies into package +install( SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/copyTestDependencies.cmake") + \ No newline at end of file diff --git a/src/tests/copyTestDependencies.cmake.in b/src/tests/copyTestDependencies.cmake.in new file mode 100644 index 00000000..9397bcf4 --- /dev/null +++ b/src/tests/copyTestDependencies.cmake.in @@ -0,0 +1,100 @@ +# Customized install script for fftw test program; analyzes all the shared library dependencies and installs +# the dependencies into the package +include( GetPrerequisites ) + +# message( testLocation ": @testLocation@" ) + +# The Microsoft IDE presents a challenge because the full configuration is not known at cmake time +# This logic allows us to 'substitute' the proper confguration at install time +if( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "Debug" ) + string( REPLACE "\$(Configuration)" "Debug" fixedTestLocation "@testLocation@" ) +elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "Release" ) + string( REPLACE "\$(Configuration)" "Release" fixedTestLocation "@testLocation@" ) +elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "MinSizeRel" ) + string( REPLACE "\$(Configuration)" "MinSizeRel" fixedTestLocation "@testLocation@" ) +elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "RelwithDebInfo" ) + string( REPLACE "\$(Configuration)" "RelwithDebInfo" fixedTestLocation "@testLocation@" ) +endif( ) + +# message( fixedTestLocation ": ${fixedTestLocation}" ) +# Get the directory that the test executable resides in; this helps get_prerequisites( ) find dependent libraries +get_filename_component( testDir "${fixedTestLocation}" PATH ) +# message( testDir ": ${testDir}" ) + +set( installPath "" ) +if( WIN32 ) + set( installPath "${CMAKE_INSTALL_PREFIX}/bin@SUFFIX_BIN@" ) +else( ) + set( installPath "${CMAKE_INSTALL_PREFIX}/lib@SUFFIX_LIB@" ) +endif( ) + +# Only search for dependencies that have ROOT defined +set( depList "" ) + +#This logic assumes that FindFFTW.cmake has been called +get_filename_component( fftwDirSingle "@FFTW_SINGLE_PRECISION_LIBRARIES@" PATH ) +get_filename_component( fftwDirDouble "@FFTW_DOUBLE_PRECISION_LIBRARIES@" PATH ) + +if( EXISTS "${fftwDirSingle}" ) + list( APPEND depList "${fftwDirSingle}" ) +# message( "fftwDirSingle: ${fftwDirSingle}" ) +endif( ) + +string( COMPARE NOTEQUAL "${fftwDirSingle}" "${fftwDirDouble}" fftwDiffDirs ) +if( ${fftwDiffDirs} AND EXISTS "${fftwDirDouble}" ) + list( APPEND depList "${fftwDirDouble}" ) +# message( "fftwDirDouble: ${fftwDirDouble}" ) +endif( ) + +#This logic assumes that FindGTest.cmake has been called +get_filename_component( gtestDir "@GTEST_LIBRARY@" PATH ) +get_filename_component( gtestDirDebug "@GTEST_LIBRARY_DEBUG@" PATH ) + +if( EXISTS "${gtestDir}" ) + list( APPEND depList "${gtestDir}" ) +# message( "gtestDir: ${gtestDir}" ) +endif( ) + +string( COMPARE NOTEQUAL "${gtestDir}" "${gtestDirDebug}" gtestDiffDirs ) +if( ${gtestDiffDirs} AND EXISTS "${gtestDirDebug}" ) + list( APPEND depList "${gtestDirDebug}" ) +# message( "gtestDirDebug: ${gtestDirDebug}" ) +endif( ) + +#This logic assumes that FindOpenCL.cmake has been called +get_filename_component( openclDir "@OPENCL_LIBRARIES@" PATH ) + +if( EXISTS "${openclDir}" ) + list( APPEND depList "${openclDir}" ) +# message( "openclDir: ${openclDir}" ) +endif( ) + +if( EXISTS "${testDir}" ) + list( APPEND depList "${testDir}" ) +endif( ) + +# message( "depList: ${depList}" ) + +# This retrieves a list of shared library dependencies from the target; they are not full path names +# Skip system dependencies and skip recursion +get_prerequisites( ${fixedTestLocation} testDependencies 1 0 "" "${depList}" ) + +# Loop on queried library dependencies and copy them into package +foreach( dep ${testDependencies} ) + # This converts the dependency into a full path + gp_resolve_item( "${fixedTestLocation}" "${dep}" "" "${depList}" dep_test_path ) + + # In linux, the dep_test_path may point to a symbolic link, we also need to copy real file + get_filename_component( dep_realpath "${dep_test_path}" REALPATH ) + get_filename_component( dep_name "${dep_test_path}" NAME ) + # message( STATUS "depName: ${dep_name}" ) + # message( STATUS "depFullPath: ${dep_test_path}" ) + # message( STATUS "dep_realpath: ${dep_realpath}" ) + + if( NOT EXISTS ${installPath}/${dep_name} ) + file( INSTALL ${dep_test_path} ${dep_realpath} + USE_SOURCE_PERMISSIONS + DESTINATION ${installPath} + ) + endif( ) +endforeach( ) diff --git a/src/tests/gtest_main.cpp b/src/tests/gtest_main.cpp index 6e29b90e..b30c3eac 100644 --- a/src/tests/gtest_main.cpp +++ b/src/tests/gtest_main.cpp @@ -18,7 +18,7 @@ #include #include #include "clFFT.h" -#include "version.h" +#include "clFFT.version.h" #include "test_constants.h" #include "../client/openCL.misc.h" #include "unicode.compatibility.h" @@ -29,8 +29,10 @@ time_t random_test_parameter_seed; float tolerance; bool verbose; -#if defined( _WIN32 ) -#define NOMINMAX +#if defined( MSVC_VER ) +#if !defined( NOMINMAX ) + #define NOMINMAX +#endif #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers #include diff --git a/src/tests/test_constants.cpp b/src/tests/test_constants.cpp index d4ca1e57..cdc1ad60 100644 --- a/src/tests/test_constants.cpp +++ b/src/tests/test_constants.cpp @@ -14,7 +14,6 @@ * limitations under the License. * ************************************************************************/ - #include "test_constants.h" #include #include