From 88f668af43ece2084dd9de903ecfa9a0c7f9eca2 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Tue, 13 Aug 2013 13:17:46 -0500 Subject: [PATCH 01/32] Adding link to our API documentation on GitHub Pages --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 328a3aa5..13b73dab 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,11 @@ The clFFT library is an open source OpenCL library implementation of discrete Fa * Supports dimension lengths that can be any mix of powers of 2, 3, and 5. * Supports single and double precision floating point formats. +## clFFT library user documentation +[Library and API documentation]( http://clmathlibraries.github.io/clFFT/ ) for developers is available online as a GitHub Pages website + ## clFFT Wiki -The [project wiki](https://github.com/kknox/clFFT/wiki) contains helpful documentation, including a [build primer](https://github.com/kknox/clFFT/wiki/Build) +The [project wiki](https://github.com/clMathLibraries/clFFT/wiki) contains helpful documentation, including a [build primer](https://github.com/clMathLibraries/clFFT/wiki/Build) ## Contributing code Please refer to and read the [Contributing](CONTRIBUTING.md) document for guidelines on how to contribute code to this open source project From 07a668e82e17c9cc0715148b4b024bd1227990ad Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Wed, 14 Aug 2013 16:08:06 -0500 Subject: [PATCH 02/32] Fixed project name for doxy file --- doc/clFFT.doxy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/clFFT.doxy b/doc/clFFT.doxy index 366f9c75..f771f04d 100644 --- a/doc/clFFT.doxy +++ b/doc/clFFT.doxy @@ -26,7 +26,7 @@ DOXYFILE_ENCODING = UTF-8 # identify the project. Note that if you do not use Doxywizard you need # to put quotes around the project name if it contains spaces. -PROJECT_NAME = clMathFft +PROJECT_NAME = clFFT # The PROJECT_NUMBER tag can be used to enter a project or revision number. # This could be handy for archiving the generated documentation or From 881788eeae5c47c02e1877e3d71a44e409950468 Mon Sep 17 00:00:00 2001 From: Matthias Vogelgesang Date: Mon, 19 Aug 2013 15:59:27 +0200 Subject: [PATCH 03/32] Add pkg-config support --- src/library/CMakeLists.txt | 7 +++++++ src/library/clFFT.pc.in | 11 +++++++++++ 2 files changed, 18 insertions(+) create mode 100644 src/library/clFFT.pc.in diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt index ea7637bf..212feb3e 100644 --- a/src/library/CMakeLists.txt +++ b/src/library/CMakeLists.txt @@ -83,6 +83,13 @@ set_target_properties( clFFT PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINA if( UNIX ) # Right now, linux has problems compiling dynamic_cast, but the flag below doesn't help # set_target_properties( clFFT PROPERTIES COMPILE_FLAGS "-frtti" ) + + configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/clFFT.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/clFFT.pc @ONLY IMMEDIATE ) + + install( FILES ${CMAKE_CURRENT_BINARY_DIR}/clFFT.pc + DESTINATION lib/pkgconfig + ) endif( ) if( BUILD64 ) diff --git a/src/library/clFFT.pc.in b/src/library/clFFT.pc.in new file mode 100644 index 00000000..85cd8829 --- /dev/null +++ b/src/library/clFFT.pc.in @@ -0,0 +1,11 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +libdir=${exec_prefix}/lib +includedir=${prefix}/include + +Name: clFFT +Description: OpenCL FFT library +Version: @CLFFT_VERSION@ + +Libs: -L${libdir} -lclFFT +Cflags: -I${includedir} From b37940112f2b05a88d9df16437a88908e89a228d Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Sun, 25 Aug 2013 23:54:14 -0500 Subject: [PATCH 04/32] First attempt at changing install suffixes based upon cmake cache variables --- src/CMakeLists.txt | 17 ++++++++++++++--- src/client/CMakeLists.txt | 25 ++++++------------------- src/library/CMakeLists.txt | 21 ++++++--------------- src/scripts/perf/CMakeLists.txt | 8 +------- src/statTimer/CMakeLists.txt | 21 ++++++--------------- src/tests/CMakeLists.txt | 18 ++++-------------- 6 files changed, 37 insertions(+), 73 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 549590e0..30e341df 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -80,8 +80,9 @@ else() endif() if( MSVC_IDE ) - set( BUILD64 ${CMAKE_CL_64} ) set_property( GLOBAL PROPERTY USE_FOLDERS TRUE ) + + set( BUILD64 ${CMAKE_CL_64} ) else() option( BUILD64 "Build a 64-bit product" ON ) @@ -93,15 +94,25 @@ else() endif() endif() +# These variables are meant to contain string which should be appended to the installation paths +# of library and executable binaries, respectively. They are meant to be user configurable/overridable. +set( SUFFIX_LIB_DEFAULT "" ) +set( SUFFIX_BIN_DEFAULT "" ) + # Modify the global find property to help us find libraries like Boost in the correct paths for 64-bit # Essentially, find_library calls will look for /lib64 instead of /lib; works for windows and linux if( BUILD64 ) set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS TRUE ) message( STATUS "64bit build - FIND_LIBRARY_USE_LIB64_PATHS TRUE" ) -else() + + set( SUFFIX_LIB_DEFAULT "64" ) +else( ) set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS FALSE ) message( STATUS "32bit build - FIND_LIBRARY_USE_LIB64_PATHS FALSE" ) -endif() +endif( ) + +set( SUFFIX_LIB ${SUFFIX_LIB_DEFAULT} CACHE STRING "String to append to 'lib' install path" ) +set( SUFFIX_BIN ${SUFFIX_BIN_DEFAULT} CACHE STRING "String to append to 'bin' install path" ) # Client is built only if boost is found; on windows, we need vs10 or higher # Find Boost on the system, and configure the type of boost build we want diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index d9609cca..99a89d31 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -48,22 +48,9 @@ target_link_libraries( Client clFFT ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} ${DL_ set_target_properties( Client PROPERTIES VERSION ${CLFFT_VERSION} ) set_target_properties( Client PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) -if( BUILD64 ) - # CPack configuration; include the executable into the package - install( TARGETS Client - RUNTIME DESTINATION bin64 - LIBRARY DESTINATION lib64 - ARCHIVE DESTINATION lib64/import - ) - -else() - # CPack configuration; include the executable into the package - install( TARGETS Client - RUNTIME DESTINATION bin32 - LIBRARY DESTINATION lib32 - ARCHIVE DESTINATION lib32/import - ) -endif() - -# configure_file( "${PROJECT_SOURCE_DIR}/client/CMakeLists.pack" - # "${PROJECT_BINARY_DIR}/samples/CMakeLists.txt" COPYONLY ) +# CPack configuration; include the executable into the package +install( TARGETS Client + RUNTIME DESTINATION bin${SUFFIX_BIN} + LIBRARY DESTINATION lib${SUFFIX_LIB} + ARCHIVE DESTINATION lib${SUFFIX_LIB}/import + ) diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt index ea7637bf..2bd83c6e 100644 --- a/src/library/CMakeLists.txt +++ b/src/library/CMakeLists.txt @@ -85,18 +85,9 @@ if( UNIX ) # set_target_properties( clFFT PROPERTIES COMPILE_FLAGS "-frtti" ) endif( ) -if( BUILD64 ) - # CPack configuration; include the executable into the package - install( TARGETS clFFT - RUNTIME DESTINATION bin64 - LIBRARY DESTINATION lib64 - ARCHIVE DESTINATION lib64/import - ) -else() - # CPack configuration; include the executable into the package - install( TARGETS clFFT - RUNTIME DESTINATION bin32 - LIBRARY DESTINATION lib32 - ARCHIVE DESTINATION lib32/import - ) -endif() +# CPack configuration; include the executable into the package +install( TARGETS clFFT + RUNTIME DESTINATION bin${SUFFIX_BIN} + LIBRARY DESTINATION lib${SUFFIX_LIB} + ARCHIVE DESTINATION lib${SUFFIX_LIB}/import + ) diff --git a/src/scripts/perf/CMakeLists.txt b/src/scripts/perf/CMakeLists.txt index 95add5a7..37337893 100644 --- a/src/scripts/perf/CMakeLists.txt +++ b/src/scripts/perf/CMakeLists.txt @@ -21,10 +21,4 @@ set(GRAPHING_SCRIPTS measurePerformance.py performanceUtility.py ) -if( BUILD64 ) - set( BIN_DIR bin64 ) -else() - set( BIN_DIR bin32 ) -endif() - -install( FILES ${GRAPHING_SCRIPTS} DESTINATION ${BIN_DIR} ) \ No newline at end of file +install( FILES ${GRAPHING_SCRIPTS} DESTINATION bin${SUFFIX_BIN} ) \ No newline at end of file diff --git a/src/statTimer/CMakeLists.txt b/src/statTimer/CMakeLists.txt index 0b5d75f8..c2d55bb3 100644 --- a/src/statTimer/CMakeLists.txt +++ b/src/statTimer/CMakeLists.txt @@ -73,18 +73,9 @@ if( UNIX ) target_link_libraries( StatTimer -lrt ) endif( ) -if( BUILD64 ) - # CPack configuration; include the executable into the package - install( TARGETS StatTimer - RUNTIME DESTINATION bin64 - LIBRARY DESTINATION lib64 - ARCHIVE DESTINATION lib64/import - ) -else() - # CPack configuration; include the executable into the package - install( TARGETS StatTimer - RUNTIME DESTINATION bin32 - LIBRARY DESTINATION lib32 - ARCHIVE DESTINATION lib32/import - ) -endif() +# CPack configuration; include the executable into the package +install( TARGETS StatTimer + RUNTIME DESTINATION bin${SUFFIX_BIN} + LIBRARY DESTINATION lib${SUFFIX_LIB} + ARCHIVE DESTINATION lib${SUFFIX_LIB}/import + ) diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 87510efb..29e9bf86 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -49,16 +49,6 @@ set( clFFT.Test.Headers set( clFFT.Test.Files ${clFFT.Test.Source} ${clFFT.Test.Headers} ) - - -if( BUILD64 ) - set( BIN_DIR bin64 ) - set( LIB_DIR lib64 ) -else() - set( BIN_DIR bin32 ) - set( LIB_DIR lib32 ) -endif() - set( LD_PTHREAD "" ) if( CMAKE_COMPILER_IS_GNUCXX ) set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" ) @@ -103,7 +93,7 @@ set_target_properties( Test PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINAR # CPack configuration; include the executable into the package install( TARGETS Test - RUNTIME DESTINATION ${BIN_DIR} - LIBRARY DESTINATION ${LIB_DIR} - ARCHIVE DESTINATION ${LIB_DIR}/import - ) + RUNTIME DESTINATION bin${SUFFIX_BIN} + LIBRARY DESTINATION lib${SUFFIX_LIB} + ARCHIVE DESTINATION lib${SUFFIX_LIB}/import + ) From 4a22f0b75671de190b990c1635e6034e5d96696b Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Mon, 26 Aug 2013 14:49:52 -0500 Subject: [PATCH 05/32] Removing /stack from windows builds; adding printscreen support for nmake builds --- src/CMakeLists.txt | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 30e341df..446988ab 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,15 @@ else( ) cmake_minimum_required( VERSION 2.6 ) endif( ) +if( CMAKE_GENERATOR MATCHES "NMake" ) + option( NMAKE_COMPILE_VERBOSE "Print compile and link strings to the console" OFF ) + if( NMAKE_COMPILE_VERBOSE ) + set( CMAKE_START_TEMP_FILE "" ) + set( CMAKE_END_TEMP_FILE "" ) + set( CMAKE_VERBOSE_MAKEFILE 1 ) + endif( ) +endif( ) + # This becomes the name of the solution file project( clFFT ) @@ -30,11 +39,6 @@ set( CLFFT_VERSION_MAJOR 2 ) set( CLFFT_VERSION_MINOR 1 ) set( CLFFT_VERSION_PATCH 0 ) set( CLFFT_VERSION "${CLFFT_VERSION_MAJOR}.${CLFFT_VERSION_MINOR}.${CLFFT_VERSION_PATCH}") - -# uncomment these to debug nmake and borland makefiles -#SET(CMAKE_START_TEMP_FILE "") -#SET(CMAKE_END_TEMP_FILE "") -#SET(CMAKE_VERBOSE_MAKEFILE 1) set( CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR} ) @@ -173,19 +177,11 @@ get_filename_component( C_COMPILER_NAME ${CMAKE_C_COMPILER} NAME_WE ) if( C_COMPILER_NAME STREQUAL "cl" ) # Following options for nMake message( STATUS "Detected MSVS Ver: " ${MSVC_VERSION} ) - if( NOT MSVC_IDE ) - message( STATUS "Using an nMake environment to build" ) - - # I can't get nmake to work because of faulty /machine:, not sure that this isn't a cmake bug - # if( BUILD64 ) - # set( CMAKE_EXE_LINKER_FLAGS "/machine:amd64 ${CMAKE_EXE_LINKER_FLAGS}" ) - # set( CMAKE_SHARED_LINKER_FLAGS "/machine:amd64 ${CMAKE_SHARED_LINKER_FLAGS}" ) - # set( CMAKE_MODULE_LINKER_FLAGS "/machine:amd64 ${CMAKE_MODULE_LINKER_FLAGS }" ) - # else( ) - # set( CMAKE_EXE_LINKER_FLAGS "/machine:i386 ${CMAKE_EXE_LINKER_FLAGS}" ) - # endif( ) - endif( ) + # CMake sets huge stack frames for windows, for whatever reason. We go with compiler default. + string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}" ) + string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}" ) + string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS}" ) elseif( C_COMPILER_NAME STREQUAL "gcc" ) message( STATUS "Detected GNU fortran compiler." ) From 6d37dbad3ca4ed1e561be36bc2a3e2588387c24b Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Wed, 4 Sep 2013 09:15:36 -0500 Subject: [PATCH 06/32] Merging Matzes pull request, with modifications for the new install procedure --- src/library/CMakeLists.txt | 10 +++------- src/library/clFFT.pc.in | 9 +++++---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt index bab71d3d..85f2aaa8 100644 --- a/src/library/CMakeLists.txt +++ b/src/library/CMakeLists.txt @@ -80,16 +80,12 @@ target_link_libraries( clFFT ${OPENCL_LIBRARIES} ) set_target_properties( clFFT PROPERTIES VERSION ${CLFFT_VERSION} ) set_target_properties( clFFT PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) -if( UNIX ) - # Right now, linux has problems compiling dynamic_cast, but the flag below doesn't help - # set_target_properties( clFFT PROPERTIES COMPILE_FLAGS "-frtti" ) - +if( CMAKE_COMPILER_IS_GNUCC ) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/clFFT.pc.in - ${CMAKE_CURRENT_BINARY_DIR}/clFFT.pc @ONLY IMMEDIATE ) + ${CMAKE_CURRENT_BINARY_DIR}/clFFT.pc @ONLY ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/clFFT.pc - DESTINATION lib/pkgconfig - ) + DESTINATION lib${SUFFIX_LIB}/pkgconfig ) endif( ) # CPack configuration; include the executable into the package diff --git a/src/library/clFFT.pc.in b/src/library/clFFT.pc.in index 85cd8829..21ef20ab 100644 --- a/src/library/clFFT.pc.in +++ b/src/library/clFFT.pc.in @@ -1,11 +1,12 @@ prefix=@CMAKE_INSTALL_PREFIX@ -exec_prefix=${prefix} -libdir=${exec_prefix}/lib +exec_prefix=${prefix}/bin@SUFFIX_BIN@ includedir=${prefix}/include +libdir=${exec_prefix}/lib@SUFFIX_LIB@ Name: clFFT -Description: OpenCL FFT library +Description: Open source OpenCL FFT library Version: @CLFFT_VERSION@ +URL: https://github.com/clMathLibraries/clFFT -Libs: -L${libdir} -lclFFT Cflags: -I${includedir} +Libs: -L${libdir} -lclFFT From b43d7683e9d634739b35705a9f1d123a7ec0c1c7 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Thu, 5 Sep 2013 18:51:01 -0400 Subject: [PATCH 07/32] Fixed compiler name check on Linux - Default compiler name on most distros is c++ - Check for the flag "CMAKE_COMPILER_IS_GNUCXX instead --- src/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 446988ab..02d4d695 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -183,7 +183,7 @@ if( C_COMPILER_NAME STREQUAL "cl" ) string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}" ) string( REGEX REPLACE "/STACK:[0-9]+" "" CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS}" ) -elseif( C_COMPILER_NAME STREQUAL "gcc" ) +elseif( CMAKE_COMPILER_IS_GNUCXX ) message( STATUS "Detected GNU fortran compiler." ) EXEC_PROGRAM( ${CMAKE_CXX_COMPILER} ARGS --version OUTPUT_VARIABLE vnum ) STRING(REGEX REPLACE ".*([0-9])\\.([0-9])\\.([0-9]).*" "\\1\\2\\3" vnum ${vnum}) @@ -205,7 +205,7 @@ elseif( C_COMPILER_NAME STREQUAL "gcc" ) set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage") endif() else( ) - message( FATAL_ERROR "Compiler name not detected" ) + message( FATAL_ERROR "Compiler not supported or not detected" ) endif( ) # If UNICODE is defined, pass extra definitions into From 943fae247322432728921601f93c4487718c54c8 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Sun, 8 Sep 2013 14:51:38 -0500 Subject: [PATCH 08/32] Adding mingw32 support as a compilation platform --- src/CMakeLists.txt | 4 ++-- src/client/openCL.misc.h | 2 +- src/include/sharedLibrary.h | 2 +- src/include/stdafx.h | 5 ++++- src/library/private.h | 6 ++++-- src/statTimer/statisticalTimer.CPU.h | 3 ++- src/statTimer/stdafx.h | 3 +++ src/tests/CMakeLists.txt | 6 +++++- src/tests/gtest_main.cpp | 6 ++++-- src/tests/test_constants.cpp | 1 - 10 files changed, 26 insertions(+), 12 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 446988ab..2e8c7a6c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -208,8 +208,8 @@ else( ) message( FATAL_ERROR "Compiler name not detected" ) endif( ) -# If UNICODE is defined, pass extra definitions into -if( UNICODE ) +# If UNICODE is defined for microsoft compilers, pass extra definitions +if( MSVC AND UNICODE ) add_definitions( "/DUNICODE /D_UNICODE" ) endif( ) diff --git a/src/client/openCL.misc.h b/src/client/openCL.misc.h index f7f6c202..4560d2b4 100644 --- a/src/client/openCL.misc.h +++ b/src/client/openCL.misc.h @@ -23,7 +23,7 @@ #include "unicode.compatibility.h" // Creating a portable defintion of countof -#if defined( _WIN32 ) +#if defined( _MSC_VER ) #define countOf _countof #else #define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) ) diff --git a/src/include/sharedLibrary.h b/src/include/sharedLibrary.h index 9f34b3a1..e5e65de7 100644 --- a/src/include/sharedLibrary.h +++ b/src/include/sharedLibrary.h @@ -79,7 +79,7 @@ inline void* LoadFunctionAddr( void* libHandle, std::string funcName ) #if defined( _WIN32 ) HMODULE fileHandle = reinterpret_cast< HMODULE >( libHandle ); - void* pFunc = ::GetProcAddress( fileHandle, funcName.c_str( ) ); + void* pFunc = reinterpret_cast< void* >( ::GetProcAddress( fileHandle, funcName.c_str( ) ) ); #else void* pFunc = ::dlsym( libHandle, funcName.c_str( ) ); #endif diff --git a/src/include/stdafx.h b/src/include/stdafx.h index 5a8077bf..4ab26bf2 100644 --- a/src/include/stdafx.h +++ b/src/include/stdafx.h @@ -42,8 +42,11 @@ #include #include "targetver.h" +#if !defined( NOMINMAX ) #define NOMINMAX - #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers +#endif + + #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers // Windows Header Files: #include #endif diff --git a/src/library/private.h b/src/library/private.h index fe3770d7..5a31d2f2 100644 --- a/src/library/private.h +++ b/src/library/private.h @@ -46,13 +46,15 @@ #endif // Creating a portable defintion of countof -#if defined( _WIN32 ) +// This excludes mingw compilers; mingw32 does not have _countof +#if defined( _MSC_VER ) #define countOf _countof #else #define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) ) #endif -#if defined( _WIN32 ) +// This excludes mingw compilers; mingw32 does not have +#if defined( _MSC_VER ) #include #if defined( _WIN64 ) diff --git a/src/statTimer/statisticalTimer.CPU.h b/src/statTimer/statisticalTimer.CPU.h index 876250e1..0c7af3a2 100644 --- a/src/statTimer/statisticalTimer.CPU.h +++ b/src/statTimer/statisticalTimer.CPU.h @@ -47,7 +47,8 @@ class CpuStatTimer : public baseStatTimer cl_ulong clkFrequency; // For linux; the resolution of a high-precision timer -#if defined( __GNUC__ ) + // Mingw32 does not define timespec; can use windows timers +#if !defined( _WIN32 ) timespec res; #endif diff --git a/src/statTimer/stdafx.h b/src/statTimer/stdafx.h index 09875f9d..774fef75 100644 --- a/src/statTimer/stdafx.h +++ b/src/statTimer/stdafx.h @@ -41,7 +41,10 @@ // #include #include "targetver.h" +#if !defined( NOMINMAX ) #define NOMINMAX +#endif + #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers // Windows Header Files: #include diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 29e9bf86..868f402d 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -50,7 +50,11 @@ set( clFFT.Test.Headers set( clFFT.Test.Files ${clFFT.Test.Source} ${clFFT.Test.Headers} ) set( LD_PTHREAD "" ) -if( CMAKE_COMPILER_IS_GNUCXX ) +if( MINGW ) + # -std=c++0x causes g++ to go into strict ANSI mode, which doesn't declare non-standard functions + # Googletest for mingw appears to have a dependency on _stricmp and off64_t + set( CMAKE_CXX_FLAGS "-std=gnu++0x ${CMAKE_CXX_FLAGS}" ) +elseif( CMAKE_COMPILER_IS_GNUCXX ) set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" ) set( LD_PTHREAD "-lpthread" ) endif( ) diff --git a/src/tests/gtest_main.cpp b/src/tests/gtest_main.cpp index 6e29b90e..ec22e98c 100644 --- a/src/tests/gtest_main.cpp +++ b/src/tests/gtest_main.cpp @@ -29,8 +29,10 @@ time_t random_test_parameter_seed; float tolerance; bool verbose; -#if defined( _WIN32 ) -#define NOMINMAX +#if defined( MSVC_VER ) +#if !defined( NOMINMAX ) + #define NOMINMAX +#endif #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers #include diff --git a/src/tests/test_constants.cpp b/src/tests/test_constants.cpp index d4ca1e57..cdc1ad60 100644 --- a/src/tests/test_constants.cpp +++ b/src/tests/test_constants.cpp @@ -14,7 +14,6 @@ * limitations under the License. * ************************************************************************/ - #include "test_constants.h" #include #include From 0dfdb07abef645402b4b66af73d3fc1f112b4da5 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Tue, 10 Sep 2013 13:20:05 -0500 Subject: [PATCH 09/32] Fixed bug in clFFT.pc.in where lib directory was set relative to bin directory Added SOVERSION 2 to the clFFT library --- src/CMakeLists.txt | 3 +++ src/library/CMakeLists.txt | 1 + src/library/clFFT.pc.in | 2 +- src/statTimer/CMakeLists.txt | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 446988ab..7f1431dd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,6 +40,9 @@ set( CLFFT_VERSION_MINOR 1 ) set( CLFFT_VERSION_PATCH 0 ) set( CLFFT_VERSION "${CLFFT_VERSION_MAJOR}.${CLFFT_VERSION_MINOR}.${CLFFT_VERSION_PATCH}") +# This is incremented when the ABI to the library changes +set( CLFFT_SOVERSION 2 ) + set( CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR} ) # On windows, it's convenient to change the default install prefix such that it does NOT point to 'program files' diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt index 85f2aaa8..6a3ea696 100644 --- a/src/library/CMakeLists.txt +++ b/src/library/CMakeLists.txt @@ -78,6 +78,7 @@ add_library( clFFT SHARED ${clFFT.Files} ) target_link_libraries( clFFT ${OPENCL_LIBRARIES} ) set_target_properties( clFFT PROPERTIES VERSION ${CLFFT_VERSION} ) +set_target_properties( clFFT PROPERTIES SOVERSION ${CLFFT_SOVERSION} ) set_target_properties( clFFT PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) if( CMAKE_COMPILER_IS_GNUCC ) diff --git a/src/library/clFFT.pc.in b/src/library/clFFT.pc.in index 21ef20ab..63dc0ea6 100644 --- a/src/library/clFFT.pc.in +++ b/src/library/clFFT.pc.in @@ -1,7 +1,7 @@ prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix}/bin@SUFFIX_BIN@ includedir=${prefix}/include -libdir=${exec_prefix}/lib@SUFFIX_LIB@ +libdir=${prefix}/lib@SUFFIX_LIB@ Name: clFFT Description: Open source OpenCL FFT library diff --git a/src/statTimer/CMakeLists.txt b/src/statTimer/CMakeLists.txt index c2d55bb3..6de01a9c 100644 --- a/src/statTimer/CMakeLists.txt +++ b/src/statTimer/CMakeLists.txt @@ -65,6 +65,7 @@ include_directories( ${OPENCL_INCLUDE_DIRS} ${PROJECT_BINARY_DIR}/include ../inc add_library( StatTimer SHARED ${StatTimer.Files} ) set_target_properties( StatTimer PROPERTIES VERSION ${CLFFT_VERSION} ) +set_target_properties( StatTimer PROPERTIES SOVERSION ${CLFFT_SOVERSION} ) set_target_properties( StatTimer PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) target_link_libraries( StatTimer ${OPENCL_LIBRARIES} ) From 242ba476f8afffb445a452e82338ce18fd987e7e Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Wed, 11 Sep 2013 18:23:47 -0400 Subject: [PATCH 10/32] Adding CUDA_PATH as an option for OPENCL_ROOT --- src/FindOpenCL.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/FindOpenCL.cmake b/src/FindOpenCL.cmake index 4dbb3d57..374cf0d5 100644 --- a/src/FindOpenCL.cmake +++ b/src/FindOpenCL.cmake @@ -48,6 +48,8 @@ #----------------------- if( DEFINED ENV{AMDAPPSDKROOT} ) set( OPENCL_ROOT $ENV{AMDAPPSDKROOT} CACHE PATH "Environment variable defining the root of OPENCL implementation" ) +elseif( DEFINED ENV{CUDA_PATH} ) + set( OPENCL_ROOT $ENV{CUDA_PATH} CACHE PATH "Environment variable defining the root of OPENCL implementation" ) else( ) set( OPENCL_ROOT "/usr/lib" CACHE PATH "Environment variable defining the root of OPENCL implementation" ) endif( ) From 2b1227f01fe78cafd4a0bb623ed078dca1fc7725 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Wed, 11 Sep 2013 18:23:58 -0400 Subject: [PATCH 11/32] CUDA uses Win32 to store 32 bit libraries --- src/FindOpenCL.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/FindOpenCL.cmake b/src/FindOpenCL.cmake index 374cf0d5..ab58252b 100644 --- a/src/FindOpenCL.cmake +++ b/src/FindOpenCL.cmake @@ -85,7 +85,7 @@ else( ) ${OPENCL_ROOT}/lib ENV AMDAPPSDKROOT/lib DOC "OpenCL dynamic library path" - PATH_SUFFIXES x86 + PATH_SUFFIXES x86 Win32 ) endif( ) mark_as_advanced( OPENCL_LIBRARIES ) From 137cf48c699aae10c3e4e65feb8769b0b2da3f1d Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Thu, 12 Sep 2013 17:29:50 -0500 Subject: [PATCH 12/32] Cleanup of txt not in the spirit of the clMath Apache license --- CHANGELOG | 31 ------------------------------- src/client/CMakeLists.pack | 18 +++++++++++++++--- src/library/generator.transpose.h | 4 ---- src/library/repo.cpp | 20 ++++++++++++++++++-- 4 files changed, 33 insertions(+), 40 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a88cad8e..e5814efe 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -171,34 +171,3 @@ For example: ./clfft.Sample -iv - Watch for the version strings to print out; watch for 'Client Test *****PASS*****' to print out. -_______________________________________________________________________________ -(C) 2010-2013 Advanced Micro Devices, Inc. All rights reserved. AMD, the AMD -Arrow logo, ATI, the ATI logo, Radeon, FireStream, FireGL, Catalyst, and -combinations thereof are trademarks of Advanced Micro Devices, Inc. Microsoft -(R), Windows, and Windows Vista (R) are registered trademarks of Microsoft -Corporation in the U.S. and/or other jurisdictions. OpenCL and the OpenCL logo -are trademarks of Apple Inc. used by permission by Khronos. Other names are for -informational purposes only and may be trademarks of their respective owners. - -The contents of this document are provided in connection with Advanced Micro -Devices, Inc. ("AMD") products. AMD makes no representations or warranties with -respect to the accuracy or completeness of the contents of this publication and -reserves the right to make changes to specifications and product descriptions -at any time without notice. The information contained herein may be of a -preliminary or advance nature and is subject to change without notice. No -license, whether express, implied, arising by estoppel or otherwise, to any -intellectual property rights is granted by this publication. Except as set forth -in AMD's Standard Terms and Conditions of Sale, AMD assumes no liability -whatsoever, and disclaims any express or implied warranty, relating to its -products including, but not limited to, the implied warranty of -merchantability, fitness for a particular purpose, or infringement of any -intellectual property right. - -AMD's products are not designed, intended, authorized or warranted for use as -components in systems intended for surgical implant into the body, or in other -applications intended to support or sustain life, or in any other application -in which the failure of AMD's product could create a situation where personal -injury, death, or severe property or environmental damage may occur. AMD -reserves the right to discontinue or make changes to its products at any time -without notice. -_______________________________________________________________________________ diff --git a/src/client/CMakeLists.pack b/src/client/CMakeLists.pack index b5903633..2fcf3ea1 100644 --- a/src/client/CMakeLists.pack +++ b/src/client/CMakeLists.pack @@ -1,6 +1,18 @@ -############################################################################# -## Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. -############################################################################# +# ######################################################################## +# Copyright 2013 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ######################################################################## cmake_minimum_required( VERSION 2.6 ) project( clFFT.Sample ) diff --git a/src/library/generator.transpose.h b/src/library/generator.transpose.h index b08e3d3a..12ad7014 100644 --- a/src/library/generator.transpose.h +++ b/src/library/generator.transpose.h @@ -14,10 +14,6 @@ * limitations under the License. * ************************************************************************/ -//////////////////////////////////////////// -// Copyright (C) 2011 Advanced Micro Devices, Inc. All Rights Reserved. -//////////////////////////////////////////// - #pragma once #if !defined( AMD_CLFFT_generator_transpose_H ) #define AMD_CLFFT_generator_transpose_H diff --git a/src/library/repo.cpp b/src/library/repo.cpp index 018dff5c..b41d6578 100644 --- a/src/library/repo.cpp +++ b/src/library/repo.cpp @@ -96,8 +96,24 @@ clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelG // Prefix copyright statement at the top of generated kernels std::stringstream ss; - ss << std::endl << std::endl; - ss << "// Copyright (C) 2010-2013 Advanced Micro Devices, Inc. All Rights Reserved." << std::endl << std::endl; + ss << + "/* ************************************************************************\n" + " * Copyright 2013 Advanced Micro Devices, Inc.\n" + " *\n" + " * Licensed under the Apache License, Version 2.0 (the \"License\");\n" + " * you may not use this file except in compliance with the License.\n" + " * You may obtain a copy of the License at\n" + " *\n" + " * http://www.apache.org/licenses/LICENSE-2.0\n" + " *\n" + " * Unless required by applicable law or agreed to in writing, software\n" + " * distributed under the License is distributed on an \"AS IS\" BASIS,\n" + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" + " * See the License for the specific language governing permissions and\n" + " * limitations under the License.\n" + " * ************************************************************************/" + << std::endl << std::endl; + std::string prefixCopyright = ss.str(); mapFFTs[ key ].ProgramString = prefixCopyright + kernel; From d7b95efc4e8560d1009775e540767c14fe0baf4d Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Tue, 17 Sep 2013 09:56:06 -0500 Subject: [PATCH 13/32] Add configuration file and cmake modifications to automate build on Travis. --- .travis.yml | 44 ++++++++++++++++++++++++++++++++++++++++++++ README.md | 1 + src/CMakeLists.txt | 12 +++--------- 3 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 00000000..0bb9d45e --- /dev/null +++ b/.travis.yml @@ -0,0 +1,44 @@ +language: cpp + +compiler: + - gcc + +before_install: + - sudo apt-get update -qq + - sudo apt-get install -qq fglrx opencl-headers libboost-program-options-dev libfftw3-dev libgtest-dev +# Uncomment below to help verify the installs above work +# - ls -la /usr/lib/libboost* +# - ls -la /usr/include/boost +# - ls -la /usr/src/gtest + +install: + - mkdir -p bin/gTest + - cd bin/gTest + - cmake -DCMAKE_BUILD_TYPE=Release /usr/src/gtest + - make + - sudo mv libg* /usr/lib + +before_script: + - cd ${TRAVIS_BUILD_DIR} + - mkdir -p bin/clFFT + - cd bin/clFFT + - cmake -DBoost_NO_SYSTEM_PATHS=OFF ../../src + +script: + - make install +# - ls -Rla package +# Run a simple test to validate that the build works; CPU device in a VM + - cd package/bin + - export LD_LIBRARY_PATH=${TRAVIS_BUILD_DIR}/bin/clFFT/package/lib64:${LD_LIBRARY_PATH} + - ./Client -i + +after_success: + - cd ${TRAVIS_BUILD_DIR}/bin/clFFT + - make package + +notifications: + email: + - clmath-developers@googlegroups.com + on_success: change + on_failure: always + \ No newline at end of file diff --git a/README.md b/README.md index 13b73dab..84c80107 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ clMath is a software library containing FFT and BLAS functions written in OpenCL APPML 1.10 is the most current generally available version of the library, and pre-built binaries are available for download on both Linux and Windows platforms. +[![Build Status](https://travis-ci.org/kknox/clFFT.png)](https://travis-ci.org/kknox/clFFT) ## Introduction to clFFT The FFT is an implementation of the Discrete Fourier Transform (DFT) that makes use of symmetries in the FFT definition to reduce the mathematical intensity required from O(N2) to O(N log2( N )) when the sequence length N is the product of small prime factors. Currently, there is no standard API for FFT routines. Hardware vendors usually provide a set of high-performance FFTs optimized for their systems: no two vendors employ the same interfaces for their FFT routines. clFFT provides a set of FFT routines that are optimized for AMD graphics processors, but also are functional across CPU and other compute devices. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 450a3834..6c68791c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -66,12 +66,6 @@ option( BUILD_LOADLIBRARIES "Build the optional dynamic load libraries that the # Otherwise, create a sensible default that the user can change if( DEFINED ENV{BOOST_ROOT} ) set( BOOST_ROOT $ENV{BOOST_ROOT} CACHE PATH "Environment variable defining the root of the Boost installation" ) -else( ) - if( UNIX ) - set( BOOST_ROOT "/usr" CACHE PATH "Modify this variable to point to the root of the Boost installation" ) - else( ) - set( BOOST_ROOT "/Path/To/boost_x_xx_x" CACHE PATH "Modify this variable to point to the root of the Boost installation" ) - endif() endif( ) # Currently, linux has a problem outputing both narrow and wide characters, @@ -129,9 +123,9 @@ set( Boost_DETAILED_FAILURE_MSG ON ) set( Boost_DEBUG ON ) set( Boost_ADDITIONAL_VERSIONS "1.46.1" "1.46" "1.44.0" "1.44" ) -# On linux, the boost installed in the system always appears to override any user boost installs -if( UNIX ) - set( Boost_NO_SYSTEM_PATHS TRUE ) +# Default Boost_NO_SYSTEM_PATHS to TRUE if the user does not specify themselves +if( NOT DEFINED Boost_NO_SYSTEM_PATHS AND UNIX ) + set( Boost_NO_SYSTEM_PATHS ON ) endif( ) # This will define Boost_FOUND From 0def5e1870238103cb76f1a4c724e169ef058886 Mon Sep 17 00:00:00 2001 From: Jenkins SEE Anna7 Date: Wed, 2 Oct 2013 14:24:10 -0500 Subject: [PATCH 14/32] Adding the ability to specify build numbers through cmake command line parameters --- src/CMakeLists.txt | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6c68791c..1a76989f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -35,9 +35,18 @@ endif( ) project( clFFT ) # Define a version for the code -set( CLFFT_VERSION_MAJOR 2 ) -set( CLFFT_VERSION_MINOR 1 ) -set( CLFFT_VERSION_PATCH 0 ) +if( NOT DEFINED CLFFT_VERSION_MAJOR ) + set( CLFFT_VERSION_MAJOR 2 ) +endif( ) + +if( NOT DEFINED CLFFT_VERSION_MINOR ) + set( CLFFT_VERSION_MINOR 1 ) +endif( ) + +if( NOT DEFINED CLFFT_VERSION_PATCH ) + set( CLFFT_VERSION_PATCH 0 ) +endif( ) + set( CLFFT_VERSION "${CLFFT_VERSION_MAJOR}.${CLFFT_VERSION_MINOR}.${CLFFT_VERSION_PATCH}") # This is incremented when the ABI to the library changes From fd063c24278660d1217f1c6df1f20d730eb5e02c Mon Sep 17 00:00:00 2001 From: AMD-FirePro Date: Fri, 11 Oct 2013 09:41:43 +0100 Subject: [PATCH 15/32] workaround to make clfft run on NV --- src/library/generator.stockham.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp index 8a6f5a60..fdf36442 100644 --- a/src/library/generator.stockham.cpp +++ b/src/library/generator.stockham.cpp @@ -2348,7 +2348,7 @@ namespace StockhamGenerator } - void GenerateKernel(std::string &str) + void GenerateKernel(std::string &str, cl_device_id Dev_ID) { std::string twType = RegBaseType(2); std::string rType = RegBaseType(1); @@ -2501,8 +2501,19 @@ namespace StockhamGenerator else str += "fft_back"; str += "("; - // TODO : address this kludge - str += "__constant cb_t *cb __attribute__((max_constant_size(32))), "; + // TODO : address this kludge + size_t SizeParam_ret = 0; + clGetDeviceInfo(Dev_ID, CL_DEVICE_VENDOR, 0, NULL, &SizeParam_ret); + char* nameVendor = new char[SizeParam_ret]; + clGetDeviceInfo(Dev_ID, CL_DEVICE_VENDOR, SizeParam_ret, nameVendor, NULL); + + //nv compiler doesn't support __constant kernel argument + if (strncmp(nameVendor, "NVIDIA",6)!=0) + str += "__constant cb_t *cb __attribute__((max_constant_size(32))), "; + else + str += "__global cb_t *cb, "; + + delete [] nameVendor; // Function attributes if(params.fft_placeness == CLFFT_INPLACE) @@ -3230,12 +3241,12 @@ clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo ) const case P_SINGLE: { Kernel kernel(params); - kernel.GenerateKernel(programCode); + kernel.GenerateKernel(programCode, devices[0]); } break; case P_DOUBLE: { Kernel kernel(params); - kernel.GenerateKernel(programCode); + kernel.GenerateKernel(programCode, devices[0]); } break; } From 24ce704491c0e1960a6e82bc820e8536deb101cf Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Sun, 13 Oct 2013 16:57:01 -0500 Subject: [PATCH 16/32] Everything builds with xcode 5.0 except for the googletest executable --- src/CMakeLists.txt | 4 ++-- src/statTimer/CMakeLists.txt | 2 +- src/tests/CMakeLists.txt | 4 ++++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1a76989f..54ade577 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -210,8 +210,8 @@ elseif( CMAKE_COMPILER_IS_GNUCXX ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage") endif() -else( ) - message( FATAL_ERROR "Compiler not supported or not detected" ) +# elseif( APPLE ) +# set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) endif( ) # If UNICODE is defined for microsoft compilers, pass extra definitions diff --git a/src/statTimer/CMakeLists.txt b/src/statTimer/CMakeLists.txt index 6de01a9c..4976b30d 100644 --- a/src/statTimer/CMakeLists.txt +++ b/src/statTimer/CMakeLists.txt @@ -69,7 +69,7 @@ set_target_properties( StatTimer PROPERTIES SOVERSION ${CLFFT_SOVERSION} ) set_target_properties( StatTimer PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) target_link_libraries( StatTimer ${OPENCL_LIBRARIES} ) -if( UNIX ) +if( UNIX AND NOT APPLE ) # This library dependency is brought in by the high precision timer available in linux target_link_libraries( StatTimer -lrt ) endif( ) diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 868f402d..cf6ab6e1 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -57,6 +57,10 @@ if( MINGW ) elseif( CMAKE_COMPILER_IS_GNUCXX ) set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" ) set( LD_PTHREAD "-lpthread" ) +elseif( APPLE ) + set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) +# set( CMAKE_CXX_FLAGS "-stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) + add_definitions( -DGTEST_USE_OWN_TR1_TUPLE ) endif( ) # Include standard OpenCL headers From cdb291951cde68df711ded457a7c627ae1693e9c Mon Sep 17 00:00:00 2001 From: AMD-FirePro Date: Mon, 14 Oct 2013 10:23:35 +0100 Subject: [PATCH 17/32] NV workaround. Using the command queue to get the device id --- src/library/generator.copy.cpp | 2 +- src/library/generator.stockham.cpp | 12 +++++++++--- src/library/generator.transpose.cpp | 2 +- src/library/plan.cpp | 14 +++++++------- src/library/plan.h | 4 ++-- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/library/generator.copy.cpp b/src/library/generator.copy.cpp index e839ed8a..5afe3cd2 100644 --- a/src/library/generator.copy.cpp +++ b/src/library/generator.copy.cpp @@ -445,7 +445,7 @@ clfftStatus FFTPlan::GetMax1DLengthPvt (size_t * longest) const using namespace CopyGenerator; template<> -clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo ) const +clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const { FFTKernelGenKeyParams params; OPENCL_V( this->GetKernelGenKeyPvt (params), _T("GetKernelGenKey() failed!") ); diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp index fdf36442..a1b7969a 100644 --- a/src/library/generator.stockham.cpp +++ b/src/library/generator.stockham.cpp @@ -3229,11 +3229,17 @@ clfftStatus FFTPlan::GetMax1DLengthPvt (size_t * longest) const } template<> -clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo ) const +clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const { FFTKernelGenKeyParams params; OPENCL_V( this->GetKernelGenKeyPvt (params), _T("GetKernelGenKey() failed!") ); + cl_int status = CL_SUCCESS; + cl_device_id Device = NULL; + status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &Device, NULL); + + OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) ); + std::string programCode; Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE; switch(pr) @@ -3241,12 +3247,12 @@ clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo ) const case P_SINGLE: { Kernel kernel(params); - kernel.GenerateKernel(programCode, devices[0]); + kernel.GenerateKernel(programCode, Device); } break; case P_DOUBLE: { Kernel kernel(params); - kernel.GenerateKernel(programCode, devices[0]); + kernel.GenerateKernel(programCode, Device); } break; } diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp index 3b12504f..2c83b8a3 100644 --- a/src/library/generator.transpose.cpp +++ b/src/library/generator.transpose.cpp @@ -822,7 +822,7 @@ clfftStatus FFTPlan::GetWorkSizesPvt (std::vector & globalWS, // OpenCL does not take unicode strings as input, so this routine returns only ASCII strings // Feed this generator the FFTPlan, and it returns the generated program as a string template<> -clfftStatus FFTPlan::GenerateKernelPvt ( FFTRepo& fftRepo ) const +clfftStatus FFTPlan::GenerateKernelPvt ( FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const { FFTKernelGenKeyParams params; OPENCL_V( this->GetKernelGenKeyPvt (params), _T("GetKernelGenKey() failed!") ); diff --git a/src/library/plan.cpp b/src/library/plan.cpp index ec87b2d4..61ae195e 100644 --- a/src/library/plan.cpp +++ b/src/library/plan.cpp @@ -543,7 +543,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma if(fftPlan->gen == Copy) { - OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) ); + OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateKernel() failed" ) ); OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) ); fftPlan->baked = true; return CLFFT_SUCCESS; @@ -1505,7 +1505,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma //break; if (fftPlan->transflag) //Transpose for 2D { - OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateTransposeProgram() failed" ) ); + OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateTransposeProgram() failed" ) ); OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) ); fftPlan->baked = true; @@ -2445,7 +2445,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma } // For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels - OPENCL_V( fftPlan->GenerateKernel( fftRepo ), _T( "GenerateKernel() failed" ) ); + OPENCL_V( fftPlan->GenerateKernel( fftRepo, *commQueueFFT ), _T( "GenerateKernel() failed" ) ); // For the radices that we have factored, we need to load/compile and build the appropriate OpenCL kernels OPENCL_V( CompileKernels( *commQueueFFT, plHandle, fftPlan->gen, fftPlan ), _T( "CompileKernels() failed" ) ); @@ -3265,13 +3265,13 @@ clfftStatus FFTPlan::GetKernelGenKey (FFTKernelGenKeyParams & params) const } } -clfftStatus FFTPlan::GenerateKernel (FFTRepo & fftRepo) const +clfftStatus FFTPlan::GenerateKernel (FFTRepo & fftRepo, const cl_command_queue commQueueFFT) const { switch(gen) { - case Stockham: return GenerateKernelPvt(fftRepo); - case Transpose: return GenerateKernelPvt(fftRepo); - case Copy: return GenerateKernelPvt(fftRepo); + case Stockham: return GenerateKernelPvt(fftRepo, commQueueFFT); + case Transpose: return GenerateKernelPvt(fftRepo, commQueueFFT); + case Copy: return GenerateKernelPvt(fftRepo, commQueueFFT); default: assert(false); return CLFFT_NOTIMPLEMENTED; } } diff --git a/src/library/plan.h b/src/library/plan.h index ec96fadb..11319e7e 100644 --- a/src/library/plan.h +++ b/src/library/plan.h @@ -202,7 +202,7 @@ class FFTPlan clfftStatus GetKernelGenKeyPvt (FFTKernelGenKeyParams & params) const; template - clfftStatus GenerateKernelPvt (FFTRepo& fftRepo) const; + clfftStatus GenerateKernelPvt (FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const; template clfftStatus GetMax1DLengthPvt (size_t *longest ) const; @@ -338,7 +338,7 @@ class FFTPlan clfftStatus GetWorkSizes (std::vector & globalws, std::vector & localws) const; clfftStatus GetKernelGenKey (FFTKernelGenKeyParams & params) const; - clfftStatus GenerateKernel (FFTRepo & fftRepo) const; + clfftStatus GenerateKernel (FFTRepo & fftRepo, const cl_command_queue commQueueFFT) const; clfftStatus GetMax1DLength (size_t *longest ) const; void ResetBinarySizes(); From 3a56ebff1dc2e5493724778f877e595c66ecbb33 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Mon, 14 Oct 2013 23:36:19 -0500 Subject: [PATCH 18/32] Everything compiles on MacOSX with the Unix makefile generator --- src/CMakeLists.txt | 4 +--- src/client/CMakeLists.txt | 6 ++++-- src/tests/CMakeLists.txt | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 54ade577..a791b3ff 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -180,7 +180,7 @@ get_filename_component( C_COMPILER_NAME ${CMAKE_C_COMPILER} NAME_WE ) # message( "CMAKE_C_COMPILER: " ${CMAKE_C_COMPILER} ) # Set common compile and link options -if( C_COMPILER_NAME STREQUAL "cl" ) +if( MSVC ) # Following options for nMake message( STATUS "Detected MSVS Ver: " ${MSVC_VERSION} ) @@ -210,8 +210,6 @@ elseif( CMAKE_COMPILER_IS_GNUCXX ) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage") set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --coverage") endif() -# elseif( APPLE ) -# set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) endif( ) # If UNICODE is defined for microsoft compilers, pass extra definitions diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index 99a89d31..fdf209f5 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -33,10 +33,12 @@ set( Client.Files ${Client.Source} ${Client.Headers} ) set( DL_LIB "" ) if( WIN32 ) add_definitions( "/D_CONSOLE" ) -else() +elseif( APPLE ) + set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) +else( ) # To use the dlopen() and dlclose() functions, we should link with libdl set( DL_LIB "-ldl" ) -endif() +endif( ) # Include standard OpenCL headers include_directories( ${Boost_INCLUDE_DIRS} ${OPENCL_INCLUDE_DIRS} ../../../common ${PROJECT_BINARY_DIR}/include ../include ) diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index cf6ab6e1..2fdf49de 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -59,7 +59,6 @@ elseif( CMAKE_COMPILER_IS_GNUCXX ) set( LD_PTHREAD "-lpthread" ) elseif( APPLE ) set( CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) -# set( CMAKE_CXX_FLAGS "-stdlib=libc++ ${CMAKE_CXX_FLAGS}" ) add_definitions( -DGTEST_USE_OWN_TR1_TUPLE ) endif( ) From 3c94e569e92826a6fdd23e489c2210f80ed1fed8 Mon Sep 17 00:00:00 2001 From: Pavan Yalamanchili Date: Thu, 31 Oct 2013 17:06:10 -0400 Subject: [PATCH 19/32] Workaround for 2D FFT failures on NVIDIA GPUs - Issue: https://github.com/clMathLibraries/clFFT/issues/25 - Inplace transpose were being used for power of 2 dimensions - If device from NVIDIA, then alternative path is taken --- src/library/plan.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/library/plan.cpp b/src/library/plan.cpp index 61ae195e..b988ecd0 100644 --- a/src/library/plan.cpp +++ b/src/library/plan.cpp @@ -1512,6 +1512,20 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma return CLFFT_SUCCESS; } + // TODO : Check for a better way to do this. + bool isnvidia = false; + for (size_t Idx = 0; !isnvidia && Idx < numQueues; Idx++) + { + cl_command_queue QIdx = commQueueFFT[Idx]; + cl_device_id Device; + clGetCommandQueueInfo(QIdx, CL_QUEUE_DEVICE, sizeof(Device), &Device, NULL); + char Vendor[256]; + clGetDeviceInfo(Device, CL_DEVICE_VENDOR, sizeof(Vendor), &Vendor, NULL); + isnvidia |= (strncmp(Vendor, "NVIDIA", 6) == 0); + } + // nvidia gpus are failing when doing transpose for 2D FFTs + if (isnvidia) break; + if (fftPlan->length.size() != 2) break; if (!(IsPo2(fftPlan->length[0])) || !(IsPo2(fftPlan->length[1]))) break; From 08130dc6d23d469aaf672bbe9234ba393d0b69c2 Mon Sep 17 00:00:00 2001 From: bragadeesh Date: Mon, 11 Nov 2013 14:15:27 -0600 Subject: [PATCH 20/32] adding error return to handle mismatched device usgae between BakePlan and EnqueueTransform --- src/include/clFFT.h | 1 + src/library/plan.cpp | 3 +++ src/library/plan.h | 4 ++++ src/library/transform.cpp | 10 ++++++++++ 4 files changed, 18 insertions(+) diff --git a/src/include/clFFT.h b/src/include/clFFT.h index f75ded30..738141fd 100644 --- a/src/include/clFFT.h +++ b/src/include/clFFT.h @@ -127,6 +127,7 @@ enum clfftStatus_ CLFFT_VERSION_MISMATCH, /*!< Version conflict between client and library. */ CLFFT_INVALID_PLAN, /*!< Requested plan could not be found. */ CLFFT_DEVICE_NO_DOUBLE, /*!< Double precision not supported on this device. */ + CLFFT_DEVICE_MISMATCH, /*!< Attempt to run on a device using a plan baked for a different device. */ CLFFT_ENDSTATUS /* This value will always be last, and marks the length of clfftStatus. */ }; typedef enum clfftStatus_ clfftStatus; diff --git a/src/library/plan.cpp b/src/library/plan.cpp index 61ae195e..f824976d 100644 --- a/src/library/plan.cpp +++ b/src/library/plan.cpp @@ -494,6 +494,9 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma return CLFFT_SUCCESS; } + // Store the device for which we are baking + clGetCommandQueueInfo(*commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &fftPlan->bakeDevice, NULL); + //find product of lengths size_t pLength = 1; switch(fftPlan->dim) diff --git a/src/library/plan.h b/src/library/plan.h index 11319e7e..acafa070 100644 --- a/src/library/plan.h +++ b/src/library/plan.h @@ -223,6 +223,10 @@ class FFTPlan size_t iDist, oDist; size_t batchsize; + // Note the device passed to BakePlan, assuming we are baking for one device + // TODO, change this logic for handling multiple GPUs/devices + cl_device_id bakeDevice; + // Devices that the user specified in the context passed to the create function std::vector< cl_device_id > devices; diff --git a/src/library/transform.cpp b/src/library/transform.cpp index e53e830f..067de7b1 100644 --- a/src/library/transform.cpp +++ b/src/library/transform.cpp @@ -63,6 +63,16 @@ clfftStatus clfftEnqueueTransform( OPENCL_V( clfftBakePlan( plHandle, numQueuesAndEvents, commQueues, NULL, NULL ), _T( "Failed to bake plan" ) ); } + + // get the device information + cl_device_id q_device; + clGetCommandQueueInfo(*commQueues, CL_QUEUE_DEVICE, sizeof(cl_device_id), &q_device, NULL); + + // verify if the current device is the same as the one used for baking the plan + if(q_device != fftPlan->bakeDevice) + return CLFFT_DEVICE_MISMATCH; + + if (fftPlan->inputLayout == CLFFT_REAL) dir = CLFFT_FORWARD; else if (fftPlan->outputLayout == CLFFT_REAL) dir = CLFFT_BACKWARD; From 192773076e5e27c94a44512b1636ac4268a0a481 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Thu, 31 Oct 2013 12:53:19 -0500 Subject: [PATCH 21/32] Logic added to packaging step to query dependencies for test executables, and package the dependencies in addition to the test executables if they are built. In addition, only for debug builds, debug runtimes, debug symbols and a snapshot of the code is packaged. Fixing the TravisCI build badge on the main readme.md file. --- README.md | 2 +- src/library/CMakeLists.txt | 31 +++++++++++ src/tests/CMakeLists.txt | 12 +++++ src/tests/copyTestDependencies.cmake.in | 72 +++++++++++++++++++++++++ 4 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 src/tests/copyTestDependencies.cmake.in diff --git a/README.md b/README.md index 84c80107..e2fbd72f 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ clFFT ===== +[![Build Status](https://travis-ci.org/clMathLibraries/clFFT.png)](https://travis-ci.org/clMathLibraries/clFFT) clMath is a software library containing FFT and BLAS functions written in OpenCL. In addition to GPU devices, the libraries also support running on CPU devices to facilitate debugging and multicore programming. APPML 1.10 is the most current generally available version of the library, and pre-built binaries are available for download on both Linux and Windows platforms. -[![Build Status](https://travis-ci.org/kknox/clFFT.png)](https://travis-ci.org/kknox/clFFT) ## Introduction to clFFT The FFT is an implementation of the Discrete Fourier Transform (DFT) that makes use of symmetries in the FFT definition to reduce the mathematical intensity required from O(N2) to O(N log2( N )) when the sequence length N is the product of small prime factors. Currently, there is no standard API for FFT routines. Hardware vendors usually provide a set of high-performance FFTs optimized for their systems: no two vendors employ the same interfaces for their FFT routines. clFFT provides a set of FFT routines that are optimized for AMD graphics processors, but also are functional across CPU and other compute devices. diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt index 6a3ea696..35475c2f 100644 --- a/src/library/CMakeLists.txt +++ b/src/library/CMakeLists.txt @@ -95,3 +95,34 @@ install( TARGETS clFFT LIBRARY DESTINATION lib${SUFFIX_LIB} ARCHIVE DESTINATION lib${SUFFIX_LIB}/import ) + +# For debug builds, include the debug runtimes into the package for testing on non-developer machines +set( CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_SKIP true ) +set( CMAKE_INSTALL_DEBUG_LIBRARIES true ) +set( CMAKE_INSTALL_DEBUG_LIBRARIES_ONLY true ) + +if( WIN32 ) + set( CLFFT_RUNTIME_DESTINATION bin${SUFFIX_BIN} ) +else( ) + set( CLFFT_RUNTIME_DESTINATION lib${SUFFIX_LIB} ) +endif( ) + +include( InstallRequiredSystemLibraries ) + +# Install necessary runtime files for debug builds +install( PROGRAMS ${CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS} + CONFIGURATIONS Debug + DESTINATION ${CLFFT_RUNTIME_DESTINATION} ) + +# Install all *.pdb files for debug builds +install( DIRECTORY ${PROJECT_BINARY_DIR}/staging/ + DESTINATION ${CLFFT_RUNTIME_DESTINATION} + OPTIONAL + CONFIGURATIONS Debug + FILES_MATCHING PATTERN "*.pdb" ) + +# Install a snapshot of the source as it was for this build; useful for the .pdb's +install( DIRECTORY ${PROJECT_SOURCE_DIR} + DESTINATION ${CLFFT_RUNTIME_DESTINATION} + OPTIONAL + CONFIGURATIONS Debug ) diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 2fdf49de..8ee80a35 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -104,3 +104,15 @@ install( TARGETS Test LIBRARY DESTINATION lib${SUFFIX_LIB} ARCHIVE DESTINATION lib${SUFFIX_LIB}/import ) + +get_target_property( testLocation Test LOCATION ) + +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/copyTestDependencies.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/copyTestDependencies.cmake" + @ONLY +) + +# Register script at run at install time to analyze the executable and copy dependencies into package +install( SCRIPT "${CMAKE_CURRENT_BINARY_DIR}/copyTestDependencies.cmake") + \ No newline at end of file diff --git a/src/tests/copyTestDependencies.cmake.in b/src/tests/copyTestDependencies.cmake.in new file mode 100644 index 00000000..b6d13df6 --- /dev/null +++ b/src/tests/copyTestDependencies.cmake.in @@ -0,0 +1,72 @@ +# Customized install script for fftw test program; analyzes all the shared library dependencies and installs +# the dependencies into the package +include( GetPrerequisites ) + +# message( testLocation ": @testLocation@" ) + +# The Microsoft IDE presents a challenge because the full configuration is not known at cmake time +# This logic allows us to 'substitute' the proper confguration at install time +if( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "Debug" ) + string( REPLACE "\$(Configuration)" "Debug" fixedTestLocation "@testLocation@" ) +elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "Release" ) + string( REPLACE "\$(Configuration)" "Release" fixedTestLocation "@testLocation@" ) +elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "MinSizeRel" ) + string( REPLACE "\$(Configuration)" "MinSizeRel" fixedTestLocation "@testLocation@" ) +elseif( "${CMAKE_INSTALL_CONFIG_NAME}" MATCHES "RelwithDebInfo" ) + string( REPLACE "\$(Configuration)" "RelwithDebInfo" fixedTestLocation "@testLocation@" ) +endif( ) + +# message( fixedTestLocation ": ${fixedTestLocation}" ) +# Get the directory that the test executable resides in; this helps get_prerequisites( ) find dependent libraries +get_filename_component( testName "${fixedTestLocation}" NAME ) +string( REPLACE ${testName} "" testDir ${fixedTestLocation} ) +string( REGEX REPLACE "/+$" "" testDir ${testDir} ) +# message( testDir ": ${testDir}" ) + +set( installPath "" ) +if( WIN32 ) + set( installPath "${CMAKE_INSTALL_PREFIX}/bin@SUFFIX_BIN@" ) +else( ) + set( installPath "${CMAKE_INSTALL_PREFIX}/lib@SUFFIX_LIB@" ) +endif( ) + +# Only search for dependencies that have ROOT defined +set( depList "" ) + +if( EXISTS "@FFTW_ROOT@" ) + list( APPEND depList "@FFTW_ROOT@/lib@SUFFIX_LIB@" ) +endif( ) + +if( EXISTS "@GTEST_ROOT@" ) + list( APPEND depList "@GTEST_ROOT@/lib@SUFFIX_LIB@" ) +endif( ) + +if( EXISTS "${testDir}" ) + list( APPEND depList "${testDir}" ) +endif( ) + +# message( STATUS "depList: ${depList}" ) + +# This retrieves a list of shared library dependencies from the target; they are not full path names +# Skip system dependencies and skip recursion +get_prerequisites( ${fixedTestLocation} testDependencies 1 0 "" "${depList}" ) + +# Loop on queried library dependencies and copy them into package +foreach( dep ${testDependencies} ) + # This converts the dependency into a full path + gp_resolve_item( "${fixedTestLocation}" "${dep}" "" "${depList}" dep_test_path ) + + # In linux, the dep_test_path may point to a symbolic link, we also need to copy real file + get_filename_component( dep_realpath "${dep_test_path}" REALPATH ) + get_filename_component( dep_name "${dep_test_path}" NAME ) + # message( STATUS "depName: ${dep_name}" ) + # message( STATUS "depFullPath: ${dep_test_path}" ) + # message( STATUS "dep_realpath: ${dep_realpath}" ) + + if( NOT EXISTS ${installPath}/${dep_name} ) + file( INSTALL ${dep_test_path} ${dep_realpath} + USE_SOURCE_PERMISSIONS + DESTINATION ${installPath} + ) + endif( ) +endforeach( ) From ed38aa4d92de9c63c5d1e34478715593457fb5e1 Mon Sep 17 00:00:00 2001 From: BenjaminCoquelle Date: Wed, 4 Dec 2013 18:33:18 +0000 Subject: [PATCH 22/32] fix bug when working multiple devices. The lib requires to work with multiple context for the moment in that case. Therefore we need to get one kernel object per context --- src/library/generator.copy.cpp | 45 ++++++++++++++++------------- src/library/generator.stockham.cpp | 9 ++++-- src/library/generator.transpose.cpp | 10 +++++-- src/library/plan.cpp | 14 ++++----- src/library/repo.cpp | 45 +++++++++++++++++++++-------- src/library/repo.h | 19 +++++++----- src/library/transform.cpp | 2 +- 7 files changed, 93 insertions(+), 51 deletions(-) diff --git a/src/library/generator.copy.cpp b/src/library/generator.copy.cpp index 5afe3cd2..b88adaef 100644 --- a/src/library/generator.copy.cpp +++ b/src/library/generator.copy.cpp @@ -447,28 +447,33 @@ using namespace CopyGenerator; template<> clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo, const cl_command_queue commQueueFFT ) const { - FFTKernelGenKeyParams params; - OPENCL_V( this->GetKernelGenKeyPvt (params), _T("GetKernelGenKey() failed!") ); + FFTKernelGenKeyParams params; + OPENCL_V( this->GetKernelGenKeyPvt (params), _T("GetKernelGenKey() failed!") ); + + std::string programCode; + Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE; + switch(pr) + { + case P_SINGLE: + { + CopyKernel kernel(params); + kernel.GenerateKernel(programCode); + } break; + case P_DOUBLE: + { + CopyKernel kernel(params); + kernel.GenerateKernel(programCode); + } break; + } - std::string programCode; - Precision pr = (params.fft_precision == CLFFT_SINGLE) ? P_SINGLE : P_DOUBLE; - switch(pr) - { - case P_SINGLE: - { - CopyKernel kernel(params); - kernel.GenerateKernel(programCode); - } break; - case P_DOUBLE: - { - CopyKernel kernel(params); - kernel.GenerateKernel(programCode); - } break; - } + cl_int status = CL_SUCCESS; + cl_context QueueContext = NULL; + status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL); + OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) ); - OPENCL_V( fftRepo.setProgramCode( Copy, params, programCode ), _T( "fftRepo.setclString() failed!" ) ); - OPENCL_V( fftRepo.setProgramEntryPoints( Copy, params, "copy_c2h", "copy_h2c" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); + OPENCL_V( fftRepo.setProgramCode( Copy, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) ); + OPENCL_V( fftRepo.setProgramEntryPoints( Copy, params, "copy_c2h", "copy_h2c", QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); - return CLFFT_SUCCESS; + return CLFFT_SUCCESS; } diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp index a1b7969a..f6f7241e 100644 --- a/src/library/generator.stockham.cpp +++ b/src/library/generator.stockham.cpp @@ -3238,6 +3238,11 @@ clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo, const cl_comm cl_device_id Device = NULL; status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_DEVICE, sizeof(cl_device_id), &Device, NULL); + OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) ); + + cl_context QueueContext = NULL; + status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL); + OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) ); std::string programCode; @@ -3260,8 +3265,8 @@ clfftStatus FFTPlan::GenerateKernelPvt(FFTRepo& fftRepo, const cl_comm ReadKernelFromFile(programCode); #endif - OPENCL_V( fftRepo.setProgramCode( Stockham, params, programCode ), _T( "fftRepo.setclString() failed!" ) ); - OPENCL_V( fftRepo.setProgramEntryPoints( Stockham, params, "fft_fwd", "fft_back" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); + OPENCL_V( fftRepo.setProgramCode( Stockham, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) ); + OPENCL_V( fftRepo.setProgramEntryPoints( Stockham, params, "fft_fwd", "fft_back", QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); return CLFFT_SUCCESS; } diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp index 2c83b8a3..0615b99b 100644 --- a/src/library/generator.transpose.cpp +++ b/src/library/generator.transpose.cpp @@ -830,8 +830,14 @@ clfftStatus FFTPlan::GenerateKernelPvt ( FFTRepo& fftRepo, const cl_c std::string programCode; OPENCL_V( GenerateTransposeKernel( params, programCode ), _T( "GenerateTransposeKernel() failed!" ) ); - OPENCL_V( fftRepo.setProgramCode( Transpose, params, programCode ), _T( "fftRepo.setclString() failed!" ) ); - OPENCL_V( fftRepo.setProgramEntryPoints( Transpose, params, "fft_trans", "fft_trans" ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); + cl_int status = CL_SUCCESS; + cl_context QueueContext = NULL; + status = clGetCommandQueueInfo(commQueueFFT, CL_QUEUE_CONTEXT, sizeof(cl_context), &QueueContext, NULL); + + OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) ); + + OPENCL_V( fftRepo.setProgramCode( Transpose, params, programCode, QueueContext ), _T( "fftRepo.setclString() failed!" ) ); + OPENCL_V( fftRepo.setProgramEntryPoints( Transpose, params, "fft_trans", "fft_trans",QueueContext ), _T( "fftRepo.setProgramEntryPoint() failed!" ) ); return CLFFT_SUCCESS; } diff --git a/src/library/plan.cpp b/src/library/plan.cpp index 61ae195e..c4064e4f 100644 --- a/src/library/plan.cpp +++ b/src/library/plan.cpp @@ -194,7 +194,7 @@ clfftStatus clfftCreateDefaultPlan( clfftPlanHandle* plHandle, cl_context contex } // Read the kernels that this plan uses from file, and store into the plan -clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators gen, const FFTKernelGenKeyParams& fftParams ) +clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators gen, const FFTKernelGenKeyParams& fftParams, const cl_context& context ) { FFTRepo& fftRepo = FFTRepo::getInstance( ); @@ -220,7 +220,7 @@ clfftStatus WriteKernel( const clfftPlanHandle plHandle, const clfftGenerators g } std::string kernel; - OPENCL_V( fftRepo.getProgramCode( gen, fftParams, kernel ), _T( "fftRepo.getProgramCode failed." ) ); + OPENCL_V( fftRepo.getProgramCode( gen, fftParams, kernel, context ), _T( "fftRepo.getProgramCode failed." ) ); kernelFile.get( ) << kernel << std::endl; @@ -250,16 +250,16 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan OPENCL_V( fftPlan->GetKernelGenKey( fftParams ), _T("GetKernelGenKey() failed!") ); cl_program program; - if( fftRepo.getclProgram( gen, fftParams, program ) == CLFFT_INVALID_PROGRAM ) + if( fftRepo.getclProgram( gen, fftParams, program, fftPlan->context ) == CLFFT_INVALID_PROGRAM ) { // If the user wishes us to write the kernels out to disk, we do so if( fftRepo.setupData.debugFlags & CLFFT_DUMP_PROGRAMS ) { - OPENCL_V( WriteKernel( plHandle, gen, fftParams ), _T( "WriteKernel failed." ) ); + OPENCL_V( WriteKernel( plHandle, gen, fftParams, fftPlan->context ), _T( "WriteKernel failed." ) ); } std::string programCode; - OPENCL_V( fftRepo.getProgramCode( gen, fftParams, programCode ), _T( "fftRepo.getProgramCode failed." ) ); + OPENCL_V( fftRepo.getProgramCode( gen, fftParams, programCode, fftPlan->context ), _T( "fftRepo.getProgramCode failed." ) ); const char* source = programCode.c_str(); program = clCreateProgramWithSource( fftPlan->context, 1, &source, NULL, &status ); @@ -317,7 +317,7 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan if( fftRepo.getclKernel( program, CLFFT_FORWARD, kernel ) == CLFFT_INVALID_KERNEL ) { std::string entryPoint; - OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_FORWARD, entryPoint ), _T( "fftRepo.getProgramEntryPoint failed." ) ); + OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_FORWARD, entryPoint, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) ); kernel = clCreateKernel( program, entryPoint.c_str( ), &status ); OPENCL_V( status, _T( "clCreateKernel failed" ) ); @@ -331,7 +331,7 @@ clfftStatus CompileKernels( const cl_command_queue commQueueFFT, const clfftPlan if( fftRepo.getclKernel( program, CLFFT_BACKWARD, kernel ) == CLFFT_INVALID_KERNEL ) { std::string entryPoint; - OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_BACKWARD, entryPoint ), _T( "fftRepo.getProgramEntryPoint failed." ) ); + OPENCL_V( fftRepo.getProgramEntryPoint( gen, fftParams, CLFFT_BACKWARD, entryPoint, fftPlan->context ), _T( "fftRepo.getProgramEntryPoint failed." ) ); kernel = clCreateKernel( program, entryPoint.c_str( ), &status ); OPENCL_V( status, _T( "clCreateKernel failed" ) ); diff --git a/src/library/repo.cpp b/src/library/repo.cpp index b41d6578..0b6e532a 100644 --- a/src/library/repo.cpp +++ b/src/library/repo.cpp @@ -34,6 +34,9 @@ size_t FFTRepo::planCount = 1; void* FFTRepo::timerHandle = NULL; GpuStatTimer* FFTRepo::pStatTimer = NULL; + + + clfftStatus FFTRepo::releaseResources( ) { scopedLock sLock( lockRepo, _T( "releaseResources" ) ); @@ -88,11 +91,13 @@ clfftStatus FFTRepo::releaseResources( ) return CLFFT_SUCCESS; } -clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const std::string& kernel ) +clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const std::string& kernel, const cl_context& context ) { scopedLock sLock( lockRepo, _T( "setProgramCode" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + std::pair Params = std::make_pair(fftParam, context); + fftRepoKey key = std::make_pair( gen, Params ); + // Prefix copyright statement at the top of generated kernels std::stringstream ss; @@ -121,26 +126,28 @@ clfftStatus FFTRepo::setProgramCode( const clfftGenerators gen, const FFTKernelG return CLFFT_SUCCESS; } -clfftStatus FFTRepo::getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, std::string& kernel ) +clfftStatus FFTRepo::getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, std::string& kernel, const cl_context& context ) { scopedLock sLock( lockRepo, _T( "getProgramCode" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + std::pair Params = std::make_pair(fftParam, context); + fftRepoKey key = std::make_pair( gen, Params ); fftRepo_iterator pos = mapFFTs.find( key); if( pos == mapFFTs.end( ) ) return CLFFT_FILE_NOT_FOUND; - kernel = pos->second.ProgramString; + kernel = pos->second.ProgramString; return CLFFT_SUCCESS; } clfftStatus FFTRepo::setProgramEntryPoints( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, - const char * kernel_fwd, const char * kernel_back ) + const char * kernel_fwd, const char * kernel_back, const cl_context& context ) { scopedLock sLock( lockRepo, _T( "setProgramEntryPoints" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + std::pair Params = std::make_pair(fftParam, context); + fftRepoKey key = std::make_pair( gen, Params ); fftRepoValue& fft = mapFFTs[ key ]; fft.EntryPoint_fwd = kernel_fwd; @@ -150,11 +157,12 @@ clfftStatus FFTRepo::setProgramEntryPoints( const clfftGenerators gen, const FFT } clfftStatus FFTRepo::getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, - clfftDirection dir, std::string& kernel ) + clfftDirection dir, std::string& kernel, const cl_context& context ) { scopedLock sLock( lockRepo, _T( "getProgramEntryPoint" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + std::pair Params = std::make_pair(fftParam, context); + fftRepoKey key = std::make_pair( gen, Params ); fftRepo_iterator pos = mapFFTs.find( key ); if( pos == mapFFTs.end( ) ) @@ -182,7 +190,14 @@ clfftStatus FFTRepo::setclProgram( const clfftGenerators gen, const FFTKernelGen { scopedLock sLock( lockRepo, _T( "setclProgram" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + cl_int status = CL_SUCCESS; + cl_context ProgramContext = NULL; + status = clGetProgramInfo(prog, CL_PROGRAM_CONTEXT, sizeof(cl_context), &ProgramContext, NULL); + + OPENCL_V( status, _T( "clGetCommandQueueInfo failed" ) ); + + std::pair Params = std::make_pair(fftParam, ProgramContext); + fftRepoKey key = std::make_pair( gen, Params ); fftRepo_iterator pos = mapFFTs.find( key ); if( pos == mapFFTs.end( ) ) @@ -198,11 +213,12 @@ clfftStatus FFTRepo::setclProgram( const clfftGenerators gen, const FFTKernelGen return CLFFT_SUCCESS; } -clfftStatus FFTRepo::getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& prog ) +clfftStatus FFTRepo::getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& prog, const cl_context& PlanContext ) { scopedLock sLock( lockRepo, _T( "getclProgram" ) ); - std::pair< clfftGenerators, FFTKernelGenKeyParams > key = std::make_pair( gen, fftParam ); + std::pair Params = std::make_pair(fftParam, PlanContext); + fftRepoKey key = std::make_pair( gen, Params ); fftRepo_iterator pos = mapFFTs.find( key ); if( pos == mapFFTs.end( ) ) @@ -210,6 +226,11 @@ clfftStatus FFTRepo::getclProgram( const clfftGenerators gen, const FFTKernelGen prog = pos->second.clProgram; if (NULL == prog) return CLFFT_INVALID_PROGRAM; + + cl_context ProgContext; + clGetProgramInfo(prog, CL_PROGRAM_CONTEXT, sizeof(cl_context), &ProgContext, NULL); + if (PlanContext!=ProgContext) + return CLFFT_INVALID_PROGRAM; return CLFFT_SUCCESS; } diff --git a/src/library/repo.h b/src/library/repo.h index f68242de..f2619e79 100644 --- a/src/library/repo.h +++ b/src/library/repo.h @@ -22,6 +22,7 @@ #include "private.h" #include "plan.h" #include "lock.h" + #include "../statTimer/statisticalTimer.GPU.h" @@ -48,10 +49,14 @@ class FFTRepo // Map structure to map parameters that a generator uses to a specific set of kernels that the generator // has created - typedef std::pair< clfftGenerators, FFTKernelGenKeyParams > fftRepoKey; + //typedef std::pair< clfftGenerators, FFTKernelGenKeyParams > fftRepoKey; + + typedef std::pair< clfftGenerators, std::pair > fftRepoKey; typedef std::map< fftRepoKey, fftRepoValue > fftRepoType; typedef fftRepoType::iterator fftRepo_iterator; + + fftRepoType mapFFTs; struct fftKernels { @@ -134,15 +139,15 @@ class FFTRepo clfftStatus releaseResources( ); - clfftStatus setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, const std::string& kernel ); - clfftStatus getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, std::string& kernel ); + clfftStatus setProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, const std::string& kernel, const cl_context& context); + clfftStatus getProgramCode( const clfftGenerators gen, const FFTKernelGenKeyParams&, std::string& kernel, const cl_context& context ); clfftStatus setProgramEntryPoints( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, - const char * kernel_fwd, const char * kernel_back ); - clfftStatus getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, clfftDirection dir, std::string& kernel ); + const char * kernel_fwd, const char * kernel_back, const cl_context& context ); + clfftStatus getProgramEntryPoint( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, clfftDirection dir, std::string& kernel , const cl_context& context); clfftStatus setclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, const cl_program& kernel ); - clfftStatus getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& kernel ); + clfftStatus getclProgram( const clfftGenerators gen, const FFTKernelGenKeyParams& fftParam, cl_program& kernel, const cl_context& PlanContext ); clfftStatus setclKernel ( cl_program prog, clfftDirection dir, const cl_kernel& kernel ); clfftStatus getclKernel ( cl_program prog, clfftDirection dir, cl_kernel& kernel ); @@ -150,9 +155,9 @@ class FFTRepo clfftStatus createPlan( clfftPlanHandle* plHandle, FFTPlan*& fftPlan ); clfftStatus getPlan( clfftPlanHandle plHandle, FFTPlan*& fftPlan, lockRAII*& planLock ); clfftStatus deletePlan( clfftPlanHandle* plHandle ); + }; - #endif diff --git a/src/library/transform.cpp b/src/library/transform.cpp index e53e830f..b1546f0c 100644 --- a/src/library/transform.cpp +++ b/src/library/transform.cpp @@ -1152,7 +1152,7 @@ clfftStatus clfftEnqueueTransform( cl_program prog; cl_kernel kern; - OPENCL_V( fftRepo.getclProgram( fftPlan->gen, fftParams, prog ), _T( "fftRepo.getclProgram failed" ) ); + OPENCL_V( fftRepo.getclProgram( fftPlan->gen, fftParams, prog, fftPlan->context ), _T( "fftRepo.getclProgram failed" ) ); OPENCL_V( fftRepo.getclKernel( prog, dir, kern ), _T( "fftRepo.getclKernels failed" ) ); From 2bc3b58a68d9a35cf09833ff021ff4bc8c0b7ac4 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Fri, 24 Jan 2014 17:51:00 -0600 Subject: [PATCH 23/32] Fixes #14 Renamed version.h to clFFT.version.h If the clBLAS and clFFT projects are installed into the same directory, there is a filename collision with version.h. Only the last file to be installed survived, and would break the other project. --- src/CMakeLists.txt | 4 ++-- src/include/clFFT.h | 2 +- src/include/{version.h.in => clFFT.version.h.in} | 0 src/tests/gtest_main.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename src/include/{version.h.in => clFFT.version.h.in} (100%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a791b3ff..ad2484b1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -225,9 +225,9 @@ message( STATUS "CMAKE_CXX_COMPILER relwithdebinfo flags: " ${CMAKE_CXX_FLAGS_RE message( STATUS "CMAKE_EXE_LINKER link flags: " ${CMAKE_EXE_LINKER_FLAGS} ) # configure a header file to pass the CMake version settings to the source, and package the header files in the output archive -configure_file( "${PROJECT_SOURCE_DIR}/include/version.h.in" "${PROJECT_BINARY_DIR}/include/version.h" ) +configure_file( "${PROJECT_SOURCE_DIR}/include/clFFT.version.h.in" "${PROJECT_BINARY_DIR}/include/clFFT.version.h" ) install( FILES - "${PROJECT_BINARY_DIR}/include/version.h" + "${PROJECT_BINARY_DIR}/include/clFFT.version.h" "include/clFFT.h" "include/clAmdFft.h" "include/clAmdFft.version.h" diff --git a/src/include/clFFT.h b/src/include/clFFT.h index 738141fd..583a76b9 100644 --- a/src/include/clFFT.h +++ b/src/include/clFFT.h @@ -33,7 +33,7 @@ #include #endif -#include "version.h" +#include "clFFT.version.h" /*! This preprocessor definition is the standard way of making exporting APIs * from a DLL simpler. All files within this DLL are compiled with the CLFFT_EXPORTS diff --git a/src/include/version.h.in b/src/include/clFFT.version.h.in similarity index 100% rename from src/include/version.h.in rename to src/include/clFFT.version.h.in diff --git a/src/tests/gtest_main.cpp b/src/tests/gtest_main.cpp index ec22e98c..b30c3eac 100644 --- a/src/tests/gtest_main.cpp +++ b/src/tests/gtest_main.cpp @@ -18,7 +18,7 @@ #include #include #include "clFFT.h" -#include "version.h" +#include "clFFT.version.h" #include "test_constants.h" #include "../client/openCL.misc.h" #include "unicode.compatibility.h" From 1572a59c09de463b3bfdc1c4db10d0ddbc1f3d67 Mon Sep 17 00:00:00 2001 From: Andrey Gursky Date: Wed, 5 Feb 2014 14:42:24 +0100 Subject: [PATCH 24/32] Adding missing '-pthread' to the C/C++ compiler flags, since the library makes use of pthreads --- src/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ad2484b1..9c6f1b05 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -197,6 +197,9 @@ elseif( CMAKE_COMPILER_IS_GNUCXX ) # we only want c++0x if we're using gcc 4.5.2 set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" ) endif() + + set( CMAKE_CXX_FLAGS "-pthread ${CMAKE_CXX_FLAGS}" ) + set( CMAKE_C_FLAGS "-pthread ${CMAKE_C_FLAGS}" ) if( BUILD64 ) set( CMAKE_CXX_FLAGS "-m64 ${CMAKE_CXX_FLAGS}" ) From 1396b7fd1ee67cbefc792a96893c0132e18a3e97 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Fri, 7 Feb 2014 17:21:25 -0600 Subject: [PATCH 25/32] Changing the behavior of linking to Boost such that system directories are searched on linux. This can still be disabled by setting the flag Boost_NO_SYSTEM_PATHS. --- src/CMakeLists.txt | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ad2484b1..f3431564 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -124,22 +124,29 @@ endif( ) set( SUFFIX_LIB ${SUFFIX_LIB_DEFAULT} CACHE STRING "String to append to 'lib' install path" ) set( SUFFIX_BIN ${SUFFIX_BIN_DEFAULT} CACHE STRING "String to append to 'bin' install path" ) +# Useful variables to configure FindBoost.cake +# set( Boost_USE_MULTITHREADED ON ) +# set( Boost_DETAILED_FAILURE_MSG ON ) +# set( Boost_DEBUG ON ) +# set( Boost_NO_SYSTEM_PATHS ON ) + # Client is built only if boost is found; on windows, we need vs10 or higher # Find Boost on the system, and configure the type of boost build we want -set( Boost_USE_MULTITHREADED ON ) -set( Boost_USE_STATIC_LIBS ON ) -set( Boost_DETAILED_FAILURE_MSG ON ) -set( Boost_DEBUG ON ) -set( Boost_ADDITIONAL_VERSIONS "1.46.1" "1.46" "1.44.0" "1.44" ) - -# Default Boost_NO_SYSTEM_PATHS to TRUE if the user does not specify themselves -if( NOT DEFINED Boost_NO_SYSTEM_PATHS AND UNIX ) - set( Boost_NO_SYSTEM_PATHS ON ) +if( NOT DEFINED Boost_USE_STATIC_LIBS ) + set( Boost_USE_STATIC_LIBS ON ) +endif( ) + +if( NOT DEFINED Boost_USE_STATIC_RUNTIME ) + set( Boost_USE_STATIC_RUNTIME OFF ) endif( ) # This will define Boost_FOUND find_package( Boost 1.33.0 COMPONENTS program_options ) -message( STATUS "Boost_PROGRAM_OPTIONS_LIBRARY: ${Boost_PROGRAM_OPTIONS_LIBRARY}" ) +if( Boost_FOUND ) + message( STATUS "Boost_PROGRAM_OPTIONS_LIBRARY: ${Boost_PROGRAM_OPTIONS_LIBRARY}" ) +else( ) + message( WARNING "Try setting Boost_DEBUG and Boost_DETAILED_FAILURE_MSG for more information" ) +endif( ) # This will define OPENCL_FOUND find_package( OpenCL ) From 3c62d97113182eb43936d0f3f7549571f698c171 Mon Sep 17 00:00:00 2001 From: Brian Kloppenborg Date: Fri, 7 Feb 2014 18:56:03 -0500 Subject: [PATCH 26/32] Added CMake find script. The script searches in /usr/local by default. The user may specify CLFFT_ROOT if clFFT is installed in a non-standard location. --- src/FindclFFT.cmake | 61 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 src/FindclFFT.cmake diff --git a/src/FindclFFT.cmake b/src/FindclFFT.cmake new file mode 100644 index 00000000..92dcc829 --- /dev/null +++ b/src/FindclFFT.cmake @@ -0,0 +1,61 @@ +# - Find clFFT, AMD's OpenCL FFT library + +# This script defines the following variables: +# CLFFT_INCLUDE_DIRS - Location of clFFT's include directory. +# CLFFT_LIBRARIES - Location of clFFT's libraries +# CLFFT_FOUND - True if clFFT has been located +# +# If your clFFT installation is not in a standard installation directory, you +# may provide a hint to where it may be found. Simply set the value CLFFT_ROOT +# to the directory containing 'include/clFFT.h" prior to calling this script. +# +# By default this script will attempt to find the 32-bit version of clFFT. +# If you desire to use the 64-bit version instead, set +# set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON) +# prior to calling this script. +# +#============================================================================= +# Copyright 2014 Brian Kloppenborg +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +IF(CLFFT_INCLUDE_DIRS) + # Already in cache, be silent + set (CLFFT_FIND_QUIETLY TRUE) +ENDIF (CLFFT_INCLUDE_DIRS) + +FIND_PATH(CLFFT_ROOT_DIR + NAMES include/clFFT.h + HINTS /usr/local/ ${CLFFT_ROOT} + DOC "clFFT root directory.") + +FIND_PATH(_CLFFT_INCLUDE_DIRS + NAMES clFFT.h + HINTS ${CLFFT_ROOT_DIR}/include + DOC "clFFT Include directory") + +FIND_LIBRARY(_CLFFT_LIBRARY + NAMES clFFT + HINTS ${CLFFT_ROOT_DIR}/lib) + +SET(CLFFT_INCLUDE_DIRS ${_CLFFT_INCLUDE_DIRS}) +SET(CLFFT_LIBRARIES ${_CLFFT_LIBRARY}) + +# handle the QUIETLY and REQUIRED arguments and set CLFFT_FOUND to TRUE if +# all listed variables are TRUE +INCLUDE (FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(CLFFT DEFAULT_MSG CLFFT_LIBRARIES CLFFT_INCLUDE_DIRS) +MARK_AS_ADVANCED(CLFFT_LIBRARIES CLFFT_INCLUDE_DIRS) + + From b4484e9c22975c596da2f32e949296a690aae4d1 Mon Sep 17 00:00:00 2001 From: bragadeesh Date: Thu, 13 Feb 2014 11:36:11 -0600 Subject: [PATCH 27/32] fixing documentation --- README.md | 2 +- src/library/mainpage.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e2fbd72f..304bcd13 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ clFFT clMath is a software library containing FFT and BLAS functions written in OpenCL. In addition to GPU devices, the libraries also support running on CPU devices to facilitate debugging and multicore programming. -APPML 1.10 is the most current generally available version of the library, and pre-built binaries are available for download on both Linux and Windows platforms. +clMath 2.1 is the latest version and is available as source only. clMath's predecessor APPML 1.10 has pre-built binaries available for download on both Linux and Windows platforms. ## Introduction to clFFT diff --git a/src/library/mainpage.h b/src/library/mainpage.h index 326ad7a1..70e014c7 100644 --- a/src/library/mainpage.h +++ b/src/library/mainpage.h @@ -538,6 +538,12 @@ In-place transforms:
  • \c CLFFT_HERMITIAN_INTERLEAVED to \c CLFFT_REAL +@subsection ExplicitStrides Setting strides + +The library currently requires the user to explicitly set input and output strides for real transforms. See +the following examples to understand what values to use for input and output strides under different scenarios. The +examples only show typical usages. The user has flexibility in allocating their buffers and laying out data according +to their needs. @subsection RealExamples Examples From 4dee2d5852be7353a9d90d8c44454d785b2201f0 Mon Sep 17 00:00:00 2001 From: Dominic Meiser Date: Fri, 21 Mar 2014 12:57:01 -0600 Subject: [PATCH 28/32] Add an option to control shared vs static library builds. --- src/CMakeLists.txt | 1 + src/library/CMakeLists.txt | 6 +++++- src/statTimer/CMakeLists.txt | 6 +++++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9bdb8516..6f201b54 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -70,6 +70,7 @@ option( BUILD_RUNTIME "Build the FFT runtime library" ON ) option( BUILD_CLIENT "Build a command line clFFT client program with a variety of configurable parameters (dependency on Boost)" ON ) option( BUILD_TEST "Build the library testing suite (dependency on google test, Boost, and FFTW)" ON ) option( BUILD_LOADLIBRARIES "Build the optional dynamic load libraries that the FFT runtime will search for" ON ) +option( BUILD_SHARED_LIBRARY "Build shared libraries." ON) # If BOOST_ROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui. # Otherwise, create a sensible default that the user can change diff --git a/src/library/CMakeLists.txt b/src/library/CMakeLists.txt index 35475c2f..63ac0f9c 100644 --- a/src/library/CMakeLists.txt +++ b/src/library/CMakeLists.txt @@ -74,7 +74,11 @@ add_definitions( "/DCLFFT_EXPORTS" ) # Include standard OpenCL headers include_directories( ${OPENCL_INCLUDE_DIRS} ${PROJECT_BINARY_DIR}/include ../include ) -add_library( clFFT SHARED ${clFFT.Files} ) +if(BUILD_SHARED_LIBRARY) + add_library( clFFT SHARED ${clFFT.Files} ) +else() + add_library( clFFT STATIC ${clFFT.Files} ) +endif() target_link_libraries( clFFT ${OPENCL_LIBRARIES} ) set_target_properties( clFFT PROPERTIES VERSION ${CLFFT_VERSION} ) diff --git a/src/statTimer/CMakeLists.txt b/src/statTimer/CMakeLists.txt index 4976b30d..a635bf12 100644 --- a/src/statTimer/CMakeLists.txt +++ b/src/statTimer/CMakeLists.txt @@ -63,7 +63,11 @@ add_definitions( "/DSTATTIMER_EXPORTS" ) # Include standard OpenCL headers include_directories( ${OPENCL_INCLUDE_DIRS} ${PROJECT_BINARY_DIR}/include ../include ) -add_library( StatTimer SHARED ${StatTimer.Files} ) +if(BUILD_SHARED_LIBRARY) + add_library( StatTimer SHARED ${StatTimer.Files} ) +else() + add_library( StatTimer STATIC ${StatTimer.Files} ) +endif() set_target_properties( StatTimer PROPERTIES VERSION ${CLFFT_VERSION} ) set_target_properties( StatTimer PROPERTIES SOVERSION ${CLFFT_SOVERSION} ) set_target_properties( StatTimer PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" ) From 0db6e07afbf2b1cafed401e6b9d46ab2d503f22f Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Thu, 27 Mar 2014 14:50:38 -0500 Subject: [PATCH 29/32] Improved detection of the OpenCL dependencies during the 'copy' step A little refactoring of the FindOpenCL & FindFFTW to make it more standard --- src/FindFFTW.cmake | 73 +++++++++++++------------ src/FindOpenCL.cmake | 62 ++++++++++----------- src/tests/copyTestDependencies.cmake.in | 8 +++ 3 files changed, 76 insertions(+), 67 deletions(-) diff --git a/src/FindFFTW.cmake b/src/FindFFTW.cmake index 50a632e8..cac5fc64 100644 --- a/src/FindFFTW.cmake +++ b/src/FindFFTW.cmake @@ -33,7 +33,9 @@ # FFTW_ROOT - (as a CMake or environment variable) # The root directory of the fftw install prefix # -#----------------------- +# FIND_LIBRARY_USE_LIB64_PATHS - Global property that controls whether +# findFFTW should search for 64bit or 32bit libs +#----------------------------------------------- # Example Usage: # # find_package(FFTW REQUIRED) @@ -42,53 +44,52 @@ # add_executable(foo foo.cc) # target_link_libraries(foo ${FFTW_LIBRARIES}) # -#----------------------- -if( DEFINED ENV{FFTW_ROOT} ) - set( FFTW_ROOT $ENV{FFTW_ROOT} CACHE PATH "Environment variable defining the root of FFTW" ) -else( ) - set( FFTW_ROOT "/usr/lib" CACHE PATH "Environment variable defining the root of FFTW" ) -endif( ) +#----------------------------------------------- find_path(FFTW_INCLUDE_DIRS - NAMES fftw3.h + NAMES fftw3.h HINTS - ${FFTW_ROOT}/api ${FFTW_ROOT}/include + ${FFTW_ROOT}/api ${FFTW_ROOT} - $ENV{FFTW_ROOT}/api $ENV{FFTW_ROOT}/include - $ENV{FFTW_ROOT} - PATHS - /usr/include - /usr/local/include + $ENV{FFTW_ROOT}/api + ENV FFTW_ROOT + PATHS + /usr/include + /usr/local/include ) mark_as_advanced( FFTW_INCLUDE_DIRS ) find_library( FFTW_SINGLE_PRECISION_LIBRARIES - NAMES fftw3f libfftw3f-3 - HINTS - ${FFTW_ROOT} - ${FFTW_ROOT}/lib - $ENV{FFTW_ROOT} - $ENV{FFTW_ROOT}/lib - PATHS - /usr/lib - /usr/local/lib - DOC "FFTW dynamic library" + NAMES fftw3f libfftw3f-3 + HINTS + ${FFTW_ROOT}/lib + ${FFTW_ROOT}/.libs + ${FFTW_ROOT} + $ENV{FFTW_ROOT}/lib + $ENV{FFTW_ROOT}/.libs + ENV FFTW_ROOT + PATHS + /usr/lib + /usr/local/lib + DOC "FFTW dynamic library" ) mark_as_advanced( FFTW_SINGLE_PRECISION_LIBRARIES ) find_library( FFTW_DOUBLE_PRECISION_LIBRARIES - NAMES fftw3 libfftw3-3 - HINTS - ${FFTW_ROOT} - ${FFTW_ROOT}/lib - $ENV{FFTW_ROOT} - $ENV{FFTW_ROOT}/lib - PATHS - /usr/lib - /usr/local/lib - DOC "FFTW dynamic library" + NAMES fftw3 libfftw3-3 + HINTS + ${FFTW_ROOT}/lib + ${FFTW_ROOT}/.libs + ${FFTW_ROOT} + $ENV{FFTW_ROOT}/lib + $ENV{FFTW_ROOT}/.libs + ENV FFTW_ROOT + PATHS + /usr/lib + /usr/local/lib + DOC "FFTW dynamic library" ) mark_as_advanced( FFTW_DOUBLE_PRECISION_LIBRARIES ) @@ -99,6 +100,6 @@ include( FindPackageHandleStandardArgs ) FIND_PACKAGE_HANDLE_STANDARD_ARGS( FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDE_DIRS ) if( NOT FFTW_FOUND ) - message( STATUS "FindFFTW looked for single precision libraries named: fftw3f or libfftw3f-3" ) - message( STATUS "FindFFTW looked for double precision libraries named: fftw3 or libfftw3-3" ) + message( STATUS "FindFFTW looked for single precision libraries named: fftw3f or libfftw3f-3" ) + message( STATUS "FindFFTW looked for double precision libraries named: fftw3 or libfftw3-3" ) endif() diff --git a/src/FindOpenCL.cmake b/src/FindOpenCL.cmake index ab58252b..8725612f 100644 --- a/src/FindOpenCL.cmake +++ b/src/FindOpenCL.cmake @@ -46,23 +46,17 @@ # target_link_libraries(foo ${OPENCL_LIBRARIES}) # #----------------------- -if( DEFINED ENV{AMDAPPSDKROOT} ) - set( OPENCL_ROOT $ENV{AMDAPPSDKROOT} CACHE PATH "Environment variable defining the root of OPENCL implementation" ) -elseif( DEFINED ENV{CUDA_PATH} ) - set( OPENCL_ROOT $ENV{CUDA_PATH} CACHE PATH "Environment variable defining the root of OPENCL implementation" ) -else( ) - set( OPENCL_ROOT "/usr/lib" CACHE PATH "Environment variable defining the root of OPENCL implementation" ) -endif( ) find_path(OPENCL_INCLUDE_DIRS - NAMES OpenCL/cl.h CL/cl.h + NAMES OpenCL/cl.h CL/cl.h HINTS - ${OPENCL_ROOT}/include - ENV AMDAPPSDKROOT/include - PATHS - /usr/include - /usr/local/include - DOC "OpenCL header file path" + ${OPENCL_ROOT}/include + $ENV{AMDAPPSDKROOT}/include + $ENV{CUDA_PATH}/include + PATHS + /usr/include + /usr/local/include + DOC "OpenCL header file path" ) mark_as_advanced( OPENCL_INCLUDE_DIRS ) @@ -70,23 +64,29 @@ mark_as_advanced( OPENCL_INCLUDE_DIRS ) get_property( LIB64 GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ) if( LIB64 ) - find_library( OPENCL_LIBRARIES - NAMES OpenCL - HINTS - ${OPENCL_ROOT}/lib - ENV AMDAPPSDKROOT/lib - DOC "OpenCL dynamic library path" - PATH_SUFFIXES x86_64 x64 - ) + find_library( OPENCL_LIBRARIES + NAMES OpenCL + HINTS + ${OPENCL_ROOT}/lib + $ENV{AMDAPPSDKROOT}/lib + $ENV{CUDA_PATH}/lib + DOC "OpenCL dynamic library path" + PATH_SUFFIXES x86_64 x64 + PATHS + /usr/lib + ) else( ) - find_library( OPENCL_LIBRARIES - NAMES OpenCL - HINTS - ${OPENCL_ROOT}/lib - ENV AMDAPPSDKROOT/lib - DOC "OpenCL dynamic library path" - PATH_SUFFIXES x86 Win32 - ) + find_library( OPENCL_LIBRARIES + NAMES OpenCL + HINTS + ${OPENCL_ROOT}/lib + $ENV{AMDAPPSDKROOT}/lib + $ENV{CUDA_PATH}/lib + DOC "OpenCL dynamic library path" + PATH_SUFFIXES x86 Win32 + PATHS + /usr/lib + ) endif( ) mark_as_advanced( OPENCL_LIBRARIES ) @@ -94,5 +94,5 @@ include( FindPackageHandleStandardArgs ) FIND_PACKAGE_HANDLE_STANDARD_ARGS( OPENCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS ) if( NOT OPENCL_FOUND ) - message( STATUS "FindOpenCL looked for libraries named: OpenCL" ) + message( STATUS "FindOpenCL looked for libraries named: OpenCL" ) endif() diff --git a/src/tests/copyTestDependencies.cmake.in b/src/tests/copyTestDependencies.cmake.in index b6d13df6..e4f24a61 100644 --- a/src/tests/copyTestDependencies.cmake.in +++ b/src/tests/copyTestDependencies.cmake.in @@ -41,6 +41,14 @@ if( EXISTS "@GTEST_ROOT@" ) list( APPEND depList "@GTEST_ROOT@/lib@SUFFIX_LIB@" ) endif( ) +if( EXISTS "@OPENCL_LIBRARIES@" ) + get_filename_component( clLibName "@OPENCL_LIBRARIES@" NAME ) + string( REPLACE ${clLibName} "" clLibDir "@OPENCL_LIBRARIES@" ) + string( REGEX REPLACE "/+$" "" clLibDir ${clLibDir} ) + + list( APPEND depList "${clLibDir}" ) +endif( ) + if( EXISTS "${testDir}" ) list( APPEND depList "${testDir}" ) endif( ) From ee5da40d949f3858af3ff3757c0c31094bcaacee Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Tue, 1 Apr 2014 13:53:38 -0500 Subject: [PATCH 30/32] Update to the cmake script to copy dependencies into packages This should make the logic more robust in windows --- src/tests/copyTestDependencies.cmake.in | 46 ++++++++++++++++++------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/tests/copyTestDependencies.cmake.in b/src/tests/copyTestDependencies.cmake.in index e4f24a61..2c5e4471 100644 --- a/src/tests/copyTestDependencies.cmake.in +++ b/src/tests/copyTestDependencies.cmake.in @@ -18,9 +18,7 @@ endif( ) # message( fixedTestLocation ": ${fixedTestLocation}" ) # Get the directory that the test executable resides in; this helps get_prerequisites( ) find dependent libraries -get_filename_component( testName "${fixedTestLocation}" NAME ) -string( REPLACE ${testName} "" testDir ${fixedTestLocation} ) -string( REGEX REPLACE "/+$" "" testDir ${testDir} ) +get_filename_component( testDir "${fixedTestLocation}" DIRECTORY ) # message( testDir ": ${testDir}" ) set( installPath "" ) @@ -33,27 +31,49 @@ endif( ) # Only search for dependencies that have ROOT defined set( depList "" ) -if( EXISTS "@FFTW_ROOT@" ) - list( APPEND depList "@FFTW_ROOT@/lib@SUFFIX_LIB@" ) +#This logic assumes that FindFFTW.cmake has been called +get_filename_component( fftwDirSingle "@FFTW_SINGLE_PRECISION_LIBRARIES@" DIRECTORY ) +get_filename_component( fftwDirDouble "@FFTW_DOUBLE_PRECISION_LIBRARIES@" DIRECTORY ) + +if( EXISTS "${fftwDirSingle}" ) + list( APPEND depList "${fftwDirSingle}" ) +# message( "fftwDirSingle: ${fftwDirSingle}" ) +endif( ) + +string( COMPARE NOTEQUAL "${fftwDirSingle}" "${fftwDirDouble}" fftwDiffDirs ) +if( ${fftwDiffDirs} AND EXISTS "${fftwDirDouble}" ) + list( APPEND depList "${fftwDirDouble}" ) +# message( "fftwDirDouble: ${fftwDirDouble}" ) +endif( ) + +#This logic assumes that FindGTest.cmake has been called +get_filename_component( gtestDir "@GTEST_LIBRARY@" DIRECTORY ) +get_filename_component( gtestDirDebug "@GTEST_LIBRARY_DEBUG@" DIRECTORY ) + +if( EXISTS "${gtestDir}" ) + list( APPEND depList "${gtestDir}" ) +# message( "gtestDir: ${gtestDir}" ) endif( ) -if( EXISTS "@GTEST_ROOT@" ) - list( APPEND depList "@GTEST_ROOT@/lib@SUFFIX_LIB@" ) +string( COMPARE NOTEQUAL "${gtestDir}" "${gtestDirDebug}" gtestDiffDirs ) +if( ${gtestDiffDirs} AND EXISTS "${gtestDirDebug}" ) + list( APPEND depList "${gtestDirDebug}" ) +# message( "gtestDirDebug: ${gtestDirDebug}" ) endif( ) -if( EXISTS "@OPENCL_LIBRARIES@" ) - get_filename_component( clLibName "@OPENCL_LIBRARIES@" NAME ) - string( REPLACE ${clLibName} "" clLibDir "@OPENCL_LIBRARIES@" ) - string( REGEX REPLACE "/+$" "" clLibDir ${clLibDir} ) +#This logic assumes that FindOpenCL.cmake has been called +get_filename_component( openclDir "@OPENCL_LIBRARIES@" DIRECTORY ) - list( APPEND depList "${clLibDir}" ) +if( EXISTS "${openclDir}" ) + list( APPEND depList "${openclDir}" ) +# message( "openclDir: ${openclDir}" ) endif( ) if( EXISTS "${testDir}" ) list( APPEND depList "${testDir}" ) endif( ) -# message( STATUS "depList: ${depList}" ) +# message( "depList: ${depList}" ) # This retrieves a list of shared library dependencies from the target; they are not full path names # Skip system dependencies and skip recursion From 02f07f4ea9464a0fb14f943dc7e10ca39585ee67 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Tue, 1 Apr 2014 15:38:47 -0500 Subject: [PATCH 31/32] Travis fix: The DIRECTORY tag on get_filename_component( ) was introduced in cmake 2.8.10.2, which Travis CI does not have by default. Revert to the old name PATH. --- src/tests/copyTestDependencies.cmake.in | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tests/copyTestDependencies.cmake.in b/src/tests/copyTestDependencies.cmake.in index 2c5e4471..9397bcf4 100644 --- a/src/tests/copyTestDependencies.cmake.in +++ b/src/tests/copyTestDependencies.cmake.in @@ -18,7 +18,7 @@ endif( ) # message( fixedTestLocation ": ${fixedTestLocation}" ) # Get the directory that the test executable resides in; this helps get_prerequisites( ) find dependent libraries -get_filename_component( testDir "${fixedTestLocation}" DIRECTORY ) +get_filename_component( testDir "${fixedTestLocation}" PATH ) # message( testDir ": ${testDir}" ) set( installPath "" ) @@ -32,8 +32,8 @@ endif( ) set( depList "" ) #This logic assumes that FindFFTW.cmake has been called -get_filename_component( fftwDirSingle "@FFTW_SINGLE_PRECISION_LIBRARIES@" DIRECTORY ) -get_filename_component( fftwDirDouble "@FFTW_DOUBLE_PRECISION_LIBRARIES@" DIRECTORY ) +get_filename_component( fftwDirSingle "@FFTW_SINGLE_PRECISION_LIBRARIES@" PATH ) +get_filename_component( fftwDirDouble "@FFTW_DOUBLE_PRECISION_LIBRARIES@" PATH ) if( EXISTS "${fftwDirSingle}" ) list( APPEND depList "${fftwDirSingle}" ) @@ -47,8 +47,8 @@ if( ${fftwDiffDirs} AND EXISTS "${fftwDirDouble}" ) endif( ) #This logic assumes that FindGTest.cmake has been called -get_filename_component( gtestDir "@GTEST_LIBRARY@" DIRECTORY ) -get_filename_component( gtestDirDebug "@GTEST_LIBRARY_DEBUG@" DIRECTORY ) +get_filename_component( gtestDir "@GTEST_LIBRARY@" PATH ) +get_filename_component( gtestDirDebug "@GTEST_LIBRARY_DEBUG@" PATH ) if( EXISTS "${gtestDir}" ) list( APPEND depList "${gtestDir}" ) @@ -62,7 +62,7 @@ if( ${gtestDiffDirs} AND EXISTS "${gtestDirDebug}" ) endif( ) #This logic assumes that FindOpenCL.cmake has been called -get_filename_component( openclDir "@OPENCL_LIBRARIES@" DIRECTORY ) +get_filename_component( openclDir "@OPENCL_LIBRARIES@" PATH ) if( EXISTS "${openclDir}" ) list( APPEND depList "${openclDir}" ) From 2217b686b11283539766aaea47451f20d420e983 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Thu, 3 Apr 2014 16:46:29 -0500 Subject: [PATCH 32/32] Updates to the main README.md file to incorporate google group links, and updates to the build dependencies section. --- README.md | 275 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 172 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index 304bcd13..b5664b57 100644 --- a/README.md +++ b/README.md @@ -2,39 +2,86 @@ clFFT ===== [![Build Status](https://travis-ci.org/clMathLibraries/clFFT.png)](https://travis-ci.org/clMathLibraries/clFFT) -clMath is a software library containing FFT and BLAS functions written in OpenCL. In addition to GPU devices, the libraries also support running on CPU devices to facilitate debugging and multicore programming. +clMath is a software library containing FFT and BLAS functions written +in OpenCL. In addition to GPU devices, the libraries also support +running on CPU devices to facilitate debugging and multicore +programming. -clMath 2.1 is the latest version and is available as source only. clMath's predecessor APPML 1.10 has pre-built binaries available for download on both Linux and Windows platforms. +clMath 2.1 is the latest version and is available as source only. +clMath's predecessor APPML 1.10 has pre-built binaries available for +download on both Linux and Windows platforms. ## Introduction to clFFT -The FFT is an implementation of the Discrete Fourier Transform (DFT) that makes use of symmetries in the FFT definition to reduce the mathematical intensity required from O(N2) to O(N log2( N )) when the sequence length N is the product of small prime factors. Currently, there is no standard API for FFT routines. Hardware vendors usually provide a set of high-performance FFTs optimized for their systems: no two vendors employ the same interfaces for their FFT routines. clFFT provides a set of FFT routines that are optimized for AMD graphics processors, but also are functional across CPU and other compute devices. +The FFT is an implementation of the Discrete Fourier Transform (DFT) +that makes use of symmetries in the FFT definition to reduce the +mathematical intensity required from O(N2) to O(N log2( N )) when the +sequence length N is the product of small prime factors. Currently, +there is no standard API for FFT routines. Hardware vendors usually +provide a set of high-performance FFTs optimized for their systems: no +two vendors employ the same interfaces for their FFT routines. clFFT +provides a set of FFT routines that are optimized for AMD graphics +processors, but also are functional across CPU and other compute +devices. -The clFFT library is an open source OpenCL library implementation of discrete Fast Fourier Transforms. It: +The clFFT library is an open source OpenCL library implementation of +discrete Fast Fourier Transforms. It: -* Provides a fast and accurate platform for calculating discrete FFTs. -* Works on CPU or GPU backends. -* Supports in-place or out-of-place transforms. -* Supports 1D, 2D, and 3D transforms with a batch size that can be greater than 1. -* Supports planar (real and complex components in separate arrays) and interleaved (real and complex components as a pair contiguous in memory) formats. -* Supports dimension lengths that can be any mix of powers of 2, 3, and 5. -* Supports single and double precision floating point formats. +- Provides a fast and accurate platform for calculating discrete FFTs. + +- Works on CPU or GPU backends. + +- Supports in-place or out-of-place transforms. + +- Supports 1D, 2D, and 3D transforms with a batch size that can be + greater than 1. + +- Supports planar (real and complex components in separate arrays) and + interleaved (real and complex components as a pair contiguous in + memory) formats. + +- Supports dimension lengths that can be any mix of powers of 2, 3, + and 5. + +- Supports single and double precision floating point formats. ## clFFT library user documentation -[Library and API documentation]( http://clmathlibraries.github.io/clFFT/ ) for developers is available online as a GitHub Pages website + +[Library and API documentation][] for developers is available online as +a GitHub Pages website + +### Google Groups + +Two mailing lists have been created for the clMath projects: + +- [clmath@googlegroups.com][] - group whose focus is to answer + questions on using the library or reporting issues + +- [clmath-developers@googlegroups.com][] - group whose focus is for + developers interested in contributing to the library code itself ## clFFT Wiki -The [project wiki](https://github.com/clMathLibraries/clFFT/wiki) contains helpful documentation, including a [build primer](https://github.com/clMathLibraries/clFFT/wiki/Build) + +The [project wiki][clmath@googlegroups.com] contains helpful +documentation, including a [build +primer][clmath-developers@googlegroups.com] ## Contributing code -Please refer to and read the [Contributing](CONTRIBUTING.md) document for guidelines on how to contribute code to this open source project + +Please refer to and read the [Contributing][] document for guidelines on +how to contribute code to this open source project. The code in the +/master branch is considered to be stable, and all pull-requests should +be made against the /develop branch. ## License -The source for clFFT is licensed under the [Apache License, Version 2.0]( http://www.apache.org/licenses/LICENSE-2.0 ) + +The source for clFFT is licensed under the [Apache License, Version +2.0][] ## Example -The simple example below shows how to use clFFT to compute an simple 1D forward transform +The simple example below shows how to use clFFT to compute an simple 1D +forward transform ```c #include @@ -43,101 +90,123 @@ The simple example below shows how to use clFFT to compute an simple 1D forward int main( void ) { - cl_int err; - cl_platform_id platform = 0; - cl_device_id device = 0; - cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 }; - cl_context ctx = 0; - cl_command_queue queue = 0; - cl_mem bufX; - float *X; - cl_event event = NULL; - int ret = 0; - size_t N = 16; - - /* FFT library realted declarations */ - clfftPlanHandle planHandle; - clfftDim dim = CLFFT_1D; - size_t clLengths[1] = {N}; - - /* Setup OpenCL environment. */ - err = clGetPlatformIDs( 1, &platform, NULL ); - err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL ); - - props[1] = (cl_context_properties)platform; - ctx = clCreateContext( props, 1, &device, NULL, NULL, &err ); - queue = clCreateCommandQueue( ctx, device, 0, &err ); - - /* Setup clFFT. */ - clfftSetupData fftSetup; - err = clfftInitSetupData(&fftSetup); - err = clfftSetup(&fftSetup); - - /* Allocate host & initialize data. */ - /* Only allocation shown for simplicity. */ - X = (float *)malloc(N * 2 * sizeof(*X)); - - /* Prepare OpenCL memory objects and place data inside them. */ - bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, N * 2 * sizeof(*X), NULL, &err ); - - err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0, - N * 2 * sizeof( *X ), X, 0, NULL, NULL ); - - /* Create a default plan for a complex FFT. */ - err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths); - - /* Set plan parameters. */ - err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE); - err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED); - err = clfftSetResultLocation(planHandle, CLFFT_INPLACE); - - /* Bake the plan. */ - err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL); - - /* Execute the plan. */ - err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL); - - /* Wait for calculations to be finished. */ - err = clFinish(queue); - - /* Fetch results of calculations. */ - err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, N * 2 * sizeof( *X ), X, 0, NULL, NULL ); - - /* Release OpenCL memory objects. */ - clReleaseMemObject( bufX ); - - free(X); - - /* Release the plan. */ - err = clfftDestroyPlan( &planHandle ); - - /* Release clFFT library. */ - clfftTeardown( ); - - /* Release OpenCL working objects. */ - clReleaseCommandQueue( queue ); - clReleaseContext( ctx ); - - return ret; + cl_int err; + cl_platform_id platform = 0; + cl_device_id device = 0; + cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 }; + cl_context ctx = 0; + cl_command_queue queue = 0; + cl_mem bufX; + float *X; + cl_event event = NULL; + int ret = 0; + size_t N = 16; + + /* FFT library realted declarations */ + clfftPlanHandle planHandle; + clfftDim dim = CLFFT_1D; + size_t clLengths[1] = {N}; + + /* Setup OpenCL environment. */ + err = clGetPlatformIDs( 1, &platform, NULL ); + err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL ); + + props[1] = (cl_context_properties)platform; + ctx = clCreateContext( props, 1, &device, NULL, NULL, &err ); + queue = clCreateCommandQueue( ctx, device, 0, &err ); + + /* Setup clFFT. */ + clfftSetupData fftSetup; + err = clfftInitSetupData(&fftSetup); + err = clfftSetup(&fftSetup); + + /* Allocate host & initialize data. */ + /* Only allocation shown for simplicity. */ + X = (float *)malloc(N * 2 * sizeof(*X)); + + /* Prepare OpenCL memory objects and place data inside them. */ + bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, N * 2 * sizeof(*X), NULL, &err ); + + err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0, + N * 2 * sizeof( *X ), X, 0, NULL, NULL ); + + /* Create a default plan for a complex FFT. */ + err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths); + + /* Set plan parameters. */ + err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE); + err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED); + err = clfftSetResultLocation(planHandle, CLFFT_INPLACE); + + /* Bake the plan. */ + err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL); + + /* Execute the plan. */ + err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL); + + /* Wait for calculations to be finished. */ + err = clFinish(queue); + + /* Fetch results of calculations. */ + err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, N * 2 * sizeof( *X ), X, 0, NULL, NULL ); + + /* Release OpenCL memory objects. */ + clReleaseMemObject( bufX ); + + free(X); + + /* Release the plan. */ + err = clfftDestroyPlan( &planHandle ); + + /* Release clFFT library. */ + clfftTeardown( ); + + /* Release OpenCL working objects. */ + clReleaseCommandQueue( queue ); + clReleaseContext( ctx ); + + return ret; } ``` ## Build dependencies + ### Library for Windows -* Windows® 7/8 -* Visual Studio 2010 SP1, 2012 -* Latest CMake -* An OpenCL SDK, such as APP SDK 2.8 + +- Windows® 7/8 + +- Visual Studio 2010 SP1, 2012 + +- Latest CMake + +- An OpenCL SDK, such as APP SDK 2.9 ### Library for Linux -* GCC 4.6 and onwards -* Latest CMake -* An OpenCL SDK, such as APP SDK 2.8 + +- GCC 4.6 and onwards + +- Latest CMake + +- An OpenCL SDK, such as APP SDK 2.9 + +### Library for Mac OSX + +- Recommended to generate Unix makefiles with cmake ### Test infrastructure -* Latest Googletest -* Latest FFTW -* Latest Boost + +- Googletest v1.6 + +- Latest FFTW + +- Latest Boost ### Performance infrastructure -* Python \ No newline at end of file + +- Python + + [Library and API documentation]: http://clmathlibraries.github.io/clFFT/ + [clmath@googlegroups.com]: https://github.com/clMathLibraries/clFFT/wiki + [clmath-developers@googlegroups.com]: https://github.com/clMathLibraries/clFFT/wiki/Build + [Contributing]: CONTRIBUTING.md + [Apache License, Version 2.0]: http://www.apache.org/licenses/LICENSE-2.0