diff --git a/README.md b/README.md index e90fd339..ad8a3321 100644 --- a/README.md +++ b/README.md @@ -116,15 +116,15 @@ int main( void ) cl_context ctx = 0; cl_command_queue queue = 0; cl_mem bufX; - float *X; + float *X; cl_event event = NULL; int ret = 0; - size_t N = 16; + size_t N = 16; - /* FFT library realted declarations */ - clfftPlanHandle planHandle; - clfftDim dim = CLFFT_1D; - size_t clLengths[1] = {N}; + /* FFT library realted declarations */ + clfftPlanHandle planHandle; + clfftDim dim = CLFFT_1D; + size_t clLengths[1] = {N}; /* Setup OpenCL environment. */ err = clGetPlatformIDs( 1, &platform, NULL ); @@ -135,47 +135,47 @@ int main( void ) queue = clCreateCommandQueue( ctx, device, 0, &err ); /* Setup clFFT. */ - clfftSetupData fftSetup; - err = clfftInitSetupData(&fftSetup); - err = clfftSetup(&fftSetup); + clfftSetupData fftSetup; + err = clfftInitSetupData(&fftSetup); + err = clfftSetup(&fftSetup); - /* Allocate host & initialize data. */ - /* Only allocation shown for simplicity. */ - X = (float *)malloc(N * 2 * sizeof(*X)); + /* Allocate host & initialize data. */ + /* Only allocation shown for simplicity. */ + X = (float *)malloc(N * 2 * sizeof(*X)); /* Prepare OpenCL memory objects and place data inside them. */ bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, N * 2 * sizeof(*X), NULL, &err ); err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0, - N * 2 * sizeof( *X ), X, 0, NULL, NULL ); + N * 2 * sizeof( *X ), X, 0, NULL, NULL ); - /* Create a default plan for a complex FFT. */ - err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths); + /* Create a default plan for a complex FFT. */ + err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths); - /* Set plan parameters. */ - err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE); - err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED); - err = clfftSetResultLocation(planHandle, CLFFT_INPLACE); + /* Set plan parameters. */ + err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE); + err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED); + err = clfftSetResultLocation(planHandle, CLFFT_INPLACE); /* Bake the plan. */ - err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL); + err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL); - /* Execute the plan. */ - err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL); + /* Execute the plan. */ + err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL); - /* Wait for calculations to be finished. */ - err = clFinish(queue); + /* Wait for calculations to be finished. */ + err = clFinish(queue); - /* Fetch results of calculations. */ - err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, N * 2 * sizeof( *X ), X, 0, NULL, NULL ); + /* Fetch results of calculations. */ + err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, N * 2 * sizeof( *X ), X, 0, NULL, NULL ); /* Release OpenCL memory objects. */ clReleaseMemObject( bufX ); - free(X); + free(X); - /* Release the plan. */ - err = clfftDestroyPlan( &planHandle ); + /* Release the plan. */ + err = clfftDestroyPlan( &planHandle ); /* Release clFFT library. */ clfftTeardown( ); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f0a7b26b..e1d9b17c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,6 +16,8 @@ cmake_minimum_required( VERSION 3.1 ) +option (CMAKE_SUPPRESS_REGENERATION "Suppress the cmake macro that causes regeneration of build scripts during build" ON) + if( CMAKE_GENERATOR MATCHES "NMake" ) option( NMAKE_COMPILE_VERBOSE "Print compile and link strings to the console" OFF ) if( NMAKE_COMPILE_VERBOSE ) diff --git a/src/client/client.cpp b/src/client/client.cpp index 052d326e..f71a007d 100644 --- a/src/client/client.cpp +++ b/src/client/client.cpp @@ -55,6 +55,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride // this from the lengths that the user specifies for X, Y, Z. A length of one means that // The user does not want that dimension. + const size_t max_dimensions = 3; size_t strides[ 4 ]; size_t o_strides[ 4 ]; @@ -159,6 +160,10 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride break; } + + + + // Fill the input buffers switch( in_layout ) { @@ -383,6 +388,25 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride break; } + + + std::cout << "Doing test transform" << std::endl; + std::cout << "Lengths: " << lengths[ 0 ] << ", " << lengths[ 1 ] << ", " << lengths[ 2 ] << "."; + std::cout << "Input Mem buffers: " << reinterpret_cast(input_cl_mem_buffers[ 0 ]) << ", " << reinterpret_cast(input_cl_mem_buffers[ 1 ]) << o_strides[ 3 ] << "."; + std::cout << "Output Mem buffers: " << reinterpret_cast(output_cl_mem_buffers[ 0 ]) << ", " << reinterpret_cast(output_cl_mem_buffers[ 1 ]) << o_strides[ 3 ] << "."; + + std::cout << "Layouts: " << static_cast(in_layout) << ", " << static_cast(out_layout) << std::endl; + std::cout << "number_of_output_buffers: " << number_of_output_buffers << std::endl; + std::cout << "fftVectorSize: " << fftVectorSize << std::endl; + std::cout << "fftVectorSizePadded: " << fftVectorSizePadded << std::endl; + std::cout << "fftBatchSize: " << fftBatchSize << std::endl; + std::cout << "outfftVectorSize: " << outfftVectorSize << std::endl; + std::cout << "outfftVectorSizePadded: " << outfftVectorSizePadded << std::endl; + std::cout << "outfftBatchSize: " << outfftBatchSize << std::endl; + + std::cout << "Input buffer size (bytes): " << size_of_input_buffers_in_bytes << "." << std::endl; + std::cout << "Output buffer size (bytes): " << size_of_output_buffers_in_bytes << "." << std::endl; + // Discover and load the timer module if present void* timerLibHandle = LoadSharedLibrary( "lib", "StatTimer", false ); if( timerLibHandle == NULL ) @@ -404,8 +428,11 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride OPENCL_V_THROW( clfftSetPlanBatchSize( plan_handle, batch_size ), "clfftSetPlanBatchSize failed" ); OPENCL_V_THROW( clfftSetPlanPrecision( plan_handle, precision ), "clfftSetPlanPrecision failed" ); + std::cout << "Input Strides: " << dim << " -> " << strides[ 0 ] << ", " << strides[ 1 ] << ", " << strides[ 2 ] << ", " << strides[ 3 ] << "." << std::endl; OPENCL_V_THROW (clfftSetPlanInStride ( plan_handle, dim, strides ), "clfftSetPlanInStride failed" ); + std::cout << "Output Strides: " << dim << " -> " << o_strides[ 0 ] << ", " << o_strides[ 1 ] << ", " << o_strides[ 2 ] << ", " << o_strides[ 3 ] << "." << std::endl; OPENCL_V_THROW (clfftSetPlanOutStride ( plan_handle, dim, o_strides ), "clfftSetPlanOutStride failed" ); + std::cout << "Plan Distance: " << strides[ 3 ] << ", " << o_strides[ 3 ] << "." << std::endl; OPENCL_V_THROW (clfftSetPlanDistance ( plan_handle, strides[ 3 ], o_strides[ 3 ]), "clfftSetPlanDistance failed" ); // Set backward scale factor to 1.0 for non real FFTs to do correct output checks @@ -507,7 +534,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride "clfftEnqueueTransform failed" ); OPENCL_V_THROW( clFinish( queue ), "clFinish failed" ); - + // Create and initialize our timer class, if the external timer shared library loaded baseStatTimer* timer = NULL; @@ -526,7 +553,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride if(profile_count > 1) { - Timer tr; + Timer tr; tr.Start(); for( cl_uint i = 0; i < profile_count; ++i ) { @@ -795,7 +822,7 @@ int _tmain( int argc, _TCHAR* argv[] ) #endif /* MEMORYREPORT */ - // OpenCL state + // OpenCL state cl_device_type deviceType = CL_DEVICE_TYPE_ALL; cl_int deviceId = 0; cl_int platformId = 0; @@ -1028,9 +1055,39 @@ int _tmain( int argc, _TCHAR* argv[] ) } if( precision == CLFFT_SINGLE ) - transform( lengths, iStrides, oStrides, batchSize, inLayout, outLayout, place, precision, dir, deviceType, deviceId, platformId, printInfo, command_queue_flags, profile_count, setupData ); + transform( lengths, + iStrides, + oStrides, + batchSize, + inLayout, + outLayout, + place, + precision, + dir, + deviceType, + deviceId, + platformId, + printInfo, + command_queue_flags, + profile_count, + setupData ); else - transform( lengths, iStrides, oStrides, batchSize, inLayout, outLayout, place, precision, dir, deviceType, deviceId, platformId, printInfo, command_queue_flags, profile_count, setupData ); + transform( lengths, + iStrides, + oStrides, + batchSize, + inLayout, + outLayout, + place, + precision, + dir, + deviceType, + deviceId, + platformId, + printInfo, + command_queue_flags, + profile_count, + setupData ); } catch( std::exception& e ) {