Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…ivate into develop
  • Loading branch information
bragadeesh committed Aug 7, 2015
2 parents 8489d30 + 81be7cd commit c03d7bb
Show file tree
Hide file tree
Showing 8 changed files with 285 additions and 47 deletions.
98 changes: 60 additions & 38 deletions src/client/client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
std::vector< cl_device_id > device_id;
cl_context context;
cl_command_queue queue;
cl_event outEvent = NULL;
clfftPlanHandle plan_handle;

for (unsigned u = 0; u < max_dimensions; ++u) {
Expand Down Expand Up @@ -204,7 +203,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride


OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &input[ 0 ],
0, NULL, &outEvent ),
0, NULL, NULL ),
"clEnqueueWriteBuffer failed" );

}
Expand Down Expand Up @@ -252,10 +251,10 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride


OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
0, NULL, &outEvent ),
0, NULL, NULL ),
"clEnqueueWriteBuffer failed" );
OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &imag[ 0 ],
0, NULL, &outEvent ),
0, NULL, NULL ),
"clEnqueueWriteBuffer failed" );
}
break;
Expand Down Expand Up @@ -289,7 +288,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride


OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &input[ 0 ],
0, NULL, &outEvent ),
0, NULL, NULL ),
"clEnqueueWriteBuffer failed" );
}
break;
Expand Down Expand Up @@ -325,10 +324,10 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride


OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
0, NULL, &outEvent ),
0, NULL, NULL ),
"clEnqueueWriteBuffer failed" );
OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 1 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &imag[ 0 ],
0, NULL, &outEvent ),
0, NULL, NULL ),
"clEnqueueWriteBuffer failed" );
}
break;
Expand Down Expand Up @@ -373,7 +372,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride


OPENCL_V_THROW( clEnqueueWriteBuffer( queue, input_cl_mem_buffers[ 0 ], CL_TRUE, 0, size_of_input_buffers_in_bytes, &real[ 0 ],
0, NULL, &outEvent ),
0, NULL, NULL ),
"clEnqueueWriteBuffer failed" );
}
break;
Expand All @@ -391,22 +390,10 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
terr << _T( "Could not find the external timing library; timings disabled" ) << std::endl;
}


// Timer module discovered and loaded successfully
// Initialize function pointers to call into the shared module
PFGETSTATTIMER get_timer = reinterpret_cast< PFGETSTATTIMER > ( LoadFunctionAddr( timerLibHandle, "getStatTimer" ) );

// Create and initialize our timer class, if the external timer shared library loaded
baseStatTimer* timer = NULL;
size_t clFFTID = 0;
if( get_timer )
{
timer = get_timer( CLFFT_GPU );
timer->Reserve( 1, profile_count );
timer->setNormalize( true );

clFFTID = timer->getUniqueID( "clFFT", 0 );
}

OPENCL_V_THROW( clfftSetup( setupData.get( ) ), "clfftSetup failed" );
OPENCL_V_THROW( clfftCreateDefaultPlan( &plan_handle, context, dim, lengths ), "clfftCreateDefaultPlan failed" );
Expand Down Expand Up @@ -511,37 +498,64 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
}
}

// Loop as many times as the user specifies to average out the timings
//

cl_mem * BuffersOut = ( place == CLFFT_INPLACE ) ? NULL : &output_cl_mem_buffers[ 0 ];

Timer tr;
tr.Start();
// Execute once for basic functional test
OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, NULL,
&input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
"clfftEnqueueTransform failed" );

for( cl_uint i = 0; i < profile_count; ++i )
{
if( timer ) timer->Start( clFFTID );
OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );


OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, &outEvent,
&input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
"clfftEnqueueTransform failed" );
// Create and initialize our timer class, if the external timer shared library loaded
baseStatTimer* timer = NULL;
size_t clFFTID = 0;
if( get_timer )
{
timer = get_timer( CLFFT_GPU );
timer->Reserve( 1, profile_count );
timer->setNormalize( true );

if( timer ) timer->Stop( clFFTID );
clFFTID = timer->getUniqueID( "clFFT", 0 );
}
OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );
if(clMedBuffer) clReleaseMemObject(clMedBuffer);

double wtime = tr.Sample()/((double)profile_count);
size_t totalLen = 1;
for(int i=0; i<dim; i++) totalLen *= lengths[i];
double opsconst = 5.0 * (double)totalLen * log((double)totalLen) / log(2.0);
cl_event *outEvent = new cl_event[profile_count];
for( cl_uint i = 0; i < profile_count; ++i ) outEvent[i] = 0;

if(profile_count > 1)
{
Timer tr;
tr.Start();
for( cl_uint i = 0; i < profile_count; ++i )
{
if( timer ) timer->Start( clFFTID );

OPENCL_V_THROW( clfftEnqueueTransform( plan_handle, dir, 1, &queue, 0, NULL, &outEvent[i],
&input_cl_mem_buffers[ 0 ], BuffersOut, clMedBuffer ),
"clfftEnqueueTransform failed" );

if( timer ) timer->Stop( clFFTID );
}
OPENCL_V_THROW( clWaitForEvents ( profile_count, outEvent ), "clWaitForEvents failed" );

double wtime = tr.Sample()/((double)profile_count);

OPENCL_V_THROW( clFinish( queue ), "clFinish failed" );

size_t totalLen = 1;
for(int i=0; i<dim; i++) totalLen *= lengths[i];
double opsconst = 5.0 * (double)totalLen * log((double)totalLen) / log(2.0);


tout << "\nExecution wall time: " << 1000.0*wtime << " ms" << std::endl;
tout << "Execution gflops: " << ((double)batch_size * opsconst)/(1000000000.0*wtime) << std::endl;

}

if(clMedBuffer) clReleaseMemObject(clMedBuffer);

if( timer && (command_queue_flags & CL_QUEUE_PROFILING_ENABLE) )
{
// Remove all timings that are outside of 2 stddev (keep 65% of samples); we ignore outliers to get a more consistent result
Expand All @@ -553,6 +567,14 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
/*****************/
FreeSharedLibrary( timerLibHandle );

for( cl_uint i = 0; i < profile_count; ++i )
{
if(outEvent[i])
clReleaseEvent(outEvent[i]);
}

delete[] outEvent;

// Read and check output data
// This check is not valid if the FFT is executed multiple times inplace.
//
Expand Down Expand Up @@ -725,7 +747,7 @@ int transform( size_t* lengths, const size_t *inStrides, const size_t *outStride
OPENCL_V_THROW( clfftDestroyPlan( &plan_handle ), "clfftDestroyPlan failed" );
OPENCL_V_THROW( clfftTeardown( ), "clfftTeardown failed" );

cleanupCL( &context, &queue, countOf( input_cl_mem_buffers ), input_cl_mem_buffers, countOf( output_cl_mem_buffers ), output_cl_mem_buffers, &outEvent );
cleanupCL( &context, &queue, countOf( input_cl_mem_buffers ), input_cl_mem_buffers, countOf( output_cl_mem_buffers ), output_cl_mem_buffers, NULL );
return 0;
}

Expand Down
7 changes: 5 additions & 2 deletions src/client/openCL.misc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,8 +477,11 @@ std::vector< cl_device_id > initializeCL( cl_device_type deviceType,
int cleanupCL( cl_context* context, cl_command_queue* commandQueue,
const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent )
{
if( *outEvent != NULL )
OPENCL_V_THROW( clReleaseEvent( *outEvent ), "Error: In clReleaseEvent\n" );
if(outEvent != NULL)
{
if( *outEvent != NULL )
OPENCL_V_THROW( clReleaseEvent( *outEvent ), "Error: In clReleaseEvent\n" );
}

releaseOpenCLMemBuffer( numBuffersIn, inputBuffer);
releaseOpenCLMemBuffer( numBuffersOut, outputBuffer);
Expand Down
Loading

0 comments on commit c03d7bb

Please sign in to comment.