From 3c5d6f1249b2fe70a25b2e74b3e506f23add2dde Mon Sep 17 00:00:00 2001 From: Duksu Kim Date: Thu, 14 Oct 2021 13:14:31 +0900 Subject: [PATCH] update lecture6 --- CUDA_Programming.sln | 14 + .../ThreadCounting_AtomicOp/DS_definitions.h | 197 ++++++++++ .../ThreadCounting_AtomicOp/DS_timer.cpp | 336 ++++++++++++++++++ .../ThreadCounting_AtomicOp/DS_timer.h | 95 +++++ .../ThreadCounting_AtomicOp.cu | 92 +++++ .../ThreadCounting_AtomicOp.vcxproj | 164 +++++++++ .../ThreadCounting_AtomicOp.vcxproj.filters | 13 + 7 files changed, 911 insertions(+) create mode 100644 Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_definitions.h create mode 100644 Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_timer.cpp create mode 100644 Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_timer.h create mode 100644 Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.cu create mode 100644 VS2019/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.vcxproj create mode 100644 VS2019/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.vcxproj.filters diff --git a/CUDA_Programming.sln b/CUDA_Programming.sln index b0779a6..e4959ab 100644 --- a/CUDA_Programming.sln +++ b/CUDA_Programming.sln @@ -51,6 +51,10 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MatMul_MemAccessPattern", " EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MatMul_bank_conflict", "VS2019\Intermediate_Class\Lecture5\MatMul_bank_conflict\MatMul_bank_conflict.vcxproj", "{8D608736-D0A2-4D23-BED7-641B87D5F4C1}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Lecture6", "Lecture6", "{3DEBB45B-FF8B-4776-BFF5-D68A48058E79}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ThreadCounting_AtomicOp", "VS2019\Intermediate_Class\Lecture6\ThreadCounting_AtomicOp\ThreadCounting_AtomicOp.vcxproj", "{E36C456D-0161-426E-B305-E119C40B480F}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -149,6 +153,14 @@ Global {8D608736-D0A2-4D23-BED7-641B87D5F4C1}.Release|x64.Build.0 = Release|x64 {8D608736-D0A2-4D23-BED7-641B87D5F4C1}.Release|x86.ActiveCfg = Release|Win32 {8D608736-D0A2-4D23-BED7-641B87D5F4C1}.Release|x86.Build.0 = Release|Win32 + {E36C456D-0161-426E-B305-E119C40B480F}.Debug|x64.ActiveCfg = Debug|x64 + {E36C456D-0161-426E-B305-E119C40B480F}.Debug|x64.Build.0 = Debug|x64 + {E36C456D-0161-426E-B305-E119C40B480F}.Debug|x86.ActiveCfg = Debug|Win32 + {E36C456D-0161-426E-B305-E119C40B480F}.Debug|x86.Build.0 = Debug|Win32 + {E36C456D-0161-426E-B305-E119C40B480F}.Release|x64.ActiveCfg = Release|x64 + {E36C456D-0161-426E-B305-E119C40B480F}.Release|x64.Build.0 = Release|x64 + {E36C456D-0161-426E-B305-E119C40B480F}.Release|x86.ActiveCfg = Release|Win32 + {E36C456D-0161-426E-B305-E119C40B480F}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -176,6 +188,8 @@ Global {EF9A42B1-29F7-4349-827D-08B3B5D95ECA} = {B3430C43-00B8-443C-A7C2-DCF639AEADD1} {78E9C712-3A19-4402-A9F6-50A621258682} = {EF9A42B1-29F7-4349-827D-08B3B5D95ECA} {8D608736-D0A2-4D23-BED7-641B87D5F4C1} = {EF9A42B1-29F7-4349-827D-08B3B5D95ECA} + {3DEBB45B-FF8B-4776-BFF5-D68A48058E79} = {B3430C43-00B8-443C-A7C2-DCF639AEADD1} + {E36C456D-0161-426E-B305-E119C40B480F} = {3DEBB45B-FF8B-4776-BFF5-D68A48058E79} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {52DE253B-6715-4A93-BA27-7131973681F9} diff --git a/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_definitions.h b/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_definitions.h new file mode 100644 index 0000000..e7a6816 --- /dev/null +++ b/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_definitions.h @@ -0,0 +1,197 @@ +#pragma once + +#include +#include +#include +#include + +#define OS_WINDOWS 0 +#define OS_LINUX 1 + +#ifdef _WIN32 +#define _TARGET_OS OS_WINDOWS +#else + #ifndef nullptr + #define nullptr NULL + #endif + #define _TARGET_OS OS_LINUX +#endif + +/************************************************************************/ +/* OS dependet function */ +/************************************************************************/ +#if _TARGET_OS == OS_WINDOWS +// #define _SPRINT sprintf_s +#define _STRTOK strtok_s + +#define EXIT_WIHT_KEYPRESS {std::cout << "Press any key to exit..."; getchar(); exit(0);} + +#define SPLIT_PATH(_path,_result) \ + _splitpath_s(_path, _result.drive, 255, _result.dir, 255, _result.filename, 255, _result.ext, 255) + + +#elif _TARGET_OS == OS_LINUX +#include +#include + +#define _STRTOK strtok_r + +#define EXIT_WIHT_KEYPRESS {std::cout << "Program was terminated!"; exit(0);} + +#define sprintf_s sprintf +#define scanf_s scanf +#define fprintf_s fprintf + +#define __int64 int64_t + +#define fopen_s(fp, name, mode) (*fp = fopen(name, mode)) + +#endif + +/************************************************************************/ +/* Defines */ +/************************************************************************/ + +// *********** data size +#define _1K_ 1024 +#define _1M_ (_1K_*_1K_) +#define _1G_ (_1M_*_1K_) + +#define CHAR_STRING_SIZE 255 + +/************************************************************************/ +/* Type definitions */ +/************************************************************************/ +typedef unsigned int UINT ; + +/************************************************************************/ +/* Macro functions */ +/************************************************************************/ +#define DS_MEM_DELETE(a) \ + if (a != NULL) { \ + delete a ; \ + a = NULL ; \ + } + +#define DS_MEM_DELETE_ARRAY(a) \ + if (a != NULL) { \ + delete [] a ; \ + a = NULL ; \ + } + +#define RANGE_MIN 0 +#define RANGE_MAX 1 + +#define MATCHED_STRING 0 + +#ifndef VTK_RANGE_MIN +#define VTK_RANGE_MIN 0 +#define VTK_RANGE_MAX 1 +#endif + +// Print +#define PRINT_LINE_INFO printf("%s, line %d", __FILE__, __LINE__) +#define PRINT_ERROR_MSG(_msg) {PRINT_LINE_INFO; printf(" at "); printf(_msg);} + +// Single loops +#define LOOP_I(a) for(int i=0; i +void SWAP(T &a, T &b){ + T tmp = a; + a = b; + b = tmp; +} +#endif + +// +#ifndef MIN +#define MIN(a,b) (a > b ? b : a) +#endif + +#ifndef MAX +#define MAX(a,b) (a > b ? a : b) +#endif + +// Index converter + +#define INDEX2X(_ID,_W) (_ID%_W) +#define INDEX2Y(_ID,_W) (_ID/_W) +#define INDEX2ID(_ID,_X,_Y,_W) {_X=INDEX2X(_ID,_W);_Y=INDEX2Y(_ID_,_W);} +#define ID2INDEX(_W,_X,_Y) (_Y*_W+_X) +#define PTR2ID(_type, _target, _base) ((_type*)_target - (_type*)_base) + +// Memory allocation and release +#ifndef SAFE_DELETE +#define SAFE_DELETE(p) {if(p!=NULL) delete p; p=NULL;} +#endif + +#ifndef SAFE_DELETE_ARR +#define SAFE_DELETE_ARR(p) {if(p!=NULL) delete [] p; p=NULL;} +#endif + +#define SAFE_NEW(p, type, size) {\ + try {p = new type[size];} \ + catch(std::bad_alloc& exc) \ + { printf("[%s, line %d] fail to memory allocation - %.2f MB requested\n", __FILE__, __LINE__, (float)(sizeof(type)*size)/_1M_); \ + EXIT_WIHT_KEYPRESS }\ + } + +template +void memsetZero(T** p, long long size = 0) { + if (*p != NULL) + memset(*p, 0, sizeof(T)*size); +} + +template +void allocNinitMem(T** p, long long size, double *memUsage = NULL) { + *p = new T[size]; + //SAFE_NEW(*p, T, size); + memset(*p, 0, sizeof(T)*size); + + if (memUsage != NULL) { + *memUsage += sizeof(T)*size; + } +} + +#define SAFE_MEMCPY(_dst, _src, _type, _size){ \ + if(_dst == nullptr || _src == nullptr ) \ + printf("[%s, line %d] fail to memcpy (dst = %x, src = %x)\n", __FILE__, __LINE__, _dst, _src); \ + exit(-1); \ + memcpy(_dst, _src, sizeof(_type)*_size);\ +} + +// VTK related +#ifndef SAFE_DELETE_VTK +#define SAFE_DELETE_VTK(p) {if(p!=NULL) p->Delete(); p=NULL;} +#endif + +#ifndef VTK_IS_NOERROR +//#include "DS_common_def.h" +#define VTK_IS_NOERROR(p) (p->GetErrorCode()==vtkErrorCode::NoError ? true : false) +#endif + +/************************************************************************/ +/* Data structures */ +/************************************************************************/ +typedef struct { + std::string input; + std::string output; +} nameMatch; + +typedef struct { + char drive[255]; + char dir[255]; + char filename[255]; + char ext[255]; +} filePathSplit; diff --git a/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_timer.cpp b/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_timer.cpp new file mode 100644 index 0000000..876a2c0 --- /dev/null +++ b/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_timer.cpp @@ -0,0 +1,336 @@ +#include "DS_timer.h" +#include "DS_definitions.h" +#include + +//#ifndef _WIN32 +//#define fprintf_s fprintf +//void fopen_s(FILE** fp, char* name, char* mode) +//{ +// *fp = fopen(name, mode); +//} +//#endif + +/************************************************************************/ +/* Constructor & Destructor */ +/************************************************************************/ +DS_timer::DS_timer(int _numTimer /* =0 */, int _numCount /* =0 */, bool _trunOn /* =true */){ + + turnOn = _trunOn ; + start_ticks = NULL ; end_ticks = NULL ; totalTicks = NULL ; + counters = NULL ; + timerStates = NULL ; + timerName = NULL ; + + numTimer = numCounter = 0 ; + + setTimerTitle((char*)"DS_timer Report") ; + + setTimer(_numTimer ) ; + setCounter( _numTimer ) ; + +#ifdef _WIN32 + // For windows + QueryPerformanceFrequency(&ticksPerSecond) ; +#endif +} + +DS_timer::~DS_timer(void){ + releaseTimers() ; + releaseCounters() ; +} + +/************************************************************************/ +/* Set & Get configurations */ +/************************************************************************/ + +// Memory allocation +void DS_timer::memAllocCounters( void ) { + releaseCounters() ; + counters = new UINT[numCounter] ; + initCounters() ; +} + +void DS_timer::memAllocTimers( void ) { + releaseTimers() ; + + timerStates = new bool[numTimer] ; + start_ticks = new TIME_VAL[numTimer]; + end_ticks = new TIME_VAL[numTimer]; + totalTicks = new TIME_VAL[numTimer]; + + // Initialize + memset(timerStates, 0, sizeof(bool)*numTimer); + memset(start_ticks, 0, sizeof(TIME_VAL)*numTimer); + memset(end_ticks, 0, sizeof(TIME_VAL)*numTimer); + memset(totalTicks, 0, sizeof(TIME_VAL)*numTimer); + + timerName = new std::string[numTimer] ; + for ( UINT i = 0 ; i < numTimer ; i++ ) { + timerName[i].clear() ; + timerName[i].resize(255) ; + } +} + +// Memory release +void DS_timer::releaseCounters( void ) { DS_MEM_DELETE_ARRAY(counters) ; } +void DS_timer::releaseTimers( void ) { + //DS_MEM_DELETE(start_ticks) ; DS_MEM_DELETE(end_ticks) ; DS_MEM_DELETE(totalTicks) ; + DS_MEM_DELETE_ARRAY(timerStates) ; + DS_MEM_DELETE_ARRAY(start_ticks) ; + DS_MEM_DELETE_ARRAY(end_ticks) ; + DS_MEM_DELETE_ARRAY(totalTicks) ; + DS_MEM_DELETE_ARRAY(timerName) ; +} + +// Getters +UINT DS_timer::getNumTimer( void ) { return numTimer ; } +UINT DS_timer::getNumCounter( void ) { return numCounter ; } + +// Setters +UINT DS_timer::setTimer( UINT _numTimer ) { + if ( _numTimer == 0 ) + return 0 ; + + if (_numTimer <= numTimer) + return numTimer; + + if ( numTimer != 0 ) { + + // Backup + UINT oldNumTimer = numTimer ; + TIME_VAL *oldTotalTicks = new TIME_VAL[oldNumTimer]; + memcpy(oldTotalTicks, totalTicks, sizeof(TIME_VAL)*oldNumTimer); + + numTimer = _numTimer ; + memAllocTimers() ; + + memcpy(totalTicks, oldTotalTicks, sizeof(TIME_VAL)* oldNumTimer); + delete oldTotalTicks ; + } else { + numTimer = _numTimer ; + memAllocTimers() ; + } + + return _numTimer ; +} + +UINT DS_timer::setCounter( UINT _numCounter ) { + + if (_numCounter == 0 ) + return 0 ; + + if (_numCounter <= numCounter) + return numCounter; + + if ( numCounter != 0 ) { + + // Backup + int numOldCounter = numCounter ; + UINT *oldCounters = new UINT[numOldCounter] ; + memcpy(oldCounters, counters, sizeof(UINT)*numOldCounter) ; + + numCounter = _numCounter ; + memAllocCounters() ; + + // Restore + memcpy(counters, oldCounters, sizeof(UINT)*numOldCounter) ; + delete oldCounters ; + + } else { + numCounter = _numCounter ; + memAllocCounters() ; + } + + return numCounter ; + +} + +/************************************************************************/ +/* Timer */ +/************************************************************************/ +void DS_timer::initTimer( UINT id ) { + timerStates[id] = TIMER_OFF ; +#ifdef _WIN32 + totalTicks[id].QuadPart = 0 ; +#else + totalTicks[id].tv_sec = 0; + totalTicks[id].tv_usec = 0; +#endif +} + +void DS_timer::initTimers( void ) { + for ( UINT i = 0 ; i < numTimer ; i++ ) { + initTimer(i); + } +} + +void DS_timer::onTimer( UINT id ) { + if ( turnOn == false ) + return ; + + if ( timerStates[id] == TIMER_ON ) + return ; +#ifdef _WIN32 + QueryPerformanceCounter(&start_ticks[id]) ; +#else + gettimeofday(&start_ticks[id], NULL); +#endif + + timerStates[id] = TIMER_ON ; +} + +void DS_timer::offTimer( UINT id ) { + if ( turnOn == false ) + return ; + + if ( timerStates[id] == TIMER_OFF ) + return ; + +#ifdef _WIN32 + QueryPerformanceCounter(&end_ticks[id]) ; + totalTicks[id].QuadPart = totalTicks[id].QuadPart + (end_ticks[id].QuadPart - start_ticks[id].QuadPart) ; +#else + gettimeofday(&end_ticks[id], NULL); + TIME_VAL period, previous; + timersub(&end_ticks[id], &start_ticks[id], &period); + previous = totalTicks[id]; + timeradd(&previous, &period, &totalTicks[id]); +#endif + + timerStates[id] = TIMER_OFF ; +} + +double DS_timer::getTimer_ms( UINT id ) { +#ifdef _WIN32 + return ((double)totalTicks[id].QuadPart/(double)ticksPerSecond.QuadPart * 1000) ; +#else + return (double)(totalTicks[id].tv_sec * 1000 + totalTicks[id].tv_usec / 1000.0); +#endif +} + +/************************************************************************/ +/* Counter */ +/************************************************************************/ +void DS_timer::incCounter( UINT id ) { + if ( turnOn == false ) + return ; + + counters[id]++ ; +} + +void DS_timer::initCounters( void ) { + if ( turnOn == false ) + return ; + + for ( UINT i = 0 ; i < numCounter ; i++ ) + counters[i] = 0 ; +} + +void DS_timer::initCounter( UINT id ) { + if ( turnOn == false ) + return ; + + counters[id] = 0 ; +} + +void DS_timer::add2Counter( UINT id, UINT num ) { + if ( turnOn == false ) + return ; + + counters[id] += num ; +} + +UINT DS_timer::getCounter( UINT id ) { + if ( turnOn == false ) + return 0; + + return counters[id] ; +} + +/************************************************************************/ +/* */ +/************************************************************************/ +void DS_timer::printTimer( float _denominator){ + + if ( turnOn == false ) + return ; + + //printf("\n*\t DS_timer Report \t*\n") ; + printf("\n*\t %s \t*\n", timerTitle) ; + printf("* The number of timer = %d, counter = %d\n", numTimer, numCounter ) ; + printf("**** Timer report ****\n") ; + + for ( UINT i = 0 ; i < numTimer ; i++ ) { + if ( getTimer_ms(i) == 0 ) + continue ; + if ( timerName[i].c_str()[0] == 0 ) + printf("Timer %d : %.5f ms\n", i, getTimer_ms(i) ) ; + else + printf("%s : %.5f ms\n", timerName[i].c_str(), getTimer_ms(i) ) ; + } + + printf("**** Counter report ****\n") ; + for ( UINT i = 0 ; i < numCounter ;i++ ) { + if ( counters[i] == 0 ) + continue ; + printf("Counter %d : %.3f (%d) \n",i, counters[i]/_denominator, counters[i] ) ; + } + + printf("*\t End of the report \t*\n") ; +} + +void DS_timer::printToFile( char* fileName, int _id ) +{ + if ( turnOn == false ) + return ; + + FILE *fp ; + + if ( fileName == NULL) + fopen_s(&fp, "DS_timer_report.txt", "a") ; + else { + fopen_s(&fp, fileName, "a") ; + } + + if ( _id >= 0 ) + fprintf_s(fp,"%d\t", _id) ; + + for ( UINT i = 0 ; i < numTimer ; i++ ) { + if ( getTimer_ms(i) == 0 ) + continue ; + fprintf_s(fp, "%s: %.9f\n", timerName[i].c_str(), getTimer_ms(i) ) ; + } + + for ( UINT i = 0 ; i < numCounter ;i++ ) { + if ( counters[i] == 0 ) + continue ; + fprintf_s(fp, "%s: %d\n", timerName[i].c_str(), counters[i] ) ; + } + + fprintf_s(fp, "\n") ; + + fclose(fp) ; +} + +void DS_timer::printTimerNameToFile( char* fileName ) +{ + if ( turnOn == false ) + return ; + + FILE *fp ; + + if ( fileName == NULL) + fopen_s(&fp, "DS_timer_name.txt", "a") ; + else { + fopen_s(&fp, fileName, "a") ; + } + + + for ( UINT i = 0 ; i < numTimer ; i++ ) { + if ( timerName[i].empty() ) + continue ; + fprintf_s(fp, "%s\t", timerName[i].c_str() ) ; + } + fprintf_s(fp, "\n") ; + fclose(fp) ; +} diff --git a/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_timer.h b/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_timer.h new file mode 100644 index 0000000..d1c3391 --- /dev/null +++ b/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/DS_timer.h @@ -0,0 +1,95 @@ +//#pragma once +#ifndef _DS_TIMER_H +#define _DS_TIMER_H + +#include // std string + +#ifndef UINT +typedef unsigned int UINT; +#endif + +#ifdef _WIN32 + // For windows + #include + typedef LARGE_INTEGER TIME_VAL; +#else + // For Unix/Linux + #include + #include + #include + #include // c string + typedef struct timeval TIME_VAL; +#endif + +#define TIMER_ON true +#define TIMER_OFF false + +class DS_timer +{ +private : + + bool turnOn ; + + UINT numTimer ; + UINT numCounter ; + + // For timers + bool* timerStates ; + TIME_VAL ticksPerSecond; + TIME_VAL *start_ticks; + TIME_VAL *end_ticks; + TIME_VAL *totalTicks; + + char timerTitle[255] ; + std::string *timerName ; + + // For counters + UINT *counters ; + + void memAllocCounters ( void ) ; + void memAllocTimers ( void ) ; + void releaseCounters ( void ) ; + void releaseTimers ( void ) ; + +public: + DS_timer(int _numTimer = 1, int _numCount = 1, bool _trunOn = true ); + ~DS_timer(void); + + // For configurations + inline void timerOn ( void ) { turnOn = TIMER_ON ; } + inline void timerOff ( void ) { turnOn = TIMER_OFF ; } + + UINT getNumTimer( void ) ; + UINT getNumCounter ( void ) ; + UINT setTimer ( UINT _numTimer ) ; + UINT setCounter ( UINT _numCounter ) ; + + // For timers + + void initTimer(UINT id) ; + void initTimers ( void ); + void onTimer(UINT id) ; + void offTimer(UINT id ) ; + double getTimer_ms(UINT id) ; + + void setTimerTitle ( char* _name ) { memset(timerTitle, 0, sizeof(char)*255) ; memcpy(timerTitle, _name, strlen(_name)) ; } + + void setTimerName (UINT id, std::string &_name) { timerName[id] = _name ; } + void setTimerName (UINT id, char* _name) { timerName[id] = _name ;} + + // For counters + + void incCounter(UINT id) ; + void initCounters( void ) ; + void initCounter(UINT id) ; + void add2Counter( UINT id, UINT num ) ; + UINT getCounter ( UINT id ) ; + + // For reports + + void printTimer ( float _denominator = 1 ) ; + void printToFile ( char* fileName, int _id = -1 ) ; + void printTimerNameToFile ( char* fileName ) ; +} ; + +#endif \ No newline at end of file diff --git a/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.cu b/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.cu new file mode 100644 index 0000000..92354f3 --- /dev/null +++ b/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.cu @@ -0,0 +1,92 @@ +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include "DS_timer.h" +#include +#include +#include + +#define NUM_BLOCK 10240 +#define NUM_T_IN_B 512 + +__global__ void threadCounting_noSync(int* a) +{ + (*a)++; +} + +__global__ void threadCounting_atomicGlobal(int* a) +{ + atomicAdd(a, 1); +} + +__global__ void threadCounting_atomicShared(int* a) +{ + __shared__ int sa; + + if (threadIdx.x == 0) + sa = 0; + __syncthreads(); + + atomicAdd(&sa, 1); + __syncthreads(); + + if (threadIdx.x == 0) + atomicAdd(a, sa); +} + +int main(void) { + DS_timer timer(10); + timer.setTimerName(0, (char*)"No atoimc"); + timer.setTimerName(1, (char*)"AtomicGlobal"); + timer.setTimerName(2, (char*)"AtomicShared"); + + int a = 0; + int* d1, * d2, * d3; + + //cudaSetDevice(1); + + cudaMalloc((void**)&d1, sizeof(int)); + cudaMemset(d1, 0, sizeof(int) * 0); + + cudaMalloc((void**)&d2, sizeof(int)); + cudaMemset(d2, 0, sizeof(int) * 0); + + cudaMalloc((void**)&d3, sizeof(int)); + cudaMemset(d3, 0, sizeof(int) * 0); + + // warp-up + threadCounting_noSync << > > (d1); + cudaDeviceSynchronize(); + + timer.onTimer(0); + threadCounting_noSync << > > (d1); + cudaDeviceSynchronize(); + timer.offTimer(0); + + cudaMemcpy(&a, d1, sizeof(int), cudaMemcpyDeviceToHost); + printf("[NoAtomic] # of threads = %d\n", a); + + timer.onTimer(1); + threadCounting_atomicGlobal << > > (d2); + cudaDeviceSynchronize(); + timer.offTimer(1); + + cudaMemcpy(&a, d2, sizeof(int), cudaMemcpyDeviceToHost); + printf("[AtomicGlobal] # of threads = %d\n", a); + + timer.onTimer(2); + threadCounting_atomicShared << > > (d3); + cudaDeviceSynchronize(); + timer.offTimer(2); + + cudaMemcpy(&a, d3, sizeof(int), cudaMemcpyDeviceToHost); + printf("[AtomicShared] # of threads = %d\n", a); + + cudaFree(d1); + cudaFree(d2); + cudaFree(d3); + + timer.printTimer(); + + return 0; +} \ No newline at end of file diff --git a/VS2019/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.vcxproj b/VS2019/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.vcxproj new file mode 100644 index 0000000..ef36f87 --- /dev/null +++ b/VS2019/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.vcxproj @@ -0,0 +1,164 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 16.0 + Win32Proj + {e36c456d-0161-426e-b305-e119c40b480f} + ThreadCountingAtomicOp + 10.0 + + + + Application + true + v142 + Unicode + + + Application + false + v142 + true + Unicode + + + Application + true + v142 + Unicode + + + Application + false + v142 + true + Unicode + + + + + + + + + + + + + + + + + + + + + + true + + + false + + + true + + + false + + + + Level3 + true + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + + + + + Level3 + true + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + true + true + + + + + Level3 + true + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + + + compute_52,sm_52;compute_86,sm_86 + + + + + Level3 + true + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + + + Console + true + true + true + + + compute_52,sm_52;compute_86,sm_86 + + + + + Document + + + + + + + + + + + + + + \ No newline at end of file diff --git a/VS2019/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.vcxproj.filters b/VS2019/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.vcxproj.filters new file mode 100644 index 0000000..7638b4c --- /dev/null +++ b/VS2019/Intermediate_Class/Lecture6/ThreadCounting_AtomicOp/ThreadCounting_AtomicOp.vcxproj.filters @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file