-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
1,299 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
#include "cuda_runtime.h" | ||
#include "device_launch_parameters.h" | ||
|
||
#include "DS_timer.h" | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
|
||
#define ARRAY_SIZE (64*1024*1024) | ||
#define B_SIZE (1024) | ||
|
||
#define NUM_STREAMS 1 | ||
|
||
__global__ void myKernel(int* _in, int* _out) | ||
{ | ||
int tID = blockDim.x * blockIdx.x + threadIdx.x; | ||
|
||
int temp = 0; | ||
for (int i = 0; i < 250; i++) { | ||
temp = (temp + _in[tID] * 5) % 10; | ||
} | ||
_out[tID] = temp; | ||
|
||
} | ||
|
||
void main(void) | ||
{ | ||
DS_timer timer(1); | ||
timer.setTimerName(0, "Total"); | ||
|
||
int* in = NULL, * out = NULL, * out2 = NULL; | ||
|
||
cudaMallocHost(&in, sizeof(int) * ARRAY_SIZE); | ||
memset(in, 0, sizeof(int) * ARRAY_SIZE); | ||
|
||
cudaMallocHost(&out, sizeof(int) * ARRAY_SIZE); | ||
memset(out, 0, sizeof(int) * ARRAY_SIZE); | ||
|
||
cudaMallocHost(&out2, sizeof(int) * ARRAY_SIZE); | ||
memset(out2, 0, sizeof(int) * ARRAY_SIZE); | ||
|
||
int* dIn, * dOut; | ||
cudaMalloc(&dIn, sizeof(int) * ARRAY_SIZE); | ||
cudaMalloc(&dOut, sizeof(int) * ARRAY_SIZE); | ||
|
||
for (int i = 0; i < ARRAY_SIZE; i++) | ||
in[i] = rand() % 10; | ||
|
||
// Multiple stream version | ||
cudaStream_t stream[NUM_STREAMS]; | ||
cudaEvent_t start[NUM_STREAMS], end[NUM_STREAMS]; | ||
|
||
for (int i = 0; i < NUM_STREAMS; i++) { | ||
cudaStreamCreate(&stream[i]); | ||
cudaEventCreate(&start[i]); cudaEventCreate(&end[i]); | ||
} | ||
|
||
int chunkSize = ARRAY_SIZE / NUM_STREAMS; | ||
|
||
timer.onTimer(0); | ||
for (int i = 0; i < NUM_STREAMS; i++) | ||
{ | ||
int offset = chunkSize * i; | ||
cudaEventRecord(start[i], stream[i]); | ||
|
||
cudaMemcpyAsync(dIn + offset, in + offset, sizeof(int) * chunkSize, cudaMemcpyHostToDevice, stream[i]); | ||
myKernel <<<chunkSize/B_SIZE, B_SIZE, 0, stream[i] >>> (dIn + offset, dOut + offset); | ||
cudaMemcpyAsync(out2 + offset, dOut + offset, sizeof(int) * chunkSize, cudaMemcpyDeviceToHost, stream[i]); | ||
|
||
cudaEventRecord(end[i], stream[i]); | ||
} | ||
|
||
cudaDeviceSynchronize(); | ||
timer.offTimer(0); | ||
timer.printTimer(); | ||
|
||
for (int i = 0; i < NUM_STREAMS; i++) { | ||
float time = 0; | ||
cudaEventElapsedTime(&time, start[i], end[i]); | ||
printf("Stream[%d] : %f ms\n", i, time); | ||
} | ||
|
||
for (int i = 0; i < ARRAY_SIZE; i++) { | ||
if (out[i] != out2[i]) | ||
printf("!"); | ||
} | ||
|
||
for (int i = 0; i < NUM_STREAMS; i++) { | ||
cudaStreamDestroy(stream[i]); | ||
cudaEventDestroy(start[i]); | ||
cudaEventDestroy(end[i]); | ||
} | ||
|
||
cudaFree(dIn); | ||
cudaFree(dOut); | ||
|
||
cudaFreeHost(in); | ||
cudaFreeHost(out); | ||
cudaFreeHost(out2); | ||
} |
197 changes: 197 additions & 0 deletions
197
Intermediate_Class/Lecture8/CUDA_Event/DS_definitions.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
#pragma once | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
#include <iostream> | ||
|
||
#define OS_WINDOWS 0 | ||
#define OS_LINUX 1 | ||
|
||
#ifdef _WIN32 | ||
#define _TARGET_OS OS_WINDOWS | ||
#else | ||
#ifndef nullptr | ||
#define nullptr NULL | ||
#endif | ||
#define _TARGET_OS OS_LINUX | ||
#endif | ||
|
||
/************************************************************************/ | ||
/* OS dependet function */ | ||
/************************************************************************/ | ||
#if _TARGET_OS == OS_WINDOWS | ||
// #define _SPRINT sprintf_s | ||
#define _STRTOK strtok_s | ||
|
||
#define EXIT_WIHT_KEYPRESS {std::cout << "Press any key to exit..."; getchar(); exit(0);} | ||
|
||
#define SPLIT_PATH(_path,_result) \ | ||
_splitpath_s(_path, _result.drive, 255, _result.dir, 255, _result.filename, 255, _result.ext, 255) | ||
|
||
|
||
#elif _TARGET_OS == OS_LINUX | ||
#include <libgen.h> | ||
#include <inttypes.h> | ||
|
||
#define _STRTOK strtok_r | ||
|
||
#define EXIT_WIHT_KEYPRESS {std::cout << "Program was terminated!"; exit(0);} | ||
|
||
#define sprintf_s sprintf | ||
#define scanf_s scanf | ||
#define fprintf_s fprintf | ||
|
||
#define __int64 int64_t | ||
|
||
#define fopen_s(fp, name, mode) (*fp = fopen(name, mode)) | ||
|
||
#endif | ||
|
||
/************************************************************************/ | ||
/* Defines */ | ||
/************************************************************************/ | ||
|
||
// *********** data size | ||
#define _1K_ 1024 | ||
#define _1M_ (_1K_*_1K_) | ||
#define _1G_ (_1M_*_1K_) | ||
|
||
#define CHAR_STRING_SIZE 255 | ||
|
||
/************************************************************************/ | ||
/* Type definitions */ | ||
/************************************************************************/ | ||
typedef unsigned int UINT ; | ||
|
||
/************************************************************************/ | ||
/* Macro functions */ | ||
/************************************************************************/ | ||
#define DS_MEM_DELETE(a) \ | ||
if (a != NULL) { \ | ||
delete a ; \ | ||
a = NULL ; \ | ||
} | ||
|
||
#define DS_MEM_DELETE_ARRAY(a) \ | ||
if (a != NULL) { \ | ||
delete [] a ; \ | ||
a = NULL ; \ | ||
} | ||
|
||
#define RANGE_MIN 0 | ||
#define RANGE_MAX 1 | ||
|
||
#define MATCHED_STRING 0 | ||
|
||
#ifndef VTK_RANGE_MIN | ||
#define VTK_RANGE_MIN 0 | ||
#define VTK_RANGE_MAX 1 | ||
#endif | ||
|
||
#define PRINT_LINE_INFO printf("%s, line %d", __FILE__, __LINE__) | ||
#define PRINT_ERROR_MSG(_msg) {PRINT_LINE_INFO; printf(" at "); printf(_msg);} | ||
|
||
// Single loops | ||
#define LOOP_I(a) for(int i=0; i<a; i++) | ||
#define LOOP_J(a) for(int j=0; j<a; j++) | ||
#define LOOP_K(a) for(int k=0; k<a; k++) | ||
#define LOOP_INDEX(index, end) for (int index = 0 ; index < end ; index++) | ||
#define LOOP_INDEX_START_END(index, start, end) for (int index = start ; index < end ; index++) | ||
|
||
// Multiple loops | ||
#define LOOP_J_I(b, a) LOOP_J(b) LOOP_I(a) | ||
#define LOOP_K_J_I(c,b,a) for(int k=0; k<c; k++) LOOP_J_I(b,a) | ||
|
||
// | ||
#ifndef SWAP | ||
template<class T> | ||
void SWAP(T &a, T &b){ | ||
T tmp = a; | ||
a = b; | ||
b = tmp; | ||
} | ||
#endif | ||
|
||
// | ||
#ifndef MIN | ||
#define MIN(a,b) (a > b ? b : a) | ||
#endif | ||
|
||
#ifndef MAX | ||
#define MAX(a,b) (a > b ? a : b) | ||
#endif | ||
|
||
// Index converter | ||
|
||
#define INDEX2X(_ID,_W) (_ID%_W) | ||
#define INDEX2Y(_ID,_W) (_ID/_W) | ||
#define INDEX2ID(_ID,_X,_Y,_W) {_X=INDEX2X(_ID,_W);_Y=INDEX2Y(_ID_,_W);} | ||
#define ID2INDEX(_W,_X,_Y) (_Y*_W+_X) | ||
#define PTR2ID(_type, _target, _base) ((_type*)_target - (_type*)_base) | ||
|
||
// Memory allocation and release | ||
#ifndef SAFE_DELETE | ||
#define SAFE_DELETE(p) {if(p!=NULL) delete p; p=NULL;} | ||
#endif | ||
|
||
#ifndef SAFE_DELETE_ARR | ||
#define SAFE_DELETE_ARR(p) {if(p!=NULL) delete [] p; p=NULL;} | ||
#endif | ||
|
||
#define SAFE_NEW(p, type, size) {\ | ||
try {p = new type[size];} \ | ||
catch(std::bad_alloc& exc) \ | ||
{ printf("[%s, line %d] fail to memory allocation - %.2f MB requested\n", __FILE__, __LINE__, (float)(sizeof(type)*size)/_1M_); \ | ||
EXIT_WIHT_KEYPRESS }\ | ||
} | ||
|
||
template<class T> | ||
void memsetZero(T** p, long long size = 0) { | ||
if (*p != NULL) | ||
memset(*p, 0, sizeof(T)*size); | ||
} | ||
|
||
template<class T> | ||
void allocNinitMem(T** p, long long size, double *memUsage = NULL) { | ||
*p = new T[size]; | ||
//SAFE_NEW(*p, T, size); | ||
memset(*p, 0, sizeof(T)*size); | ||
|
||
if (memUsage != NULL) { | ||
*memUsage += sizeof(T)*size; | ||
} | ||
} | ||
|
||
#define SAFE_MEMCPY(_dst, _src, _type, _size){ \ | ||
if(_dst == nullptr || _src == nullptr ) \ | ||
printf("[%s, line %d] fail to memcpy (dst = %x, src = %x)\n", __FILE__, __LINE__, _dst, _src); \ | ||
exit(-1); \ | ||
memcpy(_dst, _src, sizeof(_type)*_size);\ | ||
} | ||
|
||
// VTK related | ||
#ifndef SAFE_DELETE_VTK | ||
#define SAFE_DELETE_VTK(p) {if(p!=NULL) p->Delete(); p=NULL;} | ||
#endif | ||
|
||
#ifndef VTK_IS_NOERROR | ||
//#include "DS_common_def.h" | ||
#define VTK_IS_NOERROR(p) (p->GetErrorCode()==vtkErrorCode::NoError ? true : false) | ||
#endif | ||
|
||
/************************************************************************/ | ||
/* Data structures */ | ||
/************************************************************************/ | ||
typedef struct { | ||
std::string input; | ||
std::string output; | ||
} nameMatch; | ||
|
||
typedef struct { | ||
char drive[255]; | ||
char dir[255]; | ||
char filename[255]; | ||
char ext[255]; | ||
} filePathSplit; |
Oops, something went wrong.