Skip to content

Commit

Permalink
Update lec5
Browse files Browse the repository at this point in the history
  • Loading branch information
bluekds committed Oct 12, 2021
1 parent 76b7943 commit 78595af
Show file tree
Hide file tree
Showing 8 changed files with 1,073 additions and 6 deletions.
13 changes: 12 additions & 1 deletion CUDA_Programming.sln
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Lecture5", "Lecture5", "{EF
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MatMul_MemAccessPattern", "VS2019\Intermediate_Class\Lecture5\MatMul_MemAccessPattern\MatMul_MemAccessPattern.vcxproj", "{78E9C712-3A19-4402-A9F6-50A621258682}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MatMul_bank_conflict", "VS2019\Intermediate_Class\Lecture5\MatMul_bank_conflict\MatMul_bank_conflict.vcxproj", "{8D608736-D0A2-4D23-BED7-641B87D5F4C1}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
Expand Down Expand Up @@ -139,6 +141,14 @@ Global
{78E9C712-3A19-4402-A9F6-50A621258682}.Release|x64.Build.0 = Release|x64
{78E9C712-3A19-4402-A9F6-50A621258682}.Release|x86.ActiveCfg = Release|Win32
{78E9C712-3A19-4402-A9F6-50A621258682}.Release|x86.Build.0 = Release|Win32
{8D608736-D0A2-4D23-BED7-641B87D5F4C1}.Debug|x64.ActiveCfg = Debug|x64
{8D608736-D0A2-4D23-BED7-641B87D5F4C1}.Debug|x64.Build.0 = Debug|x64
{8D608736-D0A2-4D23-BED7-641B87D5F4C1}.Debug|x86.ActiveCfg = Debug|Win32
{8D608736-D0A2-4D23-BED7-641B87D5F4C1}.Debug|x86.Build.0 = Debug|Win32
{8D608736-D0A2-4D23-BED7-641B87D5F4C1}.Release|x64.ActiveCfg = Release|x64
{8D608736-D0A2-4D23-BED7-641B87D5F4C1}.Release|x64.Build.0 = Release|x64
{8D608736-D0A2-4D23-BED7-641B87D5F4C1}.Release|x86.ActiveCfg = Release|Win32
{8D608736-D0A2-4D23-BED7-641B87D5F4C1}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand All @@ -154,7 +164,7 @@ Global
{F7C0847C-FB6E-4EEA-9F41-6496E6A9D1D1} = {694AE413-7EF6-4E09-A6FB-F0DB09C6C958}
{847E94AE-AF96-4A3B-A751-FC11A7D5E0B9} = {694AE413-7EF6-4E09-A6FB-F0DB09C6C958}
{CE6F28BA-60F6-4E0E-ACA0-D6D96E0B0351} = {BBC3837C-5486-4B81-92F4-2ADAE8696667}
{02359BC0-0118-4B16-B441-36814F5E71BB} = {BBC3837C-5486-4B81-92F4-2ADAE8696667}
{02359BC0-0118-4B16-B441-36814F5E71BB} = {94C2BE3E-F051-43B9-9C46-C56D1D2D5B0D}
{94C2BE3E-F051-43B9-9C46-C56D1D2D5B0D} = {BBC3837C-5486-4B81-92F4-2ADAE8696667}
{9D888272-7A7A-4696-92CF-4AAD7B154C99} = {CE6F28BA-60F6-4E0E-ACA0-D6D96E0B0351}
{3EFA4D0E-3D17-4785-AC1A-DA9CE3CD7E2B} = {CE6F28BA-60F6-4E0E-ACA0-D6D96E0B0351}
Expand All @@ -165,6 +175,7 @@ Global
{22D65404-3842-48CD-BCA3-FABACECE205F} = {FF3FE1FF-AC94-4E2C-9C52-FD2318512EE2}
{EF9A42B1-29F7-4349-827D-08B3B5D95ECA} = {B3430C43-00B8-443C-A7C2-DCF639AEADD1}
{78E9C712-3A19-4402-A9F6-50A621258682} = {EF9A42B1-29F7-4349-827D-08B3B5D95ECA}
{8D608736-D0A2-4D23-BED7-641B87D5F4C1} = {EF9A42B1-29F7-4349-827D-08B3B5D95ECA}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {52DE253B-6715-4A93-BA27-7131973681F9}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,18 +92,19 @@ int main(int argc, char* argv[])
cudaMemcpy(dA, A, sizeA * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(dB, B, sizeB * sizeof(int), cudaMemcpyHostToDevice);

dim3 gridDim(ceil((float)m / BLOCK_SIZE), ceil((float)n / BLOCK_SIZE));
dim3 blockDim(BLOCK_SIZE, BLOCK_SIZE);

// Row = X-dim version
timer.onTimer(1);
MatMul_xRow<<<gridDim, blockDim>>> (dA, dB, dC, m, n, k);
dim3 gridDim_xRow(ceil((float)m / BLOCK_SIZE), ceil((float)n / BLOCK_SIZE));
dim3 blockDim_xRow(BLOCK_SIZE, BLOCK_SIZE);
MatMul_xRow<<<gridDim_xRow, blockDim_xRow >>> (dA, dB, dC, m, n, k);
cudaDeviceSynchronize();
timer.offTimer(1);

// Row = Y-dim version
timer.onTimer(2);
MatMul_yRow<< <gridDim, blockDim >> > (dA, dB, dC, m, n, k);
dim3 gridDim_yRow(ceil((float)n / BLOCK_SIZE), ceil((float)m / BLOCK_SIZE));
dim3 blockDim_yRow(BLOCK_SIZE, BLOCK_SIZE);
MatMul_yRow<<<gridDim_yRow, blockDim_yRow >>> (dA, dB, dC, m, n, k);
cudaDeviceSynchronize();
timer.offTimer(2);

Expand Down
197 changes: 197 additions & 0 deletions Intermediate_Class/Lecture5/MatMul_back_conflict/DS_definitions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
#pragma once

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iostream>

#define OS_WINDOWS 0
#define OS_LINUX 1

#ifdef _WIN32
#define _TARGET_OS OS_WINDOWS
#else
#ifndef nullptr
#define nullptr NULL
#endif
#define _TARGET_OS OS_LINUX
#endif

/************************************************************************/
/* OS dependet function */
/************************************************************************/
#if _TARGET_OS == OS_WINDOWS
// #define _SPRINT sprintf_s
#define _STRTOK strtok_s

#define EXIT_WIHT_KEYPRESS {std::cout << "Press any key to exit..."; getchar(); exit(0);}

#define SPLIT_PATH(_path,_result) \
_splitpath_s(_path, _result.drive, 255, _result.dir, 255, _result.filename, 255, _result.ext, 255)


#elif _TARGET_OS == OS_LINUX
#include <libgen.h>
#include <inttypes.h>

#define _STRTOK strtok_r

#define EXIT_WIHT_KEYPRESS {std::cout << "Program was terminated!"; exit(0);}

#define sprintf_s sprintf
#define scanf_s scanf
#define fprintf_s fprintf

#define __int64 int64_t

#define fopen_s(fp, name, mode) (*fp = fopen(name, mode))

#endif

/************************************************************************/
/* Defines */
/************************************************************************/

// *********** data size
#define _1K_ 1024
#define _1M_ (_1K_*_1K_)
#define _1G_ (_1M_*_1K_)

#define CHAR_STRING_SIZE 255

/************************************************************************/
/* Type definitions */
/************************************************************************/
typedef unsigned int UINT ;

/************************************************************************/
/* Macro functions */
/************************************************************************/
#define DS_MEM_DELETE(a) \
if (a != NULL) { \
delete a ; \
a = NULL ; \
}

#define DS_MEM_DELETE_ARRAY(a) \
if (a != NULL) { \
delete [] a ; \
a = NULL ; \
}

#define RANGE_MIN 0
#define RANGE_MAX 1

#define MATCHED_STRING 0

#ifndef VTK_RANGE_MIN
#define VTK_RANGE_MIN 0
#define VTK_RANGE_MAX 1
#endif

// Print
#define PRINT_LINE_INFO printf("%s, line %d", __FILE__, __LINE__)
#define PRINT_ERROR_MSG(_msg) {PRINT_LINE_INFO; printf(" at "); printf(_msg);}

// Single loops
#define LOOP_I(a) for(int i=0; i<a; i++)
#define LOOP_J(a) for(int j=0; j<a; j++)
#define LOOP_K(a) for(int k=0; k<a; k++)
#define LOOP_INDEX(index, end) for (int index = 0 ; index < end ; index++)
#define LOOP_INDEX_START_END(index, start, end) for (int index = start ; index < end ; index++)

// Multiple loops
#define LOOP_J_I(b, a) LOOP_J(b) LOOP_I(a)
#define LOOP_K_J_I(c,b,a) for(int k=0; k<c; k++) LOOP_J_I(b,a)

//
#ifndef SWAP
template<class T>
void SWAP(T &a, T &b){
T tmp = a;
a = b;
b = tmp;
}
#endif

//
#ifndef MIN
#define MIN(a,b) (a > b ? b : a)
#endif

#ifndef MAX
#define MAX(a,b) (a > b ? a : b)
#endif

// Index converter

#define INDEX2X(_ID,_W) (_ID%_W)
#define INDEX2Y(_ID,_W) (_ID/_W)
#define INDEX2ID(_ID,_X,_Y,_W) {_X=INDEX2X(_ID,_W);_Y=INDEX2Y(_ID_,_W);}
#define ID2INDEX(_W,_X,_Y) (_Y*_W+_X)
#define PTR2ID(_type, _target, _base) ((_type*)_target - (_type*)_base)

// Memory allocation and release
#ifndef SAFE_DELETE
#define SAFE_DELETE(p) {if(p!=NULL) delete p; p=NULL;}
#endif

#ifndef SAFE_DELETE_ARR
#define SAFE_DELETE_ARR(p) {if(p!=NULL) delete [] p; p=NULL;}
#endif

#define SAFE_NEW(p, type, size) {\
try {p = new type[size];} \
catch(std::bad_alloc& exc) \
{ printf("[%s, line %d] fail to memory allocation - %.2f MB requested\n", __FILE__, __LINE__, (float)(sizeof(type)*size)/_1M_); \
EXIT_WIHT_KEYPRESS }\
}

template<class T>
void memsetZero(T** p, long long size = 0) {
if (*p != NULL)
memset(*p, 0, sizeof(T)*size);
}

template<class T>
void allocNinitMem(T** p, long long size, double *memUsage = NULL) {
*p = new T[size];
//SAFE_NEW(*p, T, size);
memset(*p, 0, sizeof(T)*size);

if (memUsage != NULL) {
*memUsage += sizeof(T)*size;
}
}

#define SAFE_MEMCPY(_dst, _src, _type, _size){ \
if(_dst == nullptr || _src == nullptr ) \
printf("[%s, line %d] fail to memcpy (dst = %x, src = %x)\n", __FILE__, __LINE__, _dst, _src); \
exit(-1); \
memcpy(_dst, _src, sizeof(_type)*_size);\
}

// VTK related
#ifndef SAFE_DELETE_VTK
#define SAFE_DELETE_VTK(p) {if(p!=NULL) p->Delete(); p=NULL;}
#endif

#ifndef VTK_IS_NOERROR
//#include "DS_common_def.h"
#define VTK_IS_NOERROR(p) (p->GetErrorCode()==vtkErrorCode::NoError ? true : false)
#endif

/************************************************************************/
/* Data structures */
/************************************************************************/
typedef struct {
std::string input;
std::string output;
} nameMatch;

typedef struct {
char drive[255];
char dir[255];
char filename[255];
char ext[255];
} filePathSplit;
Loading

0 comments on commit 78595af

Please sign in to comment.