Skip to content

Commit

Permalink
Merge pull request #2 from ROCmSoftwarePlatform/amd
Browse files Browse the repository at this point in the history
Amd RNN
  • Loading branch information
Mayank Daga authored Mar 30, 2018
2 parents b49d1bb + 3a25b28 commit e569133
Show file tree
Hide file tree
Showing 6 changed files with 413 additions and 78 deletions.
7 changes: 6 additions & 1 deletion code/amd/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,20 @@ CONV_PATH?=$

INC_DIR=${SOURCE_DIR}/../kernels

all: conv gemm
all: conv rnn gemm

#OPT=-g -O0 -fsanitize=undefined -fno-omit-frame-pointer
OPT=-O3
TARGETS=

conv:
$(MKDIR) $(BIN_DIR)
$(HIPCC) ${SOURCE_DIR}/conv_bench_rocm.cpp -o $(BIN_DIR)/conv_bench -I$(ROCM_PATH)/include -I$(INC_DIR) -l$(CONV_LIBRARY) $(OPT) -std=c++11 --amdgpu-target=gfx900

rnn:
$(MKDIR) $(BIN_DIR)
$(HIPCC) ${SOURCE_DIR}/rnn_bench_rocm.cpp -o $(BIN_DIR)/rnn_bench -I$(ROCM_PATH)/include -I$(INC_DIR) -l$(CONV_LIBRARY) $(OPT) -std=c++11 --amdgpu-target=gfx900

gemm:
$(MKDIR) $(BIN_DIR)
$(HIPCC) ${SOURCE_DIR}/gemm_bench.cpp -o $(BIN_DIR)/gemm_bench -I$(ROCM_PATH)/include -I$(INC_DIR) -l$(ROCBLAS_LIB) $(OPT) -std=c++11 --amdgpu-target=gfx900
Expand Down
26 changes: 13 additions & 13 deletions code/amd/conv_bench_rocm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class miopenCNN {

output_dims_ = {out_w, out_h, out_c, out_n};

h = zeros(output_dims_);
h = zeros<float>(output_dims_);


// Set fwd workspace size
Expand All @@ -79,7 +79,7 @@ class miopenCNN {

std::vector<int> u = std::vector<int>{static_cast<int>(fwd_workspace_size_ / sizeof(float)), 1};

fwd_workspace_ = zeros(u);
fwd_workspace_ = zeros<float>(u);

const int requestAlgoCount = 1;
int returnedAlgoCount;
Expand Down Expand Up @@ -113,7 +113,7 @@ class miopenCNN {
w_desc_.desc(),
&bwd_params_workspace_size_));
u = std::vector<int>{static_cast<int>(bwd_params_workspace_size_ / sizeof(float)), 1};
bwd_params_workspace_ = zeros(u);
bwd_params_workspace_ = zeros<float>(u);

CHECK_MIOPEN_ERROR(miopenFindConvolutionBackwardWeightsAlgorithm(
miopen_handle_.handle(),
Expand Down Expand Up @@ -143,7 +143,7 @@ class miopenCNN {
&bwd_inputs_workspace_size_));

u = std::vector<int>{static_cast<int>(bwd_inputs_workspace_size_ / sizeof(float)), 1};
bwd_inputs_workspace_ = zeros(u);
bwd_inputs_workspace_ = zeros<float>(u);

CHECK_MIOPEN_ERROR(miopenFindConvolutionBackwardDataAlgorithm(
miopen_handle_.handle(),
Expand Down Expand Up @@ -257,10 +257,10 @@ std::tuple<int, int, int, std::string> time_cnn(


// Allocate memory for filter
auto filter = rand(std::vector<int>{r, s, c, k});
auto filter = rand<float>(std::vector<int>{r, s, c, k});

// Allocate memory for input
auto input = rand(std::vector<int>{w, h, c, n});
auto input = rand<float>(std::vector<int>{w, h, c, n});
miopenCNN cnn(w, h, c, n, k, r, s, pad_w, pad_h, wstride, hstride, input, filter);

// Allocate memory for output tensor
Expand All @@ -283,8 +283,8 @@ std::tuple<int, int, int, std::string> time_cnn(
int fwd_time = static_cast<int>(std::chrono::duration<double, std::micro>(end - start).count() / num_repeats);

// Allocate memory for backward pass wrt weights
auto delta = rand(cnn.get_output_dims());
auto dW = zeros(std::vector<int>{r, s, c, k});
auto delta = rand<float>(cnn.get_output_dims());
auto dW = zeros<float>(std::vector<int>{r, s, c, k});

// Warm up backward
cnn.backward_params(input, delta, dW);
Expand All @@ -303,7 +303,7 @@ std::tuple<int, int, int, std::string> time_cnn(
int bwd_params_time = static_cast<int>(std::chrono::duration<double, std::micro>(end - start).count() / num_repeats);

//Allocate memory for backward pass wrt inputs
auto dX = zeros(std::vector<int>{w, h, c, n});
auto dX = zeros<float>(std::vector<int>{w, h, c, n});

//Warm up backward inputs
cnn.backward_inputs(filter, delta, dX);
Expand All @@ -329,7 +329,7 @@ std::tuple<int, int, int, std::string> time_cnn(

int main(int argc, char **argv) {

int num_repeats = 100;
int num_repeats = 300;

hipFree(0);

Expand All @@ -339,15 +339,15 @@ int main(int argc, char **argv) {
std::cout << std::setw(30) << "Times" << std::endl;
std::cout << std::setfill('-') << std::setw(190) << "-" << std::endl;
std::cout << std::setfill(' ');
std::cout << " w h c n k r s pad_w pad_h stride_w stride_h fwd_time (usec) bwd_inputs_time (usec) bwd_params_time (usec) total_time (usec) fwd_algo " << std::endl;
std::cout << " w h c n k f_w f_h pad_w pad_h stride_w stride_h fwd_time (usec) bwd_inputs_time (usec) bwd_params_time (usec) total_time (usec) fwd_algo " << std::endl;
std::cout << std::setfill('-') << std::setw(190) << "-" << std::endl;
std::cout << std::setfill(' ');

int total_fwd_time=0, total_bwd_inputs_time=0, total_bwd_params_time=0;
for (const auto &problem : training_set) {

// Filter parameters
int k, c, r, s;
int k, c, r, s; // r - filter_h (f_h), s - filter_w (f_w)

// Input parameters
int n, w, h;
Expand All @@ -371,8 +371,8 @@ int main(int argc, char **argv) {
std::cout << std::setw(7) << c;
std::cout << std::setw(7) << n;
std::cout << std::setw(7) << k;
std::cout << std::setw(7) << r;
std::cout << std::setw(7) << s;
std::cout << std::setw(7) << r;
std::cout << std::setw(7) << pad_w;
std::cout << std::setw(8) << pad_h;
std::cout << std::setw(10) << wstride;
Expand Down
6 changes: 3 additions & 3 deletions code/amd/gemm_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ int main(int argc, char **argv) {
bool a_t, b_t;
std::tie(m, n, k, a_t, b_t) = problem;

auto a = rand({a_t ? k : m, a_t ? m : k});
auto b = rand({b_t ? n : k, b_t ? k : n});
auto c = zeros({m, n});
auto a = rand<float>({a_t ? k : m, a_t ? m : k});
auto b = rand<float>({b_t ? n : k, b_t ? k : n});
auto c = zeros<float>({m, n});

std::cout << std::setw(7) << m;
std::cout << std::setw(7) << n;
Expand Down
121 changes: 73 additions & 48 deletions code/amd/miopen_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <vector>

#include <miopen/miopen.h>
#include <half.hpp>

#include "hip_helper.h"

Expand Down Expand Up @@ -43,43 +44,53 @@ class MIOpenHandle {
};

template<typename T>
class TensorDescriptorNd {
class TensorDescriptor {
std::shared_ptr<miopenTensorDescriptor_t> desc_;

struct TensorDescriptorNdDeleter {
struct TensorDescriptorDeleter {
void operator()(miopenTensorDescriptor_t * desc) {
miopenDestroyTensorDescriptor(*desc);
delete desc;
}
};

public:
TensorDescriptor()
{
miopenTensorDescriptor_t * desc = new miopenTensorDescriptor_t;
CHECK_MIOPEN_ERROR(miopenCreateTensorDescriptor(desc));

desc_.reset(desc, TensorDescriptorDeleter());
}

TensorDescriptorNd(const std::vector<int>& dim,
const std::vector<int>& stride) {
TensorDescriptor(std::vector<int> lens,
std::vector<int> strides) {
miopenDataType_t type;
if (std::is_same<T, float>::value)
type = miopenFloat;
else if (std::is_same<T, half_float::half>::value)
type = miopenHalf;
else
throw std::runtime_error("Unknown type");

miopenTensorDescriptor_t * desc = new miopenTensorDescriptor_t;

CHECK_MIOPEN_ERROR(miopenCreateTensorDescriptor(desc));
CHECK_MIOPEN_ERROR(miopenSet4dTensorDescriptor(*desc, type, dim[0], dim[1], dim[2], dim[3]));
CHECK_MIOPEN_ERROR(miopenSetTensorDescriptor(*desc, type, static_cast<int>(lens.size()), &lens[0], &strides[0]));

desc_.reset(desc, TensorDescriptorNdDeleter());
desc_.reset(desc, TensorDescriptorDeleter());
}

miopenTensorDescriptor_t desc() const { return *desc_; }

};

template<typename T>
class TensorDescriptorNdArray {
class TensorDescriptorArray
{
std::shared_ptr<miopenTensorDescriptor_t> desc_array_;

struct ArrayDeleter {
struct ArrayDeleter
{
int num_;
ArrayDeleter(int num) : num_(num) {}

Expand All @@ -92,23 +103,27 @@ class TensorDescriptorNdArray {
}
};

public:
public:

TensorDescriptorNdArray(std::vector<int> dim,
std::vector<int> stride,
int num) {
TensorDescriptorArray(std::vector<int> lens,
std::vector<int> strides,
int num)
{
miopenDataType_t type;
if (std::is_same<T, float>::value)
type = miopenFloat;
else if (std::is_same<T, half_float::half>::value)
type = miopenHalf;
else
throw std::runtime_error("Unknown type");

miopenTensorDescriptor_t * desc_array = new miopenTensorDescriptor_t[num];

for (int i = 0; i < num; ++i) {
for (int i = 0; i < num; ++i)
{
CHECK_MIOPEN_ERROR(miopenCreateTensorDescriptor(&desc_array[i]));
CHECK_MIOPEN_ERROR(miopenSet4dTensorDescriptor(desc_array[i], type,
dim[0], dim[1], dim[2], dim[3]));
CHECK_MIOPEN_ERROR(miopenSetTensorDescriptor(desc_array[i], type, lens.size(),
&lens[0], &strides[0]) );
}

desc_array_.reset(desc_array, ArrayDeleter(num));
Expand All @@ -117,38 +132,6 @@ class TensorDescriptorNdArray {
miopenTensorDescriptor_t * ptr() const { return desc_array_.get(); }
};

template<typename T>
class FilterDescriptorNd {
std::shared_ptr<miopenTensorDescriptor_t> desc_;

struct FilterDescriptorNdDeleter {
void operator()(miopenTensorDescriptor_t * desc) {
miopenDestroyTensorDescriptor(*desc);
delete desc;
}
};

public:

FilterDescriptorNd() {}

FilterDescriptorNd(const std::vector<int> dim) {
miopenDataType_t type;
if (std::is_same<T, float>::value)
type = miopenFloat;
else
throw std::runtime_error("Unknown type");

miopenTensorDescriptor_t * desc = new miopenTensorDescriptor_t;
CHECK_MIOPEN_ERROR(miopenCreateTensorDescriptor(desc));
CHECK_MIOPEN_ERROR(miopenSet4dTensorDescriptor(*desc, type, dim[0], dim[1], dim[2], dim[3]));

desc_.reset(desc, FilterDescriptorNdDeleter());
}

miopenTensorDescriptor_t desc() { return *desc_; }
};

template<typename T>
class TensorDescriptor4d {
std::shared_ptr<miopenTensorDescriptor_t> desc_;
Expand All @@ -167,6 +150,8 @@ class TensorDescriptor4d {
miopenDataType_t type;
if (std::is_same<T, float>::value)
type = miopenFloat;
else if (std::is_same<T, half_float::half>::value)
type = miopenHalf;
else
throw std::runtime_error("Unknown type");

Expand Down Expand Up @@ -202,6 +187,8 @@ class FilterDescriptor4d {
miopenDataType_t type;
if (std::is_same<T, float>::value)
type = miopenFloat;
else if (std::is_same<T, half_float::half>::value)
type = miopenHalf;
else
throw std::runtime_error("Unknown type");

Expand Down Expand Up @@ -246,3 +233,41 @@ class ConvolutionDescriptor {

};

class RNNDescriptor {
std::shared_ptr<miopenRNNDescriptor_t> desc_;

struct RNNDescriptorDeleter {
void operator()(miopenRNNDescriptor_t * desc) {
miopenDestroyRNNDescriptor(*desc);
delete desc;
}
};
public:

RNNDescriptor() {}

RNNDescriptor(const int hsize,
const int nlayers,
miopenRNNInputMode_t inMode,
miopenRNNDirectionMode_t direction,
miopenRNNMode_t rnnMode,
miopenRNNBiasMode_t biasMode,
miopenRNNAlgo_t algo,
miopenDataType_t dataType) :
desc_(new miopenRNNDescriptor_t, RNNDescriptorDeleter())
{
CHECK_MIOPEN_ERROR(miopenCreateRNNDescriptor(desc_.get()));
CHECK_MIOPEN_ERROR(miopenSetRNNDescriptor(*desc_,
hsize,
nlayers,
inMode,
direction,
rnnMode,
biasMode,
algo,
dataType));
}

miopenRNNDescriptor_t desc() const { return *desc_; };
};

Loading

0 comments on commit e569133

Please sign in to comment.