forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRelu.cpp
76 lines (59 loc) · 2.52 KB
/
Relu.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/core/Tensor.h>
#include <ATen/Config.h>
#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/NativeFunctions.h>
#else
#include <ATen/ops/relu_native.h> // for mkldnn_relu, mkldnn_...
#include <ATen/ops/threshold_backward_native.h> // for mkldnn_relu_backward
#endif
#if !AT_MKLDNN_ENABLED()
namespace at::native {
Tensor mkldnn_relu(const Tensor& input) {
TORCH_CHECK(false, "mkldnn_relu: ATen not compiled with MKLDNN support");
}
Tensor& mkldnn_relu_(Tensor& input) {
TORCH_CHECK(false, "mkldnn_relu_: ATen not compiled with MKLDNN support");
}
Tensor mkldnn_relu_backward(const Tensor& grad_output, const Tensor& input, const Scalar& threshold) {
TORCH_CHECK(false, "mkldnn_relu_backward: ATen not compiled with MKLDNN support");
}
}
#else // AT_MKLDNN_ENABLED
#include <ATen/native/mkldnn/MKLDNNCommon.h>
#include <ATen/native/mkldnn/Utils.h>
namespace at::native {
Tensor mkldnn_relu(const Tensor& input) {
if (input.scalar_type() == ScalarType::BFloat16) {
TORCH_CHECK(mkldnn_bf16_device_check(),
"mkldnn_relu: bf16 path needs the cpu support avx512bw, avx512vl and avx512dq");
}
const ideep::tensor& x = itensor_from_mkldnn(input);
ideep::tensor y;
ideep::eltwise_forward::compute(
x, y, ideep::algorithm::eltwise_relu, ideep::prop_kind::forward_training, /*alpha*/ 0.0);
return new_with_itensor_mkldnn(std::move(y), optTypeMetaToScalarType(input.options().dtype_opt()),
input.options().device_opt());
}
Tensor& mkldnn_relu_(Tensor& input) {
if (input.scalar_type() == ScalarType::BFloat16) {
TORCH_CHECK(mkldnn_bf16_device_check(),
"mkldnn_relu_: bf16 path needs the cpu support avx512bw, avx512vl and avx512dq");
}
ideep::tensor& x = itensor_from_mkldnn(input);
ideep::eltwise_forward::compute(
x, x, ideep::algorithm::eltwise_relu, ideep::prop_kind::forward_training, /*alpha*/ 0.0);
return input;
}
Tensor mkldnn_relu_backward(const Tensor& grad_output, const Tensor& input, const Scalar& threshold) {
ideep::tensor& x = itensor_from_mkldnn(input);
ideep::tensor grady = itensor_from_mkldnn(grad_output);
ideep::tensor gradx;
ideep::eltwise_backward::compute(x, grady, gradx,
ideep::algorithm::eltwise_relu, /*alpha*/ 0.0);
return new_with_itensor_mkldnn(std::move(gradx),
optTypeMetaToScalarType(grad_output.options().dtype_opt()),
grad_output.options().device_opt());
}
}
#endif // AT_MKLDNN_ENABLED