-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconvert_f32_to_dlf16.cpp
109 lines (93 loc) · 3.95 KB
/
convert_f32_to_dlf16.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
/*
* SPDX-License-Identifier: Apache-2.0
*/
//===----------------------- DLF16 Conversion -----------------------------===//
// Extracted from NNP1 class in DLF16Conversion.hpp in onnx-mlir.
//===----------------------------------------------------------------------===//
#include <cmath>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <cassert>
// Macros for FPFormat.
#define SIGN(one, exp, frac) (one << exp << frac)
#define EXPO(one, exp, frac) (((one << exp) - 1) << frac)
#define EXPO_BIAS(one, exp, frac) ((one << (exp - 1)) - 1)
#define FRAC(one, frac) ((one << frac) - 1)
// Macros for conversion.
#define BFLAST(mask) ((mask) & (1 + ~(mask)))
#define BFGET(w, mask) (((w) & (mask)) / BFLAST(mask))
#define BFPUT(w, mask, value) \
((w) = ((w) & ~(mask)) | (((value)*BFLAST(mask)) & (mask)))
int main(int argc, char *argv[]) {
// float x = 0.0123456789;
// uint16_t y = 12585; // dlfloat16 of fp32 0.0123456789
// fp32_to_dlf16(&x, &y, 1);
// float z = 1;
// dlf16_to_fp32(&y, &z, 1);
// printf("x: %f\n", x);
// printf("y in int: %i\n", y);
// printf("z: %f\n", z);
//--------------------------------------------------------------------------//
// f32 to dlf16
//--------------------------------------------------------------------------//
float fp = atof(argv[1]); // input 0.0123456789
// assert((fp == 0.0123456789f) && "please input 0.0123456789");
uint16_t dlf16_item; // expected output: 12585
// from constructor of parent FPFormat: `public FPFormat<uint16_t, 6, 9>`
static constexpr unsigned DLF16_EXPONENT_BITS = 6;
static constexpr unsigned DLF16_FRACTION_BITS = 9;
static constexpr uint16_t DLF16_One = 1;
static constexpr uint16_t DLF16_SIGN =
SIGN(DLF16_One, DLF16_EXPONENT_BITS, DLF16_FRACTION_BITS);
static constexpr uint16_t DLF16_EXPONENT =
EXPO(DLF16_One, DLF16_EXPONENT_BITS, DLF16_FRACTION_BITS);
static constexpr signed DLF16_EXPONENT_BIAS =
EXPO_BIAS(DLF16_One, DLF16_EXPONENT_BITS, DLF16_FRACTION_BITS);
static constexpr uint16_t DLF16_FRACTION =
FRAC(DLF16_One, DLF16_FRACTION_BITS);
static constexpr uint16_t DLF16_NINF = DLF16_EXPONENT | DLF16_FRACTION;
// from consturtor of FP32:FPFormat<uint32_t, 8, 23>
static constexpr uint32_t FP32_One = 1;
static constexpr unsigned FP32_EXPONENT_BITS = 8;
static constexpr unsigned FP32_FRACTION_BITS = 23;
static constexpr uint32_t FP32_SIGN =
SIGN(FP32_One, FP32_EXPONENT_BITS, FP32_FRACTION_BITS);
static constexpr uint32_t FP32_EXPONENT =
EXPO(FP32_One, FP32_EXPONENT_BITS, FP32_FRACTION_BITS);
static constexpr signed FP32_EXPONENT_BIAS =
EXPO_BIAS(FP32_One, FP32_EXPONENT_BITS, FP32_FRACTION_BITS);
static constexpr uint32_t FP32_FRACTION = FRAC(FP32_One, FP32_FRACTION_BITS);
static constexpr uint32_t FP32_DLF16_ROUND =
1 << (FP32_FRACTION_BITS - DLF16_FRACTION_BITS - 1);
static constexpr uint32_t FP32_DLF16_NMAX =
(((1 << DLF16_EXPONENT_BITS) - 1 + FP32_EXPONENT_BIAS -
DLF16_EXPONENT_BIAS)
<< FP32_FRACTION_BITS) |
(((1 << DLF16_FRACTION_BITS) - 2)
<< (FP32_FRACTION_BITS - DLF16_FRACTION_BITS)) |
(FP32_DLF16_ROUND - 1);
// Conversion.
uint32_t fp32;
memcpy(&fp32, &fp, sizeof(fp32));
signed nnp1_biased_exponent =
BFGET(fp32, FP32_EXPONENT) - FP32_EXPONENT_BIAS + DLF16_EXPONENT_BIAS;
uint32_t fraction = BFGET(fp32, FP32_FRACTION) + FP32_DLF16_ROUND;
if (fraction > FP32_FRACTION) {
fraction = 0;
nnp1_biased_exponent++;
}
uint16_t uint = DLF16_SIGN * BFGET(fp32, FP32_SIGN);
if (nnp1_biased_exponent >= 0) {
if ((fp32 & ~FP32_SIGN) <= FP32_DLF16_NMAX) {
BFPUT(uint, DLF16_EXPONENT, nnp1_biased_exponent);
BFPUT(uint, DLF16_FRACTION,
fraction >> (FP32_FRACTION_BITS - DLF16_FRACTION_BITS));
} else {
uint |= DLF16_NINF;
}
}
// assert(uint == 12585);
// printf("dlf16 in uint: %i\n", uint); // expected result: 12585
return uint;
}