-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbenchmark.hpp
149 lines (124 loc) · 3.48 KB
/
benchmark.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#pragma once
/**
* FFT bencmarking tool (http://kfrlib.com)
* Copyright (C) 2016 D Levin
* Benchmark source code is dual-licensed under MIT and GPL 2 or later
* See LICENSE.txt for details
*/
#include <algorithm>
#include <random>
#include <stdint.h>
#include <stdlib.h>
#include <vector>
typedef unsigned long long tick_value;
typedef unsigned long long time_value;
template <typename T>
static T* aligned_malloc(size_t size, size_t alignment = 64)
{
void* ptr = malloc(size * sizeof(T) + (alignment - 1) + sizeof(void*));
if (ptr == NULL)
return NULL;
void* aligned_ptr = (void*)(((uintptr_t)ptr + sizeof(void*) + alignment - 1) & ~(alignment - 1));
((void**)aligned_ptr)[-1] = ptr;
return static_cast<T*>(aligned_ptr);
}
static void aligned_free(void* aligned_ptr) { free(((void**)aligned_ptr)[-1]); }
static void full_barrier() { asm volatile("mfence" ::: "memory"); }
static void dont_optimize(const void* in) { asm volatile("" : "+m"(in)); }
#if defined(_WIN32) || defined(WIN32)
#include <windows.h>
namespace detail
{
static time_value get_frequency()
{
LARGE_INTEGER val;
QueryPerformanceFrequency(&val);
return val.QuadPart;
}
}
static void set_affinity() { SetProcessAffinityMask(GetCurrentProcess(), 1); }
static void sleep(long long us) { Sleep(static_cast<DWORD>((us + 999) / 1000)); }
static time_value now()
{
LARGE_INTEGER val;
full_barrier();
QueryPerformanceCounter(&val);
return static_cast<time_value>(val.QuadPart);
}
static time_value frequency()
{
static time_value freq = detail::get_frequency();
return freq;
}
#else
#include <sys/time.h>
static time_value now()
{
timeval val;
full_barrier();
gettimeofday(&val, NULL);
return tm.tv_sec * 1000000 + tm.tv_usec;
}
static void set_affinity()
{
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
sched_setaffinity(getpid(), sizeof(cpuset), &cpuset);
}
static void sleep(long long us) { usleep(us); }
static time_value frequency()
{
return 1000000; // usec
}
#endif
static tick_value tick()
{
full_barrier();
return __builtin_readcyclecounter();
}
static double time_between(time_value val1, time_value val2)
{
return static_cast<double>((static_cast<long double>(val1) - static_cast<long double>(val2)) /
static_cast<long double>(frequency()));
}
template <typename T>
static T get_average(const std::vector<T>& measures)
{
T sum = T();
for (size_t i = 0; i < measures.size(); i++)
sum += measures[i];
return sum / measures.size();
}
template <typename T>
static T get_minimum(const std::vector<T>& measures)
{
return *std::min_element(measures.begin(), measures.end());
}
template <typename T>
static T get_median(std::vector<T>& measures)
{
std::sort(measures.begin(), measures.end());
const size_t middle = measures.size() / 2;
if (measures.size() % 2 == 1)
return measures[middle];
else
return static_cast<T>((measures[middle - 1] + measures[middle]) / 2);
}
static tick_value calibrate_correction()
{
std::vector<tick_value> values;
for (size_t i = 0; i < 1000; i++)
{
const tick_value start_tick = tick();
const tick_value stop_tick = tick();
values.push_back(stop_tick - start_tick);
}
return get_median(values);
}
void fill_random(uint32_t seed, real* data, size_t size)
{
std::mt19937 rnd(seed);
for (size_t i = 0; i < size; i++)
data[i] = (rnd() / 4294967295.0) * 2.0 - 1.0;
}