-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathmica_memstackdist.cpp
424 lines (339 loc) · 11.9 KB
/
mica_memstackdist.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
/*
* This file is part of MICA, a Pin tool to collect
* microarchitecture-independent program characteristics using the Pin
* instrumentation framework.
*
* Please see the README.txt file distributed with the MICA release for more
* information.
*/
#include "pin.H"
/* MICA includes */
#include "mica_utils.h"
#include "mica_memstackdist.h"
/* Global variables */
extern INT64 interval_size;
extern INT64 interval_ins_count;
extern INT64 interval_ins_count_for_hpc_alignment;
extern INT64 total_ins_count;
extern INT64 total_ins_count_for_hpc_alignment;
extern UINT32 _block_size;
static UINT32 memstackdist_block_size;
static ofstream output_file_memstackdist;
/* A single entry of the cache line reference stack.
* below points to the entry below us in the stack
* above points to the entry above us in the stack
* block_addr is the cache line index of this entry
* bucket is the number of the stack depth bucket where this entry belongs
*/
typedef struct stack_entry_type {
struct stack_entry_type* below;
struct stack_entry_type* above;
ADDRINT block_addr;
INT32 bucket;
} stack_entry;
/* A single entry of the hash table, contains an array of stack entries referenced by part of cache line index. */
typedef struct block_type_fast {
ADDRINT id;
stack_entry* stack_entries[MAX_MEM_ENTRIES];
struct block_type_fast* next;
} block_fast;
static stack_entry* stack_top;
static UINT64 stack_size;
static block_fast* hashTableCacheBlocks_fast[MAX_MEM_TABLE_ENTRIES];
static INT64 mem_ref_cnt;
static INT64 cold_refs;
/* Counters of accesses into each bucket. */
static INT64 buckets[BUCKET_CNT];
/* References to stack entries that are the oldest entries belonging to the particular bucket.
* This is used to update bucket attributes of stack entries efficiently. Since the last
* bucket is overflow bucket, last borderline entry should never be set. */
static stack_entry* borderline_stack_entries[BUCKET_CNT];
/* initializing */
void init_memstackdist(){
int i;
/* initialize */
cold_refs = 0;
for(i=0; i < BUCKET_CNT; i++){
buckets[i] = 0;
borderline_stack_entries[i] = NULL;
}
mem_ref_cnt = 0;
/* hash table */
for (i = 0; i < MAX_MEM_TABLE_ENTRIES; i++) {
hashTableCacheBlocks_fast[i] = NULL;
}
/* access stack */
/* a dummy entry is inserted on the stack top to save some checks later */
/* since the dummy entry is not in the hash table, it should never be used */
stack_top = (stack_entry*) checked_malloc(sizeof(stack_entry));
stack_top->block_addr = 0;
stack_top->above = NULL;
stack_top->below = NULL;
stack_top->bucket = 0;
stack_size = 1;
memstackdist_block_size = _block_size;
if(interval_size != -1){
output_file_memstackdist.open(mkfilename("memstackdist_phases_int"), ios::out|ios::trunc);
output_file_memstackdist.close();
}
}
/*VOID memstackdist_instr_full(){
// counting instructions is done in all_instr_full()
}*/
static ADDRINT memstackdist_instr_intervals(){
/* counting instructions is done in all_instr_intervals() */
return (ADDRINT)(interval_ins_count_for_hpc_alignment == interval_size);
}
VOID memstackdist_instr_interval_output(){
int i;
output_file_memstackdist.open(mkfilename("memstackdist_phases_int"), ios::out|ios::app);
output_file_memstackdist << mem_ref_cnt << " " << cold_refs;
for(i=0; i < BUCKET_CNT; i++){
output_file_memstackdist << " " << buckets[i];
}
output_file_memstackdist << endl;
output_file_memstackdist.close();
}
VOID memstackdist_instr_interval_reset(){
int i;
mem_ref_cnt = 0;
cold_refs = 0;
for(i=0; i < BUCKET_CNT; i++){
buckets[i] = 0;
}
}
static VOID memstackdist_instr_interval(){
memstackdist_instr_interval_output();
memstackdist_instr_interval_reset();
interval_ins_count = 0;
interval_ins_count_for_hpc_alignment = 0;
}
/* hash table support */
/** entry_lookup
*
* Finds an arrray of stack entry references for a given address key (upper part of address) in a hash table.
*/
stack_entry** entry_lookup(block_fast** table, ADDRINT key){
block_fast* b;
for (b = table[key % MAX_MEM_TABLE_ENTRIES]; b != NULL; b = b->next){
if(b->id == key)
return b->stack_entries;
}
return NULL;
}
/** entry_install
*
* Installs a new array of stack entry references for a given address key (upper part of address) in a hash table.
*/
static stack_entry** entry_install(block_fast** table, ADDRINT key){
block_fast* b;
ADDRINT index = key % MAX_MEM_TABLE_ENTRIES;
b = table[index];
if(b == NULL) {
b = (block_fast*)checked_malloc(sizeof(block_fast));
table[index] = b;
}
else{
while(b->next != NULL){
b = b->next;
}
b->next = (block_fast*)checked_malloc(sizeof(block_fast));
b = b->next;
}
b->next = NULL;
b->id = key;
for(ADDRINT i = 0; i < MAX_MEM_ENTRIES; i++){
b->stack_entries[i] = NULL;
}
return b->stack_entries;
}
/* stack support */
#if 0
/** stack_sanity_check
*
* Checks whether the stack structure is internally consistent.
*/
static VOID stack_sanity_check(){
UINT64 position = 0;
INT32 bucket = 0;
stack_entry *e = stack_top;
if (e->above != NULL){
ERROR_MSG("Item above top of stack.");
exit(1);
}
while (e != NULL){
// Check whether the stack entry has a correct bucket.
if (e->bucket != bucket){
ERROR_MSG("Stack entry with invalid bucket.");
exit(1);
}
// Check whether the stack entry is linked correctly.
if (e->above && (e->above->below != e)){
ERROR_MSG("Incorrectly linked stack.");
exit(1);
}
if (e->below && (e->below->above != e)){
ERROR_MSG("Incorrectly linked stack.");
exit(1);
}
// Calculate which bucket we should be in next.
// Never spill over the overflow bucket though.
if (bucket < BUCKET_CNT - 1)
{
UINT64 borderline = ((UINT64) 1) << bucket;
if (position == borderline){
if (borderline_stack_entries [bucket] != e){
ERROR_MSG("Incorrect bucket borderline.");
exit(1);
}
bucket ++;
}
}
// Go on through the entire stack.
e = e->below;
position++;
}
}
#endif
/** move_to_top_fast
*
* Moves the stack entry e corresponding to the address a to the top of stack.
* The stack entry can be NULL, in which case a new stack entry is created.
*/
static VOID move_to_top_fast(stack_entry *e, ADDRINT a){
INT32 bucket;
/* check if entry was accessed before */
if(e != NULL){
/* check to see if we already are at top of stack */
if(e->above != NULL){
// disconnect the entry from its current position on the stack
if (e->below != NULL) e->below->above = e->above;
e->above->below = e->below;
// adjust all borderline entries above the entry touched (note that we can be sure those entries exist)
// a borderline entry is an entry whose bucket will change when an item is inserted above it on the stack
for(bucket=0; bucket < BUCKET_CNT && bucket < e->bucket; bucket++){
borderline_stack_entries[bucket]->bucket++;
borderline_stack_entries[bucket] = borderline_stack_entries[bucket]->above;
}
// if the entry touched was a borderline entry, new borderline entry is the one above the touched one
if(e == borderline_stack_entries[e->bucket]){
borderline_stack_entries[e->bucket] = borderline_stack_entries[e->bucket]->above;
}
// place new entry on top of LRU stack
e->below = stack_top;
e->above = NULL;
stack_top->above = e;
stack_top = e;
e->bucket = 0;
}
/* else: if top of stack was referenced again, nothing to do! */
}
else{
// allocate memory for new stack entry
stack_entry* e = (stack_entry*) checked_malloc(sizeof(stack_entry));
// initialize with address and refer prev to top of stack
e->block_addr = a;
e->above = NULL;
e->below = stack_top;
e->bucket = 0;
// adjust top of stack
stack_top->above = e;
stack_top = e;
stack_size++;
// adjust all borderline entries that exist up until the overflow bucket
// (which really has no borderline entry since there is no next bucket)
// we retain the number of the first free bucket for next code
for(bucket=0; bucket < BUCKET_CNT - 1; bucket++){
if (borderline_stack_entries[bucket] == NULL) break;
borderline_stack_entries[bucket]->bucket++;
borderline_stack_entries[bucket] = borderline_stack_entries[bucket]->above;
}
// if the stack size has reached a boundary of a bucket, set the boundary entry for this bucket
// the variable types are chosen deliberately large for overflow safety
// at least they should not overflow sooner than stack_size anyway
// overflow bucket boundar is never set
if (bucket < BUCKET_CNT - 1)
{
UINT64 borderline_distance = ((UINT64) 2) << bucket;
if(stack_size == borderline_distance){
// find the bottom of the stack by traversing from somewhere close to it
stack_entry *stack_bottom;
if (bucket) stack_bottom = borderline_stack_entries [bucket-1];
else stack_bottom = stack_top;
while (stack_bottom->below) stack_bottom = stack_bottom->below;
// the new borderline is the bottom of the stack
borderline_stack_entries [bucket] = stack_bottom;
}
}
}
// stack_sanity_check();
}
/* determine reuse distance (= number of unique cache blocks referenced since last time this cache was referenced)
* reuse distance is tracked in move_to_top_fast (by climbing up the LRU stack entry-by-entry until top of stack is reached),
* this function only returns the reuse distance calculated by move_to_top_fast */
static INT64 det_reuse_dist_bucket(stack_entry* e){
if(e != NULL)
return e->bucket;
else
return -1;
}
/* register memory access (either read of write) determine which cache lines are touched */
VOID memstackdist_memRead(ADDRINT effMemAddr, ADDRINT size){
ADDRINT a, endAddr, addr, upperAddr, indexInChunk;
stack_entry** chunk;
stack_entry* entry_for_addr;
/* Calculate index in cache addresses. The calculation does not
* handle address overflows but those are unlikely to happen. */
addr = effMemAddr >> memstackdist_block_size;
endAddr = (effMemAddr + size - 1) >> memstackdist_block_size;
/* The hit is counted for all cache lines involved. */
for(a = addr; a <= endAddr; a++){
/* split the cache line address into hash key of chunk and index in chunk */
upperAddr = a >> LOG_MAX_MEM_ENTRIES;
indexInChunk = a & MASK_MAX_MEM_ENTRIES;
chunk = entry_lookup(hashTableCacheBlocks_fast, upperAddr);
if(chunk == NULL) chunk = entry_install(hashTableCacheBlocks_fast, upperAddr);
entry_for_addr = chunk[indexInChunk];
/* determine reuse distance for this access (if it has been accessed before) */
INT64 b = det_reuse_dist_bucket(entry_for_addr);
if(b < 0)
cold_refs++;
else
buckets[b]++;
/* adjust LRU stack */
/* as a side effect, can allocate new entry, which could have been NULL so far */
move_to_top_fast(entry_for_addr, a);
/* update hash table for new cache blocks */
if(chunk[indexInChunk] == NULL) chunk[indexInChunk] = stack_top;
mem_ref_cnt++;
}
}
VOID instrument_memstackdist(INS ins, VOID *v){
if( INS_IsMemoryRead(ins) ){
INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_memRead, IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END);
if( INS_HasMemoryRead2(ins) )
INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_memRead, IARG_MEMORYREAD2_EA, IARG_MEMORYREAD_SIZE, IARG_END);
}
if(interval_size != -1){
INS_InsertIfCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_instr_intervals,IARG_END);
/* only called if interval is 'full' */
INS_InsertThenCall(ins, IPOINT_BEFORE, (AFUNPTR)memstackdist_instr_interval,IARG_END);
}
}
/* finishing... */
VOID fini_memstackdist(INT32 code, VOID* v){
int i;
if(interval_size == -1){
output_file_memstackdist.open(mkfilename("memstackdist_full_int"), ios::out|ios::trunc);
}
else{
output_file_memstackdist.open(mkfilename("memstackdist_phases_int"), ios::out|ios::app);
}
output_file_memstackdist << mem_ref_cnt << " " << cold_refs;
for(i=0; i < BUCKET_CNT; i++){
output_file_memstackdist << " " << buckets[i];
}
//output_file_memstackdist << endl << "number of instructions: " << total_ins_count_for_hpc_alignment << endl;
output_file_memstackdist << " ";
output_file_memstackdist.close();
}