-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathgraphlet_utils.h
144 lines (125 loc) · 4.08 KB
/
graphlet_utils.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
/**
============================================================================
Name : Parallel Parameterized Graphlet Decomposition (PGD) Library
Author : Nesreen K. Ahmed, ([email protected]),
Ryan A. Rossi ([email protected])
Description : A general high-performance parallel framework for computing
the graphlet decomposition. The library is designed to be fast
for both large sparse graphs as well as dense graphs.
Copyright (C) 2012-2015,
Nesreen K. Ahmed (http://nesreenahmed.com), All rights reserved.
Please cite the following paper:
Nesreen K. Ahmed, Jennifer Neville, Ryan A. Rossi, Nick Duffield,
Efficient Graphlet Counting for Large Networks, IEEE International
Conference on Data Mining (ICDM), pages 10, 2015.
Download PDF: http://www.nesreenahmed.com/publications/ahmed-et-al-icdm2015.pdf
@inproceedings{ahmed2015icdm,
title={Efficient Graphlet Counting for Large Networks},
author={Nesreen K. Ahmed and Jennifer Neville and Ryan A. Rossi and Nick Duffield},
booktitle={ICDM},
pages={1--10},
year={2015}
}
See http://nesreenahmed.com/graphlets for more information.
============================================================================
*/
#ifndef GRAPHLET_UTILS_H_
#define GRAPHLET_UTILS_H_
#ifdef WIN32
#else
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include <dirent.h>
#endif
#include <cstddef>
#include <iostream>
#include "assert.h"
#include <errno.h>
#include <string>
#include <set>
#include <vector>
#include "graphlet_headers.h"
#include "graphlet_rand.h"
using namespace std;
bool fexists(const char *filename);
void usage(char *argv0);
double get_time();
double tic();
void toc(double & start);
string memory_usage();
void validate(bool condition, const string& msg);
void indent(int level);
void indent(int level, string str);
void print_line(int n = 80, string sym = "-");
string extract_filename(string fn, bool remove_ext = true);
string remove_file_extension(string fn);
int getdir (string dir, vector<string> &files);
inline
void write_buffer(ofstream & myfile, ostringstream & str_stream) {
myfile << str_stream.str();
str_stream.clear();
str_stream.str("");
}
template<typename T>
void write_results_batch(std::vector<T> &data, string &filename, bool output_id = false,
unsigned int buffer_size = 128000000) {
ofstream myfile;
char *fn = (char*)filename.c_str();
myfile.open(fn);
ostringstream str_stream;
if (output_id) {
for (long long e = 0; e < data.size(); e++) {
str_stream << e << "," << data[e] << "\n";
if (str_stream.str().size() > buffer_size) { write_buffer(myfile, str_stream); };
}
}
else {
for (long long e = 0; e < data.size(); e++) {
str_stream << data[e] << "\n";
if (str_stream.str().size() > buffer_size) { write_buffer(myfile, str_stream); };
}
}
write_buffer(myfile, str_stream);
myfile.close();
}
template<typename T>
void write_results(std::vector<T> &data, string filename, bool output_id = false) {
ofstream myfile;
char *fn = (char*)filename.c_str();
myfile.open(fn);
if (output_id) {
for (long long e = 0; e < data.size(); e++) {
myfile << e << "," << data[e] << "\n";
}
}
else {
for (long long e = 0; e < data.size(); e++) {
myfile << data[e] << "\n";
}
}
myfile.close();
}
template<typename T>
void write_results_line(std::vector<T> &data, string filename, string delim = "\t") {
ofstream myfile;
char *fn = (char*)filename.c_str();
myfile.open(fn);
for (int e=0; e<data.size(); e++) {
myfile << data[e] << "\t";
}
myfile << "\n";
myfile.close();
}
void write_string(string &data, string filename);
template<typename T>
void bin_values(std::vector<T> &data, vector<int> & bin) {
for (int v = 0; v < data.size(); ++v) {
int val = data[v];
bin[val]++;
}
}
template<typename T>
void write_vector(std::vector<T> &data, string suffix);
int sample_rand_dist(int num_vals, vector<double> & dist_tmp);
#endif