forked from viralshah/carbon-multi-resize
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcarbon-multi-resize.py
executable file
·252 lines (185 loc) · 7.61 KB
/
carbon-multi-resize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
#!/usr/bin/env python
import os
from os.path import dirname, exists, join, realpath
import re
import subprocess
import sys
from carbon.conf import OrderedConfigParser
from carbon.util import pickle
import whisper
ROOT_DIR = os.environ.get('GRAPHITE_ROOT')
STORAGE_DIR = join(ROOT_DIR, 'storage')
WHITELISTS_DIR = join(STORAGE_DIR, 'lists')
LOCAL_DATA_DIR = join(STORAGE_DIR, 'whisper')
WHISPER_BIN = join(ROOT_DIR, 'bin')
STORAGE_SCHEMAS_CONFIG = join(ROOT_DIR, 'conf', 'storage-schemas.conf')
STORAGE_AGGREGATION_CONFIG = join(ROOT_DIR, 'conf', 'storage-aggregation.conf')
class Schema:
def test(self, metric):
raise NotImplementedError()
def matches(self, metric):
return bool( self.test(metric) )
class DefaultSchema(Schema):
def __init__(self, name, archives):
self.name = name
self.archives = archives
def test(self, metric):
return True
class PatternSchema(Schema):
def __init__(self, name, pattern, archives):
self.name = name
self.pattern = pattern
self.regex = re.compile(pattern)
self.archives = archives
def test(self, metric):
return self.regex.search(metric)
class ListSchema(Schema):
def __init__(self, name, listName, archives):
self.name = name
self.listName = listName
self.archives = archives
self.path = join(WHITELISTS_DIR, listName)
if exists(self.path):
self.mtime = os.stat(self.path).st_mtime
fh = open(self.path, 'rb')
self.members = pickle.load(fh)
fh.close()
else:
self.mtime = 0
self.members = frozenset()
def test(self, metric):
if exists(self.path):
current_mtime = os.stat(self.path).st_mtime
if current_mtime > self.mtime:
self.mtime = current_mtime
fh = open(self.path, 'rb')
self.members = pickle.load(fh)
fh.close()
return metric in self.members
class Archive:
def __init__(self,secondsPerPoint,points):
self.secondsPerPoint = int(secondsPerPoint)
self.points = int(points)
def __str__(self):
return "Archive = (Seconds per point: %d, Datapoints to save: %d)" % (self.secondsPerPoint, self.points)
def getTuple(self):
return (self.secondsPerPoint,self.points)
@staticmethod
def fromString(retentionDef):
(secondsPerPoint, points) = whisper.parseRetentionDef(retentionDef)
return Archive(secondsPerPoint, points)
def loadStorageSchemas():
schemaList = []
config = OrderedConfigParser()
config.read(STORAGE_SCHEMAS_CONFIG)
for section in config.sections():
options = dict( config.items(section) )
matchAll = options.get('match-all')
pattern = options.get('pattern')
retentions = options['retentions'].split(',')
archives = [ Archive.fromString(s) for s in retentions ]
if matchAll:
mySchema = DefaultSchema(section, archives)
elif pattern:
mySchema = PatternSchema(section, pattern, archives)
archiveList = [a.getTuple() for a in archives]
try:
whisper.validateArchiveList(archiveList)
schemaList.append(mySchema)
except whisper.InvalidConfiguration, e:
print "Invalid schemas found in %s: %s" % (section, e)
schemaList.append(defaultSchema)
return schemaList
def loadAggregationSchemas():
# NOTE: This abuses the Schema classes above, and should probably be refactored.
schemaList = []
config = OrderedConfigParser()
try:
config.read(STORAGE_AGGREGATION_CONFIG)
except IOError:
print "%s not found, ignoring." % STORAGE_AGGREGATION_CONFIG
for section in config.sections():
options = dict( config.items(section) )
matchAll = options.get('match-all')
pattern = options.get('pattern')
xFilesFactor = options.get('xfilesfactor')
aggregationMethod = options.get('aggregationmethod')
try:
if xFilesFactor is not None:
xFilesFactor = float(xFilesFactor)
assert 0 <= xFilesFactor <= 1
if aggregationMethod is not None:
assert aggregationMethod in whisper.aggregationMethods
except:
print "Invalid schemas found in %s." % section
continue
archives = (xFilesFactor, aggregationMethod)
if matchAll:
mySchema = DefaultSchema(section, archives)
elif pattern:
mySchema = PatternSchema(section, pattern, archives)
schemaList.append(mySchema)
schemaList.append(defaultAggregation)
return schemaList
defaultArchive = Archive(60, 60 * 24 * 7) #default retention for unclassified data (7 days of minutely data)
defaultSchema = DefaultSchema('default', [defaultArchive])
defaultAggregation = DefaultSchema('default', (None, None))
schemas = loadStorageSchemas()
print "Loading storage-schemas configuration from: '%s'" % STORAGE_SCHEMAS_CONFIG
agg_schemas = loadAggregationSchemas()
print "Loading storage-aggregation configuration from: '%s'" % STORAGE_AGGREGATION_CONFIG
#print schemas
#print agg_schemas
def get_archive_config(metric):
archiveConfig = None
xFilesFactor, aggregationMethod = None, None
for schema in schemas:
if schema.matches(metric):
#print 'new metric %s matched schema %s' % (metric, schema.name)
archiveConfig = [archive.getTuple() for archive in schema.archives]
break
for schema in agg_schemas:
if schema.matches(metric):
#print 'new metric %s matched aggregation schema %s' % (metric, schema.name)
xFilesFactor, aggregationMethod = schema.archives
break
if not archiveConfig:
raise Exception("No storage schema matched the metric '%s', check your storage-schemas.conf file." % metric)
return (archiveConfig, xFilesFactor, aggregationMethod)
def diff_file_conf(metric, filepath):
"""
Returns true if the actual file has parameters different from those in the configuration files
"""
(archiveConfig, xFilesFactor, aggregationMethod) = get_archive_config(metric)
info = whisper.info(filepath)
if info['xFilesFactor'] != xFilesFactor or info['aggregationMethod'] != aggregationMethod:
#print "{0} {1}".format(info['aggregationMethod'], aggregationMethod)
#print "{0} {1}".format(info['xFilesFactor'], xFilesFactor)
return True
for (archivefile, archiveconf) in zip(info['archives'], archiveConfig):
(secondsPerPoint, points) = archiveconf
#print "{0} {1}".format(archivefile['secondsPerPoint'], secondsPerPoint)
#print "{0} {1}".format(archivefile['points'], points)
if archivefile['secondsPerPoint'] != secondsPerPoint or archivefile['points'] != points:
return True
wsp_regex = re.compile('\.wsp$')
root_dir_regex = re.compile('^' + LOCAL_DATA_DIR + os.sep)
dir_sep_regex = re.compile(os.sep)
for root, dirs, files in os.walk(LOCAL_DATA_DIR):
for filename in [f for f in files if wsp_regex.search(f)]:
filepath = join(root, filename)
metric = dir_sep_regex.sub('.', wsp_regex.sub('', root_dir_regex.sub('', filepath)))
print "Processing {0}".format(filepath)
if diff_file_conf(metric, filepath):
#there is a difference and we need to resize the whisper file
(archiveConfig, xFilesFactor, aggregationMethod) = get_archive_config(metric)
command_args = [WHISPER_BIN + '/whisper-resize.py', filepath]
for (secondsPerPoint, points) in archiveConfig:
command_args.append("{0}:{1}".format(secondsPerPoint, points))
command_args.append('--nobackup')
if aggregationMethod:
command_args.append('--aggregationMethod={0}'.format(aggregationMethod))
if xFilesFactor is not None:
command_args.append('--xFilesFactor={0}'.format(xFilesFactor))
#print ' '.join(command_args)
subprocess.check_output(command_args)