From 30136eac64db214940d4f2d221d787fbc9d5fc53 Mon Sep 17 00:00:00 2001 From: Orivej Desh Date: Sun, 14 Jul 2019 06:37:00 +0000 Subject: [PATCH] Fix callgrind output compression callgrind format allows to compress repeated strings into one. yappi was using the syntax for compression but did not compress much. This change makes yappi actually compress repeated strings. kcachegrind merges functions with the same compressed ID into one (as if they were inlined) even if they were defined in different modules. Therefore this change assigns different IDs to different functions with the same name. --- yappi.py | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/yappi.py b/yappi.py index 6ab35b0..e163c55 100644 --- a/yappi.py +++ b/yappi.py @@ -14,6 +14,7 @@ except ImportError: from threading import get_ident # Python 3 +from collections import defaultdict from contextlib import contextmanager class YappiError(Exception): pass @@ -642,26 +643,43 @@ def _save_as_CALLGRIND(self, path): lines = [header] - # add function definitions - file_ids = [''] - func_ids = [''] + # Each function has a distinct number even if its name already has a + # number because kcachegrind merges functions with the same number. + numbers_seq = enumerate(iter(int, None)) + names_seq = iter(lambda: defaultdict(lambda: next(numbers_seq)[0]), None) + modules_seq = enumerate(iter(lambda: defaultdict(lambda: next(names_seq)), None)) + modules = defaultdict(lambda: next(modules_seq)) + # modules = {'file.py': [module_index, {'func': {line: func_index}}]} + fl = lambda x: modules[x.module][0] + fn = lambda x: modules[x.module][1][x.name][x.lineno] + + # enumerate modules and functions for func_stat in self: - file_ids += [ 'fl=(%d) %s' % (func_stat.index, func_stat.module) ] - func_ids += [ 'fn=(%d) %s %s:%s' % (func_stat.index, func_stat.name, func_stat.module, func_stat.lineno) ] + fn(func_stat) + for child in func_stat.children: + fn(child) - lines += file_ids + func_ids + # add function definitions + for module in sorted(modules): + lines += ['', 'fl=(%d) %s' % (modules[module][0], module)] + for func, defs in sorted(modules[module][1].items()): + suffix = '' + for line in sorted(defs): + if len(defs) > 1: # disambiguate redefined functions + suffix = ' +' + str(line) + lines += ['fn=(%d) %s%s' % (defs[line], func, suffix)] # add stats for each function we have a record of for func_stat in self: func_stats = [ '', - 'fl=(%d)' % func_stat.index, - 'fn=(%d)' % func_stat.index] + 'fl=(%d)' % fl(func_stat), + 'fn=(%d)' % fn(func_stat)] func_stats += [ '%s %s' % (func_stat.lineno, int(func_stat.tsub * 1e6)) ] # children functions stats for child in func_stat.children: - func_stats += [ 'cfl=(%d)' % child.index, - 'cfn=(%d)' % child.index, + func_stats += [ 'cfl=(%d)' % fl(child), + 'cfn=(%d)' % fn(child), 'calls=%d 0' % child.ncall, '0 %d' % int(child.ttot * 1e6) ]