-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrq2.py
executable file
·60 lines (53 loc) · 2.08 KB
/
rq2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python3
# %%
import matplotlib.pyplot as plt
import pandas as pd
import common
# %%
# rq2 output format:
# o[project_url][file_path][feature_name] = feature_count
try:
dfnodupes = pd.read_parquet('data/parquet/rq2-nodupes.parquet')
except:
try:
df = pd.read_parquet('data/parquet/rq2.parquet')
except:
df = common.get_data('data/csv/rq2.output.csv', ['var', 'project', 'file', 'feature', 'counts'], ['var'], common.get_counts())
df = common.filter_projects(df)
df.to_parquet('data/parquet/rq2.parquet', compression='gzip')
dfnodupes = common.remove_dupes(df)
dfnodupes.to_parquet('data/parquet/rq2-nodupes.parquet', compression='gzip')
# %% [markdown]
# # Generate Table(s)
# %%
df2 = dfnodupes.groupby('feature').size()
df2.loc['Library functions'] = 0
for x in [x for x in df2.index if 'func-' in x]:
df2.loc['Library functions'] = df2.loc['Library functions'] + df2.loc[x]
df2 = df2.drop(x)
df2 = df2.sort_values(ascending=False).to_frame('count')
rowcolor = 'gray!15'
df2 = df2.rename({
'method': f'\rowcolor{{{rowcolor}}} method declarations',
'FOREACH': 'for-each',
'RAISE': f'\rowcolor{{{rowcolor}}} \texttt{{raise}}',
'TRY': f'\rowcolor{{{rowcolor}}} \texttt{{try}}',
'CATCH': f'\rowcolor{{{rowcolor}}} \texttt{{except}}',
'IN': '\texttt{in}',
'ARRAY_COMPREHENSION': 'array comprehensions',
'NOT_IN': '\texttt{not in}',
'WITH': f'\rowcolor{{{rowcolor}}} \texttt{{with}}',
'LAMBDA': '\texttt{lambda}',
'YIELD': '\texttt{yield}',
'FINALLY': f'\rowcolor{{{rowcolor}}} \texttt{{finally}}',
'method-decorator': 'method decorators',
'class-decorator': 'class decorators',
'class': f'\rowcolor{{{rowcolor}}} class declarations',
'inherits': f'\rowcolor{{{rowcolor}}} class inheritance',
'higher-order-func': 'higher-order functions',
'GENERATOR': 'generators',
'iterable': 'iterable',
'Library functions': 'built-in functions (functools/itertools)',
})
df2 = df2.astype({'count': 'float64'})
common.save_table(df2, 'rq2-project', decimals=0, escape=False)