-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathYDJ monthly stats call.py
168 lines (139 loc) · 5.97 KB
/
YDJ monthly stats call.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""
by Tomasz Mrozewski [email protected]
last updated November 29, 2022
As of then, everything works
"""
# import required modules
import os
import pandas as pd
import requests
import json
import datetime
# set the working directory
my_dir = ' ' # wd set here
os.chdir(my_dir)
# open the CSV file with credentials and endpoints for all journals
my_keys = pd.read_csv(" ") # file name here
# set the value for last month in YY-MM format
# used later as a lookup value to identify's last month statistics in the certain API calls
# also used to identify the month of the stats pull
today = datetime.date.today()
first = today.replace(day=1)
lastMonth = first - datetime.timedelta(days=1)
monthLookup = lastMonth.strftime("%Y-%m")
# create empty lists for final output
# hese will be populated by the code in the for loop
# each datapoint is described in metric_list and the value is given in value_list
journal_list = []
month_list = []
metric_list = []
value_list = []
# iterate through the credentials file
# query all endpoints for each journal, using the API token
# order of queries is:
# 1. published submissions (total)
# 2. published issues (total)
# 3. abstract views (last month)
# 4. galley views (last month)
# query looks at total published submissions and total issues because published back issues would not be returned if we queried by date
for ind in my_keys.index:
# assign journal ID (jabbr), endpoint, and token values from CSV to variables
abbr = my_keys['jabbr'][ind]
subs = my_keys['subs_endpoint'][ind]
issues = my_keys['issues_endpoint'][ind]
abviews = my_keys['abstractViews_endpoint'][ind]
galviews = my_keys['galleyViews_endpoint'][ind]
key = my_keys['token'][ind]
# 1. PUBLISHED SUBMISSIONS (TOTAL)
# call for submissions data, using submissions endpoint and API token
# status:3 means that we're only requesting published submissions
subs_call = requests.get(
subs,
params={'apiToken':key,
'status':'3'}
)
# read the JSON file from the submissions data call
y = json.dumps(subs_call.json())
z = json.loads(y)
# extract the desired datapoint (itemsMax) as variable x
x = z["itemsMax"]
#write the journal identifier, month, metric type, and value to the lists
journal_list.append(abbr)
month_list.append(monthLookup)
metric_list.append ('published submissions')
value_list.append(x)
# 2. PUBLISHED ISSUES (TOTAL)
# call for submissions data, using submissions endpoint and API token
# isPublished:true means that we're only requesting published issues
issues_call = requests.get(
issues,
params={'apiToken':key,
'isPublished':'true'}
)
# read the JSON file from the submissions data call
q = json.dumps(issues_call.json())
r = json.loads(q)
# extract the desired datapoint (itemsMax) as variable p
p = r["itemsMax"]
#write the journal identifier, month, metric type, and value to the lists
journal_list.append(abbr)
month_list.append(monthLookup)
metric_list.append ('published issues')
value_list.append(p)
# 3. ABSTRACT VIEWS (LAST MONTH)
# call for abstract views data, using views endpoint and API token
# pulls for views back to 2001 by default, which is as far back as the API goes
# could update this later to maybe only pull last month's stats
abviews_call = requests.get(
abviews,
params={'apiToken':key,
'dateStart':'2001-01-01'}
)
# read the JSON file from the abstract views call
# use the monthLookup variable (defined earlier) to extract only last month's data
# get the month and
b = json.dumps(abviews_call.json())
c = json.loads(b)
d = next(item for item in c if item["date"] == monthLookup)
# extract the desired datapoint (value) as variable a
a = d.get('value')
#write the journal identifier, month, metric type, and value to the lists
journal_list.append(abbr)
month_list.append(monthLookup)
metric_list.append ('abstract views')
value_list.append(a)
# 4. GALLEY VIEWS (LAST MONTH)
# call for galley views data, using views endpoint and API token
# pulls for views back to 2001 by default, which is as far back as the API goes
# could update this later to maybe only pull last month's stats
galviews_call = requests.get(
galviews,
params={'apiToken':key,
'dateStart':'2001-01-01'}
)
# read the JSON file from the galley views call
# use the monthLookup variable (defined earlier) to extract only last month's data
# get the month and
f = json.dumps(galviews_call.json())
g = json.loads(f)
h = next(item for item in g if item["date"] == monthLookup)
# extract the desired datapoint (value) as variable a
e = h.get('value')
# write the journal identifier, month, metric type, and value to the lists
journal_list.append(abbr)
month_list.append(monthLookup)
metric_list.append ('galley views')
value_list.append(e)
# print which journal just finished, to help debugging
print(abbr)
# create dataframe df by zipping the lists that were populated by the lookups
# again, each datapoint is described in metric_list and the value is given in value_list
df = pd.DataFrame(list(zip(journal_list,month_list,metric_list, value_list)),
columns = ['journal','month','metric', 'value'])
# flatten the dataframe by using a pivot function
# one line for each journal for the month, containing all values. metrics as column titles
pivoted = pd.pivot_table(df, values='value', index=['journal','month'], columns='metric')
# write output to CSV in working directory
pivoted.to_csv(monthLookup +'.csv')
# print message for successful completion of script
print('If you\'re reading this, the script probably worked!')