-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
129 lines (107 loc) · 3.43 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from flask import Flask, jsonify, request
import elasticsearch
ES = elasticsearch.Elasticsearch("http://sibils-es.lan.text-analytics.ch:9200")
SIBILS_VERSION = "v4.0.5.1"
app = Flask(__name__)
@app.route('/')
def index():
return """<h1>Bienvenue dans l'app3</h1>
<h2>Entry point /facets/<field></h2>
<h3>Parameters</h3>
<ul>
<li><pre>offset</pre></li>
<li><pre>limits</pre></li>
</ul>
<h3>Examples</h3>
<ul>
<li><pre>/facets/journal</pre></li>
<li><pre>/facets/affiliations</pre></li>
<li><pre>/facets/pubyear?offset=200&limit=10</pre></li>
</ul>
<h2>Entry point /collections</h2>
Return the collections per SIBiLS versions
"""
@app.route("/collections")
def collections():
# get all ES indexes
indices = ES.indices.get_alias(index="*")
# keep only the index names
collections = list(indices.keys())
# keep only the index "sibils_<collection>_<version>". Example: "sibils_med24_v4.0.5.1"
result = {}
for collection in collections:
if not collection.startswith("sibils_"):
continue
# "sibils_med24_v4.0.5.1" --> ("sibils", "med24", "v4.5.0.1")
collection_part = collection.split("_")
# fetch from the ES the document count in this index
count = ES.count(index=collection)['count']
# following the example above: result["v4.5.0.1"]["med24"] = count
result.setdefault(collection_part[2], {})[collection_part[1]] = count
# return a reponse in the JSON format
return jsonify(result)
@app.route('/facets/<collection>/<field>')
def aggregate(collection, field):
# retreive the query parameters from the URL
offset = int(request.args.get("offset", 0))
limit = int(request.args.get("limit", 10))
# search the values on ES
values = search_field_values(collection, field)
# truncated the values so Firefox display the results quickly
truncated_values = values[offset:offset+limit]
# return the values
return jsonify({
"offset": offset,
"limit": limit,
"size": len(values),
"values": truncated_values,
})
@app.route('/facets/all/<field>')
def aggregate_all(field):
# retreive the query parameters from the URL
offset = int(request.args.get("offset", 0))
limit = int(request.args.get("limit", 10))
# hardcoded
collection = "med24"
# search the values on ES
values = search_field_values(collection, field)
# truncated the values so Firefox display the results quickly
truncated_values = values[offset:offset+limit]
# return the values
return jsonify({
"offset": offset,
"limit": limit,
"size": len(values),
"values": truncated_values,
})
def search_field_values(collection, field):
"""Fetch from ElasticSearch all the values for a field.
Return something like this:
```json
[
{
"doc_count": 292823,
"key": "PloS one",
},
{
"doc_count": 205106,
"key": "Scientific reports",
}
]
```
"""
query = {
"size": 0,
"aggs": {
"unique_values": {
"terms": {
"field": f"{field}.keyword",
"size": 10000
}
}
}
}
response = ES.search(index=f"sibils_{collection}_{SIBILS_VERSION}", body=query)
return response['aggregations']['unique_values']['buckets']
if __name__ == '__main__':
app.run(debug=True)