-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathqueries
executable file
·211 lines (184 loc) · 4.86 KB
/
queries
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#!/usr/bin/env python
import argparse
import datetime
import json
import os
import rdflib
today = datetime.date.today().isoformat()
source = os.path.join("datadumps", "dialled_%s" % (today), "dialled.ttl")
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--libraries", help="JSON list of libraries", default="libraries_all.json")
parser.add_argument("--source", help="Source file containing library LOD", default=source)
args = parser.parse_args()
# count the number of libraries we know about
with open(args.libraries, 'r') as f:
j = json.loads(f.read())
print("COUNT: Total number of libraries crawled: %d" % (len(j)))
g = rdflib.ConjunctiveGraph()
g.parse(args.source, format='turtle')
prefixes = """
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX md: <http://www.w3.org/ns/md#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX schema: <http://schema.org/>
PREFIX twitter: <twitter:>
"""
libraries = g.query("""
%s
SELECT DISTINCT ?lib
WHERE {
?lib a schema:Library .
OPTIONAL {?lib schema:name ?name}
OPTIONAL {?lib schema:url ?url}
FILTER (isIRI(?lib))
}
ORDER BY ?lib
LIMIT 100
""" % (prefixes))
print("COUNT: schema.org/Library = %d" % (len(libraries)))
for row in libraries:
print("schema.org/Library %s" % row)
events = g.query("""
%s
SELECT DISTINCT ?lib
WHERE {
?lib a schema:Event .
FILTER (isIRI(?lib))
}
""" % (prefixes))
print("COUNT: schema.org/Event = %d" % (len(events)))
for row in events:
print("schema.org/Event %s" % row)
orgs = g.query("""
%s
SELECT DISTINCT ?lib
WHERE {
?lib a schema:Organization .
FILTER (isIRI(?lib))
}
ORDER BY ?lib
""" % (prefixes))
print("COUNT: schema.org/Organization = %d" % (len(orgs)))
for row in orgs:
print("schema.org/Organization %s" % row)
orgs = g.query("""
%s
SELECT DISTINCT ?lib
WHERE {
?lib a schema:LocalBusiness .
FILTER (isIRI(?lib))
}
ORDER BY ?lib
""" % (prefixes))
print("COUNT: schema.org/LocalBusiness= %d" % (len(orgs)))
for row in orgs:
print("schema.org/LocalBusiness %s" % row)
address = g.query("""
%s
SELECT DISTINCT ?lib ?add
WHERE {
{
?lib schema:address/schema:streetAddress+ ?add .
}
FILTER (isIRI(?lib))
}
ORDER BY ?lib
""" % (prefixes))
print("COUNT: schema.org/address= %d" % (len(address)))
for row in address:
print("schema.org/Address1 %s %s" % row)
addresses = g.query("""
%s
SELECT DISTINCT ?lib
WHERE {
{
?lib schema:address ?add .
?add a schema:PostalAddress .
OPTIONAL {?add schema:streetAddress ?street}
}
UNION
{
?lib schema:branchOf ?lib2 .
?lib2 schema:address ?add .
?add a schema:PostalAddress .
OPTIONAL {?add schema:streetAddress ?street}
}
UNION
{
?lib schema:location ?loc .
?loc schema:address ?add .
?add a schema:PostalAddress .
OPTIONAL {?add schema:streetAddress ?street}
}
UNION
{
?lib a schema:PostalAddress .
OPTIONAL {?add schema:streetAddress ?street}
}
FILTER (isIRI(?lib))
}
ORDER BY ?lib
""" % (prefixes))
print("COUNT: schema.org/PostalAddress= %d" % (len(address)))
for row in addresses:
print("schema.org/PostalAddress %s" % row)
libs = g.query("""
# Common Bibliographic Vocabularies
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX bibo: <http://purl.org/ontology/bibo/>
PREFIX dct: <http://purl.org/dc/terms/>
# W3C
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX schema: <http://schema.org/>
SELECT ?lib ?name {
?lib rdf:type schema:Library .
?lib schema:name ?name
}
""")
print("COUNT: schema.org/Library name = %d" % (len(libs)))
for lib in libs:
print("%s %s" % lib)
libs = g.query("""
# Common Bibliographic Vocabularies
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX bibo: <http://purl.org/ontology/bibo/>
PREFIX dct: <http://purl.org/dc/terms/>
# W3C
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX sioc: <http://rdfs.org/sioc/ns#>
PREFIX twitter: <twitter:>
PREFIX schema: <http://schema.org/>
SELECT DISTINCT ?item ?creator
WHERE {
{
?item dct:creator ?creator
}
UNION {
?item sioc:has_creator ?creator
}
UNION {
?item twitter:creator ?creator
}
}
""")
print("COUNT: creator = %d" % (len(libs)))
for lib in libs:
print("%s %s" % lib)
# Query for "required" properties of common vocabularies
for ont in ('schema:name', 'ogp:title', 'twitter:card'):
res = g.query("""
%s
SELECT DISTINCT ?lib
WHERE {
?lib %s ?name .
FILTER (isIRI(?lib))
}
ORDER BY ?lib
""" % (prefixes, ont))
print("COUNT: %s = %d" % (ont, len(res)))
for row in res:
print("%s %s" % (ont, row))