This repository has been archived by the owner on Jan 14, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspjson.py
69 lines (60 loc) · 1.98 KB
/
spjson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import sqlite3
conn = sqlite3.connect('spider.sqlite')
cur = conn.cursor()
print("Creating JSON output on spider.js...")
howmany = int(input("How many nodes? "))
cur.execute('''SELECT COUNT(from_id) AS inbound, old_rank, new_rank, id, url
FROM Pages JOIN Links ON Pages.id = Links.to_id
WHERE html IS NOT NULL AND ERROR IS NULL
GROUP BY id ORDER BY id,inbound''')
fhand = open('spider.js','w')
nodes = list()
maxrank = None
minrank = None
for row in cur:
nodes.append(row)
rank = row[2]
if rank is not None: # Ensure rank is not None before comparing
if maxrank is None or maxrank < rank:
maxrank = rank
if minrank is None or minrank > rank:
minrank = rank
if len(nodes) > howmany:
break
if maxrank == minrank or maxrank is None or minrank is None:
print("Error - please run sprank.py to compute page rank")
quit()
fhand.write('spiderJson = {"nodes":[\n')
count = 0
map = dict()
ranks = dict()
for row in nodes:
if count > 0:
fhand.write(',\n')
rank = row[2]
if rank is None:
rank = 0 # Default to 0 or another appropriate value if rank is None
else:
rank = 19 * ((rank - minrank) / (maxrank - minrank))
fhand.write('{'+'"weight":'+str(row[0])+',"rank":'+str(rank)+',')
fhand.write(' "id":'+str(row[3])+', "url":"'+row[4]+'"}')
map[row[3]] = count
ranks[row[3]] = rank
count += 1
fhand.write('],\n')
cur.execute('''SELECT DISTINCT from_id, to_id FROM Links''')
fhand.write('"links":[\n')
count = 0
for row in cur:
if row[0] not in map or row[1] not in map:
continue
if count > 0:
fhand.write(',\n')
rank = ranks.get(row[0], 0) # Use get to avoid KeyError if rank is missing
srank = 19 * ((rank - minrank) / (maxrank - minrank))
fhand.write('{"source":'+str(map[row[0]])+',"target":'+str(map[row[1]])+',"value":3}')
count += 1
fhand.write(']};')
fhand.close()
cur.close()
print("Open force.html in a browser to view the visualization")