forked from tanjiti/sec_profile
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_sec_domain.py
88 lines (66 loc) · 2.06 KB
/
get_sec_domain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import logging
import re
from mills import SQLiteOper
from mills import get_title
def update_domain(so, retry=3, timeout=10, proxy=None):
"""
:param so:
:param retry:
:param timeout:
:param proxy:
:return:
"""
for table in ["secwiki_detail", "xuanwu_detail"]:
sql = 'select distinct domain ' \
'from {table} ' \
'where domain_name is null or domain_name="" order by domain '.format(table=table)
result = so.query(sql)
for item in result:
k = item[0]
if not k:
continue
title = get_title(k, proxy=proxy, retry=retry, timeout=timeout)
if title:
title = re.sub('\x22', '', title)
title = re.sub('\x27', '', title)
update_sql = "update {table} set domain_name='{title}' where domain='{domain}';".format(
table=table,
title=title,
domain=k
)
try:
so.execute(update_sql)
print update_sql
except Exception as e:
logging.error("[update_sql]: %s str(%s)" % (update_sql, str(e)))
def get_domain_name(so, source="secwiki", topn=5000):
"""
:param so:
:param source:
:param topn:
:return:
"""
sql = "select domain,domain_name,count(path) as c from %s_detail group by domain order by c desc" % source
ret = so.query(sql)
if ret:
i = 0
for r in ret:
if i < topn:
domain, domain_name, c = r
print domain, domain_name, c
else:
break
i = i + 1
if __name__ == "__main__":
"""
"""
proxy = None
so = SQLiteOper("data/scrap.db")
update_domain(so, retry=1, timeout=10, proxy=proxy)
# 获得安全网站排序列表
for source in ["secwiki", "xuanwu"]: #
get_domain_name(so, source=source, topn=50)