forked from x4nth055/pythoncode-tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpage_ranking.py
46 lines (44 loc) · 1.7 KB
/
page_ranking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import requests
import urllib.parse as p
import sys
# get the API KEY here: https://developers.google.com/custom-search/v1/overview
API_KEY = "<INSERT_YOUR_API_KEY_HERE>"
# get your Search Engine ID on your CSE control panel
SEARCH_ENGINE_ID = "<INSERT_YOUR_SEARCH_ENGINE_ID_HERE>"
# target domain you want to track
target_domain = sys.argv[1]
# target keywords
query = sys.argv[2]
for page in range(1, 11):
print("[*] Going for page:", page)
# calculating start
start = (page - 1) * 10 + 1
# make API request
url = f"https://www.googleapis.com/customsearch/v1?key={API_KEY}&cx={SEARCH_ENGINE_ID}&q={query}&start={start}"
data = requests.get(url).json()
search_items = data.get("items")
# a boolean that indicates whether `target_domain` is found
found = False
for i, search_item in enumerate(search_items, start=1):
# get the page title
title = search_item.get("title")
# page snippet
snippet = search_item.get("snippet")
# alternatively, you can get the HTML snippet (bolded keywords)
html_snippet = search_item.get("htmlSnippet")
# extract the page url
link = search_item.get("link")
# extract the domain name from the URL
domain_name = p.urlparse(link).netloc
if domain_name.endswith(target_domain):
# get the page rank
rank = i + start - 1
print(f"[+] {target_domain} is found on rank #{rank} for keyword: '{query}'")
print("[+] Title:", title)
print("[+] Snippet:", snippet)
print("[+] URL:", link)
# target domain is found, exit out of the program
found = True
break
if found:
break