-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathajihunter.py
137 lines (113 loc) · 6.14 KB
/
ajihunter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import requests
from bs4 import BeautifulSoup
import os
import re
import argparse
# Adding the opening greeting ASCII Art as raw string
def print_welcome_message():
welcome_message = r"""
_ _ _ _ ____ _ _ _
/ \ (_|_) | / ___| | | | |_ _ _ __ | |_ ___ _ __
/ _ \ | | | _ | \___ \ | |_| | | | | '_ \| __/ _ \ '__|
/ ___ \ | | | | |_| |___) | | _ | |_| | | | | || __/ |
/_/ \_\/ |_| \___/|____/ |_| |_|\__,_|_| |_|\__\___|_|
|__/ """
print("\033[36m" + welcome_message) # Cyan
print("\033[37m" + "[INFO] Welcome to Ajihunter - Scraper for JavaScript Files!") # White
# Function to ensure the URL starts with http:// if not present
def ensure_http(url):
if not url.startswith("http"):
return "http://" + url
return url
# Function to check and display sensitive data
def find_sensitive_data(js_content):
patterns = {
'email': r'[a-zA-Z0-9._%+-]+@[a-zAZ0-9.-]+\.[a-zA-Z]{2,}', # Email
'phone': r'\+?\d{1,4}[\s\-]?\(?\d{1,3}\)?[\s\-]?\d{3}[\s\-]?\d{4,6}', # Phone numbers
'api_key': r'(?i)\b(?:api[_-]?key|apikey|auth[_-]?token|access[_-]?token)[=\s]?[\'"]?([a-zA-Z0-9_-]+)[\'"]?\b', # API Key
'jwt_token': r'eyJ[a-zA-Z0-9\-_\.]+', # JWT Token
'db_credential': r'(?i)\b(?:db[_-]?user|db[_-]?password|db[_-]?host|db[_-]?port|db[_-]?name)[=\s]?[\'"]?([a-zA-Z0-9_-]+)[\'"]?\b', # Database Credentials
'aws_secret': r'(?i)\b(?:aws[_-]?secret[_-]?key)[=\s]?[\'"]?([a-zA-Z0-9/+=]+)[\'"]?\b', # AWS Secret Key
'stripe_key': r'(?i)\b(?:stripe[_-]?secret[_-]?key)[=\s]?[\'"]?([a-zA-Z0-9_-]+)[\'"]?\b', # Stripe API Key
'github_token': r'(?i)\b(?:github[_-]?token)[=\s]?[\'"]?([a-zA-Z0-9_-]+)[\'"]?\b', # GitHub Token
'google_api_key': r'(?i)\b(?:google[_-]?api[_-]?key)[=\s]?[\'"]?([a-zA-Z0-9_-]+)[\'"]?\b', # Google API Key
'ip_address': r'\b(?:\d{1,3}\.){3}\d{1,3}\b', # IP Address (IPv4)
'private_key': r'(?i)\b(?:private[_-]?key)[=\s]?[\'"]?([a-zA-Z0-9/+_=-]+)[\'"]?\b', # Private Key
'oauth_token': r'(?i)\b(?:oauth[_-]?token)[=\s]?[\'"]?([a-zA-Z0-9_-]+)[\'"]?\b', # OAuth Token
'slack_token': r'(?i)\b(?:slack[_-]?token)[=\s]?[\'"]?([a-zA-Z0-9_-]+)[\'"]?\b', # Slack Token
}
matches = {}
for key, pattern in patterns.items():
matches[key] = re.findall(pattern, js_content)
return matches
# Function to scrape JS files from a URL and save the results in an output folder
def scrape_js_from_url(url, output_dir="output"):
url = ensure_http(url) # Ensure URL has 'http://'
domain_name = url.split("//")[-1].split("/")[0]
# Create output folder if not exists
domain_folder = os.path.join(output_dir, domain_name)
os.makedirs(domain_folder, exist_ok=True)
output_file = os.path.join(domain_folder, f"{domain_name}_js_data.txt")
try:
# Request the main page
response = requests.get(url)
response.raise_for_status()
# Parse the HTML
soup = BeautifulSoup(response.text, "html.parser")
# Find all <script> tags
script_tags = soup.find_all("script", src=True)
print("\033[32m" + f"[INFO] Found {len(script_tags)} JS files from {url}") # Green
with open(output_file, "a", encoding="utf-8") as f:
# Write the domain URL
f.write(f"[INFO] Scraping JS files from: {url}\n")
# Loop through each <script src="...">
for script in script_tags:
js_url = script["src"]
# Handle relative JS URLs (convert to absolute)
if not js_url.startswith("http"):
js_url = requests.compat.urljoin(url, js_url)
# Write the JS URL found
f.write(f"JS URL: {js_url}\n")
print("\033[32m" + f"[DOWNLOAD] {js_url}") # Green
# Download the JS file
js_content = requests.get(js_url).text
# Display sensitive data if found
sensitive_data = find_sensitive_data(js_content)
for category, items in sensitive_data.items():
if items:
f.write(f"[WARNING] Found {category} in {js_url}:\n")
for item in items:
f.write(f" - {item}\n")
print(f"\033[31m[WARNING] Found {category} in {js_url}:")
for item in items:
print(f" - {item}")
# Display completion message and file save location
print(f"\033[32m[INFO] Scraping completed. Data saved in {output_file}")
except Exception as e:
print("\033[31m" + f"[ERROR] {e}") # Red
# Function to scrape from a file containing a list of domains
def scrape_from_file(file_path, output_dir="output"):
with open(file_path, "r") as f:
domains = f.read().splitlines()
for domain in domains:
scrape_js_from_url(domain, output_dir=output_dir)
# Main Program
if __name__ == "__main__":
# Display the opening message
print_welcome_message()
# Parse command line arguments
parser = argparse.ArgumentParser(description="Scrape all .js files from a website or list of domains.")
parser.add_argument("-d", "--domain", help="Single domain to scrape.")
parser.add_argument("-f", "--file", help="File containing list of domains (one per line).")
args = parser.parse_args()
# Check if command argument is present
if args.domain:
scrape_js_from_url(args.domain)
elif args.file:
scrape_from_file(args.file)
else:
# Display usage instructions with clearer formatting
print("\033[32m" + "[USAGE] Please use one of the following options:") # Green
print("\033[32m" + " -d : To specify a single domain to scrape.") # Green
print("\033[32m" + " -f : To specify a file containing a list of domains (one per line).") # Green
print("\033[32m" + "[INFO] Program finished.") # Green