diff --git a/.gitignore b/.gitignore index d9098d3..fd6fcb2 100644 --- a/.gitignore +++ b/.gitignore @@ -52,7 +52,14 @@ output/*/index.html docs/_build build/* +# Virtualenv +venv + +# VSCode +.vscode + # Misc *.pstats callgraph.svg ttyrecord + diff --git a/riko/utils.py b/riko/utils.py index 8277f22..4cac823 100644 --- a/riko/utils.py +++ b/riko/utils.py @@ -9,6 +9,7 @@ import sys import itertools as it import fcntl +import random from riko import __version__ from math import isnan @@ -71,7 +72,39 @@ def make_blocking(f): make_blocking(sys.stderr) -def default_user_agent(name="riko"): +def random_useragent(): + # useragents found here - https://techblog.willshouse.com/2012/01/03/most-common-user-agents/ + return random.choice([ + ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"), + ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36"), + ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 " + "(KHTML, like Gecko) Version/13.1 Safari/605.1.15"), + ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"), + ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"), + ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) " + "Gecko/20100101 Firefox/75.0"), + ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"), + ("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36"), + ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36"), + ("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) " + "Gecko/20100101 Firefox/76.0"), + ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36"), + ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:75.0) " + "Gecko/20100101 Firefox/75.0"), + ("Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:75.0) " + "Gecko/20100101 Firefox/75.0"), + ]) + + +def default_useragent(name="riko"): """ Return a string representing the default user agent. :rtype: str @@ -79,6 +112,10 @@ def default_user_agent(name="riko"): return '%s/%s' % (name, __version__) +def get_useragent(random=False): + return random_useragent() if random else default_useragent() + + class Chainable(object): def __init__(self, data, method=None): self.data = data @@ -209,7 +246,7 @@ def open(self, url, **params): r.raw.decode_content = self.decode response = r.text if self.cache_type else r.raw else: - req = Request(url, headers={'User-Agent': default_user_agent()}) + req = Request(url, headers={'User-Agent': get_useragent()}) try: r = urlopen(req, context=self.context, timeout=self.timeout) except TypeError: