diff --git a/CHANGELOG.md b/CHANGELOG.md index 8687a16..06d9ff3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Changed - Require Python 3.8+. +- Add new functions for user-agent header value parsing: `batch_parse_user_agents`, `parse_user_agent`. - API key is passed as header value and no longer as query parameter. - Client library method are now wrapped in a new _ApiResponse_ object that includes a mean to retrieve metadata about _credits_ and _throttling_ in addition to _data_. diff --git a/README.md b/README.md index 7ab585d..8dbf246 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,16 @@ response = client.origin_lookup_ip() print(response.data) ``` +#### User-Agent Parsing + +```python +from ipregistry import IpregistryClient + +client = IpregistryClient("YOUR_API_KEY") +response = client.parse_user_agent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36') +print(response.data) +``` + More advanced examples are available in the [samples](https://github.com/ipregistry/ipregistry-python/tree/master/samples) folder. @@ -108,9 +118,9 @@ A manner to proceed is to identify bots using the `User-Agent` header. To ease this process, the library includes a utility method: ```python -from ipregistry import UserAgent +from ipregistry import UserAgents -is_bot = UserAgent.is_bot('YOUR_USER_AGENT_HEADER_VALUE_HERE') +is_bot = UserAgents.is_bot('YOUR_USER_AGENT_HEADER_VALUE_HERE') ``` ## Other Libraries diff --git a/ipregistry/core.py b/ipregistry/core.py index 842e506..629e6c4 100644 --- a/ipregistry/core.py +++ b/ipregistry/core.py @@ -31,21 +31,27 @@ def __init__(self, key_or_config, **kwargs): raise ValueError("Given request handler instance is not of type IpregistryRequestHandler") def batch_lookup_ips(self, ips, **options): - sparse_cache = [None] * len(ips) + return self.batch_request(ips, self._requestHandler.batch_lookup_ips, **options) + + def batch_parse_user_agents(self, user_agents, **options): + return self.batch_request(user_agents, self._requestHandler.batch_parse_user_agents, **options) + + def batch_request(self, items, request_handler_func, **options): + sparse_cache = [None] * len(items) cache_misses = [] - for i in range(0, len(ips)): - ip = ips[i] - cache_key = self.__build_cache_key(ip, options) + for i in range(len(items)): + item = items[i] + cache_key = self.__build_cache_key(item, options) cache_value = self._cache.get(cache_key) if cache_value is None: - cache_misses.append(ip) + cache_misses.append(item) else: sparse_cache[i] = cache_value - result = [None] * len(ips) + result = [None] * len(items) if len(cache_misses) > 0: - response = self._requestHandler.batch_lookup_ips(cache_misses, options) + response = request_handler_func(cache_misses, options) else: response = ApiResponse( ApiResponseCredits(), @@ -53,18 +59,18 @@ def batch_lookup_ips(self, ips, **options): ApiResponseThrottling() ) - fresh_ip_info = response.data + fresh_item_info = response.data j = 0 k = 0 - for cached_ip_info in sparse_cache: - if cached_ip_info is None: - if not isinstance(fresh_ip_info[k], LookupError): - self._cache.put(self.__build_cache_key(ips[j], options), fresh_ip_info[k]) - result[j] = fresh_ip_info[k] + for cached_item_info in sparse_cache: + if cached_item_info is None: + if not isinstance(fresh_item_info[k], LookupError): + self._cache.put(self.__build_cache_key(items[j], options), fresh_item_info[k]) + result[j] = fresh_item_info[k] k += 1 else: - result[j] = cached_ip_info + result[j] = cached_item_info j += 1 response.data = result @@ -80,6 +86,9 @@ def lookup_ip(self, ip='', **options): def origin_lookup_ip(self, **options): return self.__lookup_ip('', options) + def origin_parse_user_agent(self, **options): + return self._requestHandler.origin_parse_user_agent(options) + def __lookup_ip(self, ip, options): cache_key = self.__build_cache_key(ip, options) cache_value = self._cache.get(cache_key) @@ -96,9 +105,14 @@ def __lookup_ip(self, ip, options): ApiResponseThrottling() ) + def parse_user_agent(self, user_agent, **options): + response = self.batch_parse_user_agents([user_agent], **options) + response.data = response.data[0] + return response + @staticmethod - def __build_cache_key(ip, options): - result = ip + def __build_cache_key(key, options): + result = key for key, value in options.items(): if isinstance(value, bool): diff --git a/ipregistry/json.py b/ipregistry/json.py index 6afe94e..fc97d88 100644 --- a/ipregistry/json.py +++ b/ipregistry/json.py @@ -267,8 +267,10 @@ class IpInfo(BaseModel): class RequesterAutonomousSystem(AutonomousSystem): pass - class RequesterIpInfo(IpInfo): user_agent: Optional[UserAgent] = None - model_config = ConfigDict(extra='ignore') \ No newline at end of file + model_config = ConfigDict(extra='ignore') + +class RequesterUserAgent(UserAgent): + pass \ No newline at end of file diff --git a/ipregistry/request.py b/ipregistry/request.py index 9e08a89..ddfba65 100644 --- a/ipregistry/request.py +++ b/ipregistry/request.py @@ -24,7 +24,7 @@ from .__init__ import __version__ from .model import (ApiError, ApiResponse, ApiResponseCredits, ApiResponseThrottling, ClientError, IpInfo, - LookupError, RequesterIpInfo) + LookupError, RequesterIpInfo, RequesterUserAgent, UserAgent) class IpregistryRequestHandler(ABC): @@ -35,6 +35,10 @@ def __init__(self, config): def batch_lookup_ips(self, ips, options): pass + @abstractmethod + def batch_parse_user_agents(self, user_agents, options): + pass + @abstractmethod def lookup_ip(self, ip, options): pass @@ -43,8 +47,12 @@ def lookup_ip(self, ip, options): def origin_lookup_ip(self, options): pass - def _build_base_url(self, ip, options): - result = self._config.base_url + "/" + ip + @abstractmethod + def origin_parse_user_agent(self, options): + pass + + def _build_base_url(self, resource, options): + result = self._config.base_url + "/" + resource i = 0 for key, value in options.items(): @@ -80,6 +88,29 @@ def batch_lookup_ips(self, ips, options): except Exception as e: raise ClientError(e) + def batch_parse_user_agents(self, user_agents, options): + response = None + try: + response = requests.post( + self._build_base_url('user_agent', options), + data=json.dumps(user_agents), + headers=self.__headers(), + timeout=self._config.timeout + ) + response.raise_for_status() + results = response.json().get('results', []) + + parsed_results = [ + LookupError(data) if 'code' in data else UserAgent(**data) + for data in results + ] + + return self.build_api_response(response, parsed_results) + except requests.HTTPError: + self.__create_api_error(response) + except Exception as e: + raise ClientError(e) + def lookup_ip(self, ip, options): response = None try: @@ -104,6 +135,26 @@ def lookup_ip(self, ip, options): def origin_lookup_ip(self, options): return self.lookup_ip('', options) + def origin_parse_user_agent(self, options): + response = None + try: + response = requests.get( + self._build_base_url('user_agent', options), + headers=self.__headers(), + timeout=self._config.timeout + ) + response.raise_for_status() + json_response = response.json() + + return self.build_api_response( + response, + RequesterUserAgent(**json_response) + ) + except requests.HTTPError: + self.__create_api_error(response) + except Exception as err: + raise ClientError(err) + @staticmethod def build_api_response(response, data): throttling_limit = DefaultRequestHandler.__convert_to_int(response.headers.get('x-rate-limit-limit')) diff --git a/samples/batch-parse-user-agents.py b/samples/batch-parse-user-agents.py new file mode 100644 index 0000000..eb1a7a2 --- /dev/null +++ b/samples/batch-parse-user-agents.py @@ -0,0 +1,32 @@ +""" + Copyright 2019 Ipregistry (https://ipregistry.co). + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from ipregistry import ApiError, ClientError, IpregistryClient + +try: + api_key = "tryout" + client = IpregistryClient(api_key) + response = client.batch_parse_user_agents([ + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36', + 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36' + ]) + print(response.data) +except ApiError as e: + print("API error", e) +except ClientError as e: + print("Client error", e) +except Exception as e: + print("Unexpected error", e) diff --git a/samples/parse-user-agent.py b/samples/parse-user-agent.py new file mode 100644 index 0000000..8a39895 --- /dev/null +++ b/samples/parse-user-agent.py @@ -0,0 +1,29 @@ +""" + Copyright 2019 Ipregistry (https://ipregistry.co). + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +from ipregistry import ApiError, ClientError, IpregistryClient + +try: + api_key = "tryout" + client = IpregistryClient(api_key) + response = client.parse_user_agent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36') + print(response.data) +except ApiError as e: + print("API error", e) +except ClientError as e: + print("Client error", e) +except Exception as e: + print("Unexpected error", e) diff --git a/tests/test_client.py b/tests/test_client.py index 8668f39..7f3aaf1 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -17,7 +17,7 @@ import os import unittest -from ipregistry import ApiError, IpInfo, LookupError, ClientError +from ipregistry import ApiError, IpInfo, LookupError, ClientError, UserAgent from ipregistry.cache import InMemoryCache, NoCache from ipregistry.core import IpregistryClient, IpregistryConfig @@ -36,6 +36,21 @@ def test_batch_lookup_ips(self): self.assertEqual('INVALID_IP_ADDRESS', response.data[1].code) self.assertEqual(True, isinstance(response.data[2], IpInfo)) + def test_batch_parse_user_agents(self): + """ + Test batch parse user agents + """ + client = IpregistryClient(os.getenv('IPREGISTRY_API_KEY')) + response = client.batch_parse_user_agents([ + 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36', + 'Mozilla/5.0 (Linux; Android 13; SM-S901U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36', + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36' + ]) + + self.assertEqual(4, len(response.data)) + self.assertEqual(5, response.credits.consumed) + def test_client_cache_default(self): """ Test that default cache is an instance of NoCache @@ -68,15 +83,6 @@ def test_client_cache_inmemory_batch_ips_lookup(self): batch_ips_response2 = client.batch_lookup_ips(['1.1.1.1', '1.1.1.3']) self.assertEqual(0, batch_ips_response2.credits.consumed) - def test_origin_lookup_ip(self): - """ - Test that a simple origin IP lookup returns data - """ - client = IpregistryClient(os.getenv('IPREGISTRY_API_KEY')) - response = client.lookup_ip() - self.assertIsNotNone(response.data.ip) - self.assertIsNotNone(response.data.user_agent) - def test_lookup_ip(self): """ Test that a simple IP lookup returns data @@ -107,6 +113,44 @@ def test_lookup_ip_cache(self): self.assertIsNotNone(response.data.ip) self.assertIsNotNone(response.data.company.domain) + def test_lookup_timeout(self): + """ + Test a client error is raised upon connection timeout + """ + client = IpregistryClient(IpregistryConfig(os.getenv('IPREGISTRY_API_KEY'), "https://api.ipregistry.co", + 0.0001)) + with self.assertRaises(ClientError): + client.lookup_ip('1.1.1.1') + + def test_origin_lookup_ip(self): + """ + Test that a simple origin IP lookup returns data + """ + client = IpregistryClient(os.getenv('IPREGISTRY_API_KEY')) + response = client.lookup_ip() + self.assertIsNotNone(response.data.ip) + self.assertIsNotNone(response.data.user_agent) + + def test_origin_parse_user_agent(self): + """ + Test origin parse user agent + """ + client = IpregistryClient(os.getenv('IPREGISTRY_API_KEY')) + response = client.origin_parse_user_agent() + + self.assertIsInstance(response.data, UserAgent) + self.assertEqual(1, response.credits.consumed) + + def test_parse_user_agent(self): + """ + Test user agent parsing + """ + client = IpregistryClient(os.getenv('IPREGISTRY_API_KEY')) + response = client.parse_user_agent('Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36') + + self.assertIsInstance(response.data, UserAgent) + self.assertEqual(1, response.credits.consumed) + def test_response_metadata(self): """ Test metadata returned for each successful response @@ -121,15 +165,6 @@ def test_response_metadata(self): self.assertIsNotNone(batch_ips_lookup_response.credits.remaining) self.assertIsNotNone(lookup_ip_response.credits.remaining) - def test_lookup_timeout(self): - """ - Test a client error is raised upon connection timeout - """ - client = IpregistryClient(IpregistryConfig(os.getenv('IPREGISTRY_API_KEY'), "https://api.ipregistry.co", - 0.0001)) - with self.assertRaises(ClientError): - client.lookup_ip('1.1.1.1') - if __name__ == '__main__': unittest.main()