From 66cbbd9f118cf229fc2af379f4baacc967326196 Mon Sep 17 00:00:00 2001 From: Rath Rene Date: Wed, 22 Nov 2023 21:33:08 +0100 Subject: [PATCH] lua-only option, removed shell-backend --- README.md | 92 +++++++------ .../geoip_lookup.py | 26 +++- geoip_lookup_backend_shell.py | 80 ----------- .../geoip_lookup_w_backend.lua | 17 ++- lua/geoip_lookup_w_lib.lua | 128 ++++++++++++++++++ test/{test_requests.sh => requests.sh} | 3 +- test/test.sh | 75 +++++----- 7 files changed, 253 insertions(+), 168 deletions(-) rename geoip_lookup_backend_lib.py => backend/geoip_lookup.py (64%) delete mode 100755 geoip_lookup_backend_shell.py rename geoip_lookup.lua => lua/geoip_lookup_w_backend.lua (71%) create mode 100644 lua/geoip_lookup_w_lib.lua rename test/{test_requests.sh => requests.sh} (92%) diff --git a/README.md b/README.md index 3cadb1f..171f273 100644 --- a/README.md +++ b/README.md @@ -6,19 +6,36 @@ Data linking requests to its origin country and ASN/ISP can be very useful when This allows you also to handle requests from specific countries and ASNs (p.e. datacenters/hosting providers) differently than others. +NOTE: This functionality is covered by the [HAProxy Enterprise Maxmind-Module](https://www.haproxy.com/documentation/hapee/latest/load-balancing/geolocation/maxmind/)! Only use this implementation if you are limited to the community edition. + ## Topology +You can implement this in two ways: + +* Use a custom backend to do this lookups +* UNTESTED: Use the [resty-maxminddb LUA library](https://raw.githubusercontent.com/anjia0532/lua-resty-maxminddb/master/lib/resty/maxminddb.lua) to query the MMDB databases directly from LUA + +### Lookup via Backend + 1. Request hits HAProxy 2. HAProxy calls LUA script to delegate GeoIP-database lookup - I would prefer to use [io.popen](https://www.lua.org/manual/5.1/manual.html#pdf-io.popen) so LUA directly executes the query, but this [is blocked by HAProxy](https://discourse.haproxy.org/t/haproxy-1-8-update-to-2-7-3-lua-issues/8454) - -3. LUA calls a minimal web-service on localhost that queries the GeoIP-database +3. LUA calls a minimal web-service on localhost that queries the GeoIP-database(s) In this case we use a basic Python3 HTTP-Server - + + + +I might add a Golang backend later on. + +### Lookup via Library + +1. Request hits HAProxy + +2. HAProxy calls LUA script for querying the GeoIP-database(s) + ---- @@ -48,40 +65,45 @@ You will have to download some MMDB GeoIP databases. Per example from [ipinfo.io](https://ipinfo.io/account/data-downloads) or [maxmind](https://maxmind.com)! -### Lookup-Backend +### Lookup -This repository shows two different backend-implementations. +#### via Backend -One calls a shell-util, the other one uses [a library](https://github.com/maxmind/MaxMind-DB-Reader-python). - -#### Shell-Util - -To query the MMDB databases, you will have to install the `mmdblookup` util: +To query the MMDB databases, you will have to install the [maxminddb python-module](https://github.com/maxmind/MaxMind-DB-Reader-python): ```bash -apt install mmdb-bin +python3 -m pip install maxminddb ``` -You will have to update the paths to your database-files in the `geoip_lookup_backend_shell.py` file! +You will have to update the paths to your database-files in the `backend/geoip_lookup_backend.py` file! -#### Library +You need to use the `lua/geoip_lookup_w_backend.lua` script. -To query the MMDB databases, you will have to install the `maxminddb` python-module: +#### via Library -```bash -python3 -m pip install maxminddb -``` +WARNING: UNTESTED -You will have to update the paths to your database-files in the `geoip_lookup_backend_lib.py` file! +To query the MMDB databases, you will have to install the [resty-maxminddb LUA library](https://raw.githubusercontent.com/anjia0532/lua-resty-maxminddb/master/lib/resty/maxminddb.lua) and its dependencies. +You need to use the `lua/geoip_lookup_w_lib.lua` script. ---- ## Run +### With LUA-Library + +```bash +# initialize the haproxy map(s) +touch /tmp/haproxy_geoip_country.map +# start haproxy +haproxy -W -f haproxy_example.cfg +``` + +### With Lookup-Backend ```bash # start the web-service -python3 geoip_lookup_backend.py & +python3 backend/geoip_lookup.py & # initialize the haproxy map(s) touch /tmp/haproxy_geoip_country.map # start haproxy @@ -104,30 +126,24 @@ At least IPInfo OR MaxMind databases need to exist! ```bash cd test bash test.sh -> +> +> CLEANUP +> > WARN: UNABLE TO TEST MaxMind databases as they are missing! -> +> > STARTING HAPROXY -> -> TESTING BACKEND with Lookup-Util -> LINKING IPInfo databases -> 127.0.0.1 - - [20/Nov/2023 19:10:14] "GET /?lookup=country&ip=1.1.1.1 HTTP/1.1" 200 - -> 127.0.0.1 - - [20/Nov/2023 19:10:14] "GET /?lookup=continent&ip=1.1.1.1 HTTP/1.1" 200 - -> 127.0.0.1 - - [20/Nov/2023 19:10:14] "GET /?lookup=asn&ip=1.1.1.1 HTTP/1.1" 200 - -> 127.0.0.1 - - [20/Nov/2023 19:10:14] "GET /?lookup=asname&ip=1.1.1.1 HTTP/1.1" 200 - -> REQUEST TIMES: 0.03 => 0.00 (cached) -> -> TESTING BACKEND with Lookup-Lib +> +> TESTING with PYTHON-BACKEND > LINKING IPInfo databases -> 127.0.0.1 - - [20/Nov/2023 19:10:22] "GET /?lookup=country&ip=1.1.1.1 HTTP/1.1" 200 - -> 127.0.0.1 - - [20/Nov/2023 19:10:22] "GET /?lookup=continent&ip=1.1.1.1 HTTP/1.1" 200 - -> 127.0.0.1 - - [20/Nov/2023 19:10:22] "GET /?lookup=asn&ip=1.1.1.1 HTTP/1.1" 200 - -> 127.0.0.1 - - [20/Nov/2023 19:10:22] "GET /?lookup=asname&ip=1.1.1.1 HTTP/1.1" 200 - -> REQUEST TIMES: 0.03 => 0.00 (cached) +> 127.0.0.1 - - [22/Nov/2023 21:21:00] "GET /?lookup=country&ip=1.1.1.1 HTTP/1.1" 200 - +> 127.0.0.1 - - [22/Nov/2023 21:21:00] "GET /?lookup=continent&ip=1.1.1.1 HTTP/1.1" 200 - +> 127.0.0.1 - - [22/Nov/2023 21:21:00] "GET /?lookup=asn&ip=1.1.1.1 HTTP/1.1" 200 - +> 127.0.0.1 - - [22/Nov/2023 21:21:00] "GET /?lookup=asname&ip=1.1.1.1 HTTP/1.1" 200 - +> REQUEST TIMES: 0.01 => 0.00 (cached) > > STOPPING HAPROXY > > FINISHED - exiting ``` -Feel free to [contribute more test-cases](https://github.com/superstes/haproxy-geoip-lua/blob/latest/test/test_requests.sh)! +Feel free to [contribute more test-cases](https://github.com/superstes/haproxy-geoip/blob/latest/test/requests.sh)! diff --git a/geoip_lookup_backend_lib.py b/backend/geoip_lookup.py similarity index 64% rename from geoip_lookup_backend_lib.py rename to backend/geoip_lookup.py index 4f9d773..1a37c07 100755 --- a/geoip_lookup_backend_lib.py +++ b/backend/geoip_lookup.py @@ -10,14 +10,28 @@ PORT = 6970 -# https://ipinfo.io/account/data-downloads +# for data schema see: +# ipinfo: https://github.com/ipinfo/sample-database +# maxmind: https://github.com/maxmind/MaxMind-DB/tree/main/source-data + +# ipinfo - https://ipinfo.io/account/data-downloads DATABASES = { 'country': {'file': '/tmp/country.mmdb', 'attr': 'country', 'fallback': '00'}, 'continent': {'file': '/tmp/country.mmdb', 'attr': 'continent', 'fallback': '00'}, + 'city': {'file': '/tmp/city.mmdb', 'attr': 'city', 'fallback': '-'}, 'asn': {'file': '/tmp/asn.mmdb', 'attr': 'asn', 'fallback': '0'}, 'asname': {'file': '/tmp/asn.mmdb', 'attr': 'name', 'fallback': '-'}, } +# maxmind +# DATABASES = { +# 'country': {'file': '/tmp/country.mmdb', 'attr': 'country.iso_code', 'fallback': '00'}, +# 'continent': {'file': '/tmp/country.mmdb', 'attr': 'continent.code', 'fallback': '00'}, +# 'city': {'file': '/tmp/city.mmdb', 'attr': 'city.names.en', 'fallback': '-'}, +# 'asn': {'file': '/tmp/asn.mmdb', 'attr': 'autonomous_system_number', 'fallback': '0'}, +# 'asname': {'file': '/tmp/asn.mmdb', 'attr': 'autonomous_system_organization', 'fallback': '-'}, +# } + def _lookup_mmdb(db: dict, ip: str) -> str: try: @@ -25,7 +39,15 @@ def _lookup_mmdb(db: dict, ip: str) -> str: return db['fallback'] with open_database(db['file']) as db_reader: - return db_reader.get(ip)[db['attr']] + data = db_reader.get(ip) + for attr in db['attr'].split('.'): + if attr in data: + data = data[attr] + + else: + return db['fallback'] + + return data except (RuntimeError, KeyError): return db['fallback'] diff --git a/geoip_lookup_backend_shell.py b/geoip_lookup_backend_shell.py deleted file mode 100755 index 24603e1..0000000 --- a/geoip_lookup_backend_shell.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/python3 - -import subprocess -from pathlib import Path -from http.server import HTTPServer, BaseHTTPRequestHandler -from urllib.parse import parse_qs, urlparse - -PORT = 6970 - -# https://ipinfo.io/account/data-downloads -DATABASES = { - 'country': {'file': '/tmp/country.mmdb', 'attr': 'country', 'fallback': '00'}, - 'continent': {'file': '/tmp/country.mmdb', 'attr': 'continent', 'fallback': '00'}, - 'asn': {'file': '/tmp/asn.mmdb', 'attr': 'asn', 'fallback': '0'}, - 'asname': {'file': '/tmp/asn.mmdb', 'attr': 'name', 'fallback': '-'}, -} - - -def _lookup_mmdb(db: dict, ip: str) -> str: - try: - if not Path(db['file']).is_file(): - return db['fallback'] - - with subprocess.Popen( - ['mmdblookup', '-f', db['file'], '-i', ip, db['attr']], - shell=False, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) as p: - b_stdout, _ = p.communicate(timeout=2) - stdout_raw = b_stdout.decode('utf-8').strip() - if stdout_raw.find('"') != -1: - return stdout_raw.split('"')[1] # "US" - - if stdout_raw == '': - return db['fallback'] - - return stdout_raw - - except (subprocess.TimeoutExpired, subprocess.SubprocessError, subprocess.CalledProcessError, - OSError, IOError, KeyError): - return db['fallback'] - - -def _ensure_str(data: (str, list)) -> str: - if isinstance(data, list): - if len(data) > 0: - return data[0] - - return '' - - return data - - -class WebRequestHandler(BaseHTTPRequestHandler): - def do_GET(self): - q = parse_qs(urlparse(self.path).query) - - if 'lookup' not in q or _ensure_str(q['lookup']) not in DATABASES: - self.send_response(400) - self.end_headers() - self.wfile.write('Got unsupported lookup'.encode('utf-8')) - - if 'ip' not in q: - self.send_response(400) - self.end_headers() - self.wfile.write('No IP provided'.encode('utf-8')) - - lookup = _ensure_str(q['lookup']) - ip = _ensure_str(q['ip']) - data = _lookup_mmdb(DATABASES[lookup], ip) - print(f"{lookup} | {ip} => {data}") - self.send_response(200) - self.end_headers() - self.wfile.write(data.encode('utf-8')) - - -if __name__ == '__main__': - server = HTTPServer(('127.0.0.1', PORT), WebRequestHandler) - server.serve_forever() diff --git a/geoip_lookup.lua b/lua/geoip_lookup_w_backend.lua similarity index 71% rename from geoip_lookup.lua rename to lua/geoip_lookup_w_backend.lua index abf7745..849ce62 100644 --- a/geoip_lookup.lua +++ b/lua/geoip_lookup_w_backend.lua @@ -1,8 +1,8 @@ local function http_request(lookup, src) local s = core.tcp() - s:connect("127.0.0.1:6970") - s:send("GET /?lookup=" .. lookup .. "&ip=" .. src .. " HTTP/1.1\r\n\r\n") + s:connect('127.0.0.1:6970') + s:send('GET /?lookup=' .. lookup .. '&ip=' .. src .. ' HTTP/1.1\r\n\r\n') while true do local line = s:receive('*l') if not line then break end @@ -10,29 +10,28 @@ local function http_request(lookup, src) end local res_body = s:receive('*a') if res_body == nil then - return "00" + return '00' end return res_body end local function lookup_geoip_country(txn) - country_code = http_request("country", txn.f:src()) - txn:set_var('txn.geoip_country', country_code) - + country_code = http_request('country', txn.f:src()) + txn:set_var('txn.geoip_country', country_code) end local function lookup_geoip_continent(txn) - continent_code = http_request("continent", txn.f:src()) + continent_code = http_request('continent', txn.f:src()) txn:set_var('txn.geoip_continent', continent_code) end local function lookup_geoip_asn(txn) - asn = http_request("asn", txn.f:src()) + asn = http_request('asn', txn.f:src()) txn:set_var('txn.geoip_asn', asn) end local function lookup_geoip_asname(txn) - asname = http_request("asname", txn.f:src()) + asname = http_request('asname', txn.f:src()) txn:set_var('txn.geoip_asname', asname) end diff --git a/lua/geoip_lookup_w_lib.lua b/lua/geoip_lookup_w_lib.lua new file mode 100644 index 0000000..39fba8c --- /dev/null +++ b/lua/geoip_lookup_w_lib.lua @@ -0,0 +1,128 @@ +-- WARNING: UNTESTED! + +-- use with: https://raw.githubusercontent.com/anjia0532/lua-resty-maxminddb/master/lib/resty/maxminddb.lua + +-- for data schema see: +-- ipinfo: https://github.com/ipinfo/sample-database +-- maxmind: https://github.com/maxmind/MaxMind-DB/tree/main/source-data + +local file_geoip_country = '/tmp/country.mmdb' +local file_geoip_city = '/tmp/city.mmdb' +local file_geoip_asn = '/tmp/asn.mmdb' +local query_lang = 'en' + +local geoDB_country = require 'maxminddb' +local geoDB_city = require 'maxminddb' +local geoDB_asn = require 'maxminddb' + +local function query_db(src, geoDB) + local res,err = geoDB.lookup(src) + core.Alert(res) + if not res then + return {} + end + return res +end + +local function query_db_country(src) + if not geoDB_country.initted() then + geoDB_country.init(file_geoip_country) + end + return query_db(src, geoDB_country) +end + +local function query_db_city(src) + if not geoDB_city.initted() then + geoDB_city.init(file_geoip_city) + end + return query_db(src, geoDB_city) +end + +local function query_db_asn(src) + if not geoDB_asn.initted() then + geoDB_asn.init(file_geoip_asn) + end + return query_db(src, geoDB_asn) +end + +local function lookup_geoip_country_base(txn, data) + -- ipinfo.io + txn:set_var('txn.geoip_continent', data['country'] or '00') + -- maxmind + -- txn:set_var('txn.geoip_continent', data['country']['iso_code'] or '00') +end + +local function lookup_geoip_country(txn) + data = query_db_country(txn.f:src()) + lookup_geoip_country_base(txn, data) +end + +local function lookup_geoip_continent_base(txn, data) + -- ipinfo.io + txn:set_var('txn.geoip_continent', data['continent'] or '00') + -- maxmind + -- txn:set_var('txn.geoip_continent', data['continent']['code'] or '00') + -- OR + -- txn:set_var('txn.geoip_continent', data['continent']['names'][query_lang] or '00') +end + +local function lookup_geoip_continent(txn) + data = query_db_country(txn.f:src()) + lookup_geoip_continent_base(txn, data) +end + +local function lookup_geoip_city_base(txn, data) + -- ipinfo.io + txn:set_var('txn.geoip_city', data['city'] or '-') + -- maxmind + -- txn:set_var('txn.geoip_city', data['city']['names'][query_lang] or '-') +end + +local function lookup_geoip_city(txn) + data = query_db_city(txn.f:src()) + lookup_geoip_city_base(txn, data) +end + +local function lookup_geoip_country_all(txn) + data = query_db_country(txn.f:src()) + lookup_geoip_country_base(txn, data) + lookup_geoip_continent_base(txn, data) +end + +local function lookup_geoip_city_all(txn) + -- p.e. maxmind 'city' database includes all those infos + data = query_db_city(txn.f:src()) + lookup_geoip_country_base(txn, data) + lookup_geoip_continent_base(txn, data) + lookup_geoip_city_base(txn, data) +end + +local function lookup_geoip_asn_base(txn, data) + -- ipinfo + txn:set_var('txn.geoip_asn', data['asn'] or '0') + -- maxmind + -- txn:set_var('txn.geoip_asn', data['autonomous_system_number'] or '0') +end + +local function lookup_geoip_asn(txn) + data = query_db_asn(txn.f:src()) + lookup_geoip_asn_base(txn, data) +end + +local function lookup_geoip_asname_base(txn, data) + -- ipinfo + txn:set_var('txn.geoip_asname', data['name'] or '-') + -- maxmind + -- txn:set_var('txn.geoip_asn', data['autonomous_system_organization'] or '-') +end + +local function lookup_geoip_asname(txn) + data = query_db_asn(txn.f:src()) + lookup_geoip_asname_base(txn, data) +end + +core.register_action('lookup_geoip_country', {'tcp-req', 'http-req'}, lookup_geoip_country, 0) +core.register_action('lookup_geoip_continent', {'tcp-req', 'http-req'}, lookup_geoip_continent, 0) +core.register_action('lookup_geoip_city', {'tcp-req', 'http-req'}, lookup_geoip_city, 0) +core.register_action('lookup_geoip_asn', {'tcp-req', 'http-req'}, lookup_geoip_asn, 0) +core.register_action('lookup_geoip_asname', {'tcp-req', 'http-req'}, lookup_geoip_asname, 0) diff --git a/test/test_requests.sh b/test/requests.sh similarity index 92% rename from test/test_requests.sh rename to test/requests.sh index 41ce2fa..77a4dd4 100644 --- a/test/test_requests.sh +++ b/test/requests.sh @@ -12,5 +12,4 @@ then echo "ERROR: REQUEST 1 - CACHE NOT HIT" fi -/bin/kill -USR2 "$TEST_PROXY_PID" -sleep 1 \ No newline at end of file +reload_haproxy diff --git a/test/test.sh b/test/test.sh index 51d4572..d2c3a36 100644 --- a/test/test.sh +++ b/test/test.sh @@ -1,14 +1,33 @@ #!/bin/bash -set -euo pipefail - -cd "$(dirname "$0")" - DB_MM_COUNTRY='/tmp/maxmind_country.mmdb' DB_MM_ASN='/tmp/maxmind_asn.mmdb' DB_II_COUNTRY='/tmp/ipinfo_country.mmdb' DB_II_ASN='/tmp/ipinfo_asn.mmdb' +set -euo pipefail + +function cleanup_process() { + search="$1" + pkill -f "$search" --uid "$UID" 2> /dev/null || true +} + +function reload_haproxy() { + pid="$(grep 'worker' < '/tmp/haproxy_test_err.log' | head -n 1 | cut -d '(' -f2 | cut -d ')' -f1)" + /bin/kill -USR2 "$pid" + sleep 1 +} + +echo '' +echo 'CLEANUP' +rm -f /tmp/haproxy_* +cleanup_process 'haproxy' +cleanup_process 'geoip_lookup.py' +cleanup_process 'geoip_lookup_golang' +sleep 1 + +cd "$(dirname "$0")" + TEST_PROXY='http://localhost:6969' TEST_HDR='TEST-SRC' TEST_MM=1 @@ -67,17 +86,16 @@ touch '/tmp/haproxy_geoip_country.map' touch '/tmp/haproxy_geoip_continent.map' touch '/tmp/haproxy_geoip_asn.map' touch '/tmp/haproxy_geoip_asname.map' -ln -sf "$(pwd)/../geoip_lookup.lua" '/tmp/haproxy_geoip_lookup.lua' echo 'STARTING HAPROXY' +ln -sf "$(pwd)/../lua/geoip_lookup_w_backend.lua" '/tmp/haproxy_geoip_lookup.lua' haproxy -W -f haproxy_test.cfg > '/tmp/haproxy_test.log' 2> '/tmp/haproxy_test_err.log' & -sleep 1 -TEST_PROXY_PID="$(grep 'worker' < '/tmp/haproxy_test_err.log' | head -n 1 | cut -d '(' -f2 | cut -d ')' -f1)" set +e +sleep 2 echo '' -echo 'TESTING BACKEND with Lookup-Util' -python3 "$(pwd)/../geoip_lookup_backend_shell.py" > '/tmp/haproxy_geoip_backend_shell.log' & +echo 'TESTING with PYTHON-BACKEND' +python3 "$(pwd)/../backend/geoip_lookup.py" > '/tmp/haproxy_geoip_backend.log' & sleep 2 if [[ "$TEST_MM" == "1" ]] @@ -86,7 +104,7 @@ then ln -sf "$DB_MM_COUNTRY" '/tmp/country.mmdb' ln -sf "$DB_MM_ASN" '/tmp/asn.mmdb' - source ./test_requests.sh + source ./requests.sh fi if [[ "$TEST_II" == "1" ]] @@ -95,40 +113,23 @@ then ln -sf "$DB_II_COUNTRY" '/tmp/country.mmdb' ln -sf "$DB_II_ASN" '/tmp/asn.mmdb' - source ./test_requests.sh + source ./requests.sh fi -kill "$(pgrep -f 'geoip_lookup_backend_shell.py')" -sleep 5 - -echo '' -echo 'TESTING BACKEND with Lookup-Lib' -python3 "$(pwd)/../geoip_lookup_backend_lib.py" > '/tmp/haproxy_geoip_backend_lib.log' & +cleanup_process 'geoip_lookup.py' sleep 2 -if [[ "$TEST_MM" == "1" ]] -then - echo 'LINKING MaxMind databases' - ln -sf "$DB_MM_COUNTRY" '/tmp/country.mmdb' - ln -sf "$DB_MM_ASN" '/tmp/asn.mmdb' - - source ./test_requests.sh -fi - -if [[ "$TEST_II" == "1" ]] -then - echo 'LINKING IPInfo databases' - ln -sf "$DB_II_COUNTRY" '/tmp/country.mmdb' - ln -sf "$DB_II_ASN" '/tmp/asn.mmdb' - - source ./test_requests.sh -fi - -kill "$(pgrep -f 'geoip_lookup_backend_lib.py')" +# echo '' +# echo 'TESTING with GOLANG-BACKEND' +# todo: build binary +# "$(pwd)/geoip_lookup_golang" > '/tmp/haproxy_geoip_backend.log' & +# sleep 2 +# cleanup_process 'geoip_lookup_golang' +# sleep 2 echo '' echo 'STOPPING HAPROXY' -pkill 'haproxy' +cleanup_process 'haproxy' sleep 5 echo ''