diff --git a/.circleci/config.yml b/.circleci/config.yml index 6982031..dd759cf 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,6 +27,7 @@ workflows: matrix: parameters: image: + - cimg/node:16.20 - cimg/node:18.19 - cimg/node:lts - cimg/node:current @@ -102,7 +103,7 @@ jobs: - run: node -v - attach_workspace: at: . - - run: npm t + - run: npm t -- spec publish: working_directory: ~/project docker: diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d621ef..9c31f3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## [4.0.1](https://github.com/omrilotan/isbot/compare/v4.0.0...v4.0.1) + +- Pattern optimisation (performance improvement) + ## [4.0.0](https://github.com/omrilotan/isbot/compare/v3.7.1...v4.0.0) ### Breaking changes diff --git a/fixtures/browsers.yml b/fixtures/browsers.yml index da39e85..25712b6 100644 --- a/fixtures/browsers.yml +++ b/fixtures/browsers.yml @@ -45,8 +45,9 @@ AOL: - Mozilla/4.0 (compatible; MSIE 6.0; AOL 9.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322) - Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 ADG/11.0.2566 AOLBUILD/11.0.2566 Safari/537.36 Apple TV: - - AppleTV5,3/9.1.1 - - AppleTV6,2/11.1 + - AppName/1 CFNetwork/758.1.2 Darwin/15.0.0 + - AppName/1 CFNetwork/758.1.3 AppleTV/16.0.0 + - Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Mobile/15J380 Arora: - "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Arora/0.10.1 (Git: 1329 e5385f3) Safari/532.1" - Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US) AppleWebKit/533.3 (KHTML, like Gecko) Arora/0.11.0 Safari/533.3 @@ -478,7 +479,6 @@ QtWebEngine: QuickTime: - QuickTime/7.6.6 (qtver=7.6.6;cpu=IA32;os=Mac 10.6.8) RadiosNet: - - Lavf/57.83.100, RadiosNet/2.6.2 (Java; Android) - RadiosNet/2.1.1 (Java; Android) - RadiosNet/2.2.1 (Java; Android) - RadiosNet/2.2.3 (Java; Android) @@ -675,9 +675,7 @@ ZZZ Glitches and Misidentified Browsers - These browsers are legit user agent ev - ; Android 11) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/96.0.4664.45 Mobile DuckDuckGo/5 Safari/537.36 - Android 11) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/86.0.4240.185 Mobile DuckDuckGo/5 Safari/537.36 - Android 13) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/114.0.5735.196 Mobile DuckDuckGo/5 Safari/537.36 - - daumoa,damoa,daum,daumos,duamoa,duam,duamos - GoogleApp/14.35.18.28.arm64 - - Hello, world - LWPrCa2l') OR 941=(SELECT 941 FROM PG_SLEEP(15))-- - Mozilla / 5.0 (Linux; Android 7.0; GHIA_ZEUS_3G) AppleWebKit / 537.36 (KHTML, como Gecko) Chrome / 87.0.4280.101 Mobile Safari / 537.36 - Mozilla/5.0 (compatible; Lucidworks-Anda/2.0/0.10; +; ) diff --git a/package.json b/package.json index 9cc5bf4..5d9a185 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "isbot", - "version": "4.0.0", + "version": "4.0.1", "description": "🤖 Recognise bots/crawlers/spiders using the user agent string.", "keywords": [ "bot", diff --git a/scripts/prepare/build/index.js b/scripts/prepare/build/index.js index 9e3038e..e9ade0a 100644 --- a/scripts/prepare/build/index.js +++ b/scripts/prepare/build/index.js @@ -57,12 +57,15 @@ async function crawlers({ fixturesDirectory, downloadsDirectory }) { // Ignore } } - return crawlers - .concat(downloaded.flat()) - .filter((ua) => !ua.startsWith("#")) // Remove comments - .filter( - (ua = "") => !/ucweb|cubot/i.test(ua), // I don't know why it's in so many crawler lists - ) - .filter((ua) => !browsersList.includes(ua)) // Remove browsers manually added to browsers.yml - .filter((ua = "") => ua.length < 4e3); // Remove very long user agent strings + return crawlers.concat( + // Filter the downloaded crawlers lists + downloaded + .flat() + .filter((ua) => !ua.startsWith("#")) // Remove comments + .filter( + (ua = "") => !/ucweb|cubot/i.test(ua), // I don't know why it's in so many crawler lists + ) + .filter((ua) => !browsersList.includes(ua)) // Remove browsers manually added to browsers.yml + .filter((ua = "") => ua.length < 4e3), // Remove very long user agent strings + ); } diff --git a/src/patterns.json b/src/patterns.json index 1086123..ce9f5b3 100644 --- a/src/patterns.json +++ b/src/patterns.json @@ -14,19 +14,16 @@ "\\[at\\][a-z]", "^12345", "^<", - "^[\\w \\.\\-\\(\\)]+(/v?\\d+(\\.\\d+)?(\\.\\d{1,10})?)?$", + "^[\\w \\.\\-\\(?:\\):]+(?:/v?\\d+(\\.\\d+)?(?:\\.\\d{1,10})?)?(?:,|$)", "^[^ ]{50,}$", "^active", "^ad muncher", "^amaya", "^anglesharp/", - "^anonymous", "^avsdevicesdk/", - "^axios/", "^bidtellect/", "^biglotron", "^btwebclient/", - "^castro", "^clamav[ /]", "^client/", "^cobweb/", @@ -49,7 +46,6 @@ "^jetty/", "^jigsaw", "^linkdex", - "^lwp[-: ]", "^metauri", "^microsoft bits", "^movabletype", @@ -70,10 +66,9 @@ "^svn", "^swcd ", "^taringa", - "^test certificate info", "^thumbor/", "^tumblr/", - "^user-agent:mozilla", + "^user-agent:", "^valid", "^venus/fedoraplanet", "^w3c", @@ -115,7 +110,7 @@ "firephp", "freesafeip", "gomezagent", - "headlesschrome/", + "headless", "httrack", "hubspot marketing grader", "hydra", @@ -129,7 +124,6 @@ "mail\\.ru/", "manager", "monitor", - "morningscore/", "neustar wpm", "nutch", "offbyone", @@ -145,12 +139,15 @@ "proxy", "ptst[ /]\\d", "reader", + "reputation", + "resolver", + "retriever", "rexx;", "rigor", "rss", "scan", + "score", "scrape", - "serp ?reputation ?management", "server", "sogou", "sparkler/", @@ -165,14 +162,12 @@ "torrent", "tracemyfile", "transcoder", - "trendsmapresolver", "twingly recon", "url", "virtuoso", "wappalyzer", "webglance", "webkit2png", - "websitemetadataretriever", "whatcms/", "wordpress", "zgrab"