From f16d6fbe966258f85ed5b878a74a434174740cde Mon Sep 17 00:00:00 2001 From: almostSouji Date: Mon, 22 Jul 2024 16:55:25 +0200 Subject: [PATCH] feat(node): autocomplete node docs with best effort fallback as before reoslves #122 --- package.json | 3 +- .../autocomplete/nodeAutoComplete.ts | 86 ++++++++++ src/functions/node.ts | 63 ++++++-- src/handling/handleApplicationCommand.ts | 4 +- .../handleApplicationCommandAutocomplete.ts | 10 +- src/index.ts | 1 - src/interactions/node.ts | 23 +-- src/util/constants.ts | 1 + yarn.lock | 147 ++++++++++++++++-- 9 files changed, 288 insertions(+), 50 deletions(-) create mode 100644 src/functions/autocomplete/nodeAutoComplete.ts diff --git a/package.json b/package.json index 164f1fe..4f5da82 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "@ltd/j-toml": "^1.38.0", "@vercel/postgres": "^0.9.0", "algoliasearch": "^4.19.1", + "cheerio": "^1.0.0-rc.12", "discord-api-types": "^0.37.83", "dotenv": "^16.3.1", "he": "^1.2.0", @@ -45,7 +46,7 @@ "readdirp": "^3.6.0", "reflect-metadata": "^0.2.2", "turndown": "^7.1.2", - "undici": "^5.28.3" + "undici": "^6.19.3" }, "devDependencies": { "@commitlint/cli": "^17.7.1", diff --git a/src/functions/autocomplete/nodeAutoComplete.ts b/src/functions/autocomplete/nodeAutoComplete.ts new file mode 100644 index 0000000..5099961 --- /dev/null +++ b/src/functions/autocomplete/nodeAutoComplete.ts @@ -0,0 +1,86 @@ +import process from 'node:process'; +import { stringify } from 'node:querystring'; +import { InteractionResponseType } from 'discord-api-types/v10'; +import type { Response } from 'polka'; +import { fetch } from 'undici'; +import { API_BASE_ORAMA, AUTOCOMPLETE_MAX_ITEMS } from '../../util/constants.js'; +import { prepareHeader } from '../../util/respond.js'; +import { truncate } from '../../util/truncate.js'; + +type OramaDocument = { + id: string; + pageSectionTitle: string; + pageTitle: string; + path: string; + siteSection: string; +}; + +type OramaHit = { + document: OramaDocument; + id: string; + score: number; +}; + +type OramaResult = { + count: number; + elapsed: { formatted: string; raw: number }; + facets: { siteSection: { count: number; values: { docs: number } } }; + hits: OramaHit[]; +}; + +function autoCompleteMap(elements: OramaDocument[]) { + return elements.map((element) => { + const cleanSectionTitle = element.pageSectionTitle.replaceAll('`', ''); + const name = truncate(`${element.pageTitle} > ${cleanSectionTitle}`, 90, ''); + if (element.path.length > 100) { + return { + name: truncate(`[path too long] ${element.pageTitle} > ${cleanSectionTitle}`, 100, ''), + value: element.pageTitle, + }; + } + + return { + name, + // we cannot use the full url with the node api base appended here, since discord only allows string values of length 100 + // some of `crypto` results are longer, if prefixed + value: element.path, + }; + }); +} + +export async function nodeAutoComplete(res: Response, query: string): Promise { + const full = `${API_BASE_ORAMA}/indexes/${process.env.ORAMA_CONTAINER}/search?api-key=${process.env.ORAMA_KEY}`; + + const result = (await fetch(full, { + method: 'post', + body: stringify({ + version: '1.3.2', + id: process.env.ORAMA_ID, + // eslint-disable-next-line id-length + q: JSON.stringify({ + term: query, + mode: 'fulltext', + limit: 25, + threshold: 0, + boost: { pageSectionTitle: 4, pageSectionContent: 2.5, pageTitle: 1.5 }, + facets: { siteSection: {} }, + returning: ['path', 'pageSectionTitle', 'pageTitle', 'path', 'siteSection'], + }), + }), + headers: { + 'Content-Type': 'application/x-www-form-urlencoded', + }, + }).then(async (res) => res.json())) as OramaResult; + + prepareHeader(res); + res.write( + JSON.stringify({ + data: { + choices: autoCompleteMap(result.hits?.slice(0, AUTOCOMPLETE_MAX_ITEMS - 1).map((hit) => hit.document) ?? []), + }, + type: InteractionResponseType.ApplicationCommandAutocompleteResult, + }), + ); + + return res; +} diff --git a/src/functions/node.ts b/src/functions/node.ts index 77ae486..e3ae249 100644 --- a/src/functions/node.ts +++ b/src/functions/node.ts @@ -2,7 +2,9 @@ /* eslint-disable @typescript-eslint/no-unsafe-member-access */ /* eslint-disable @typescript-eslint/no-unsafe-assignment */ +import { URL } from 'node:url'; import { bold, hideLinkEmbed, hyperlink, inlineCode, italic, underscore, userMention } from '@discordjs/builders'; +import * as cheerio from 'cheerio'; import type { Response } from 'polka'; import TurndownService from 'turndown'; import { fetch } from 'undici'; @@ -10,6 +12,8 @@ import type { NodeDocs } from '../types/NodeDocs.js'; import { API_BASE_NODE, EMOJI_ID_NODE } from '../util/constants.js'; import { logger } from '../util/logger.js'; import { prepareErrorResponse, prepareResponse } from '../util/respond.js'; +import { truncate } from '../util/truncate.js'; +import { urlOption } from '../util/url.js'; const td = new TurndownService({ codeBlockStyle: 'fenced' }); @@ -66,27 +70,68 @@ function docsUrl(version: string, source: string, anchorTextRaw: string) { return `${API_BASE_NODE}/docs/${version}/api/${parsePageFromSource(source)}.html#${formatAnchorText(anchorTextRaw)}`; } -const cache: Map = new Map(); +const jsonCache: Map = new Map(); +const docsCache: Map = new Map(); + +export async function nodeAutoCompleteResolve(res: Response, query: string, ephemeral?: boolean) { + const url = urlOption(`${API_BASE_NODE}/${query}`); + + if (!url || !query.startsWith('docs')) { + return nodeSearch(res, query, undefined, ephemeral); + } + + const key = `${url.origin}${url.pathname}`; + let html = docsCache.get(key); + + if (!html) { + const data = await fetch(url.toString()).then(async (response) => response.text()); + docsCache.set(key, data); + html = data; + } + + const $ = cheerio.load(html); + + const possible = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']; + + const headingBaseSelectorParts = possible.map((prefix) => `${prefix}:has(${url.hash})`); + const heaidngSelector = headingBaseSelectorParts.join(', '); + const headingCodeSelector = headingBaseSelectorParts.map((part) => `${part} > code`).join(', '); + const paragraphSelector = headingBaseSelectorParts.join(', '); + + const heading = $(heaidngSelector).text().replaceAll('#', ''); + const headingCode = $(headingCodeSelector).text(); + const paragraph = $(paragraphSelector).nextUntil('h4', 'p'); + + const text = paragraph.text(); + const fullSentence = text.split('. ')?.[0]; + const partSentence = text.split('.')?.[0]; + + prepareResponse( + res, + [ + `<:node:${EMOJI_ID_NODE}> ${hyperlink(inlineCode(headingCode.length ? headingCode : heading), url.toString())}`, + `${fullSentence ?? partSentence ?? `${truncate(text, 20, '')}..`}.`, + ].join('\n'), + ephemeral ?? false, + ); + + return res; +} export async function nodeSearch( res: Response, query: string, - version = 'latest-v18.x', + version = 'latest-v20.x', ephemeral?: boolean, ): Promise { const trimmedQuery = query.trim(); try { const url = `${API_BASE_NODE}/dist/${version}/docs/api/all.json`; - let allNodeData = cache.get(url); + let allNodeData = jsonCache.get(url); if (!allNodeData) { - // Get the data for this version const data = (await fetch(url).then(async (response) => response.json())) as NodeDocs; - - // Set it to the map for caching - cache.set(url, data); - - // Set the local parameter for further processing + jsonCache.set(url, data); allNodeData = data; } diff --git a/src/handling/handleApplicationCommand.ts b/src/handling/handleApplicationCommand.ts index 6d8a47f..2b3227c 100644 --- a/src/handling/handleApplicationCommand.ts +++ b/src/handling/handleApplicationCommand.ts @@ -8,7 +8,7 @@ import { algoliaResponse } from '../functions/algoliaResponse.js'; import { resolveOptionsToDocsAutoComplete } from '../functions/autocomplete/docsAutoComplete.js'; import { djsDocs } from '../functions/docs.js'; import { mdnSearch } from '../functions/mdn.js'; -import { nodeSearch } from '../functions/node.js'; +import { nodeAutoCompleteResolve } from '../functions/node.js'; import type { Tag } from '../functions/tag.js'; import { showTag, reloadTags } from '../functions/tag.js'; import { testTag } from '../functions/testtag.js'; @@ -117,7 +117,7 @@ export async function handleApplicationCommand( case 'node': { const castArgs = args as ArgumentsOf; - await nodeSearch(res, castArgs.query, castArgs.version, castArgs.hide); + await nodeAutoCompleteResolve(res, castArgs.query, castArgs.hide); break; } diff --git a/src/handling/handleApplicationCommandAutocomplete.ts b/src/handling/handleApplicationCommandAutocomplete.ts index 56d42a3..22336b9 100644 --- a/src/handling/handleApplicationCommandAutocomplete.ts +++ b/src/handling/handleApplicationCommandAutocomplete.ts @@ -5,14 +5,16 @@ import type { Response } from 'polka'; import { algoliaAutoComplete } from '../functions/autocomplete/algoliaAutoComplete.js'; import { djsAutoComplete } from '../functions/autocomplete/docsAutoComplete.js'; import { mdnAutoComplete } from '../functions/autocomplete/mdnAutoComplete.js'; +import { nodeAutoComplete } from '../functions/autocomplete/nodeAutoComplete.js'; import { tagAutoComplete } from '../functions/autocomplete/tagAutoComplete.js'; import type { Tag } from '../functions/tag.js'; import type { DTypesCommand } from '../interactions/discordtypes.js'; import type { GuideCommand } from '../interactions/guide.js'; +import type { NodeCommand } from '../interactions/node.js'; import type { MDNIndexEntry } from '../types/mdn.js'; import { transformInteraction } from '../util/interactionOptions.js'; -type CommandAutoCompleteName = 'discorddocs' | 'docs' | 'dtypes' | 'guide' | 'mdn' | 'tag'; +type CommandAutoCompleteName = 'discorddocs' | 'docs' | 'dtypes' | 'guide' | 'mdn' | 'node' | 'tag'; export async function handleApplicationCommandAutocomplete( res: Response, @@ -23,6 +25,12 @@ export async function handleApplicationCommandAutocomplete( const data = message.data; const name = data.name as CommandAutoCompleteName; switch (name) { + case 'node': { + const args = transformInteraction(data.options); + await nodeAutoComplete(res, args.query); + break; + } + case 'docs': { await djsAutoComplete(res, data.options); break; diff --git a/src/index.ts b/src/index.ts index 45435c4..4aa9edb 100644 --- a/src/index.ts +++ b/src/index.ts @@ -122,7 +122,6 @@ process.on('uncaughtException', (err, origin) => { }); process.on('unhandledRejection', (reason, promise) => { - // eslint-disable-next-line no-console logger.error('Unhandled Rejection at:', promise, 'reason:', reason); }); diff --git a/src/interactions/node.ts b/src/interactions/node.ts index 7d51922..6180ab9 100644 --- a/src/interactions/node.ts +++ b/src/interactions/node.ts @@ -7,28 +7,9 @@ export const NodeCommand = { { type: ApplicationCommandOptionType.String, name: 'query', - description: 'Class, method or event to search for', + description: 'Phrase to search for', required: true, - }, - { - type: ApplicationCommandOptionType.String, - name: 'version', - description: 'Node.js version to search documentation for', - required: false, - choices: [ - { - name: 'v16', - value: 'latest-v16.x', - }, - { - name: 'v18 (default)', - value: 'latest-v18.x', - }, - { - name: 'v20 (current)', - value: 'latest-v20.x', - }, - ], + autocomplete: true, }, { type: ApplicationCommandOptionType.Boolean, diff --git a/src/util/constants.ts b/src/util/constants.ts index 8141df4..bd1f287 100644 --- a/src/util/constants.ts +++ b/src/util/constants.ts @@ -28,6 +28,7 @@ export const API_BASE_MDN = 'https://developer.mozilla.org' as const; export const API_BASE_NODE = 'https://nodejs.org' as const; export const API_BASE_ALGOLIA = 'algolia.net' as const; export const API_BASE_DISCORD = 'https://discord.com/api/v9' as const; +export const API_BASE_ORAMA = 'https://cloud.orama.run/v1' as const; export const AUTOCOMPLETE_MAX_ITEMS = 25; export const MAX_MESSAGE_LENGTH = 4_000; export const REMOTE_TAG_URL = 'https://raw.githubusercontent.com/discordjs/discord-utils-bot/main/tags' as const; diff --git a/yarn.lock b/yarn.lock index 822f7a7..d1f0b92 100644 --- a/yarn.lock +++ b/yarn.lock @@ -737,13 +737,6 @@ __metadata: languageName: node linkType: hard -"@fastify/busboy@npm:^2.0.0": - version: 2.1.1 - resolution: "@fastify/busboy@npm:2.1.1" - checksum: 42c32ef75e906c9a4809c1e1930a5ca6d4ddc8d138e1a8c8ba5ea07f997db32210617d23b2e4a85fe376316a41a1a0439fc6ff2dedf5126d96f45a9d80754fb2 - languageName: node - linkType: hard - "@hapi/boom@npm:^10.0.1": version: 10.0.1 resolution: "@hapi/boom@npm:10.0.1" @@ -2195,6 +2188,35 @@ __metadata: languageName: node linkType: hard +"cheerio-select@npm:^2.1.0": + version: 2.1.0 + resolution: "cheerio-select@npm:2.1.0" + dependencies: + boolbase: ^1.0.0 + css-select: ^5.1.0 + css-what: ^6.1.0 + domelementtype: ^2.3.0 + domhandler: ^5.0.3 + domutils: ^3.0.1 + checksum: 843d6d479922f28a6c5342c935aff1347491156814de63c585a6eb73baf7bb4185c1b4383a1195dca0f12e3946d737c7763bcef0b9544c515d905c5c44c5308b + languageName: node + linkType: hard + +"cheerio@npm:^1.0.0-rc.12": + version: 1.0.0-rc.12 + resolution: "cheerio@npm:1.0.0-rc.12" + dependencies: + cheerio-select: ^2.1.0 + dom-serializer: ^2.0.0 + domhandler: ^5.0.3 + domutils: ^3.0.1 + htmlparser2: ^8.0.1 + parse5: ^7.0.0 + parse5-htmlparser2-tree-adapter: ^7.0.0 + checksum: 5d4c1b7a53cf22d3a2eddc0aff70cf23cbb30d01a4c79013e703a012475c02461aa1fcd99127e8d83a02216386ed6942b2c8103845fd0812300dd199e6e7e054 + languageName: node + linkType: hard + "chevrotain@npm:^11.0.1": version: 11.0.3 resolution: "chevrotain@npm:11.0.3" @@ -2442,6 +2464,26 @@ __metadata: languageName: node linkType: hard +"css-select@npm:^5.1.0": + version: 5.1.0 + resolution: "css-select@npm:5.1.0" + dependencies: + boolbase: ^1.0.0 + css-what: ^6.1.0 + domhandler: ^5.0.2 + domutils: ^3.0.1 + nth-check: ^2.0.1 + checksum: 2772c049b188d3b8a8159907192e926e11824aea525b8282981f72ba3f349cf9ecd523fdf7734875ee2cb772246c22117fc062da105b6d59afe8dcd5c99c9bda + languageName: node + linkType: hard + +"css-what@npm:^6.1.0": + version: 6.1.0 + resolution: "css-what@npm:6.1.0" + checksum: b975e547e1e90b79625918f84e67db5d33d896e6de846c9b584094e529f0c63e2ab85ee33b9daffd05bff3a146a1916bec664e18bb76dd5f66cbff9fc13b2bbe + languageName: node + linkType: hard + "cssesc@npm:^3.0.0": version: 3.0.0 resolution: "cssesc@npm:3.0.0" @@ -2636,6 +2678,33 @@ __metadata: languageName: node linkType: hard +"dom-serializer@npm:^2.0.0": + version: 2.0.0 + resolution: "dom-serializer@npm:2.0.0" + dependencies: + domelementtype: ^2.3.0 + domhandler: ^5.0.2 + entities: ^4.2.0 + checksum: cd1810544fd8cdfbd51fa2c0c1128ec3a13ba92f14e61b7650b5de421b88205fd2e3f0cc6ace82f13334114addb90ed1c2f23074a51770a8e9c1273acbc7f3e6 + languageName: node + linkType: hard + +"domelementtype@npm:^2.3.0": + version: 2.3.0 + resolution: "domelementtype@npm:2.3.0" + checksum: ee837a318ff702622f383409d1f5b25dd1024b692ef64d3096ff702e26339f8e345820f29a68bcdcea8cfee3531776b3382651232fbeae95612d6f0a75efb4f6 + languageName: node + linkType: hard + +"domhandler@npm:^5.0.2, domhandler@npm:^5.0.3": + version: 5.0.3 + resolution: "domhandler@npm:5.0.3" + dependencies: + domelementtype: ^2.3.0 + checksum: 0f58f4a6af63e6f3a4320aa446d28b5790a009018707bce2859dcb1d21144c7876482b5188395a188dfa974238c019e0a1e610d2fc269a12b2c192ea2b0b131c + languageName: node + linkType: hard + "domino@npm:^2.1.6": version: 2.1.6 resolution: "domino@npm:2.1.6" @@ -2643,6 +2712,17 @@ __metadata: languageName: node linkType: hard +"domutils@npm:^3.0.1": + version: 3.1.0 + resolution: "domutils@npm:3.1.0" + dependencies: + dom-serializer: ^2.0.0 + domelementtype: ^2.3.0 + domhandler: ^5.0.3 + checksum: e5757456ddd173caa411cfc02c2bb64133c65546d2c4081381a3bafc8a57411a41eed70494551aa58030be9e58574fcc489828bebd673863d39924fb4878f416 + languageName: node + linkType: hard + "dot-prop@npm:^5.1.0": version: 5.3.0 resolution: "dot-prop@npm:5.3.0" @@ -2699,6 +2779,13 @@ __metadata: languageName: node linkType: hard +"entities@npm:^4.2.0, entities@npm:^4.4.0": + version: 4.5.0 + resolution: "entities@npm:4.5.0" + checksum: 853f8ebd5b425d350bffa97dd6958143179a5938352ccae092c62d1267c4e392a039be1bae7d51b6e4ffad25f51f9617531fedf5237f15df302ccfb452cbf2d7 + languageName: node + linkType: hard + "env-cmd@npm:^10.1.0": version: 10.1.0 resolution: "env-cmd@npm:10.1.0" @@ -4099,6 +4186,18 @@ __metadata: languageName: node linkType: hard +"htmlparser2@npm:^8.0.1": + version: 8.0.2 + resolution: "htmlparser2@npm:8.0.2" + dependencies: + domelementtype: ^2.3.0 + domhandler: ^5.0.3 + domutils: ^3.0.1 + entities: ^4.4.0 + checksum: 29167a0f9282f181da8a6d0311b76820c8a59bc9e3c87009e21968264c2987d2723d6fde5a964d4b7b6cba663fca96ffb373c06d8223a85f52a6089ced942700 + languageName: node + linkType: hard + "http-cache-semantics@npm:^4.1.1": version: 4.1.1 resolution: "http-cache-semantics@npm:4.1.1" @@ -5992,7 +6091,7 @@ __metadata: languageName: node linkType: hard -"nth-check@npm:^2.1.1": +"nth-check@npm:^2.0.1, nth-check@npm:^2.1.1": version: 2.1.1 resolution: "nth-check@npm:2.1.1" dependencies: @@ -6249,6 +6348,25 @@ __metadata: languageName: node linkType: hard +"parse5-htmlparser2-tree-adapter@npm:^7.0.0": + version: 7.0.0 + resolution: "parse5-htmlparser2-tree-adapter@npm:7.0.0" + dependencies: + domhandler: ^5.0.2 + parse5: ^7.0.0 + checksum: fc5d01e07733142a1baf81de5c2a9c41426c04b7ab29dd218acb80cd34a63177c90aff4a4aee66cf9f1d0aeecff1389adb7452ad6f8af0a5888e3e9ad6ef733d + languageName: node + linkType: hard + +"parse5@npm:^7.0.0": + version: 7.1.2 + resolution: "parse5@npm:7.1.2" + dependencies: + entities: ^4.4.0 + checksum: 59465dd05eb4c5ec87b76173d1c596e152a10e290b7abcda1aecf0f33be49646ea74840c69af975d7887543ea45564801736356c568d6b5e71792fd0f4055713 + languageName: node + linkType: hard + "path-exists@npm:^4.0.0": version: 4.0.0 resolution: "path-exists@npm:4.0.0" @@ -7204,6 +7322,7 @@ __metadata: "@typescript-eslint/parser": ^6.5.0 "@vercel/postgres": ^0.9.0 algoliasearch: ^4.19.1 + cheerio: ^1.0.0-rc.12 discord-api-types: ^0.37.83 dotenv: ^16.3.1 env-cmd: ^10.1.0 @@ -7226,7 +7345,7 @@ __metadata: tsyringe: ^4.8.0 turndown: ^7.1.2 typescript: ^5.2.2 - undici: ^5.28.3 + undici: ^6.19.3 languageName: unknown linkType: soft @@ -7940,12 +8059,10 @@ __metadata: languageName: node linkType: hard -"undici@npm:^5.28.3": - version: 5.28.4 - resolution: "undici@npm:5.28.4" - dependencies: - "@fastify/busboy": ^2.0.0 - checksum: a8193132d84540e4dc1895ecc8dbaa176e8a49d26084d6fbe48a292e28397cd19ec5d13bc13e604484e76f94f6e334b2bdc740d5f06a6e50c44072818d0c19f9 +"undici@npm:^6.19.3": + version: 6.19.3 + resolution: "undici@npm:6.19.3" + checksum: 9fa76d751ab4012b9a1cbcb7ddab85eab67bd05b2f67a44676032a042619a096a91d41747467556b6e81bbf51b470231e2d1958d7d5fbecfc4dc3dc6248c8370 languageName: node linkType: hard