From 7d14962a640a0b7c4a308372ed4649cdaf901392 Mon Sep 17 00:00:00 2001 From: lihbr Date: Tue, 16 Jul 2024 16:05:53 +0200 Subject: [PATCH] feat: update `htmlAsRichText` API - remove `sync` helpers - rename `converter` option to `serializer` - add `direction`, `include` options - remove `silent` option - allow deep selectors in the `serializer` map - refactor helpers return type - refactor `rehypeRichText` plugin - refactor tests --- package-lock.json | 145 +----- package.json | 4 +- src/richtext/htmlAsRichText.ts | 28 +- src/richtext/index.ts | 7 +- src/richtext/markdownAsRichText.ts | 35 +- src/richtext/types.ts | 7 +- src/richtext/unified/rehypeRichText.ts | 113 ----- src/richtext/utils/RichTextFieldBuilder.ts | 3 +- .../{unified => utils}/hastUtilToRichText.ts | 76 ++- src/richtext/utils/rehypeRichText.ts | 162 ++++++ src/types/value/richText.ts | 1 + .../__snapshots__/htmlAsRichText.test.ts.snap | 462 +++++++++++------ .../__testutils__/testAsRichTextHelper.ts | 85 ++++ test/richtext/htmlAsRichText.test.ts | 472 +++++++++--------- 14 files changed, 856 insertions(+), 744 deletions(-) delete mode 100644 src/richtext/unified/rehypeRichText.ts rename src/richtext/{unified => utils}/hastUtilToRichText.ts (80%) create mode 100644 src/richtext/utils/rehypeRichText.ts create mode 100644 test/richtext/__testutils__/testAsRichTextHelper.ts diff --git a/package-lock.json b/package-lock.json index c8eb3483..b44cd027 100644 --- a/package-lock.json +++ b/package-lock.json @@ -20,9 +20,9 @@ "remark-rehype": "^11.1.0", "unified": "^11.0.5", "unist-util-remove": "^4.0.0", + "unist-util-visit": "^5.0.0", "unist-util-visit-parents": "^6.0.1", - "vfile": "^6.0.1", - "vfile-reporter": "^8.1.1" + "vfile": "^6.0.1" }, "devDependencies": { "@prismicio/mock": "^0.3.1", @@ -1289,12 +1289,6 @@ "@types/node": "*" } }, - "node_modules/@types/supports-color": { - "version": "8.1.3", - "resolved": "https://registry.npmjs.org/@types/supports-color/-/supports-color-8.1.3.tgz", - "integrity": "sha512-Hy6UMpxhE3j1tLpl27exp1XqHD7n8chAiNPzWfz16LPZoMMoSc4dzLl6w9qijkEb/r5O1ozdu1CWGA2L83ZeZg==", - "license": "MIT" - }, "node_modules/@types/unist": { "version": "2.0.8", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.8.tgz", @@ -3061,12 +3055,6 @@ "node": ">=4" } }, - "node_modules/eastasianwidth": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", - "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", - "license": "MIT" - }, "node_modules/emoji-regex": { "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", @@ -9355,135 +9343,6 @@ "url": "https://opencollective.com/unified" } }, - "node_modules/vfile-reporter": { - "version": "8.1.1", - "resolved": "https://registry.npmjs.org/vfile-reporter/-/vfile-reporter-8.1.1.tgz", - "integrity": "sha512-qxRZcnFSQt6pWKn3PAk81yLK2rO2i7CDXpy8v8ZquiEOMLSnPw6BMSi9Y1sUCwGGl7a9b3CJT1CKpnRF7pp66g==", - "license": "MIT", - "dependencies": { - "@types/supports-color": "^8.0.0", - "string-width": "^6.0.0", - "supports-color": "^9.0.0", - "unist-util-stringify-position": "^4.0.0", - "vfile": "^6.0.0", - "vfile-message": "^4.0.0", - "vfile-sort": "^4.0.0", - "vfile-statistics": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/vfile-reporter/node_modules/@types/unist": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.2.tgz", - "integrity": "sha512-dqId9J8K/vGi5Zr7oo212BGii5m3q5Hxlkwy3WpYuKPklmBEvsbMYYyLxAQpSffdLl/gdW0XUpKWFvYmyoWCoQ==", - "license": "MIT" - }, - "node_modules/vfile-reporter/node_modules/ansi-regex": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", - "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-regex?sponsor=1" - } - }, - "node_modules/vfile-reporter/node_modules/emoji-regex": { - "version": "10.3.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-10.3.0.tgz", - "integrity": "sha512-QpLs9D9v9kArv4lfDEgg1X/gN5XLnf/A6l9cs8SPZLRZR3ZkY9+kwIQTxm+fsSej5UMYGE8fdoaZVIBlqG0XTw==", - "license": "MIT" - }, - "node_modules/vfile-reporter/node_modules/string-width": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-6.1.0.tgz", - "integrity": "sha512-k01swCJAgQmuADB0YIc+7TuatfNvTBVOoaUWJjTB9R4VJzR5vNWzf5t42ESVZFPS8xTySF7CAdV4t/aaIm3UnQ==", - "license": "MIT", - "dependencies": { - "eastasianwidth": "^0.2.0", - "emoji-regex": "^10.2.1", - "strip-ansi": "^7.0.1" - }, - "engines": { - "node": ">=16" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/vfile-reporter/node_modules/strip-ansi": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", - "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", - "license": "MIT", - "dependencies": { - "ansi-regex": "^6.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/strip-ansi?sponsor=1" - } - }, - "node_modules/vfile-reporter/node_modules/supports-color": { - "version": "9.4.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-9.4.0.tgz", - "integrity": "sha512-VL+lNrEoIXww1coLPOmiEmK/0sGigko5COxI09KzHc2VJXJsQ37UaQ+8quuxjDeA7+KnLGTWRyOXSLLR2Wb4jw==", - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/supports-color?sponsor=1" - } - }, - "node_modules/vfile-reporter/node_modules/unist-util-stringify-position": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz", - "integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==", - "license": "MIT", - "dependencies": { - "@types/unist": "^3.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/vfile-sort": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/vfile-sort/-/vfile-sort-4.0.0.tgz", - "integrity": "sha512-lffPI1JrbHDTToJwcq0rl6rBmkjQmMuXkAxsZPRS9DXbaJQvc642eCg6EGxcX2i1L+esbuhq+2l9tBll5v8AeQ==", - "license": "MIT", - "dependencies": { - "vfile": "^6.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, - "node_modules/vfile-statistics": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/vfile-statistics/-/vfile-statistics-3.0.0.tgz", - "integrity": "sha512-/qlwqwWBWFOmpXujL/20P+Iuydil0rZZNglR+VNm6J0gpLHwuVM5s7g2TfVoswbXjZ4HuIhLMySEyIw5i7/D8w==", - "license": "MIT", - "dependencies": { - "vfile": "^6.0.0", - "vfile-message": "^4.0.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" - } - }, "node_modules/vfile/node_modules/@types/unist": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.2.tgz", diff --git a/package.json b/package.json index 3f8810f2..4e07bc36 100644 --- a/package.json +++ b/package.json @@ -90,9 +90,9 @@ "remark-rehype": "^11.1.0", "unified": "^11.0.5", "unist-util-remove": "^4.0.0", + "unist-util-visit": "^5.0.0", "unist-util-visit-parents": "^6.0.1", - "vfile": "^6.0.1", - "vfile-reporter": "^8.1.1" + "vfile": "^6.0.1" }, "devDependencies": { "@prismicio/mock": "^0.3.1", diff --git a/src/richtext/htmlAsRichText.ts b/src/richtext/htmlAsRichText.ts index 75f77c5c..12299505 100644 --- a/src/richtext/htmlAsRichText.ts +++ b/src/richtext/htmlAsRichText.ts @@ -3,10 +3,7 @@ import { unified } from "unified"; import { AsRichTextConfig, AsRichTextReturnType } from "./types"; -import { rehypeRichText } from "./unified/rehypeRichText"; - -const htmlProcessor = (config?: AsRichTextConfig) => - unified().use(rehypeParse).use(rehypeRichText, config); +import { rehypeRichText } from "./utils/rehypeRichText"; /** * Converts an HTML string to a rich text field. @@ -17,25 +14,14 @@ const htmlProcessor = (config?: AsRichTextConfig) => * * @returns Rich text field equivalent of the provided HTML string. */ -export const htmlAsRichText = ( +export const htmlAsRichText = async ( html: string, config?: AsRichTextConfig, ): Promise => { - return htmlProcessor(config).process(html); -}; + const { result, messages } = await unified() + .use(rehypeParse, { emitParseErrors: true }) + .use(rehypeRichText, config) + .process(html); -/** - * Converts an HTML string to a rich text field synchronously. - * - * @param html - An HTML string - * @param config - Configuration that determines the output of - * `htmlAsRichTextSync()` - * - * @returns Rich text field equivalent of the provided HTML string. - */ -export const htmlAsRichTextSync = ( - html: string, - config?: AsRichTextConfig, -): AsRichTextReturnType => { - return htmlProcessor(config).processSync(html); + return { result, warnings: messages }; }; diff --git a/src/richtext/index.ts b/src/richtext/index.ts index a7a629a6..856c7b0b 100644 --- a/src/richtext/index.ts +++ b/src/richtext/index.ts @@ -7,11 +7,8 @@ export { composeSerializers } from "./composeSerializers"; export { RichTextNodeType as Element } from "../types/value/richText"; -export { htmlAsRichText, htmlAsRichTextSync } from "./htmlAsRichText"; -export { - markdownAsRichText, - markdownAsRichTextSync, -} from "./markdownAsRichText"; +export { htmlAsRichText } from "./htmlAsRichText"; +export { markdownAsRichText } from "./markdownAsRichText"; export type { AsRichTextConfig, diff --git a/src/richtext/markdownAsRichText.ts b/src/richtext/markdownAsRichText.ts index debcf5e9..4c3182a4 100644 --- a/src/richtext/markdownAsRichText.ts +++ b/src/richtext/markdownAsRichText.ts @@ -4,14 +4,11 @@ import { unified } from "unified"; import { AsRichTextConfig, AsRichTextReturnType } from "./types"; -import { rehypeRichText } from "./unified/rehypeRichText"; +import { rehypeRichText } from "./utils/rehypeRichText"; // Used for TSDocs only. // eslint-disable-next-line @typescript-eslint/no-unused-vars -import type { htmlAsRichText, htmlAsRichTextSync } from "./htmlAsRichText"; - -const markdownProcessor = (config?: AsRichTextConfig) => - unified().use(remarkParse).use(remarkRehype).use(rehypeRichText, config); +import type { htmlAsRichText } from "./htmlAsRichText"; /** * Converts a markdown string to a rich text field. @@ -26,29 +23,15 @@ const markdownProcessor = (config?: AsRichTextConfig) => * * @returns Rich text field equivalent of the provided markdown string. */ -export const markdownAsRichText = ( +export const markdownAsRichText = async ( markdown: string, config?: AsRichTextConfig, ): Promise => { - return markdownProcessor(config).process(markdown); -}; + const { result, messages } = await unified() + .use(remarkParse) + .use(remarkRehype) + .use(rehypeRichText, config) + .process(markdown); -/** - * Converts an markdown string to a rich text field synchronously. - * - * @remarks - * To convert markdown to a rich text field, this function first converts it to - * HTML. It's essentially a sugar above {@link htmlAsRichTextSync}. - * - * @param markdown - An markdown string - * @param config - Configuration that determines the output of - * `markdownAsRichTextSync()` - * - * @returns Rich text field equivalent of the provided markdown string. - */ -export const markdownAsRichTextSync = ( - markdown: string, - config?: AsRichTextConfig, -): AsRichTextReturnType => { - return markdownProcessor(config).processSync(markdown); + return { result, warnings: messages }; }; diff --git a/src/richtext/types.ts b/src/richtext/types.ts index 8a182bcd..9f0046a6 100644 --- a/src/richtext/types.ts +++ b/src/richtext/types.ts @@ -25,7 +25,7 @@ import { RichTextNodeType, } from "../types/value/richText"; -import { RehypeRichTextConfig } from "./unified/rehypeRichText"; +import { RehypeRichTextConfig } from "./utils/rehypeRichText"; // Serializers @@ -165,4 +165,7 @@ export type AsRichTextConfig = RehypeRichTextConfig; /** * The return type of `*AsRichText` functions. */ -export type AsRichTextReturnType = VFile & { result: RichTextField }; +export type AsRichTextReturnType = { + result: RichTextField; + warnings: VFile["messages"]; +}; diff --git a/src/richtext/unified/rehypeRichText.ts b/src/richtext/unified/rehypeRichText.ts deleted file mode 100644 index e86b9c84..00000000 --- a/src/richtext/unified/rehypeRichText.ts +++ /dev/null @@ -1,113 +0,0 @@ -import { Element, Root } from "hast"; -import { select, selectAll } from "hast-util-select"; -import rehypeMinifyWhitespace from "rehype-minify-whitespace"; -import { Plugin, Processor } from "unified"; -import { remove } from "unist-util-remove"; -import { VFile } from "vfile"; -import reporter from "vfile-reporter"; - -import { RichTextField } from "../../types/value/richText"; - -import { - HastUtilToRichTextConfig, - hastUtilToRichText, -} from "./hastUtilToRichText"; - -export type RehypeRichTextConfig = { - /** - * A CSS selector for the container to extract the rich text from. - * - * @defaultValue The top-level element of the document. - */ - container?: string; - - /** - * A list of CSS selectors to exclude from the document to process. - * - * @defaultValue `[]` - */ - exclude?: string[]; - - /** - * Silent warning messages. - * - * @defaultValue `false` - */ - silent?: boolean; -} & HastUtilToRichTextConfig; - -// unified requires the function to be typed directly with the -// Plugin type to properly infer return types on processors. -export const rehypeRichText: Plugin< - [config?: RehypeRichTextConfig], - Root, - RichTextField -> = function rehypeRichText(config) { - // This is a bit dirty, but it seems like that's how rehype intends - // it to be due to JSDocs limitations(?), see: - // https://github.com/rehypejs/rehype/blob/f6912ac680704f1ef4b558ac57cbf0dd62ed0892/packages/rehype-stringify/lib/index.js#L18-L20 - // eslint-disable-next-line @typescript-eslint/no-this-alias - const self = this as unknown as Processor< - undefined, - undefined, - undefined, - Root, - RichTextField - >; - - // We need to exclude nodes _before_ we minify the tree as excluding - // nodes could end up in more whitespaces to trim. - self.use(() => { - return (tree: Root) => { - // Remove excluded nodes if any are specified - if (config?.exclude) { - // We join selector to only run one query - const nodesToExclude = selectAll(config.exclude.join(", "), tree); - - remove(tree, (node) => nodesToExclude.includes(node as Element)); - } - }; - }); - - // `rehypeRichText` _depends_ on `rehypeMinifyWhitespace`, that's why it's - // registered within the plugin rather than on the processor directly. - self.use(rehypeMinifyWhitespace); - - self.compiler = compiler; - - function compiler(tree: Root, file: VFile): RichTextField { - let convertedTree: Root | Element = tree; - - // Extract container node if any is specified. This cannot be done - // before because `tree` cannot be reasigned by a plugin. - if (config?.container) { - const element = select(config.container, tree); - - if (!element) { - throw new Error( - `No container matching \`${config?.container}\` could be found in the input AST.`, - ); - } - - convertedTree = element; - } - - const richText = hastUtilToRichText(convertedTree, file, config); - - if (!config?.silent) { - const report = reporter(file, { quiet: true }); - if (report) { - console.warn(report); - } - } - - return richText; - } -}; - -declare module "unified" { - // Register unified processor the result type. - interface CompileResultMap { - RichTextField: RichTextField; - } -} diff --git a/src/richtext/utils/RichTextFieldBuilder.ts b/src/richtext/utils/RichTextFieldBuilder.ts index 939bf4d1..dcac443b 100644 --- a/src/richtext/utils/RichTextFieldBuilder.ts +++ b/src/richtext/utils/RichTextFieldBuilder.ts @@ -29,11 +29,12 @@ export class RichTextFieldBuilder { return this; } - appendTextNode(type: RTTextNodeTypes): this { + appendTextNode(type: RTTextNodeTypes, direction?: "ltr" | "rtl"): this { return this.appendNode({ type: type, text: "", spans: [], + direction: direction || "ltr", }); } diff --git a/src/richtext/unified/hastUtilToRichText.ts b/src/richtext/utils/hastUtilToRichText.ts similarity index 80% rename from src/richtext/unified/hastUtilToRichText.ts rename to src/richtext/utils/hastUtilToRichText.ts index b57360d3..04359f99 100644 --- a/src/richtext/unified/hastUtilToRichText.ts +++ b/src/richtext/utils/hastUtilToRichText.ts @@ -1,5 +1,4 @@ import { Element, Root } from "hast"; -import { matches } from "hast-util-select"; import { toHtml } from "hast-util-to-html"; import { whitespace } from "hast-util-whitespace"; import { toString } from "mdast-util-to-string"; @@ -23,14 +22,10 @@ import { RTNodeTypes, RTTextNodeTypes } from "../utils/isNodeType"; * {@link RichTextNodeType | rich text node types}. * * @remarks - * You can only use "shallow" CSS selectors like `p.primary`, `h1#title`, - * `div[data-type="content"]`. Selectors like `p > a` or `h1 + p` are not - * supported. - * @remarks * The `o-list-item` rich text node type is not available. Use the `list-item` * type for any kind of list item instead. The correct list item type will be * inferred on whether the parent is considered a `group-list-item` or - * `group-o-list-item` by the converter. + * `group-o-list-item` by the serializer. * @remarks * The `label` rich text node type is not available as is. Use an object * containing your label name to convert to label nodes instead. For example: @@ -39,7 +34,7 @@ import { RTNodeTypes, RTTextNodeTypes } from "../utils/isNodeType"; * The `span` rich text node type is not available as it is not relevant in the * context of going from HTML to Prismic rich text. */ -type RichTextHTMLMapConverter = Record< +type RichTextHTMLMapSerializer = Record< string, | Exclude< (typeof RichTextNodeType)[keyof typeof RichTextNodeType], @@ -48,7 +43,7 @@ type RichTextHTMLMapConverter = Record< | { label: string } >; -const DEFAULT_CONVERTER: RichTextHTMLMapConverter = { +const DEFAULT_SERIALIZER: RichTextHTMLMapSerializer = { h1: "heading1", h2: "heading2", h3: "heading3", @@ -77,48 +72,33 @@ const VFILE_SOURCE = "prismic"; export type HastUtilToRichTextConfig = { /** - * An optional HTML to rich text converter. Will be merged with the default - * HTML to rich text converter. + * An optional HTML to rich text serializer. Will be merged with the default + * HTML to rich text serializer. */ - converter?: RichTextHTMLMapConverter; -}; + serializer?: RichTextHTMLMapSerializer; -const createFindType = (converter: RichTextHTMLMapConverter) => { - // We separate tag name converters from CSS selector converters to - // attempt to match on tag names first as it's much more performant - // than matching CSS selectors. - const tagNameConverter: RichTextHTMLMapConverter = {}; - const cssSelectorConverter: [string, RichTextHTMLMapConverter[string]][] = []; - - Object.entries(converter).forEach(([key, value]) => { - // HTML tag names are single word, lowercase strings, a-z and 1-6 (headings). - // See: https://regex101.com/r/LILLWH/1 - if (key.match(/^[a-z]+[1-6]?$/)) { - tagNameConverter[key] = value; - } else { - cssSelectorConverter.push([key, value]); - } - }); + /** + * Whether or not the text processed should be marked as right-to-left. + * + * @defaultValue `false` + */ + direction?: "ltr" | "rtl"; +}; +const createFindType = (serializer: RichTextHTMLMapSerializer) => { return ( node: Element, ): - | { type: Extract } + | { type: Extract } | Pick | null => { - let match: RichTextHTMLMapConverter[string] | null = null; + let match: RichTextHTMLMapSerializer[string] | null = null; - if (node.tagName in tagNameConverter) { - match = tagNameConverter[node.tagName]; + // We give priority to CSS selectors over tag names. + if (node.matchesSerializer && node.matchesSerializer in serializer) { + match = serializer[node.matchesSerializer]; } else { - for (let i = 0; i < cssSelectorConverter.length; i++) { - const [selector, value] = cssSelectorConverter[i]; - - if (matches(selector, node)) { - match = value; - break; - } - } + match = serializer[node.tagName]; } if (typeof match === "string") { @@ -138,12 +118,12 @@ export const hastUtilToRichText = ( ): RichTextField => { const builder = new RichTextFieldBuilder(); - const converter = { - ...DEFAULT_CONVERTER, - ...config?.converter, + const serializer = { + ...DEFAULT_SERIALIZER, + ...config?.serializer, }; - const findType = createFindType(converter); + const findType = createFindType(serializer); // Keep track of the last node type to append text nodes to in case // of an image or an embed node is present inside a paragraph. @@ -189,9 +169,9 @@ export const hastUtilToRichText = ( ? RichTextNodeType.oListItem : RichTextNodeType.listItem; - builder.appendTextNode(listItemType); + builder.appendTextNode(listItemType, config?.direction); } else { - builder.appendTextNode(type); + builder.appendTextNode(type, config?.direction); } } else if (isNodeType.image(type)) { // TODO: handle image @@ -291,7 +271,7 @@ export const hastUtilToRichText = ( // Happens when we extract an image/embed node inside an RTTextNode. if (!isNodeType.rtText(lastRTNodeType) && lastRTTextNodeType) { lastRTNodeType = lastRTTextNodeType; - builder.appendTextNode(lastRTTextNodeType); + builder.appendTextNode(lastRTTextNodeType, config?.direction); } if (type === "strong" || type === "em") { @@ -335,7 +315,7 @@ export const hastUtilToRichText = ( // Happens when we extract an image/embed node inside an RTTextNode. if (lastRTTextNodeType) { lastRTNodeType = lastRTTextNodeType; - builder.appendTextNode(lastRTTextNodeType); + builder.appendTextNode(lastRTTextNodeType, config?.direction); builder.appendText(node.value); } else { throw error; diff --git a/src/richtext/utils/rehypeRichText.ts b/src/richtext/utils/rehypeRichText.ts new file mode 100644 index 00000000..728cff1e --- /dev/null +++ b/src/richtext/utils/rehypeRichText.ts @@ -0,0 +1,162 @@ +import { Element, Root } from "hast"; +import { select, selectAll } from "hast-util-select"; +import rehypeMinifyWhitespace from "rehype-minify-whitespace"; +import { Plugin, Processor } from "unified"; +import { remove } from "unist-util-remove"; +import { SKIP, visit } from "unist-util-visit"; +import { VFile } from "vfile"; + +import { RichTextField } from "../../types/value/richText"; + +import { + HastUtilToRichTextConfig, + hastUtilToRichText, +} from "./hastUtilToRichText"; + +export type RehypeRichTextConfig = { + /** + * A CSS selector that targets the section of the document to convert to rich + * text. + * + * @example `div.post` + * + * @defaultValue The top-level element of the document. + */ + container?: string; + + /** + * A list of CSS selectors to exclude matching nodes from the document to + * process. + * + * @example `[".hidden", "aside"]` + * + * @defaultValue `[]` - No nodes are excluded. + */ + exclude?: string[]; + + /** + * A list of CSS selectors to include only matching nodes from the document to + * process. + * + * @example `["p", "img"]` + * + * @defaultValue All nodes are included. + */ + include?: string[]; +} & HastUtilToRichTextConfig; + +// unified requires the function to be typed directly with the +// Plugin type to properly infer return types on processors. +export const rehypeRichText: Plugin< + [config?: RehypeRichTextConfig], + Root, + RichTextField +> = function rehypeRichText(config) { + // This is a bit dirty, but it seems like that's how rehype intends + // it to be due to JSDocs limitations(?), see: + // https://github.com/rehypejs/rehype/blob/f6912ac680704f1ef4b558ac57cbf0dd62ed0892/packages/rehype-stringify/lib/index.js#L18-L20 + // eslint-disable-next-line @typescript-eslint/no-this-alias + const self = this as unknown as Processor< + undefined, + undefined, + undefined, + Root, + RichTextField + >; + + // We need to exclude nodes _before_ we minify the tree as excluding + // nodes could end up in more whitespaces to trim. + self.use(() => { + return (tree: Root) => { + // Extract container node if any is specified. + if (config?.container) { + const element = select(config.container, tree); + + if (!element) { + throw new Error( + `No container matching \`${config?.container}\` could be found in the input AST.`, + ); + } + + // We cannot reassign the tree itself, so we instead replace + // its children with the found element. + tree.children = [element]; + } + + // Remove excluded nodes if any are specified + if (config?.exclude) { + // We join selector to only run one query + const nodesToExclude = selectAll(config.exclude.join(", "), tree); + + remove(tree, (node) => nodesToExclude.includes(node as Element)); + } + + // Include only nodes to include + if (config?.include) { + const nodesToInclude: Element[] = []; + + // We join selector to only run one query + const selector = config.include.join(", "); + const rawNodesToInclude = selectAll(selector, tree); + + // We walk the tree to exclude matching nodes that are children of other matching nodes. + visit(tree, (node) => { + if (rawNodesToInclude.includes(node as Element)) { + nodesToInclude.push(node as Element); + + // Stop traversing this part of the tree since we found its matching parent node. + return SKIP; + } + }); + + // We cannot reassign the tree itself, so we instead replace + // its children with the found element. + tree.children = nodesToInclude; + } + + // Mark nodes matching CSS selectors + if (config?.serializer) { + for (const key in config.serializer) { + // HTML tag names are single word, lowercase strings, a-z and 1-6 (headings). + // Here we want to match anything that's not a valid HTML tag name and treat + // it as a CSS selector. See: https://regex101.com/r/LILLWH/1 + if (!/^[a-z]+[1-6]?$/.test(key)) { + const matches = selectAll(key, tree); + + for (let i = 0; i < matches.length; i++) { + matches[i].matchesSerializer = key; + } + } + } + } + }; + }); + + // `rehypeRichText` _depends_ on `rehypeMinifyWhitespace`, that's why it's + // registered within the plugin rather than on the processor directly. + self.use(rehypeMinifyWhitespace); + + self.compiler = compiler; + + function compiler(tree: Root, file: VFile): RichTextField { + return hastUtilToRichText(tree, file, config); + } +}; + +declare module "unified" { + // Register unified processor the result type. + interface CompileResultMap { + RichTextField: RichTextField; + } +} + +declare module "hast" { + // Extend hast node with a rich text value. + interface Element { + /** + * A serializer this node matches to. Nodes are marked with this property + * when they match a CSS selector from the serializer map. + */ + matchesSerializer?: string; + } +} diff --git a/src/types/value/richText.ts b/src/types/value/richText.ts index cf9a378c..96e5d7ab 100644 --- a/src/types/value/richText.ts +++ b/src/types/value/richText.ts @@ -40,6 +40,7 @@ export const RichTextNodeType = { export interface RTTextNodeBase { text: string; spans: RTInlineNode[]; + direction?: "ltr" | "rtl"; } /** diff --git a/test/richtext/__snapshots__/htmlAsRichText.test.ts.snap b/test/richtext/__snapshots__/htmlAsRichText.test.ts.snap index 84b6ba20..badb2157 100644 --- a/test/richtext/__snapshots__/htmlAsRichText.test.ts.snap +++ b/test/richtext/__snapshots__/htmlAsRichText.test.ts.snap @@ -1,203 +1,212 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html -exports[`transforms HTML to rich text ('compacts directly adjacent identical spans (hyperlink)') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > basic > empty 1`] = `[]`; + +exports[`transforms HTML to rich text > basic > multiple tags 1`] = ` [ { - "spans": [ - { - "data": { - "link_type": "Web", - "target": undefined, - "url": "https://prismic.io", - }, - "end": 17, - "start": 6, - "type": "hyperlink", - }, - ], + "direction": "ltr", + "spans": [], "text": "lorem ipsum dolor sit amet", + "type": "heading1", + }, + { + "direction": "ltr", + "spans": [], + "text": "consectetur adipiscing elit", "type": "paragraph", }, ] `; -exports[`transforms HTML to rich text ('compacts directly adjacent identical spans (label)') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > basic > single tag 1`] = ` [ { - "spans": [ - { - "data": { - "label": "underline", - }, - "end": 17, - "start": 6, - "type": "label", - }, - ], + "direction": "ltr", + "spans": [], "text": "lorem ipsum dolor sit amet", "type": "paragraph", }, ] `; -exports[`transforms HTML to rich text ('compacts directly adjacent identical spans (strong, em)') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > configuration > container > converts only the given container 1`] = ` [ { - "spans": [ - { - "end": 17, - "start": 6, - "type": "strong", - }, - { - "end": 38, - "start": 22, - "type": "em", - }, - ], - "text": "lorem ipsum dolor sit amet consectetur adipiscing elit", + "direction": "ltr", + "spans": [], + "text": "consectetur adipiscing elit", "type": "paragraph", }, ] `; -exports[`transforms HTML to rich text ('compacts nested directly adjacent identical spans') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > configuration > direction > marks text as left-to-right 1`] = ` [ { - "spans": [ - { - "end": 11, - "start": 6, - "type": "strong", - }, - { - "end": 17, - "start": 9, - "type": "em", - }, - ], + "direction": "ltr", + "spans": [], "text": "lorem ipsum dolor sit amet", "type": "paragraph", }, ] `; -exports[`transforms HTML to rich text ('converts only the given container') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > configuration > direction > marks text as right-to-left 1`] = ` [ { + "direction": "rtl", "spans": [], - "text": "consectetur adipiscing elit", + "text": "lorem ipsum dolor sit amet", "type": "paragraph", }, ] `; -exports[`transforms HTML to rich text ('does not compact directly adjacent different spans (hyperlink)') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > configuration > exclude > excludes the given complex selectors 1`] = ` [ { + "direction": "ltr", + "spans": [], + "text": "ipsum dolor sit amet", + "type": "heading1", + }, + { + "direction": "ltr", "spans": [ { "data": { "link_type": "Web", "target": undefined, - "url": "https://prismic.io", + "url": "#", }, "end": 11, - "start": 6, - "type": "hyperlink", - }, - { - "data": { - "link_type": "Web", - "target": undefined, - "url": "https://google.com", - }, - "end": 17, - "start": 11, + "start": 0, "type": "hyperlink", }, ], - "text": "lorem ipsum dolor sit amet", + "text": "consectetur adipiscing elit", "type": "paragraph", }, ] `; -exports[`transforms HTML to rich text ('does not compact directly adjacent different spans (label)') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > configuration > exclude > excludes the given selectors 1`] = ` [ { - "spans": [ - { - "data": { - "label": "underline", - }, - "end": 11, - "start": 6, - "type": "label", - }, - { - "data": { - "label": "strikethrough", - }, - "end": 17, - "start": 11, - "type": "label", - }, - ], + "direction": "ltr", + "spans": [], + "text": "consectetur adipiscing elit", + "type": "paragraph", + }, +] +`; + +exports[`transforms HTML to rich text > configuration > include > dedupes matches that are child of other matches 1`] = ` +[ + { + "direction": "ltr", + "spans": [], "text": "lorem ipsum dolor sit amet", "type": "paragraph", }, + { + "direction": "ltr", + "spans": [], + "text": "consectetur adipiscing elit", + "type": "paragraph", + }, ] `; -exports[`transforms HTML to rich text ('embed') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > configuration > include > includes only the given complex selectors 1`] = ` [ { - "oembed": { - "embed_url": "https://www.youtube.com/embed/wkS1bf7BLjs?feature=oembed", - "height": 150, - "html": "", - "title": "幾田りら「ハミング」Official Music Video", - "type": "rich", - "version": "1.0", - "width": 200, - }, - "type": "embed", + "direction": "ltr", + "spans": [], + "text": "consectetur adipiscing elit", + "type": "paragraph", }, ] `; -exports[`transforms HTML to rich text ('empty') > produces valid rich text field 1`] = `[]`; +exports[`transforms HTML to rich text > configuration > include > includes only the given selectors 1`] = ` +[ + { + "direction": "ltr", + "spans": [], + "text": "lorem ipsum dolor sit amet", + "type": "heading1", + }, +] +`; -exports[`transforms HTML to rich text ('excludes the given selectors') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > configuration > serializer > complex selector 1`] = ` [ { + "direction": "ltr", "spans": [], - "text": "ipsum dolor sit amet", + "text": "lorem ipsum dolor sit amet", "type": "heading1", }, { - "spans": [ - { - "data": { - "link_type": "Web", - "target": undefined, - "url": "#", - }, - "end": 11, - "start": 0, - "type": "hyperlink", - }, - ], + "direction": "ltr", + "spans": [], + "text": "consectetur adipiscing elit", + "type": "paragraph", + }, +] +`; + +exports[`transforms HTML to rich text > configuration > serializer > selector 1`] = ` +[ + { + "direction": "ltr", + "spans": [], + "text": "lorem ipsum dolor sit amet", + "type": "heading1", + }, + { + "direction": "ltr", + "spans": [], "text": "consectetur adipiscing elit", "type": "paragraph", }, ] `; -exports[`transforms HTML to rich text ('extracts images from rich text text nodes and resumes with the same text node (adjacent spans)') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > configuration > serializer > tag name 1`] = ` +[ + { + "direction": "ltr", + "spans": [], + "text": "lorem ipsum dolor sit amet", + "type": "heading1", + }, +] +`; + +exports[`transforms HTML to rich text > embed > iframe 1`] = ` +[ + { + "oembed": { + "embed_url": "https://www.youtube.com/embed/wkS1bf7BLjs?feature=oembed", + "height": 150, + "html": "", + "title": "幾田りら「ハミング」Official Music Video", + "type": "rich", + "version": "1.0", + "width": 200, + }, + "type": "embed", + }, +] +`; + +exports[`transforms HTML to rich text > image > extracts image in text nodes and resume previous text node > adjacent spans 1`] = ` [ { + "direction": "ltr", "spans": [ { "end": 11, @@ -226,6 +235,7 @@ exports[`transforms HTML to rich text ('extracts images from rich text text node "url": "https://example.com/foo.png", }, { + "direction": "ltr", "spans": [ { "end": 5, @@ -239,9 +249,44 @@ exports[`transforms HTML to rich text ('extracts images from rich text text node ] `; -exports[`transforms HTML to rich text ('extracts images from rich text text nodes and resumes with the same text node (spans)') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > image > extracts image in text nodes and resume previous text node > basic 1`] = ` +[ + { + "direction": "ltr", + "spans": [], + "text": "lorem ipsum", + "type": "paragraph", + }, + { + "alt": "bar", + "copyright": null, + "dimensions": { + "height": 0, + "width": 0, + }, + "edit": { + "background": "transparent", + "x": 0, + "y": 0, + "zoom": 1, + }, + "id": "", + "type": "image", + "url": "https://example.com/foo.png", + }, + { + "direction": "ltr", + "spans": [], + "text": "dolor sit amet", + "type": "paragraph", + }, +] +`; + +exports[`transforms HTML to rich text > image > extracts image in text nodes and resume previous text node > spans 1`] = ` [ { + "direction": "ltr", "spans": [ { "end": 5, @@ -270,6 +315,7 @@ exports[`transforms HTML to rich text ('extracts images from rich text text node "url": "https://example.com/foo.png", }, { + "direction": "ltr", "spans": [ { "end": 9, @@ -283,15 +329,10 @@ exports[`transforms HTML to rich text ('extracts images from rich text text node ] `; -exports[`transforms HTML to rich text ('extracts images from rich text text nodes and resumes with the same text node') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > image > non-prismic 1`] = ` [ { - "spans": [], - "text": "lorem ipsum", - "type": "paragraph", - }, - { - "alt": "bar", + "alt": "foo", "copyright": null, "dimensions": { "height": 0, @@ -307,15 +348,10 @@ exports[`transforms HTML to rich text ('extracts images from rich text text node "type": "image", "url": "https://example.com/foo.png", }, - { - "spans": [], - "text": "dolor sit amet", - "type": "paragraph", - }, ] `; -exports[`transforms HTML to rich text ('image (prismic)') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > image > prismic 1`] = ` [ { "alt": "foo", @@ -337,46 +373,74 @@ exports[`transforms HTML to rich text ('image (prismic)') > produces valid rich ] `; -exports[`transforms HTML to rich text ('image') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > spans > directly adjacent spans > compacts similar > hyperlink 1`] = ` [ { - "alt": "foo", - "copyright": null, - "dimensions": { - "height": 0, - "width": 0, - }, - "edit": { - "background": "transparent", - "x": 0, - "y": 0, - "zoom": 1, - }, - "id": "", - "type": "image", - "url": "https://example.com/foo.png", + "direction": "ltr", + "spans": [ + { + "data": { + "link_type": "Web", + "target": undefined, + "url": "https://prismic.io", + }, + "end": 17, + "start": 6, + "type": "hyperlink", + }, + ], + "text": "lorem ipsum dolor sit amet", + "type": "paragraph", }, ] `; -exports[`transforms HTML to rich text ('multiple tags') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > spans > directly adjacent spans > compacts similar > label 1`] = ` [ { - "spans": [], + "direction": "ltr", + "spans": [ + { + "data": { + "label": "underline", + }, + "end": 17, + "start": 6, + "type": "label", + }, + ], "text": "lorem ipsum dolor sit amet", - "type": "heading1", + "type": "paragraph", }, +] +`; + +exports[`transforms HTML to rich text > spans > directly adjacent spans > compacts similar > nested spans 1`] = ` +[ { - "spans": [], - "text": "consectetur adipiscing elit", + "direction": "ltr", + "spans": [ + { + "end": 11, + "start": 6, + "type": "strong", + }, + { + "end": 17, + "start": 9, + "type": "em", + }, + ], + "text": "lorem ipsum dolor sit amet", "type": "paragraph", }, ] `; -exports[`transforms HTML to rich text ('nested spans') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > spans > directly adjacent spans > compacts similar > strong, em 1`] = ` [ { + "direction": "ltr", "spans": [ { "end": 17, @@ -384,18 +448,40 @@ exports[`transforms HTML to rich text ('nested spans') > produces valid rich tex "type": "strong", }, { - "end": 17, - "start": 9, + "end": 38, + "start": 22, "type": "em", }, + ], + "text": "lorem ipsum dolor sit amet consectetur adipiscing elit", + "type": "paragraph", + }, +] +`; + +exports[`transforms HTML to rich text > spans > directly adjacent spans > does not compact different > hyperlink 1`] = ` +[ + { + "direction": "ltr", + "spans": [ { "data": { "link_type": "Web", "target": undefined, "url": "https://prismic.io", }, + "end": 11, + "start": 6, + "type": "hyperlink", + }, + { + "data": { + "link_type": "Web", + "target": undefined, + "url": "https://google.com", + }, "end": 17, - "start": 12, + "start": 11, "type": "hyperlink", }, ], @@ -405,19 +491,38 @@ exports[`transforms HTML to rich text ('nested spans') > produces valid rich tex ] `; -exports[`transforms HTML to rich text ('single tag') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > spans > directly adjacent spans > does not compact different > label 1`] = ` [ { - "spans": [], + "direction": "ltr", + "spans": [ + { + "data": { + "label": "underline", + }, + "end": 11, + "start": 6, + "type": "label", + }, + { + "data": { + "label": "strikethrough", + }, + "end": 17, + "start": 11, + "type": "label", + }, + ], "text": "lorem ipsum dolor sit amet", "type": "paragraph", }, ] `; -exports[`transforms HTML to rich text ('spans (hyperlink)') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > spans > hyperlink 1`] = ` [ { + "direction": "ltr", "spans": [ { "data": { @@ -446,9 +551,10 @@ exports[`transforms HTML to rich text ('spans (hyperlink)') > produces valid ric ] `; -exports[`transforms HTML to rich text ('spans (label)') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > spans > label 1`] = ` [ { + "direction": "ltr", "spans": [ { "data": { @@ -465,9 +571,42 @@ exports[`transforms HTML to rich text ('spans (label)') > produces valid rich te ] `; -exports[`transforms HTML to rich text ('spans (strong, em)') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > spans > nested spans 1`] = ` +[ + { + "direction": "ltr", + "spans": [ + { + "end": 17, + "start": 6, + "type": "strong", + }, + { + "end": 17, + "start": 9, + "type": "em", + }, + { + "data": { + "link_type": "Web", + "target": undefined, + "url": "https://prismic.io", + }, + "end": 17, + "start": 12, + "type": "hyperlink", + }, + ], + "text": "lorem ipsum dolor sit amet", + "type": "paragraph", + }, +] +`; + +exports[`transforms HTML to rich text > spans > strong, em 1`] = ` [ { + "direction": "ltr", "spans": [ { "end": 11, @@ -486,9 +625,10 @@ exports[`transforms HTML to rich text ('spans (strong, em)') > produces valid ri ] `; -exports[`transforms HTML to rich text ('strips complex indentation') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > whistespaces > strips complex indentation 1`] = ` [ { + "direction": "ltr", "spans": [], "text": "lorem ipsum dolor sit amet consectetur adipiscing elit", "type": "paragraph", @@ -496,9 +636,10 @@ exports[`transforms HTML to rich text ('strips complex indentation') > produces ] `; -exports[`transforms HTML to rich text ('strips indentation') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > whistespaces > strips indentation 1`] = ` [ { + "direction": "ltr", "spans": [], "text": "lorem ipsum dolor sit amet", "type": "paragraph", @@ -506,9 +647,10 @@ exports[`transforms HTML to rich text ('strips indentation') > produces valid ri ] `; -exports[`transforms HTML to rich text ('treats \`
\` as new lines') > produces valid rich text field 1`] = ` +exports[`transforms HTML to rich text > whistespaces > treats \`
\` as new lines 1`] = ` [ { + "direction": "ltr", "spans": [], "text": "lorem ipsum dolor sit amet consectetur adipiscing elit", diff --git a/test/richtext/__testutils__/testAsRichTextHelper.ts b/test/richtext/__testutils__/testAsRichTextHelper.ts new file mode 100644 index 00000000..769ee55a --- /dev/null +++ b/test/richtext/__testutils__/testAsRichTextHelper.ts @@ -0,0 +1,85 @@ +import { expect, it } from "vitest"; + +import { LinkType, asHTML } from "../../../src"; +import { + AsRichTextConfig, + htmlAsRichText, + markdownAsRichText, +} from "../../../src/richtext"; + +type TestAsRichTextHelperArgs = { + input: string; + + config?: AsRichTextConfig; + + /** + * The rich text format is a lossy representation of HTML. Namely it does not + * preserves indentation and applies some optimizations to the output such as + * merging directly adjacent identical spans. + * + * By default, the test suite will expect the HTML representation of the + * output to exactly match the input. This flag can be used to tell it to + * expect the output to not match the input instead. + */ + expectAsHTMLNotToMatchInput?: boolean; +}; + +const testAsRichTextHelperFactory = ( + description: string, + args: TestAsRichTextHelperArgs, + helper: typeof htmlAsRichText | typeof markdownAsRichText, +): void => { + it(description, async () => { + const output = await helper(args.input, args.config); + + expect(output.result).toMatchSnapshot(); + + const outputAsHTML = asHTML(output.result, { + serializer: { + // A simplified hyperlink serializer so that we don't have + // to append `rel="noopener noreferrer"` to every link in + // the test cases. + hyperlink: ({ node, children }) => { + const maybeTarget = + node.data.link_type === LinkType.Web && node.data.target + ? ` target="${node.data.target}"` + : ""; + + return `${children}`; + }, + // A simplified image serializer so that we don't have to + // wrap the images in a `div` element like the default + // serializer does. + image: ({ node }) => `${node.alt}`, + // A simplified embed serializer so that we don't have to + // wrap the embeds in a `div` element with the various data + // attributes the default serializer applies. + embed: ({ node }) => node.oembed.html, + }, + }); + + if (!args.expectAsHTMLNotToMatchInput) { + expect(outputAsHTML).toBe(args.input); + } else { + expect(outputAsHTML).not.toBe(args.input); + } + }); +}; + +export const testHTMLAsRichTextHelper = ( + description: string, + args: TestAsRichTextHelperArgs, +): void => { + testAsRichTextHelperFactory(description, args, htmlAsRichText); +}; + +export const testMarkdownAsRichTextHelper = ( + description: string, + args: TestAsRichTextHelperArgs, +): void => { + testAsRichTextHelperFactory( + description, + { ...args, expectAsHTMLNotToMatchInput: true }, + markdownAsRichText, + ); +}; diff --git a/test/richtext/htmlAsRichText.test.ts b/test/richtext/htmlAsRichText.test.ts index 5091a01a..a1c3498f 100644 --- a/test/richtext/htmlAsRichText.test.ts +++ b/test/richtext/htmlAsRichText.test.ts @@ -1,252 +1,278 @@ -import { describe, expect, it, vi } from "vitest"; +import { describe, expect, it } from "vitest"; -import { LinkType, asHTML } from "../../src"; -import { - AsRichTextConfig, - htmlAsRichText, - htmlAsRichTextSync, -} from "../../src/richtext"; +import { testHTMLAsRichTextHelper } from "./__testutils__/testAsRichTextHelper"; -type Case = { - case: string; +import { htmlAsRichText } from "../../src/richtext"; - input: string; +describe("transforms HTML to rich text", () => { + describe("basic", () => { + testHTMLAsRichTextHelper("empty", { + input: /* html */ ``, + }); - config?: AsRichTextConfig; - - /** - * The rich text format is a lossy representation of HTML. Namely it does not - * preserves indentation and applies some optimizations to the output such as - * merging directly adjacent identical spans. - * - * By default, the test suite will expect the HTML representation of the - * output to exactly match the input. This flag can be used to tell it to - * expect the output to not match the input instead. - */ - expectAsHTMLNotToMatchInput?: true; -}; + testHTMLAsRichTextHelper("single tag", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + }); -describe.each([ - { - case: "empty", - input: /* html */ ``, - }, - { - case: "single tag", - input: /* html */ `

lorem ipsum dolor sit amet

`, - }, - { - case: "multiple tags", - input: /* html */ `

lorem ipsum dolor sit amet

consectetur adipiscing elit

`, - }, - { - case: "spans (strong, em)", - input: /* html */ `

lorem ipsum dolor sit amet

`, - }, - { - case: "spans (label)", - input: /* html */ `

lorem ipsum dolor sit amet

`, - config: { converter: { "span.underline": { label: "underline" } } }, - }, - { - case: "spans (hyperlink)", - input: /* html */ `

lorem ipsum dolor sit amet

`, - }, - { - case: "nested spans", - input: /* html */ `

lorem ipsum dolor sit amet

`, - }, - { - case: "compacts directly adjacent identical spans (strong, em)", - input: /* html */ `

lorem ipsum dolor sit amet consectetur adipiscing elit

`, - // `strong` and `em` tags will be merged into single ones. - expectAsHTMLNotToMatchInput: true, - }, - { - case: "compacts directly adjacent identical spans (hyperlink)", - input: /* html */ `

lorem ipsum dolor sit amet

`, - // `a` tags will be merged into single ones. - expectAsHTMLNotToMatchInput: true, - }, - { - case: "compacts directly adjacent identical spans (label)", - input: /* html */ `

lorem ipsum dolor sit amet

`, - config: { converter: { "span.underline": { label: "underline" } } }, - // `span.underline` tags will be merged into single ones. - expectAsHTMLNotToMatchInput: true, - }, - { - case: "does not compact directly adjacent different spans (hyperlink)", - input: /* html */ `

lorem ipsum dolor sit amet

`, - }, - { - case: "does not compact directly adjacent different spans (label)", - input: /* html */ `

lorem ipsum dolor sit amet

`, - config: { - converter: { - "span.underline": { label: "underline" }, - "span.strikethrough": { label: "strikethrough" }, - }, - }, - }, - { - case: "compacts nested directly adjacent identical spans", - input: /* html */ `

lorem ipsum dolor sit amet

`, - }, - { - case: "image", - input: /* html */ `foo`, - }, - { - case: "image (prismic)", - input: /* html */ `foo`, - }, - { - case: "embed", - input: /* html */ ``, - }, - { - case: "extracts images from rich text text nodes and resumes with the same text node", - input: /* html */ `

lorem ipsum bar dolor sit amet

`, - expectAsHTMLNotToMatchInput: true, - }, - { - case: "extracts images from rich text text nodes and resumes with the same text node (spans)", - input: /* html */ `

lorem ipsum bar dolor sit amet

`, - expectAsHTMLNotToMatchInput: true, - }, - { - case: "extracts images from rich text text nodes and resumes with the same text node (adjacent spans)", - input: /* html */ `

lorem ipsum bar dolor sit amet

`, - expectAsHTMLNotToMatchInput: true, - }, - { - case: "converts only the given container", - input: /* html */ ` -

lorem ipsum dolor sit amet

-

consectetur adipiscing elit

`, - config: { container: "article#bar" }, - expectAsHTMLNotToMatchInput: true, - }, - { - case: "excludes the given selectors", - input: /* html */ ` -

lorem ipsum dolor sit amet

-

consectetur adipiscing elit

`, - config: { exclude: ["h1 > a"] }, - expectAsHTMLNotToMatchInput: true, - }, - { - case: "treats `
` as new lines", - input: /* html */ `

lorem ipsum dolor sit amet
consectetur adipiscing elit

`, - }, - { - case: "strips indentation", - input: /* html */ ` -

- lorem ipsum dolor sit amet -

- `, - expectAsHTMLNotToMatchInput: true, - }, - { - case: "strips complex indentation", - input: /* html */ ` -

- lorem ipsum dolor sit amet - consectetur adipiscing elit -

- `, - expectAsHTMLNotToMatchInput: true, - }, -])( - "transforms HTML to rich text ($case)", - ({ input, config, expectAsHTMLNotToMatchInput }) => { - it("produces the same output with sync and async helpers", async () => { - const asyncOutput = await htmlAsRichText(input, config); - const syncOutput = htmlAsRichTextSync(input, config); + testHTMLAsRichTextHelper("multiple tags", { + input: /* html */ `

lorem ipsum dolor sit amet

consectetur adipiscing elit

`, + }); + }); + + describe("spans", () => { + testHTMLAsRichTextHelper("strong, em", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + }); + + testHTMLAsRichTextHelper("label", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + config: { serializer: { "span.underline": { label: "underline" } } }, + }); + + testHTMLAsRichTextHelper("hyperlink", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + }); - expect(asyncOutput).toStrictEqual(syncOutput); + testHTMLAsRichTextHelper("nested spans", { + input: /* html */ `

lorem ipsum dolor sit amet

`, }); - it("produces valid rich text field", async () => { - const output = await htmlAsRichText(input, config); + describe("directly adjacent spans", () => { + describe("compacts similar", () => { + testHTMLAsRichTextHelper("strong, em", { + input: /* html */ `

lorem ipsum dolor sit amet consectetur adipiscing elit

`, + // `strong` and `em` tags will be merged into single ones. + expectAsHTMLNotToMatchInput: true, + }); - expect(output.result).toMatchSnapshot(); + testHTMLAsRichTextHelper("hyperlink", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + // `a` tags will be merged into single ones. + expectAsHTMLNotToMatchInput: true, + }); - const outputAsHTML = asHTML(output.result, { - serializer: { - // A simplified hyperlink serializer so that we don't have - // to append `rel="noopener noreferrer"` to every link in - // the test cases. - hyperlink: ({ node, children }) => { - const maybeTarget = - node.data.link_type === LinkType.Web && node.data.target - ? ` target="${node.data.target}"` - : ""; + testHTMLAsRichTextHelper("label", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + config: { serializer: { "span.underline": { label: "underline" } } }, + // `span.underline` tags will be merged into single ones. + expectAsHTMLNotToMatchInput: true, + }); - return `${children}`; + testHTMLAsRichTextHelper("nested spans", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + }); + }); + + describe("does not compact different", () => { + testHTMLAsRichTextHelper("hyperlink", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + }); + + testHTMLAsRichTextHelper("label", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + config: { + serializer: { + "span.underline": { label: "underline" }, + "span.strikethrough": { label: "strikethrough" }, + }, + }, + }); + }); + }); + }); + + describe("image", () => { + testHTMLAsRichTextHelper("non-prismic", { + input: /* html */ `foo`, + }); + + testHTMLAsRichTextHelper("prismic", { + input: /* html */ `foo`, + }); + + describe("extracts image in text nodes and resume previous text node", () => { + testHTMLAsRichTextHelper("basic", { + input: /* html */ `

lorem ipsum bar dolor sit amet

`, + expectAsHTMLNotToMatchInput: true, + }); + + testHTMLAsRichTextHelper("spans", { + input: /* html */ `

lorem ipsum bar dolor sit amet

`, + expectAsHTMLNotToMatchInput: true, + }); + + testHTMLAsRichTextHelper("adjacent spans", { + input: /* html */ `

lorem ipsum bar dolor sit amet

`, + expectAsHTMLNotToMatchInput: true, + }); + }); + }); + + describe("embed", () => { + testHTMLAsRichTextHelper("iframe", { + input: /* html */ ``, + }); + }); + + describe("configuration", () => { + describe("serializer", () => { + testHTMLAsRichTextHelper("tag name", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + config: { + serializer: { + p: "heading1", + }, + }, + expectAsHTMLNotToMatchInput: true, + }); + + testHTMLAsRichTextHelper("selector", { + input: /* html */ `

lorem ipsum dolor sit amet

consectetur adipiscing elit

`, + config: { + serializer: { + "#foo": "heading1", }, - // A simplified image serializer so that we don't have to - // wrap the images in a `div` element like the default - // serializer does. - image: ({ node }) => `${node.alt}`, - // A simplified embed serializer so that we don't have to - // wrap the embeds in a `div` element with the various data - // attributes the default serializer applies. - embed: ({ node }) => node.oembed.html, }, + expectAsHTMLNotToMatchInput: true, }); - if (!expectAsHTMLNotToMatchInput) { - expect(outputAsHTML).toBe(input); - } else { - expect(outputAsHTML).not.toBe(input); - } + testHTMLAsRichTextHelper("complex selector", { + input: /* html */ ` +

lorem ipsum dolor sit amet

+

consectetur adipiscing elit

`, + config: { + serializer: { + "article#foo > p": "heading1", + }, + }, + expectAsHTMLNotToMatchInput: true, + }); }); - }, -); -it("warns about missing `src` attribute in `image` elements", async () => { - const consoleWarnSpy = vi - .spyOn(console, "warn") - .mockImplementation(() => void 0); + describe("container", () => { + testHTMLAsRichTextHelper("converts only the given container", { + input: /* html */ ` +

lorem ipsum dolor sit amet

+

consectetur adipiscing elit

`, + config: { container: "article#bar" }, + expectAsHTMLNotToMatchInput: true, + }); + + it("throws when the container cannot be found", async () => { + await expect( + htmlAsRichText("", { container: "article#baz" }), + ).rejects.toThrowErrorMatchingInlineSnapshot( + '"No container matching `article#baz` could be found in the input AST."', + ); + }); + }); - await htmlAsRichText(/* html */ ``); + describe("exclude", () => { + testHTMLAsRichTextHelper("excludes the given selectors", { + input: /* html */ ` +

lorem ipsum dolor sit amet

+

consectetur adipiscing elit

`, + config: { exclude: ["h1"] }, + expectAsHTMLNotToMatchInput: true, + }); - expect(consoleWarnSpy).toHaveBeenCalledWith( - expect.stringMatching(/missing-image-src/i), - ); + testHTMLAsRichTextHelper("excludes the given complex selectors", { + input: /* html */ ` +

lorem ipsum dolor sit amet

+

consectetur adipiscing elit

`, + config: { exclude: ["h1 > a"] }, + expectAsHTMLNotToMatchInput: true, + }); + }); - consoleWarnSpy.mockRestore(); -}); + describe("include", () => { + testHTMLAsRichTextHelper("includes only the given selectors", { + input: /* html */ ` +

lorem ipsum dolor sit amet

+

consectetur adipiscing elit

`, + config: { include: ["h1"] }, + expectAsHTMLNotToMatchInput: true, + }); -it("warns about missing `src` attribute in `embed` elements", async () => { - const consoleWarnSpy = vi - .spyOn(console, "warn") - .mockImplementation(() => void 0); + testHTMLAsRichTextHelper("includes only the given complex selectors", { + input: /* html */ ` +

lorem ipsum dolor sit amet

+

consectetur adipiscing elit

`, + config: { include: ["article#bar > p"] }, + expectAsHTMLNotToMatchInput: true, + }); - await htmlAsRichText( - /* html */ ``, - ); + testHTMLAsRichTextHelper( + "dedupes matches that are child of other matches", + { + input: /* html */ ` +

lorem ipsum dolor sit amet

+

consectetur adipiscing elit

`, + config: { include: ["article#bar", "p"] }, + expectAsHTMLNotToMatchInput: true, + }, + ); + }); - expect(consoleWarnSpy).toHaveBeenCalledWith( - expect.stringMatching(/missing-embed-src/i), - ); + describe("direction", () => { + testHTMLAsRichTextHelper("marks text as left-to-right", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + config: { direction: "ltr" }, + }); - consoleWarnSpy.mockRestore(); -}); + testHTMLAsRichTextHelper("marks text as right-to-left", { + input: /* html */ `

lorem ipsum dolor sit amet

`, + config: { direction: "rtl" }, + }); + }); + }); -it("warns about missing `href` attribute in `hyperlink` elements", async () => { - const consoleWarnSpy = vi - .spyOn(console, "warn") - .mockImplementation(() => void 0); + describe("whistespaces", () => { + testHTMLAsRichTextHelper("treats `
` as new lines", { + input: /* html */ `

lorem ipsum dolor sit amet
consectetur adipiscing elit

`, + }); + + testHTMLAsRichTextHelper("strips indentation", { + input: /* html */ ` +

+ lorem ipsum dolor sit amet +

+ `, + expectAsHTMLNotToMatchInput: true, + }); - await htmlAsRichText(/* html */ `

missing-hyperlink-href

`); + testHTMLAsRichTextHelper("strips complex indentation", { + input: /* html */ ` +

+ lorem ipsum dolor sit amet + consectetur adipiscing elit +

+ `, + expectAsHTMLNotToMatchInput: true, + }); + }); +}); + +type WarnCase = { + name: string; + input: string; +}; - expect(consoleWarnSpy).toHaveBeenCalledWith( - expect.stringMatching(/missing-hyperlink-href/i), - ); +it.each([ + { + name: "element of type `img` is missing an `src` attribute", + input: /* html */ ``, + }, + { + name: "element of type `embed` is missing an `src` attribute", + input: /* html */ ``, + }, + { + name: "element of type `hyperlink` is missing an `href` attribute", + input: /* html */ `

missing-hyperlink-href

`, + }, +])("warns on unprocessable elements ($name)", async ({ name, input }) => { + const output = await htmlAsRichText(input); - consoleWarnSpy.mockRestore(); + expect(output.warnings.toString()).toMatch(new RegExp(name, "i")); });