Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow custom cmark options and extensions when converting Markdown #23

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 26 additions & 6 deletions Sources/Markdown/Base/Document.swift
Original file line number Diff line number Diff line change
Expand Up @@ -38,32 +38,52 @@ public extension Document {
/// Parse a string into a `Document`.
///
/// - parameter string: the input Markdown text to parse.
/// - parameter options: options for parsing Markdown text.
/// - parameter options: options for parsing Markdown text, including
/// Commonmark-specific options and extensions.
/// - parameter source: an explicit source URL from which the input `string` came for marking source locations.
/// This need not be a file URL.
init(parsing string: String, source: URL? = nil, options: ParseOptions = []) {
if options.contains(.parseBlockDirectives) {
init(parsing string: String, source: URL? = nil, convertOptions options: ConvertOptions) {
if options.parseOptions.contains(.parseBlockDirectives) {
self = BlockDirectiveParser.parse(string, source: source,
options: options)
} else {
self = MarkupParser.parseString(string, source: source, options: options)
}
}

/// Parse a string into a `Document`.
///
/// - parameter string: the input Markdown text to parse.
/// - parameter options: options for parsing Markdown text.
/// - parameter source: an explicit source URL from which the input `string` came for marking source locations.
/// This need not be a file URL.
init(parsing string: String, source: URL? = nil, options: ParseOptions = []) {
self.init(parsing: string, source: source, convertOptions: .init(fromParseOptions: options))
}

/// Parse a file's contents into a `Document`.
///
/// - parameter file: a file URL from which to load Markdown text to parse.
/// - parameter options: options for parsing Markdown text.
init(parsing file: URL, options: ParseOptions = []) throws {
/// - parameter options: options for parsing Markdown text, including
/// Commonmark-specific options and extensions.
init(parsing file: URL, convertOptions options: ConvertOptions) throws {
let string = try String(contentsOf: file)
if options.contains(.parseBlockDirectives) {
if options.parseOptions.contains(.parseBlockDirectives) {
self = BlockDirectiveParser.parse(string, source: file,
options: options)
} else {
self = MarkupParser.parseString(string, source: file, options: options)
}
}

/// Parse a file's contents into a `Document`.
///
/// - parameter file: a file URL from which to load Markdown text to parse.
/// - parameter options: options for parsing Markdown text.
init(parsing file: URL, options: ParseOptions = []) throws {
try self.init(parsing: file, convertOptions: .init(fromParseOptions: options))
}

/// Create a document from a sequence of block markup elements.
init<Children: Sequence>(_ children: Children) where Children.Element == BlockMarkup {
try! self.init(.document(parsedRange: nil, children.map { $0.raw.markup }))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,7 @@
### Options

- ``MarkupDumpOptions``
- ``ParseOptions``
- ``ConvertOptions``

<!-- Copyright (c) 2021-2022 Apple Inc and the Swift Project authors. All Rights Reserved. -->
8 changes: 4 additions & 4 deletions Sources/Markdown/Parser/BlockDirectiveParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ private enum ParseContainer: CustomStringConvertible {
/// Convert this container to the corresponding ``RawMarkup`` node.
func convertToRawMarkup(ranges: inout RangeTracker,
parent: ParseContainer?,
options: ParseOptions) -> [RawMarkup] {
options: ConvertOptions) -> [RawMarkup] {
switch self {
case let .root(children):
let rawChildren = children.flatMap {
Expand Down Expand Up @@ -945,7 +945,7 @@ extension Document {
///
/// - Precondition: The `rootContainer` must be the `.root` case.
fileprivate init(converting rootContainer: ParseContainer, from source: URL?,
options: ParseOptions) {
options: ConvertOptions) {
guard case .root = rootContainer else {
fatalError("Tried to convert a non-root container to a `Document`")
}
Expand All @@ -968,14 +968,14 @@ extension Document {
}

struct BlockDirectiveParser {
static func parse(_ input: URL, options: ParseOptions = []) throws -> Document {
static func parse(_ input: URL, options: ConvertOptions = .init()) throws -> Document {
let string = try String(contentsOf: input, encoding: .utf8)
return parse(string, source: input, options: options)
}

/// Parse the input.
static func parse(_ input: String, source: URL?,
options: ParseOptions = []) -> Document {
options: ConvertOptions = .init()) -> Document {
// Phase 0: Split the input into lines lazily, keeping track of
// line numbers, consecutive blank lines, and start positions on each line where indentation ends.
// These trim points may be used to adjust the indentation seen by the CommonMark parser when
Expand Down
18 changes: 7 additions & 11 deletions Sources/Markdown/Parser/CommonMarkConverter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -578,22 +578,18 @@ struct MarkupParser {
return MarkupConversion(state: childConversion.state.next(), result: .tableCell(parsedRange: parsedRange, colspan: colspan, rowspan: rowspan, childConversion.result))
}

static func parseString(_ string: String, source: URL?, options: ParseOptions) -> Document {
static func parseString(_ string: String, source: URL?, options: ConvertOptions) -> Document {
cmark_gfm_core_extensions_ensure_registered()

var cmarkOptions = CMARK_OPT_TABLE_SPANS
if !options.contains(.disableSmartOpts) {
cmarkOptions |= CMARK_OPT_SMART
let parser = cmark_parser_new(options.commonmarkOptions.rawValue)

for ext in options.commonmarkExtensions {
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension(ext))
}

let parser = cmark_parser_new(cmarkOptions)

cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("table"))
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("strikethrough"))
cmark_parser_attach_syntax_extension(parser, cmark_find_syntax_extension("tasklist"))

cmark_parser_feed(parser, string, string.utf8.count)
let rawDocument = cmark_parser_finish(parser)
let initialState = MarkupConverterState(source: source, iterator: cmark_iter_new(rawDocument), event: CMARK_EVENT_NONE, node: nil, options: options, headerSeen: false, pendingTableBody: nil).next()
let initialState = MarkupConverterState(source: source, iterator: cmark_iter_new(rawDocument), event: CMARK_EVENT_NONE, node: nil, options: options.parseOptions, headerSeen: false, pendingTableBody: nil).next()
precondition(initialState.event == CMARK_EVENT_ENTER)
precondition(initialState.nodeType == .document)
let conversion = convertAnyElement(initialState)
Expand Down
128 changes: 128 additions & 0 deletions Sources/Markdown/Parser/ConvertOptions.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
This source file is part of the Swift.org open source project

Copyright (c) 2021 Apple Inc. and the Swift project authors
Licensed under Apache License v2.0 with Runtime Library Exception

See https://swift.org/LICENSE.txt for license information
See https://swift.org/CONTRIBUTORS.txt for Swift project authors
*/

import cmark_gfm

/// Options to use when converting Markdown.
public struct ConvertOptions {
public let parseOptions: ParseOptions
public let commonmarkOptions: CommonmarkOptions
public let commonmarkExtensions: [String]

public init(parseOptions: ParseOptions, commonmarkOptions: CommonmarkOptions, extensions: [String]) {
self.parseOptions = parseOptions
self.commonmarkOptions = commonmarkOptions
self.commonmarkExtensions = extensions
}

public init(fromParseOptions options: ParseOptions) {
var commonmarkOptions = ConvertOptions.defaultCommonmarkOptions
if options.contains(.disableSmartOpts) {
commonmarkOptions.remove(.smart)
}
self.init(
parseOptions: options,
commonmarkOptions: commonmarkOptions,
extensions: ConvertOptions.defaultCommonmarkExtensions
)
}

public init() {
self.init(fromParseOptions: ConvertOptions.defaultParseOptions)
}

public static let defaultParseOptions: ParseOptions = []
public static let defaultCommonmarkOptions: CommonmarkOptions = [
.smart,
.tableSpans,
]
public static let defaultCommonmarkExtensions: [String] = [
"table",
"strikethrough",
"tasklist",
]
}

/// Options given to the Commonmark converter.
public struct CommonmarkOptions: OptionSet {
public var rawValue: Int32

public init(rawValue: Int32) {
self.rawValue = rawValue
}

/// The default Commonmark behavior, no special options.
public static let `default` = CommonmarkOptions(rawValue: CMARK_OPT_DEFAULT)

/// Include a `data-sourcepos` element on all block elements.
public static let sourcepos = CommonmarkOptions(rawValue: CMARK_OPT_SOURCEPOS)

/// Render `softbreak` elements as hard line breaks.
public static let hardBreaks = CommonmarkOptions(rawValue: CMARK_OPT_HARDBREAKS)

/// Render raw HTML and unsafe links.
///
/// Unsafe links are `javascript:`, `vbscript:`, `file:`, and
/// `data:`, except for `image/png`, `image/gif`, `image/jpeg`
/// or `image/webp` MIME types. Without this option, raw HTML
/// is replaced by a placeholder HTML comment. Unsafe links
/// are replaced by empty strings.
public static let unsafe = CommonmarkOptions(rawValue: CMARK_OPT_UNSAFE)

/// Render `softbreak` elements as spaces.
public static let noBreaks = CommonmarkOptions(rawValue: CMARK_OPT_NOBREAKS)

/// Validate UTF-8 in the input before parsing, replacing illegal
/// sequences with the replacement character `U+FFFD`.
public static let validateUtf8 = CommonmarkOptions(rawValue: CMARK_OPT_VALIDATE_UTF8)

/// Convert straight quotes to curly, `---` to em dashes, `--` to en dashes.
public static let smart = CommonmarkOptions(rawValue: CMARK_OPT_SMART)

/// Use GitHub-style `<pre lang="x">` tags for code blocks instead of
/// `<pre><code class="language-x">`.
public static let githubPreLang = CommonmarkOptions(rawValue: CMARK_OPT_GITHUB_PRE_LANG)

/// Be liberal in interpreting inline HTML tags.
public static let liberalHtmlTag = CommonmarkOptions(rawValue: CMARK_OPT_LIBERAL_HTML_TAG)

/// Parse footnotes.
public static let footnotes = CommonmarkOptions(rawValue: CMARK_OPT_FOOTNOTES)

/// Only parse strikethroughs if surrounded by exactly 2 tildes.
///
/// Strikethroughs are still only parsed when the `"strikethrough"`
/// extension is enabled.
public static let strikethroughDoubleTilde = CommonmarkOptions(rawValue: CMARK_OPT_STRIKETHROUGH_DOUBLE_TILDE)

/// Use style attributes to align table cells instead of align attributes.
public static let tablePreferStyleAttributes = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_PREFER_STYLE_ATTRIBUTES)

/// Include the remainder of the info string in code blocks in
/// a separate attribute.
public static let fullInfoString = CommonmarkOptions(rawValue: CMARK_OPT_FULL_INFO_STRING)

/// Parse only inline markdown directives. Block directives will not be
/// parsed (their literal representations will remain in the output).
public static let inlineOnly = CommonmarkOptions(rawValue: CMARK_OPT_INLINE_ONLY)

/// Parse the markdown input without removing preceding/trailing whitespace and
/// without converting newline characters to breaks.
///
/// Using this option also enables the `CMARK_OPT_INLINE_ONLY` option.
// FIXME: the original `CMARK_OPT_PRESERVE_WHITESPACE` isn't available to the swift compiler?
public static let preserveWhitespace = CommonmarkOptions(rawValue: (1 << 19) | CMARK_OPT_INLINE_ONLY)

/// Enable the row- and column-span syntax in the tables extension.
public static let tableSpans = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_SPANS)

/// Use a "ditto mark" (`"`) instead of a caret (`^`) to indicate row-spans in the tables extension.
public static let tableRowspanDitto = CommonmarkOptions(rawValue: CMARK_OPT_TABLE_ROWSPAN_DITTO)
}
18 changes: 16 additions & 2 deletions Sources/markdown-tool/Commands/DumpTreeCommand.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,27 @@ extension MarkdownCommand {
@Flag<Bool>(inversion: .prefixedNo, exclusivity: .chooseLast, help: "Parse block directives")
var parseBlockDirectives: Bool = false

@Option(help: "Additional Commonmark extensions to enable")
var `extension`: [String] = []

@Flag<Bool>(help: "Don't enable the default Commonmark extensions (\(ConvertOptions.defaultCommonmarkExtensions.joined(separator: ", ")))")
var noDefaultExtensions: Bool = false

func run() throws {
let parseOptions: ParseOptions = parseBlockDirectives ? [.parseBlockDirectives] : []
var commonmarkExts = noDefaultExtensions ? [] : ConvertOptions.defaultCommonmarkExtensions
commonmarkExts.append(contentsOf: `extension`)
let convertOptions = ConvertOptions.init(
parseOptions: parseOptions,
commonmarkOptions: ConvertOptions.defaultCommonmarkOptions,
extensions: commonmarkExts
)

let document: Document
if let inputFilePath = inputFilePath {
(_, document) = try MarkdownCommand.parseFile(at: inputFilePath, options: parseOptions)
(_, document) = try MarkdownCommand.parseFile(at: inputFilePath, options: convertOptions)
} else {
(_, document) = try MarkdownCommand.parseStandardInput(options: parseOptions)
(_, document) = try MarkdownCommand.parseStandardInput(options: convertOptions)
}
var dumpOptions = MarkupDumpOptions()
if sourceLocations {
Expand Down
17 changes: 15 additions & 2 deletions Sources/markdown-tool/Commands/FormatCommand.swift
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ extension MarkdownCommand {
@Argument(help: "Input file (default: standard input)")
var inputFilePath: String?

@Option(help: "Additional Commonmark extensions to enable")
var `extension`: [String] = []

@Flag<Bool>(help: "Don't enable the default Commonmark extensions (\(ConvertOptions.defaultCommonmarkExtensions.joined(separator: ", ")))")
var noDefaultExtensions: Bool = false

/// Search for the an executable with a given base name.
func findExecutable(named name: String) throws -> String? {
let which = Process()
Expand Down Expand Up @@ -196,12 +202,19 @@ extension MarkdownCommand {
if parseSymbolLinks {
parseOptions.insert(.parseSymbolLinks)
}
var commonmarkExts = noDefaultExtensions ? [] : ConvertOptions.defaultCommonmarkExtensions
commonmarkExts.append(contentsOf: `extension`)
let convertOptions = ConvertOptions.init(
parseOptions: parseOptions,
commonmarkOptions: ConvertOptions.defaultCommonmarkOptions,
extensions: commonmarkExts
)
let source: String
let document: Document
if let inputFilePath = inputFilePath {
(source, document) = try MarkdownCommand.parseFile(at: inputFilePath, options: parseOptions)
(source, document) = try MarkdownCommand.parseFile(at: inputFilePath, options: convertOptions)
} else {
(source, document) = try MarkdownCommand.parseStandardInput(options: parseOptions)
(source, document) = try MarkdownCommand.parseStandardInput(options: convertOptions)
}

guard let emphasisMarker = MarkupFormatter.Options.EmphasisMarker(argument: emphasisMarker) else {
Expand Down
8 changes: 4 additions & 4 deletions Sources/markdown-tool/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ struct MarkdownCommand: ParsableCommand {
Format.self,
])

static func parseFile(at path: String, options: ParseOptions) throws -> (source: String, parsed: Document) {
static func parseFile(at path: String, options: ConvertOptions) throws -> (source: String, parsed: Document) {
let data = try Data(contentsOf: URL(fileURLWithPath: path))
guard let inputString = String(data: data, encoding: .utf8) else {
throw Error.couldntDecodeInputAsUTF8
}
return (inputString, Document(parsing: inputString, options: options))
return (inputString, Document(parsing: inputString, convertOptions: options))
}

static func parseStandardInput(options: ParseOptions) throws -> (source: String, parsed: Document) {
static func parseStandardInput(options: ConvertOptions) throws -> (source: String, parsed: Document) {
let stdinData: Data
if #available(macOS 10.15.4, *) {
stdinData = try FileHandle.standardInput.readToEnd() ?? Data()
Expand All @@ -47,7 +47,7 @@ struct MarkdownCommand: ParsableCommand {
guard let stdinString = String(data: stdinData, encoding: .utf8) else {
throw Error.couldntDecodeInputAsUTF8
}
return (stdinString, Document(parsing: stdinString, options: options))
return (stdinString, Document(parsing: stdinString, convertOptions: options))
}
}

Expand Down
27 changes: 27 additions & 0 deletions Tests/MarkdownTests/Parsing/CommonMarkConverterTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,31 @@ class CommonMarkConverterTests: XCTestCase {
let document = Document(parsing: text, source: nil, options: [.parseBlockDirectives, .parseSymbolLinks])
XCTAssertEqual(expectedDump, document.debugDescription(options: .printSourceLocations))
}

/// Test using a custom set of Commonmark options to convert Markdown.
func testCustomOpts() {
let text = "~This is not strikethrough~ -- but ~~this is strikethrough~~."

// Because the "smart" option is not set, the `--` should not be converted
// to an en-dash.
let expectedDump = """
Document @1:1-1:62
└─ Paragraph @1:1-1:62
├─ Text @1:1-1:36 "~This is not strikethrough~ -- but "
├─ Strikethrough @1:36-1:61
│ └─ Text @1:38-1:59 "this is strikethrough"
└─ Text @1:61-1:62 "."
"""

let document = Document(
parsing: text,
source: nil,
convertOptions: .init(
parseOptions: ConvertOptions.defaultParseOptions,
commonmarkOptions: .strikethroughDoubleTilde,
extensions: ConvertOptions.defaultCommonmarkExtensions
)
)
XCTAssertEqual(expectedDump, document.debugDescription(options: .printSourceLocations))
}
}