diff --git a/rc/filetype/markdown.kak b/rc/filetype/markdown.kak index 2a5755b9fa..278c71a0fc 100644 --- a/rc/filetype/markdown.kak +++ b/rc/filetype/markdown.kak @@ -41,71 +41,86 @@ provide-module markdown %{ # Highlighters # ‾‾‾‾‾‾‾‾‾‾‾‾ -add-highlighter shared/markdown regions -add-highlighter shared/markdown/inline default-region regions -add-highlighter shared/markdown/inline/text default-region group - -add-highlighter shared/markdown/listblock region ^\h*[-*]\s ^(?=\S) regions -add-highlighter shared/markdown/listblock/g default-region group -add-highlighter shared/markdown/listblock/g/ ref markdown/inline -add-highlighter shared/markdown/listblock/g/marker regex ^\h*([-*])\s 1:bullet - -add-highlighter shared/markdown/codeblock region -match-capture \ - ^(\h*)```\h* \ - ^(\h*)```\h*$ \ - regions -add-highlighter shared/markdown/codeblock/ default-region fill meta -add-highlighter shared/markdown/listblock/codeblock region -match-capture \ - ^(\h*)```\h* \ - ^(\h*)```\h*$ \ - regions -add-highlighter shared/markdown/listblock/codeblock/ default-region fill meta -add-highlighter shared/markdown/codeline region "^( {4}|\t)" "$" fill meta - -# https://spec.commonmark.org/0.29/#link-destination -add-highlighter shared/markdown/angle_bracket_url region (?<=<)([a-z]+://|(mailto|magnet|xmpp):) (?!\\).(?=>)|\n fill link -add-highlighter shared/markdown/inline/url region -recurse \( (\b[a-z]+://|(mailto|magnet|xmpp):) (?!\\).(?=\))|\s fill link -add-highlighter shared/markdown/listblock/angle_bracket_url region (?<=<)(\b[a-z]+://|(mailto|magnet|xmpp):) (?!\\).(?=>)|\n fill link - try %{ - require-module html - add-highlighter shared/markdown/inline/tag region (?i))) > ref html/tag -} - -add-highlighter shared/markdown/inline/code region -match-capture (`+) (`+) fill mono - -# Setext-style header -add-highlighter shared/markdown/inline/text/ regex (\A|^\n)[^\n]+\n={2,}\h*\n\h*$ 0:title -add-highlighter shared/markdown/inline/text/ regex (\A|^\n)[^\n]+\n-{2,}\h*\n\h*$ 0:header - -# Atx-style header -add-highlighter shared/markdown/inline/text/ regex ^#[^\n]* 0:header + require-module tree-sitter + tree-sitter-load-highlighter shared/markdown markdown %opt{tree_sitter_default_faces} + + define-command markdown-load-languages -params 1 %{ + evaluate-commands -draft %{ try %{ + execute-keys "%arg{1}s```\h*\{?[.=]?\K\w+" # } + evaluate-commands -itersel %{ try %{ + require-module %val{selection} + add-highlighter "shared/markdown/%val{selection}" tree-sitter-injection ref %val{selection} + }} + }} + } +} catch %{ + add-highlighter shared/markdown regions + add-highlighter shared/markdown/inline default-region regions + add-highlighter shared/markdown/inline/text default-region group + + add-highlighter shared/markdown/listblock region ^\h*[-*]\s ^(?=\S) regions + add-highlighter shared/markdown/listblock/g default-region group + add-highlighter shared/markdown/listblock/g/ ref markdown/inline + add-highlighter shared/markdown/listblock/g/marker regex ^\h*([-*])\s 1:bullet + + add-highlighter shared/markdown/codeblock region -match-capture \ + ^(\h*)```\h* \ + ^(\h*)```\h*$ \ + regions + add-highlighter shared/markdown/codeblock/ default-region fill meta + add-highlighter shared/markdown/listblock/codeblock region -match-capture \ + ^(\h*)```\h* \ + ^(\h*)```\h*$ \ + regions + add-highlighter shared/markdown/listblock/codeblock/ default-region fill meta + add-highlighter shared/markdown/codeline region "^( {4}|\t)" "$" fill meta + + # https://spec.commonmark.org/0.29/#link-destination + add-highlighter shared/markdown/angle_bracket_url region (?<=<)([a-z]+://|(mailto|magnet|xmpp):) (?!\\).(?=>)|\n fill link + add-highlighter shared/markdown/inline/url region -recurse \( (\b[a-z]+://|(mailto|magnet|xmpp):) (?!\\).(?=\))|\s fill link + add-highlighter shared/markdown/listblock/angle_bracket_url region (?<=<)(\b[a-z]+://|(mailto|magnet|xmpp):) (?!\\).(?=>)|\n fill link + + try %{ + require-module html + add-highlighter shared/markdown/inline/tag region (?i))) > ref html/tag + } -add-highlighter shared/markdown/inline/text/ regex (?\h*)+ 0:comment -add-highlighter shared/markdown/inline/text/ regex "\H( {2,})$" 1:+r@meta + add-highlighter shared/markdown/inline/code region -match-capture (`+) (`+) fill mono + + # Setext-style header + add-highlighter shared/markdown/inline/text/ regex (\A|^\n)[^\n]+\n={2,}\h*\n\h*$ 0:title + add-highlighter shared/markdown/inline/text/ regex (\A|^\n)[^\n]+\n-{2,}\h*\n\h*$ 0:header + + # Atx-style header + add-highlighter shared/markdown/inline/text/ regex ^#[^\n]* 0:header + + add-highlighter shared/markdown/inline/text/ regex (?\h*)+ 0:comment + add-highlighter shared/markdown/inline/text/ regex "\H( {2,})$" 1:+r@meta + + define-command markdown-load-languages -params 1 %{ + evaluate-commands -draft %{ try %{ + execute-keys "%arg{1}s```\h*\{?[.=]?\K\w+" # } + evaluate-commands -itersel %{ try %{ + require-module %val{selection} + add-highlighter "shared/markdown/codeblock/%val{selection}" region -match-capture "^(\h*)```\h*(%val{selection}\b|\{[.=]?%val{selection}\})" ^(\h*)``` regions + add-highlighter "shared/markdown/codeblock/%val{selection}/" default-region fill meta + add-highlighter "shared/markdown/codeblock/%val{selection}/inner" region \A\h*```[^\n]*\K (?=```) ref %val{selection} + add-highlighter "shared/markdown/listblock/codeblock/%val{selection}" region -match-capture "^(\h*)```\h*(%val{selection}\b|\{[.=]?%val{selection}\})" ^(\h*)``` regions + add-highlighter "shared/markdown/listblock/codeblock/%val{selection}/" default-region fill meta + add-highlighter "shared/markdown/listblock/codeblock/%val{selection}/inner" region \A\h*```[^\n]*\K (?=```) ref %val{selection} + }} + }} + } +} # Commands # ‾‾‾‾‾‾‾‾ -define-command markdown-load-languages -params 1 %{ - evaluate-commands -draft %{ try %{ - execute-keys "%arg{1}s```\h*\{?[.=]?\K\w+" # } - evaluate-commands -itersel %{ try %{ - require-module %val{selection} - add-highlighter "shared/markdown/codeblock/%val{selection}" region -match-capture "^(\h*)```\h*(%val{selection}\b|\{[.=]?%val{selection}\})" ^(\h*)``` regions - add-highlighter "shared/markdown/codeblock/%val{selection}/" default-region fill meta - add-highlighter "shared/markdown/codeblock/%val{selection}/inner" region \A\h*```[^\n]*\K (?=```) ref %val{selection} - add-highlighter "shared/markdown/listblock/codeblock/%val{selection}" region -match-capture "^(\h*)```\h*(%val{selection}\b|\{[.=]?%val{selection}\})" ^(\h*)``` regions - add-highlighter "shared/markdown/listblock/codeblock/%val{selection}/" default-region fill meta - add-highlighter "shared/markdown/listblock/codeblock/%val{selection}/inner" region \A\h*```[^\n]*\K (?=```) ref %val{selection} - }} - }} -} - define-command -hidden markdown-trim-indent %{ evaluate-commands -no-hooks -draft -itersel %{ execute-keys x diff --git a/rc/filetype/rust.kak b/rc/filetype/rust.kak index 920ed2b4e5..4bca8cdefa 100644 --- a/rc/filetype/rust.kak +++ b/rc/filetype/rust.kak @@ -30,79 +30,84 @@ provide-module rust %§ # Highlighters # ‾‾‾‾‾‾‾‾‾‾‾‾ -add-highlighter shared/rust regions -add-highlighter shared/rust/code default-region group -add-highlighter shared/rust/string region %{(?|<|%)=? 0:operator -add-highlighter shared/rust/code/operators_as regex \bas\b 0:operator -add-highlighter shared/rust/code/ref_ref regex (&\h+[&~@*])[^)=\s\t\r\n] 1:type -add-highlighter shared/rust/code/ref regex ([&~@*])[^)=\s\t\r\n] 1:type -add-highlighter shared/rust/code/operators_logic regex &&|\|\| 0:operator - -add-highlighter shared/rust/code/lifetime_or_loop_label regex ('([a-zA-Z]\w+|_\w+))\b 1:meta -add-highlighter shared/rust/code/namespace regex \b[a-zA-Z](\w+)?(\h+)?(?=::) 0:module -add-highlighter shared/rust/code/mod_path_sep regex :: 0:meta -add-highlighter shared/rust/code/question_mark regex \? 0:meta -# the language keywords are defined here, but many of them are reserved and unused yet: -# https://doc.rust-lang.org/reference/keywords.html -add-highlighter shared/rust/code/function_call regex _?[a-zA-Z]\w*\s*(?=\() 0:function -add-highlighter shared/rust/code/generic_function_call regex _?[a-zA-Z]\w*\s*(?=::<) 0:function -add-highlighter shared/rust/code/function_declaration regex (?:fn\h+)(_?\w+)(?:<[^>]+?>)?\( 1:function -add-highlighter shared/rust/code/keywords regex \b(?:as|break|continue|crate|else|enum|extern|false|fn|for|if|impl|in|let|loop|match|mod|pub|return|self|Self|struct|super|trait|true|type|union|unsafe|use|where|while|async|await|dyn|abstract|become|box|do|try)\b 0:keyword -add-highlighter shared/rust/code/storage regex \b(move|mut|ref|static|const)\b 0:type -add-highlighter shared/rust/code/pub_with_scope regex \b(pub)\h*(\()\h*(crate|super|self|in\h+[\w:]+)\h*(\)) 1:keyword 2:meta 4:meta -# after let can be an arbitrary pattern match -add-highlighter shared/rust/code/macro regex \b\w+! 0:meta -# the number literals syntax is defined here: -# https://doc.rust-lang.org/reference/tokens.html#numb ers -add-highlighter shared/rust/code/values regex \b(?:self|true|false|[0-9][_0-9]*(?:\.[0-9][_0-9]*|(?:\.[0-9][_0-9]*)?E[\+\-][_0-9]+)(?:f(?:32|64))?|(?:0x[_0-9a-fA-F]+|0o[_0-7]+|0b[_01]+|[0-9][_0-9]*)(?:(?:i|u|f)(?:8|16|32|64|128|size))?)\b 0:value -add-highlighter shared/rust/code/char_character regex "'([^\\]|\\(.|x[0-9a-fA-F]{2}|u\{[0-9a-fA-F]{1,6}\}))'" 0:green -# TODO highlight error for unicode or single escape by te character -add-highlighter shared/rust/code/byte_character regex b'([\x00-\x5B\x5D-\x7F]|\\(.|x[0-9a-fA-F]{2}))' 0:yellow -add-highlighter shared/rust/code/builtin_types regex \b(?:u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize|f32|f64|bool|char|str|Self)\b 0:type -add-highlighter shared/rust/code/return regex \breturn\b 0:meta - -add-highlighter shared/rust/code/enum regex \b(Option|Result)\b 0:type -add-highlighter shared/rust/code/enum_variant regex \b(Some|None|Ok|Err)\b 0:value -add-highlighter shared/rust/code/std_traits regex \b(Copy|Send|Sized|Sync|Drop|Fn|FnMut|FnOnce|Box|ToOwned|Clone|PartialEq|PartialOrd|Eq|Ord|AsRef|AsMut|Into|From|Default|Iterator|Extend|IntoIterator|DoubleEndedIterator|ExactSizeIterator|SliceConcatExt|String|ToString|Vec)\b 0:type +try %{ + require-module tree-sitter + tree-sitter-load-highlighter shared/rust rust %opt{tree_sitter_default_faces} +} catch %{ + add-highlighter shared/rust regions + add-highlighter shared/rust/code default-region group + add-highlighter shared/rust/string region %{(?|<|%)=? 0:operator + add-highlighter shared/rust/code/operators_as regex \bas\b 0:operator + add-highlighter shared/rust/code/ref_ref regex (&\h+[&~@*])[^)=\s\t\r\n] 1:type + add-highlighter shared/rust/code/ref regex ([&~@*])[^)=\s\t\r\n] 1:type + add-highlighter shared/rust/code/operators_logic regex &&|\|\| 0:operator + + add-highlighter shared/rust/code/lifetime_or_loop_label regex ('([a-zA-Z]\w+|_\w+))\b 1:meta + add-highlighter shared/rust/code/namespace regex \b[a-zA-Z](\w+)?(\h+)?(?=::) 0:module + add-highlighter shared/rust/code/mod_path_sep regex :: 0:meta + add-highlighter shared/rust/code/question_mark regex \? 0:meta + # the language keywords are defined here, but many of them are reserved and unused yet: + # https://doc.rust-lang.org/reference/keywords.html + add-highlighter shared/rust/code/function_call regex _?[a-zA-Z]\w*\s*(?=\() 0:function + add-highlighter shared/rust/code/generic_function_call regex _?[a-zA-Z]\w*\s*(?=::<) 0:function + add-highlighter shared/rust/code/function_declaration regex (?:fn\h+)(_?\w+)(?:<[^>]+?>)?\( 1:function + add-highlighter shared/rust/code/keywords regex \b(?:as|break|continue|crate|else|enum|extern|false|fn|for|if|impl|in|let|loop|match|mod|pub|return|self|Self|struct|super|trait|true|type|union|unsafe|use|where|while|async|await|dyn|abstract|become|box|do|try)\b 0:keyword + add-highlighter shared/rust/code/storage regex \b(move|mut|ref|static|const)\b 0:type + add-highlighter shared/rust/code/pub_with_scope regex \b(pub)\h*(\()\h*(crate|super|self|in\h+[\w:]+)\h*(\)) 1:keyword 2:meta 4:meta + # after let can be an arbitrary pattern match + add-highlighter shared/rust/code/macro regex \b\w+! 0:meta + # the number literals syntax is defined here: + # https://doc.rust-lang.org/reference/tokens.html#numb ers + add-highlighter shared/rust/code/values regex \b(?:self|true|false|[0-9][_0-9]*(?:\.[0-9][_0-9]*|(?:\.[0-9][_0-9]*)?E[\+\-][_0-9]+)(?:f(?:32|64))?|(?:0x[_0-9a-fA-F]+|0o[_0-7]+|0b[_01]+|[0-9][_0-9]*)(?:(?:i|u|f)(?:8|16|32|64|128|size))?)\b 0:value + add-highlighter shared/rust/code/char_character regex "'([^\\]|\\(.|x[0-9a-fA-F]{2}|u\{[0-9a-fA-F]{1,6}\}))'" 0:green + # TODO highlight error for unicode or single escape by te character + add-highlighter shared/rust/code/byte_character regex b'([\x00-\x5B\x5D-\x7F]|\\(.|x[0-9a-fA-F]{2}))' 0:yellow + add-highlighter shared/rust/code/builtin_types regex \b(?:u8|u16|u32|u64|u128|usize|i8|i16|i32|i64|i128|isize|f32|f64|bool|char|str|Self)\b 0:type + add-highlighter shared/rust/code/return regex \breturn\b 0:meta + + add-highlighter shared/rust/code/enum regex \b(Option|Result)\b 0:type + add-highlighter shared/rust/code/enum_variant regex \b(Some|None|Ok|Err)\b 0:value + add-highlighter shared/rust/code/std_traits regex \b(Copy|Send|Sized|Sync|Drop|Fn|FnMut|FnOnce|Box|ToOwned|Clone|PartialEq|PartialOrd|Eq|Ord|AsRef|AsMut|Into|From|Default|Iterator|Extend|IntoIterator|DoubleEndedIterator|ExactSizeIterator|SliceConcatExt|String|ToString|Vec)\b 0:type +} # Commands # ‾‾‾‾‾‾‾‾ diff --git a/rc/tools/tree-sitter.kak b/rc/tools/tree-sitter.kak new file mode 100644 index 0000000000..f4f041cb1d --- /dev/null +++ b/rc/tools/tree-sitter.kak @@ -0,0 +1,69 @@ +declare-option str-list tree_sitter_grammar_directories \ + "%val{runtime}/grammars" \ + "%val{config}/grammars" \ + +declare-option str-list tree_sitter_default_faces \ + text.literal:comment \ + text.reference:variable \ + text.title:title \ + text.uri:+u \ + text.underline:+u \ + text.todo:meta \ + comment:comment \ + punctuation.special:meta \ + \ + constant.builtin:keyword \ + constant.macro:meta \ + define:meta \ + macro:meta \ + string:string \ + string.escape:operator \ + string.special:operator \ + character:string \ + character.special:operator \ + number:value \ + boolean:value \ + float:value \ + \ + keyword:keyword \ + function:function \ + function.builtin:builtin \ + function.macro:meta \ + parameter:variable \ + method:function \ + field:variable \ + property:variable \ + constructor:function \ + \ + variable:variable \ + type:type \ + type.definition:type \ + storageclass:type \ + structure:type \ + namespace:type \ + include:type \ + preproc:meta \ + debug:error \ + tag:attribute \ + +provide-module tree-sitter %{ + define-command tree-sitter-load-highlighter -params 2.. -docstring %{ + } %{ try %{ evaluate-commands %sh{ + path="$1" + lang="$2" + shift 2 + faces="$*" + + escape() { + printf "'%s'" $(printf "%s" "$1" | sed "s|'|''|g") + } + + eval set -- "$kak_quoted_opt_tree_sitter_grammar_directories" + for directory in "$@" ; do + grammar="$directory/$lang" + [ -e "$grammar" ] || continue + printf "%s\n" \ + "add-highlighter $path tree-sitter $lang $(escape "$grammar") $faces" + done + }}} +} diff --git a/src/Makefile b/src/Makefile index bb0f32e6ea..2e44c58e90 100644 --- a/src/Makefile +++ b/src/Makefile @@ -3,6 +3,7 @@ static ?= no gzip_man ?= yes # to get format compatible with GitHub archive use "gzip -S .gz" here compress_bin ?= bzip2 +tree_sitter ?= yes ifneq ($(gzip_man),yes) ifneq ($(gzip_man),no) @@ -23,6 +24,16 @@ else endif endif +ifeq ($(tree_sitter),yes) + CPPFLAGS += -DKAK_TREE_SITTER + CXXFLAGS += `pkg-config --cflags tree-sitter` + LDFLAGS += `pkg-config --libs tree-sitter` +else + ifneq ($(tree_sitter),no) + $(error tree_sitter should be either yes or no) + endif +endif + ifneq (,$(findstring address,$(sanitize))) CXXFLAGS += -fsanitize=address LDFLAGS += -lasan diff --git a/src/highlighter.hh b/src/highlighter.hh index 925dcaa7d6..e20c364824 100644 --- a/src/highlighter.hh +++ b/src/highlighter.hh @@ -83,6 +83,11 @@ private: const HighlightPass m_passes; }; +struct HighlighterDelegate : public Highlighter { + using Highlighter::Highlighter; + virtual Highlighter& delegate() const = 0; +}; + using HighlighterParameters = ConstArrayView; using HighlighterFactory = std::unique_ptr (*)(HighlighterParameters params, Highlighter* parent); diff --git a/src/highlighters.cc b/src/highlighters.cc index 7e6bd1c0cb..b862e452ce 100644 --- a/src/highlighters.cc +++ b/src/highlighters.cc @@ -16,6 +16,7 @@ #include "regex.hh" #include "register_manager.hh" #include "string.hh" +#include "tree_sitter.hh" #include "utf8.hh" #include "utf8_iterator.hh" #include "window.hh" @@ -2027,8 +2028,8 @@ struct RegionsHighlighter : public Highlighter { if (dynamic_cast(parent)) return true; - if (auto* region = dynamic_cast(parent)) - return is_regions(®ion->delegate()); + if (auto* highlighter = dynamic_cast(parent)) + return is_regions(&highlighter->delegate()); return false; } @@ -2078,12 +2079,12 @@ struct RegionsHighlighter : public Highlighter } private: - struct RegionHighlighter : public Highlighter + struct RegionHighlighter : public HighlighterDelegate { RegionHighlighter(std::unique_ptr&& delegate, String begin, String end, String recurse, bool match_capture) - : Highlighter{delegate->passes()}, + : HighlighterDelegate{delegate->passes()}, m_delegate{std::move(delegate)}, m_begin{std::move(begin)}, m_end{std::move(end)}, m_recurse{std::move(recurse)}, m_match_capture{match_capture} @@ -2091,7 +2092,7 @@ struct RegionsHighlighter : public Highlighter } RegionHighlighter(std::unique_ptr&& delegate) - : Highlighter{delegate->passes()}, m_delegate{std::move(delegate)}, m_default{true} + : HighlighterDelegate{delegate->passes()}, m_delegate{std::move(delegate)}, m_default{true} { } @@ -2134,7 +2135,7 @@ struct RegionsHighlighter : public Highlighter bool match_capture() const { return m_match_capture; } bool is_default() const { return m_default; } - Highlighter& delegate() { return *m_delegate; } + Highlighter& delegate() const override { return *m_delegate; } // private: std::unique_ptr m_delegate; @@ -2504,6 +2505,12 @@ void register_highlighters() registry.insert({ "show-whitespaces", { ShowWhitespacesHighlighter::create, &show_whitespace_desc } }); + registry.insert({ + "tree-sitter", + { create_tree_sitter_highlighter, &tree_sitter_desc } }); + registry.insert({ + "tree-sitter-injection", + { create_tree_sitter_injection_highlighter, &tree_sitter_injection_desc } }); registry.insert({ "wrap", { WrapHighlighter::create, &wrap_desc } }); diff --git a/src/tree_sitter.cc b/src/tree_sitter.cc new file mode 100644 index 0000000000..853dd8f94a --- /dev/null +++ b/src/tree_sitter.cc @@ -0,0 +1,540 @@ +#include "tree_sitter.hh" + +#ifdef KAK_TREE_SITTER + +#include "buffer.hh" +#include "changes.hh" +#include "context.hh" +#include "coord.hh" +#include "exception.hh" +#include "face.hh" +#include "face_registry.hh" +#include "file.hh" + +#include +#include + +namespace Kakoune +{ + +using DlPtr = std::unique_ptr; +using TSParserPtr = std::unique_ptr; +using TSTreePtr = std::unique_ptr; +using TSQueryPtr = std::unique_ptr; +using TSQueryCursorPtr = std::unique_ptr; + +static TSPoint coord_to_point(BufferCoord coord) +{ + return { + .row = static_cast((int) coord.line), + .column = static_cast((int) coord.column) }; +} + +static BufferCoord point_to_coord(TSPoint point) +{ + return { static_cast(point.row), static_cast(point.column) }; +} + +static void highlight_node(DisplayBuffer& display_buffer, TSNode const& node, Face const& face) +{ + auto begin = point_to_coord(ts_node_start_point(node)); + auto end = point_to_coord(ts_node_end_point(node)); + + for (auto& line : display_buffer.lines()) + { + auto& range = line.range(); + if (range.end <= begin or end < range.begin) + continue; + + for (auto atom_it = line.begin(); atom_it != line.end(); ++atom_it) + { + bool is_replaced = atom_it->type() == DisplayAtom::ReplacedRange; + + if (not atom_it->has_buffer_range() or + end <= atom_it->begin() or begin >= atom_it->end()) + continue; + + if (not is_replaced and begin > atom_it->begin()) + atom_it = ++line.split(atom_it, begin); + + if (not is_replaced and end < atom_it->end()) + { + atom_it = line.split(atom_it, end); + atom_it->face = merge_faces(atom_it->face, face); + ++atom_it; + } + else + atom_it->face = merge_faces(atom_it->face, face); + } + } +} + +struct InjectionHighlighterApplier +{ + DisplayBuffer& display_buffer; + HighlightContext& context; + DisplayLineList::iterator cur_line = display_buffer.lines().begin(); + DisplayLineList::iterator end_line = display_buffer.lines().end(); + DisplayLine::iterator cur_atom = cur_line->begin(); + DisplayBuffer region_display{}; + + void operator()(BufferCoord begin, BufferCoord end, Highlighter& highlighter) + { + if (begin == end) + return; + + auto first_line = std::find_if(cur_line, end_line, [&](auto&& line) { return line.range().end > begin; }); + if (first_line != cur_line and first_line != end_line) + cur_atom = first_line->begin(); + cur_line = first_line; + if (cur_line == end_line or cur_line->range().begin >= end) + return; + + auto& region_lines = region_display.lines(); + region_lines.clear(); + Vector> insert_pos; + while (cur_line != end_line and cur_line->range().begin < end) + { + auto& line = *cur_line; + auto first = std::find_if(cur_atom, line.end(), [&](auto&& atom) { return atom.has_buffer_range() and atom.end() > begin; }); + if (first != line.end() and first->type() == DisplayAtom::Range and first->begin() < begin) + first = ++line.split(first, begin); + auto idx = first - line.begin(); + + auto last = std::find_if(first, line.end(), [&](auto&& atom) { return atom.has_buffer_range() and atom.end() > end; }); + if (last != line.end() and last->type() == DisplayAtom::Range and last->begin() < end) + last = ++line.split(last, end); + + if (line.begin() + idx != last) + { + insert_pos.emplace_back(cur_line, idx); + region_lines.push_back(line.extract(line.begin() + idx, last)); + } + + if (idx != line.atoms().size()) + break; + else if (++cur_line != end_line) + cur_atom = cur_line->begin(); + } + + if (region_lines.empty()) + return; + + region_display.compute_range(); + highlighter.highlight(context, region_display, {begin, end}); + + for (size_t i = 0; i < insert_pos.size(); ++i) + { + auto& [line_it, idx] = insert_pos[i]; + auto& atoms = region_lines[i].atoms(); + auto it = line_it->insert( + line_it->begin() + idx, + std::move_iterator(atoms.begin()), + std::move_iterator(atoms.end())); + + if (line_it == cur_line) + cur_atom = it + atoms.size(); + } + } +}; + +struct TreeSitterInjectionHighlighter : public HighlighterDelegate +{ + TreeSitterInjectionHighlighter(std::unique_ptr&& delegate) + : HighlighterDelegate{delegate->passes()} + , m_delegate{std::move(delegate)} + { + } + + bool has_children() const override + { + return m_delegate->has_children(); + } + + Highlighter& get_child(StringView path) override + { + return m_delegate->get_child(path); + } + + void add_child(String name, std::unique_ptr&& hl, bool override) override + { + return m_delegate->add_child(name, std::move(hl), override); + } + + void remove_child(StringView id) override + { + return m_delegate->remove_child(id); + } + + Completions complete_child(StringView path, ByteCount cursor_pos, bool group) const override + { + return m_delegate->complete_child(path, cursor_pos, group); + } + + void fill_unique_ids(Vector& unique_ids) const override + { + return m_delegate->fill_unique_ids(unique_ids); + } + + void do_highlight(HighlightContext context, DisplayBuffer& display_buffer, BufferRange range) override + { + return m_delegate->highlight(context, display_buffer, range); + } + + Highlighter& delegate() const override + { + return *m_delegate; + } + +private: + std::unique_ptr m_delegate; +}; + +struct TreeSitterHighlighter : public Highlighter +{ +public: + using InjectionsMap = HashMap, MemoryDomain::Highlight>; + using FacesSpec = Vector, MemoryDomain::Highlight>; + + TSTreePtr parse(Buffer& buffer, BufferRange range) + { + auto start_byte = buffer.distance({0, 0}, range.begin); + auto end_byte = start_byte + buffer.distance(range.begin, range.end); + + TSRange ts_range = { + .start_point = coord_to_point(range.begin), + .end_point = coord_to_point(range.end), + .start_byte = static_cast(int{start_byte}), + .end_byte = static_cast(int{end_byte}), + }; + ts_parser_set_included_ranges(m_parser.get(), &ts_range, 1); + + auto read = [](void* ptr, uint32_t, TSPoint point, uint32_t *bytes_read) + { + auto& buffer = *reinterpret_cast(ptr); + + auto coord = point_to_coord(point); + if (coord.line >= buffer.line_count() or + coord.column >= buffer[coord.line].length()) { + *bytes_read = 0; + return ""; + } + + auto string = buffer[coord.line].substr(coord.column); + *bytes_read = static_cast(int{string.length()}); + return string.data(); + }; + + auto tree = TSTreePtr{ts_parser_parse(m_parser.get(), nullptr, { + .payload = reinterpret_cast(&buffer), + .read = read, + .encoding = TSInputEncodingUTF8 })}; + + if (not tree) + ts_parser_reset(m_parser.get()); + + return tree; + } + + void execute_queries(HighlightContext context, DisplayBuffer& display_buffer, BufferRange range, TSTree* tree) + { + uint32_t length; + + if (not tree) + return; + + auto display_range = display_buffer.range(); + auto root = ts_tree_root_node(tree); + ts_query_cursor_set_point_range( + m_cursor.get(), + coord_to_point(display_range.begin), + coord_to_point(display_range.end)); + + if (m_highlights_query) { + ts_query_cursor_exec(m_cursor.get(), m_highlights_query.get(), root); + TSQueryMatch match; + uint32_t capture_index; + while (ts_query_cursor_next_capture(m_cursor.get(), &match, &capture_index)) { + auto& capture = match.captures[capture_index]; + char const* data = ts_query_capture_name_for_id(m_highlights_query.get(), + capture.index, &length); + + auto name = StringView{data, static_cast(length)}; + auto it = find_if(m_faces, + [name](const auto& pair) + { return name.starts_with(pair.first); }); + + if (it == m_faces.end()) + continue; + + auto&[id, spec] = *it; + highlight_node(display_buffer, capture.node, context.context.faces()[spec]); + } + } + + if (m_injections_query) { + TSQueryMatch match; + InjectionHighlighterApplier apply_highlighter{display_buffer, context}; + ts_query_cursor_exec(m_cursor.get(), m_injections_query.get(), root); + while (ts_query_cursor_next_match(m_cursor.get(), &match)) { + Optional language{}; + Optional content{}; + + for (uint16_t capture_index = 0; capture_index < match.capture_count; ++capture_index) { + auto& capture = match.captures[capture_index]; + char const* data = ts_query_capture_name_for_id(m_injections_query.get(), + capture.index, &length); + + auto capture_name = StringView{data, static_cast(length)}; + auto capture_begin = point_to_coord(ts_node_start_point(capture.node)); + auto capture_end = point_to_coord(ts_node_end_point(capture.node)); + + if (capture_name == "injection.language") + language = context.context.buffer().string(capture_begin, capture_end); + else if (capture_name == "injection.content") + content = {{capture_begin, capture_end}}; + } + + if (not language or not content) + continue; + + if (m_injections.contains(*language)) + apply_highlighter(content->begin, content->end, + m_injections[*language]->delegate()); + } + } + } + + TreeSitterHighlighter( + DlPtr lib, + TSParserPtr parser, + TSQueryPtr highlights, + TSQueryPtr injections, + FacesSpec faces) + : Highlighter(HighlightPass::Colorize) + , m_lib{std::move(lib)} + , m_parser{std::move(parser)} + , m_highlights_query{std::move(highlights)} + , m_injections_query{std::move(injections)} + , m_faces{std::move(faces)} + , m_cursor{ts_query_cursor_new()} + { + std::sort(m_faces.begin(), m_faces.end(), + [](auto&& lhs, auto&& rhs) { return lhs.first > rhs.first; }); + } + + void do_highlight(HighlightContext context, DisplayBuffer& display_buffer, BufferRange range) override + { + auto tree = parse(context.context.buffer(), range); + execute_queries(context, display_buffer, range, tree.get()); + } + + bool has_children() const override { return true; } + + Highlighter& get_child_impl(StringView path) const + { + auto sep_it = find(path, '/'); + StringView id(path.begin(), sep_it); + auto it = m_injections.find(id); + if (it == m_injections.end()) + throw child_not_found(format("no such id: {}", id)); + if (sep_it == path.end()) + return *it->value; + else + return it->value->get_child({sep_it+1, path.end()}); + } + + Highlighter& get_child(StringView path) override + { + return get_child_impl(path); + } + + void add_child(String name, std::unique_ptr&& hl, bool override) override + { + if (not dynamic_cast(hl.get())) + throw runtime_error{"only tree-sitter-injection highlighter can be added as child of a tree-sitter highlighter"}; + auto it = m_injections.find(name); + if (not override and it != m_injections.end()) + throw runtime_error{format("duplicate id: '{}'", name)}; + + std::unique_ptr injection_hl{dynamic_cast(hl.release())}; + if (it != m_injections.end()) + it->value = std::move(injection_hl); + else + m_injections.insert({std::move(name), std::move(injection_hl)}); + } + + void remove_child(StringView id) override + { + m_injections.remove(id); + } + + Completions complete_child(StringView path, ByteCount cursor_pos, bool group) const override + { + auto sep_it = find(path, '/'); + if (sep_it != path.end()) + { + ByteCount offset = sep_it+1 - path.begin(); + Highlighter& hl = get_child_impl({path.begin(), sep_it}); + return offset_pos(hl.complete_child(path.substr(offset), cursor_pos - offset, group), offset); + } + + auto container = m_injections | transform(&decltype(m_injections)::Item::key); + auto completions_flags = group ? Completions::Flags::None : Completions::Flags::Menu; + return { 0, 0, complete(path, cursor_pos, container), completions_flags }; + } + + static bool is_tree_sitter(Highlighter* parent) + { + if (dynamic_cast(parent)) + return true; + if (auto* highlighter = dynamic_cast(parent)) + return is_tree_sitter(&highlighter->delegate()); + return false; + } + + static std::unique_ptr create(HighlighterParameters params, Highlighter*) + { + uint32_t error_offset; + TSQueryError error_type; + + if (params.size() < 2) + throw runtime_error{"wrong parameter count"}; + + auto lang = params[0]; + auto dir = String{params[1]}; + auto parser_path = dir + "/parser"; + auto highlights_query_path = dir + "/queries/highlights.scm"; + auto injections_query_path = dir + "/queries/injections.scm"; + + auto lib = DlPtr{dlopen(parser_path.c_str(), RTLD_LAZY | RTLD_LOCAL)}; + if (not lib) + throw runtime_error{format( + "could not load {} parser at {}", + lang, dlerror())}; + + auto sym = String{"tree_sitter_"}; + sym += lang; + + auto get_language = reinterpret_cast(dlsym(lib.get(), sym.c_str())); + if (not get_language) + throw runtime_error{format( + "could not load {} parser at {}", + lang, dlerror())}; + + auto language = get_language(); + auto parser = TSParserPtr{ts_parser_new()}; + if (not ts_parser_set_language(parser.get(), language)) + throw runtime_error{format( + "could not load {} parser at {}: incompatible ABI version {}, expected at least {}", + lang, dir, ts_language_version(language), + TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION)}; + + TSQueryPtr highlights_query{nullptr}; + if (file_exists(highlights_query_path)) + { + auto highlights_query_str = read_file(highlights_query_path); + highlights_query = TSQueryPtr{ts_query_new( + language, + highlights_query_str.data(), + int{highlights_query_str.length()}, + &error_offset, + &error_type)}; + } + + TSQueryPtr injections_query{nullptr}; + if (file_exists(injections_query_path)) + { + auto injections_query_str = read_file(injections_query_path); + injections_query = TSQueryPtr{ts_query_new( + language, + injections_query_str.data(), + int{injections_query_str.length()}, + &error_offset, + &error_type)}; + } + + if (not highlights_query and not injections_query) + throw runtime_error{format( + "could not load {} parser at {}: missing queries for parser", + lang, dir)}; + + FacesSpec faces; + for (auto& spec : params.subrange(2)) + { + auto colon = find(spec, ':'); + if (colon == spec.end()) + throw runtime_error(format("wrong face spec: '{}' expected :", spec)); + StringView id{spec.begin(), colon}; + StringView face{colon+1, spec.end()}; + faces.emplace_back(id, parse_face(face)); + } + + return std::make_unique( + std::move(lib), + std::move(parser), + std::move(highlights_query), + std::move(injections_query), + std::move(faces)); + } + + static std::unique_ptr create_injection(HighlighterParameters params, Highlighter* parent) + { + if (not is_tree_sitter(parent)) + throw runtime_error{"tree-sitter-injection highlighter can only be added to a tree-sitter parent"}; + + if (params.empty()) + throw runtime_error{"wrong parameter count"}; + + const auto& type = params[0]; + auto& registry = HighlighterRegistry::instance(); + auto it = registry.find(type); + if (it == registry.end()) + throw runtime_error(format("no such highlighter type: '{}'", type)); + + auto delegate = it->value.factory(params.subrange(1), nullptr); + return std::make_unique(std::move(delegate)); + } + +private: + DlPtr m_lib; + TSParserPtr m_parser; + TSQueryPtr m_highlights_query; + TSQueryPtr m_injections_query; + FacesSpec m_faces; + TSQueryCursorPtr m_cursor; + InjectionsMap m_injections{}; +}; + +std::unique_ptr create_tree_sitter_highlighter(HighlighterParameters params, Highlighter* parent) { + return TreeSitterHighlighter::create(params, parent); +} + +std::unique_ptr create_tree_sitter_injection_highlighter(HighlighterParameters params, Highlighter* parent) { + return TreeSitterHighlighter::create_injection(params, parent); +} + +} + +#else // KAK_TREE_SITTER + +#include "exception.hh" + +namespace Kakoune { + +std::unique_ptr create_tree_sitter_highlighter(HighlighterParameters params, Highlighter* parent) { + throw runtime_error{"This binary was compiled without tree-sitter support"}; +} + +std::unique_ptr create_tree_sitter_injection_highlighter(HighlighterParameters params, Highlighter* parent) { + throw runtime_error{"This binary was compiled without tree-sitter support"}; +} + +} + +#endif // KAK_TREE_SITTER diff --git a/src/tree_sitter.hh b/src/tree_sitter.hh new file mode 100644 index 0000000000..939ef923ae --- /dev/null +++ b/src/tree_sitter.hh @@ -0,0 +1,28 @@ +#ifndef tree_sitter_hh_INCLUDED +#define tree_sitter_hh_INCLUDED + +#include "highlighter.hh" + +namespace Kakoune +{ + +const HighlighterDesc tree_sitter_injection_desc = { + "Parameters: ...\n" + "Apply the given delegate highlighter as defined by and \n" + "to tree-sitter injection.content nodes where the injection.language is\n" + ".", + {} +}; +std::unique_ptr create_tree_sitter_injection_highlighter(HighlighterParameters params, Highlighter* parent); + +const HighlighterDesc tree_sitter_desc = { + "Parameters: : :...\n" + "Highlight the tree-sitter nodes by id with the given faces" + "The ids will be sorted and matched by longest prefix.", + {} +}; +std::unique_ptr create_tree_sitter_highlighter(HighlighterParameters params, Highlighter* parent); + +} + +#endif // tree_sitter_hh_INCLUDED