Merge pull request #659 from pulsar-edit/add-markdown-tree-sitter-grammar

Add the Tree-sitter Markdown grammar
This commit is contained in:
confused_techie 2023-08-15 17:13:26 -07:00 committed by GitHub
commit 9e0db5b054
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 484 additions and 19 deletions

View File

@ -242,12 +242,17 @@
color: #555;
}
.syntax--punctuation.syntax--definition.syntax--list-item {
color: #C6C5FE;
}
.syntax--variable.syntax--list,
.syntax--support.syntax--quote {
color: #555;
}
.syntax--link .syntax--entity {
.syntax--link .syntax--entity,
.syntax--meta.syntax--link.syntax--text {
color: #ddd;
}

View File

@ -202,11 +202,6 @@
color: #eee;
}
// - item
&.syntax--list {
color: #555;
}
// > quote
&.syntax--quote {
color: #555;
@ -226,6 +221,14 @@
&.syntax--alt {
color: #ddd;
}
&.syntax--bold {
font-style: bold;
}
&.syntax--italic {
font-style: italic;
}
}
// /* comment */

View File

@ -170,9 +170,14 @@
color: #888;
}
.syntax--variable.syntax--list {
.syntax--variable.syntax--list,
.syntax--punctuation.syntax--definition.syntax--list-item {
color: #888;
}
.syntax--meta.syntax--link.syntax--text {
color: #555;
}
}
.syntax--markdown {

View File

@ -190,6 +190,14 @@
&.syntax--link {
color: #888;
}
&.syntax--bold {
font-style: bold;
}
&.syntax--italic {
font-style: italic;
}
}
// /* comment */

View File

@ -274,6 +274,20 @@
-webkit-font-smoothing: auto;
}
.syntax--meta.syntax--link.syntax--text {
color: @cyan;
}
.syntax--punctuation.syntax--definition.syntax--list-item {
color: @red;
}
.syntax--markup.syntax--heading {
.syntax--punctuation {
color: inherit;
}
}
.syntax--link .syntax--entity {
color: @cyan;
}

View File

@ -258,6 +258,13 @@
color: @gray;
}
}
&.syntax--quote.syntax--blockquote {
color: @gray;
.syntax--punctuation.syntax--definition.syntax--blockquote {
color: @gray;
}
}
}
// /* comment */

View File

@ -274,6 +274,20 @@
-webkit-font-smoothing: auto;
}
.syntax--meta.syntax--link.syntax--text {
color: @cyan;
}
.syntax--punctuation.syntax--definition.syntax--list-item {
color: @red;
}
.syntax--markup.syntax--heading {
.syntax--punctuation {
color: inherit;
}
}
.syntax--link .syntax--entity {
color: @cyan;
}

View File

@ -258,6 +258,13 @@
color: @gray;
}
}
&.syntax--quote.syntax--blockquote {
color: @gray;
.syntax--punctuation.syntax--definition.syntax--blockquote {
color: @gray;
}
}
}
// /* comment */

View File

@ -0,0 +1,30 @@
name: 'GitHub Markdown'
scopeName: 'source.gfm'
type: 'modern-tree-sitter'
# Generated from `savetheclocktower/tree-sitter-frontmatter`.
parser: 'tree-sitter-frontmatter'
treeSitter:
grammar: 'tree-sitter/tree-sitter-frontmatter.wasm'
highlightsQuery: 'tree-sitter/tree-sitter-frontmatter/highlights.scm'
fileTypes: [
'markdown'
'md'
'mdown'
'mdwn'
'mkd'
'mkdn'
'mkdown'
'rmd'
'ron'
'workbook'
]
firstLineRegex: '^---$'
comments:
start: '<!--'
end: '-->'
blockStart: '<!--'
blockEnd: '-->'

View File

@ -0,0 +1,31 @@
# This grammar doesn't have its own name because it's only meant to be injected.
scopeName: 'source.gfm.embedded'
type: 'modern-tree-sitter'
parser: 'tree-sitter-markdown'
injectionRegex: '(MARKDOWN|markdown|GFM|gfm)$'
treeSitter:
grammar: 'tree-sitter/tree-sitter-markdown.wasm'
highlightsQuery: 'tree-sitter/tree-sitter-markdown/highlights.scm'
foldsQuery: 'tree-sitter/tree-sitter-markdown/folds.scm'
tagsQuery: 'tree-sitter/tree-sitter-markdown/tags.scm'
fileTypes: [
'markdown'
'md'
'mdown'
'mdwn'
'mkd'
'mkdn'
'mkdown'
'rmd'
'ron'
'workbook'
]
comments:
start: '<!--'
end: '-->'
blockStart: '<!--'
blockEnd: '-->'

View File

@ -0,0 +1 @@
(front_matter) @meta.embedded.block.front-matter.gfm

View File

@ -0,0 +1,22 @@
; TODO: Folds in Markdown files will have to wait until we can add "tags" to
; divided folds. We want an H1 section to be able to fold up everything until
; the next H1 in the file, an H2 to fold up everything until the next H2 _or_
; H1, an H3 to fold up everything until the next H3 _or_ H2 _or_ H1… but this
; is not currently possible.
; (atx_heading (atx_h1_marker)) @fold.start.h1 @fold.end.h1 @fold.end.h2 @fold.end.h3 @fold.end.h4 @fold.end.h5 @fold.end.h6
;
; (atx_heading (atx_h2_marker)) @fold.start.h2 @fold.end.h2 @fold.end.h3 @fold.end.h4 @fold.end.h5 @fold.end.h6
;
; (atx_heading (atx_h3_marker)) @fold.start.h3 @fold.end.h3 @fold.end.h4 @fold.end.h5 @fold.end.h6
;
; (atx_heading (atx_h4_marker)) @fold.start.h4 @fold.end.h4 @fold.end.h5 @fold.end.h6
;
; (atx_heading (atx_h5_marker)) @fold.start.h5 @fold.end.h5 @fold.end.h6
;
; (atx_heading (atx_h6_marker)) @fold.start.h6 @fold.end.h6
;
; ; [(atx_heading) (setext_heading)] @fold.end @fold.start
((list_item) @fold
(#set! fold.endAt endPosition))

View File

@ -0,0 +1,187 @@
; HEADINGS
; ========
(setext_heading
(heading_content) @markup.heading.heading-1.gfm
(setext_h1_underline) @punctuation.definition.heading-underline.gfm)
(setext_heading
(heading_content) @markup.heading.heading-2.gfm
(setext_h2_underline) @punctuation.definition.heading-underline.gfm)
(atx_heading
(atx_h1_marker) @punctuation.definition.heading.gfm
) @markup.heading.heading-1.gfm
(atx_heading
(atx_h2_marker) @punctuation.definition.heading.gfm
) @markup.heading.heading-2.gfm
(atx_heading
(atx_h3_marker) @punctuation.definition.heading.gfm
) @markup.heading.heading-3.gfm
(atx_heading
(atx_h4_marker) @punctuation.definition.heading.gfm
) @markup.heading.heading-4.gfm
(atx_heading
(atx_h5_marker) @punctuation.definition.heading.gfm
) @markup.heading.heading-5.gfm
(atx_heading
(atx_h6_marker) @punctuation.definition.heading.gfm
) @markup.heading.heading-6.gfm
; SECTIONS
; ========
(paragraph) @markup.paragraph.gfm
(thematic_break) @punctuation.definition.horizontal-rule.gfm
(block_quote) @markup.quote.blockquote.gfm
((block_quote) @punctuation.definition.blockquote.gfm
(#set! adjust.endAfterFirstMatchOf ">"))
; LISTS
; =====
; `markup.list` gets applied to individual list items, unintuitively. So let's
; scope the entire list. “Tight” vs “Loose” has to do with whether each `<li>`
; has one or more implicit `<p>` tags around it.
[(tight_list) (loose_list)] @meta.list.gfm
((list_item
(list_marker) @punctuation.definition.list-item.gfm) @markup.list.unnumbered
; Instead of matching bullet or minus or plus, any not-digit here is
; guaranteed to be an unordered list.
(#not-match? @punctuation.definition.list-item.gfm "^\\d"))
((list_item
(list_marker) @punctuation.definition.list-item.gfm) @markup.list.numbered
(#match? @punctuation.definition.list-item.gfm "^\\d"))
((task_list_item
(list_marker) @punctuation.definition.list-item.gfm) @markup.list.unnumbered
; Instead of matching bullet or minus or plus, any not-digit here is
; guaranteed to be an unordered list.
(#not-match? @punctuation.definition.list-item.gfm "^\\d"))
((task_list_item
(list_marker) @punctuation.definition.list-item.gfm) @markup.list.numbered
(#match? @punctuation.definition.list-item.gfm "^\\d"))
; INLINE/REPLACED
; ===============
; The text inside []s in anchors/image syntax.
[(link_text) (image_description)] @string.unquoted.gfm @meta.link.text
(link_label (text) @meta.link.text)
; A URL between ()s in anchor syntax.
(link_destination) @markup.underline.link.gfm
((link) @punctuation.definition.begin.link.bracket.round.gfm
(#set! adjust.startAndEndAroundFirstMatchOf "(?<=\\])\\("))
((link) @punctuation.definition.end.link.bracket.round.gfm
(#set! adjust.startAndEndAroundFirstMatchOf "\\)$"))
((link) @punctuation.definition.begin.link.bracket.square.gfm
(#set! adjust.endAfterFirstMatchOf "^\\["))
((link) @punctuation.definition.end.link.bracket.square.gfm
(#set! adjust.startAndEndAroundFirstMatchOf "\\](?=\\(|\\[)"))
((link_reference_definition) @punctuation.definition.begin.link.bracket.square.gfm
(#set! adjust.endAfterFirstMatchOf "^\\["))
((link_reference_definition) @punctuation.definition.end.link.bracket.square.gfm
(#set! adjust.startAndEndAroundFirstMatchOf "\\](?=\\(|\\[)"))
((link_reference_definition) @punctuation.separator.link.colon.gfm
(#set! adjust.startAndEndAroundFirstMatchOf ":"))
; A URL between <>s in autolink syntax.
(uri_autolink (text) @markup.underline.link.gfm)
((uri_autolink) @punctuation.definition.link.begin.bracket.angle.gfm
(#set! adjust.endAfterFirstMatchOf "^<"))
((uri_autolink) @punctuation.definition.link.end.bracket.angle.gfm
(#set! adjust.startBeforeFirstMatchOf ">$"))
; A link title: `[foo](http://example.com "Example web site")`
((link_title) @string.quoted.double.link-title.gfm
(#match? @string.quoted.double.link-title.gfm "^\"")
(#set! capture.final true))
((link_title) @punctuation.definition.string.begin.gfm
(#match? @punctuation.definition.string.begin.gfm "^\"")
(#set! adjust.endAfterFirstMatchOf "^\""))
((link_title) @punctuation.definition.string.end.gfm
(#match? @punctuation.definition.string.end.gfm "\"$")
(#set! adjust.startBeforeFirstMatchOf "\"$"))
; Out of laziness, let's throw all other kinds of link title into the generic
; bin — they are all delimited _somehow_, right?
(link_title) @string.quoted.link-title.gfm
; Link labels in `[foo][bar]` syntax, where `bar` is associated with a URL via
; a subsequent footnote, actually work correctly when one runs "Link: Open" in
; Pulsar, so these should be treated like links.
(link_label) @markup.underline.link.link-label.gfm
(image) @meta.image.gfm
; CODE BLOCKS
; ===========
(code_span) @meta.embedded.line.inline-code.gfm @markup.raw.inline.gfm
(info_string) @storage.modifier.language._TEXT_.gfm
(fenced_code_block) @markup.code.fenced.gfm @meta.embedded.block.fenced-code.gfm
(indented_code_block) @markup.code.indented.gfm @meta.embedded.block.indented-code.gfm
; BOLD/ITALIC/OTHER
; =================
(emphasis) @markup.italic.gfm
(strong_emphasis) @markup.bold.gfm
(strikethrough) @markup.strike.gfm
((emphasis) @punctuation.delimiter.emphasis.begin.gfm
(#set! adjust.startAndEndAroundFirstMatchOf "^(\\*|_)"))
((emphasis) @punctuation.delimiter.emphasis.end.gfm
(#set! adjust.startAndEndAroundFirstMatchOf "(\\*|_)$"))
((strong_emphasis) @punctuation.delimiter.emphasis.begin.gfm
(#set! adjust.startAndEndAroundFirstMatchOf "^(\\*{2}|_{2})"))
((strong_emphasis) @punctuation.delimiter.emphasis.end.gfm
(#set! adjust.startAndEndAroundFirstMatchOf "(\\*{2}|_{2})$"))
((strikethrough) @punctuation.delimiter.strike.begin.gfm
(#set! adjust.startAndEndAroundFirstMatchOf "^~~"))
((strikethrough) @punctuation.delimiter.strike.begin.gfm
(#set! adjust.startAndEndAroundFirstMatchOf "~~$"))
; HTML
; ====
(html_comment) @comment.block.html
; MISC
; ====
(table) @markup.other.table.gfm
(table_header_row (table_cell) @markup.other.table-cell.header.gfm)
(table_data_row (table_cell) @markup.other.table-cell.data.gfm)
(table_delimiter_row (table_column_alignment) @punctuation.separator.table-row.gfm)
(backslash_escape) @constant.character.escape.gfm

View File

@ -0,0 +1,3 @@
; Intentionally empty indents.scm. By and large, indentation level should be
; manually controlled by the user in Markdown; the best thing we can do is stay
; out of the user's way.

View File

@ -0,0 +1,48 @@
((atx_heading
(atx_h1_marker)
(heading_content) @name) @definition.heading
(#set! symbol.strip "(^\\s*|\\s*$)")
(#set! symbol.prepend "· "))
((atx_heading
(atx_h2_marker)
(heading_content) @name) @definition.heading
(#set! symbol.strip "(^\\s*|\\s*$)")
(#set! symbol.prepend "·· "))
((atx_heading
(atx_h3_marker)
(heading_content) @name) @definition.heading
(#set! symbol.strip "(^\\s*|\\s*$)")
(#set! symbol.prepend "··· "))
((atx_heading
(atx_h4_marker)
(heading_content) @name) @definition.heading
(#set! symbol.strip "(^\\s*|\\s*$)")
(#set! symbol.prepend "···· "))
((atx_heading
(atx_h5_marker)
(heading_content) @name) @definition.heading
(#set! symbol.strip "(^\\s*|\\s*$)")
(#set! symbol.prepend "····· "))
((atx_heading
(atx_h6_marker)
(heading_content) @name) @definition.heading
(#set! symbol.strip "(^\\s*|\\s*$)")
(#set! symbol.prepend "······ "))
((setext_heading
(heading_content) @name) @definition.heading
(setext_h1_underline)
(#set! symbol.strip "(^\\s*|\\s*$)")
(#set! symbol.prepend "· "))
((setext_heading
(heading_content) @name) @definition.heading
(setext_h2_underline)
(#set! symbol.strip "(^\\s*|\\s*$)")
(#set! symbol.prepend "·· "))

View File

@ -0,0 +1,89 @@
exports.activate = () => {
// The top-level tree-sitter parser for `source.gfm` simply divides the text
// into front matter (if it exists) and the remainder, which is directly
// parsed as Markdown.
//
// We do this because the `ikatyang/tree-sitter-markdown` parser does not
// recognize YAML front matter, but is otherwise a very strong Markdown
// parser. If the `MDeiml/tree-sitter-markdown` parser became more stable,
// we could consider switching, and then we wouldn't need this extra parser.
// Hand off the front matter to the YAML injection.
atom.grammars.addInjectionPoint('source.gfm', {
type: 'front_matter',
language: () => 'yaml',
content(node) {
return node.descendantsOfType('text');
}
});
// Hand off everything else to the Markdown injection.
atom.grammars.addInjectionPoint('source.gfm', {
type: 'remainder',
language: () => 'markdown',
content: (node) => node,
languageScope: null
});
// The markdown injection has a scope name of `source.gfm.embedded` so we can
// target it for the rest of these injections, but you can see above that we
// suppress that scope name when we inject it into a document.
// Highlight HTML blocks.
atom.grammars.addInjectionPoint('source.gfm.embedded', {
type: 'html_block',
language: () => 'html',
content: (node) => node,
includeChildren: true
});
// Highlight inline HTML within paragraphs.
atom.grammars.addInjectionPoint('source.gfm.embedded', {
type: 'paragraph',
language(node) {
let html = node.descendantsOfType([
'html_open_tag',
'html_close_tag',
'html_self_closing_tag'
]);
if (html.length === 0) { return null; }
return 'html';
},
content(node) {
let html = node.descendantsOfType([
'html_open_tag',
'html_close_tag',
'html_self_closing_tag'
]);
return html;
},
includeChildren: true
});
// All code blocks of the form
//
// ```foo
// (code goes here)
// ```
//
// get injections on the theory that some grammar's `injectionRegex` will
// match `foo`.
atom.grammars.addInjectionPoint('source.gfm.embedded', {
type: 'fenced_code_block',
language(node) {
let language = node?.firstNamedChild;
if (language?.type === 'info_string')
return language.text;
return null;
},
content(node) {
return node.descendantsOfType('code_fence_content');
},
languageScope: (grammar) => `${grammar.scopeName}.embedded`,
includeChildren: true
});
};

View File

@ -1,6 +1,7 @@
{
"name": "language-gfm",
"version": "0.90.8",
"main": "lib/main",
"description": "Syntax highlighting and snippets for GitHub Flavored Markdown (GFM).",
"repository": "https://github.com/pulsar-edit/pulsar",
"license": "MIT",

View File

@ -129,11 +129,6 @@
color: @mono-1;
}
&.syntax--heading,
&.syntax--identity {
color: @hue-2;
}
&.syntax--bold {
color: @hue-6-2;
font-weight: bold;
@ -299,7 +294,7 @@
color: @hue-5;
.syntax--punctuation.syntax--definition.syntax--heading {
color: @hue-2;
color: @hue-5;
}
}

View File

@ -129,11 +129,6 @@
color: @mono-1;
}
&.syntax--heading,
&.syntax--identity {
color: @hue-2;
}
&.syntax--bold {
color: @hue-6-2;
font-weight: bold;
@ -299,7 +294,7 @@
color: @hue-5;
.syntax--punctuation.syntax--definition.syntax--heading {
color: @hue-2;
color: @hue-5;
}
}