Merge branch 'tests-for-ruby' into feature/modernize-tree-sitter

This commit is contained in:
Maurício Szabo 2023-02-25 16:54:25 -03:00
commit cf9abdb463
12 changed files with 612 additions and 56 deletions

View File

@ -145,6 +145,11 @@ jobs:
- name: Checkout the latest code
uses: actions/checkout@v2
- name: Setup node
uses: actions/setup-node@v2-beta
with:
node-version: 16
- name: Install Dependencies
run: yarn install || yarn install

View File

@ -0,0 +1,98 @@
name: 'Ruby'
scopeName: 'source.ruby'
type: 'tree-sitter-2'
parser: 'tree-sitter-ruby'
injectionRegex: 'rb|ruby'
treeSitter:
grammar: 'ts/grammar.wasm'
syntaxQuery: 'ts/highlights.scm'
localsQuery: 'ts/locals.scm'
firstLineRegex: [
# shebang line
'^#!.*\\b(\w*ruby|rake)\\r?\\n'
# vim modeline
'vim\\b.*\\bset\\b.*\\b(filetype|ft|syntax)=ruby'
]
fileTypes: [
'rb',
'rake',
'Podfile',
'Brewfile',
'Rakefile',
'Gemfile'
]
comments:
start: '# '
folds: [
{
type: ['block', 'do_block']
start: {type: 'block_parameters'}
end: {index: -1}
}
{
type: 'begin'
start: {index: 0}
end: {type: 'rescue'}
}
{
type: 'heredoc_body',
end: {type: 'heredoc_end'}
}
{
type: [
'hash'
'array'
'begin'
'block'
'do_block'
]
start: {index: 0}
end: {index: -1}
}
{
type: 'argument_list'
start: {index: 0, type: '('}
end: {index: -1}
}
{
type: 'class'
start: {type: 'superclass'}
end: {index: -1}
}
{
type: 'class'
start: {index: 1}
end: {index: -1}
}
{
type: ['method', 'singleton_method']
start: {type: 'method_parameters'}
end: {index: -1}
}
{
type: ['method', 'singleton_method']
start: {index: 1}
end: {index: -1}
}
{
type: 'then',
start: {index: 0, type: '"then"'}
}
{
type: 'then'
}
{
type: 'case',
end: {index: -1}
}
{
type: 'else'
start: {index: 0}
}
]

Binary file not shown.

View File

@ -0,0 +1,146 @@
; Keywords
[
"alias"
"and"
"begin"
"break"
"case"
"class"
"def"
"do"
"else"
"elsif"
"end"
"ensure"
"for"
"if"
"in"
"module"
"next"
"or"
"rescue"
"retry"
"return"
"then"
"unless"
"until"
"when"
"while"
"yield"
] @keyword
((identifier) @keyword
(#match? @keyword "^(private|protected|public)$"))
; Function calls
((identifier) @function.method.builtin
(#eq? @function.method.builtin "require"))
"defined?" @function.method.builtin
(call
method: [(identifier) (constant)] @function.method)
; Function definitions
(alias (identifier) @function.method)
(setter (identifier) @function.method)
(method name: [(identifier) (constant)] @function.method)
(singleton_method name: [(identifier) (constant)] @function.method)
; Identifiers
[
(class_variable)
(instance_variable)
] @property
((identifier) @constant.builtin
(#match? @constant.builtin "^__(FILE|LINE|ENCODING)__$"))
((constant) @constant
(#match? @constant "^[A-Z\\d_]+$"))
(constant) @constructor
(self) @variable.builtin
(super) @variable.builtin
(block_parameter (identifier) @variable.parameter)
(block_parameters (identifier) @variable.parameter)
(destructured_parameter (identifier) @variable.parameter)
(hash_splat_parameter (identifier) @variable.parameter)
(lambda_parameters (identifier) @variable.parameter)
(method_parameters (identifier) @variable.parameter)
(splat_parameter (identifier) @variable.parameter)
(keyword_parameter name: (identifier) @variable.parameter)
(optional_parameter name: (identifier) @variable.parameter)
((identifier) @function.method
(#is-not? local))
(identifier) @variable
; Literals
[
(string)
(bare_string)
(subshell)
(heredoc_body)
(heredoc_beginning)
] @string
[
(simple_symbol)
(delimited_symbol)
(hash_key_symbol)
(bare_symbol)
] @string.special.symbol
(regex) @string.special.regex
(escape_sequence) @escape
[
(integer)
(float)
] @number
[
(nil)
(true)
(false)
]@constant.builtin
(interpolation
"#{" @punctuation.special
"}" @punctuation.special) @embedded
(comment) @comment
; Operators
[
"="
"=>"
"->"
] @operator
[
","
";"
"."
] @punctuation.delimiter
[
"("
")"
"["
"]"
"{"
"}"
"%w("
"%i("
] @punctuation.bracket

View File

@ -0,0 +1,27 @@
((method) @local.scope
(#set! local.scope-inherits false))
[
(lambda)
(block)
(do_block)
] @local.scope
(block_parameter (identifier) @local.definition)
(block_parameters (identifier) @local.definition)
(destructured_parameter (identifier) @local.definition)
(hash_splat_parameter (identifier) @local.definition)
(lambda_parameters (identifier) @local.definition)
(method_parameters (identifier) @local.definition)
(splat_parameter (identifier) @local.definition)
(keyword_parameter name: (identifier) @local.definition)
(optional_parameter name: (identifier) @local.definition)
(identifier) @local.reference
(assignment left: (identifier) @local.definition)
(operator_assignment left: (identifier) @local.definition)
(left_assignment_list (identifier) @local.definition)
(rest_assignment (identifier) @local.definition)
(destructured_left_assignment (identifier) @local.definition)

View File

@ -0,0 +1,64 @@
; Method definitions
(
(comment)* @doc
.
[
(method
name: (_) @name) @definition.method
(singleton_method
name: (_) @name) @definition.method
]
(#strip! @doc "^#\\s*")
(#select-adjacent! @doc @definition.method)
)
(alias
name: (_) @name) @definition.method
(setter
(identifier) @ignore)
; Class definitions
(
(comment)* @doc
.
[
(class
name: [
(constant) @name
(scope_resolution
name: (_) @name)
]) @definition.class
(singleton_class
value: [
(constant) @name
(scope_resolution
name: (_) @name)
]) @definition.class
]
(#strip! @doc "^#\\s*")
(#select-adjacent! @doc @definition.class)
)
; Module definitions
(
(module
name: [
(constant) @name
(scope_resolution
name: (_) @name)
]) @definition.module
)
; Calls
(call method: (identifier) @name) @reference.call
(
[(identifier) (constant)] @name @reference.call
(#is-not? local)
(#not-match? @name "^(lambda|load|require|require_relative|__FILE__|__LINE__)$")
)

View File

@ -0,0 +1,33 @@
require "a"
# ^ function.method.builtin
class Car < Vehicle
# <- keyword
# ^ constructor
def init(id)
# <- keyword
# ^ function.method
@id = id
# <- property
# ^ variable.parameter
yield
# <- keyword
return
# <- keyword
next
# <- keyword
end
private
# ^ keyword
public
# ^ keyword
protected
# ^ keyword
end
# <- keyword

View File

@ -0,0 +1,70 @@
const dedent = require('dedent');
const path = require('path');
const { Point } = require('atom');
describe('WASM Tree-sitter Ruby grammar', () => {
beforeEach(async () => {
await atom.packages.activatePackage('language-ruby');
atom.config.set('core.languageParser', 'wasm-tree-sitter');
});
it('tokenizes symbols', async () => {
const editor = await openDocument('classes-wasm-ts.rb');
let allMatches = [], lastNonComment = 0
editor.getBuffer().getLines().forEach((row, i) => {
const m = row.match(/#/)
if(m) {
const scope = editor.scopeDescriptorForBufferPosition([i, m.index])
if(scope.scopes.find(s => s.match(/comment/))) {
allMatches.push({row: lastNonComment, text: row, col: m.index})
return
}
}
lastNonComment = i
})
expect(allMatches).toSatisfy((matches, reason) => {
reason("Tokenizer wasn't able to run")
return matches.length > 0
})
allMatches.forEach(({text, row, col}) => {
const exactPos = text.match(/\^\s+(.*)/)
if(exactPos) {
expect(editor.scopeDescriptorForBufferPosition([row, exactPos.index]).scopes).toSatisfy((scopes, reason) => {
const expected = exactPos[1]
reason(dedent`
Expected to find scope "${expected}" but found "${scopes}"
at class-wasm-ts.rb:${row+1}:${exactPos.index+1}
`)
return scopes.indexOf(expected) !== -1
})
} else {
const pos = text.match(/\<-\s+(.*)/)
// console.log('Finding Scope', pos[1], 'on', [row, col], 'and scopes:', editor.scopeDescriptorForBufferPosition([row, col]))
expect(editor.scopeDescriptorForBufferPosition([row, col]).scopes).toSatisfy((scopes, reason) => {
const expected = pos[1]
reason(dedent`
Expected to find scope "${expected}" but found "${scopes}"
at class-wasm-ts.rb:${row+1}:${col+1}
`)
return scopes.indexOf(expected) !== -1
})
}
})
const mode = editor.languageMode
});
});
async function openDocument(fileName) {
const fullPath = path.join(__dirname, 'fixtures', fileName)
const editor = await atom.workspace.open(fullPath)
await editor.languageMode.ready
// editor.languageMode.buildHighlightIterator().seek({row: 0, column: 0} )
// await new Promise(resolve => {
// console.log("WAT")
// editor.languageMode.onDidTokenize(resolve)
// })
return editor
}

View File

@ -7,6 +7,7 @@ const TextMateLanguageMode = require('./text-mate-language-mode');
const NodeTreeSitterLanguageMode = require('./tree-sitter-language-mode');
const WASMTreeSitterLanguageMode = require('./wasm-tree-sitter-language-mode');
const TreeSitterGrammar = require('./tree-sitter-grammar');
const WASMTreeSitterGrammar = require('./wasm-tree-sitter-grammar');
const ScopeDescriptor = require('./scope-descriptor');
const Token = require('./token');
const fs = require('fs-plus');
@ -30,6 +31,7 @@ module.exports = class GrammarRegistry {
clear() {
this.textmateRegistry.clear();
this.wasmTreeSitterGrammarsById = {};
this.treeSitterGrammarsById = {};
if (this.subscriptions) this.subscriptions.dispose();
this.subscriptions = new CompositeDisposable();
@ -197,8 +199,12 @@ module.exports = class GrammarRegistry {
}
languageModeForGrammarAndBuffer(grammar, buffer) {
if(grammar === 'tree-sitter') {
return new WASMTreeSitterLanguageMode(buffer, this.config);
if (grammar instanceof WASMTreeSitterGrammar) {
return new WASMTreeSitterLanguageMode(
buffer,
this.config,
grammar
);
} else if (grammar instanceof TreeSitterGrammar) {
return new NodeTreeSitterLanguageMode({
grammar,
@ -227,18 +233,15 @@ module.exports = class GrammarRegistry {
selectGrammarWithScore(filePath, fileContents) {
let bestMatch = null;
let highestScore = -Infinity;
if(this.config.get('core.languageParser') === 'wasm-tree-sitter') {
return {grammar: "tree-sitter", score: 10}
} else {
this.forEachGrammar(grammar => {
const score = this.getGrammarScore(grammar, filePath, fileContents);
if (score > highestScore || bestMatch == null) {
bestMatch = grammar;
highestScore = score;
}
});
return { grammar: bestMatch, score: highestScore };
}
this.forEachGrammar(grammar => {
const score = this.getGrammarScore(grammar, filePath, fileContents);
if (score > highestScore || bestMatch == null) {
bestMatch = grammar;
highestScore = score;
}
});
return { grammar: bestMatch, score: highestScore };
// }
}
// Extended: Returns a {Number} representing how well the grammar matches the
@ -255,11 +258,23 @@ module.exports = class GrammarRegistry {
// If multiple grammars match by one of the above criteria, break ties.
if (score > 0) {
const isTreeSitter = grammar instanceof TreeSitterGrammar;
const isNewTreeSitter = grammar instanceof WASMTreeSitterGrammar;
const isOldTreeSitter = grammar instanceof TreeSitterGrammar;
const isTreeSitter = isNewTreeSitter || isOldTreeSitter;
const config = () =>
this.config.get('core.languageParser', {
scope: new ScopeDescriptor({ scopes: [grammar.scopeName] })
})
// Prefer either TextMate or Tree-sitter grammars based on the user's settings.
if (isTreeSitter) {
if (this.shouldUseOldTreeSitterParser(grammar.scopeName)) {
if (isNewTreeSitter) {
if( config() === 'wasm-tree-sitter') {
score += 0.1;
} else {
score = -Infinity
}
} else if (isTreeSitter) {
if ( config() === 'node-tree-sitter' ) {
score += 0.1;
} else {
return -Infinity;
@ -362,7 +377,16 @@ module.exports = class GrammarRegistry {
grammarForId(languageId) {
if (!languageId) return null;
if (this.shouldUseOldTreeSitterParser(languageId)) {
const config = this.config.get('core.languageParser', {
scope: new ScopeDescriptor({ scopes: [languageId] })
})
if ( config === 'wasm-tree-sitter') {
return (
this.wasmTreeSitterGrammarsById[languageId] ||
this.textmateRegistry.grammarForScopeName(languageId)
);
} else if ( config === 'node-tree-sitter' ) {
return (
this.treeSitterGrammarsById[languageId] ||
this.textmateRegistry.grammarForScopeName(languageId)
@ -370,6 +394,7 @@ module.exports = class GrammarRegistry {
} else {
return (
this.textmateRegistry.grammarForScopeName(languageId) ||
this.wasmTreeSitterGrammarsById[languageId] ||
this.treeSitterGrammarsById[languageId]
);
}
@ -520,7 +545,19 @@ module.exports = class GrammarRegistry {
}
addGrammar(grammar) {
if (grammar instanceof TreeSitterGrammar) {
if (grammar instanceof WASMTreeSitterGrammar) {
const existingParams =
this.wasmTreeSitterGrammarsById[grammar.scopeName] || {};
if (grammar.scopeName)
this.wasmTreeSitterGrammarsById[grammar.scopeName] = grammar;
if (existingParams.injectionPoints) {
for (const injectionPoint of existingParams.injectionPoints) {
grammar.addInjectionPoint(injectionPoint);
}
}
this.grammarAddedOrUpdated(grammar);
return new Disposable(() => this.removeGrammar(grammar));
} else if (grammar instanceof TreeSitterGrammar) {
const existingParams =
this.treeSitterGrammarsById[grammar.scopeName] || {};
if (grammar.scopeName)
@ -538,7 +575,9 @@ module.exports = class GrammarRegistry {
}
removeGrammar(grammar) {
if (grammar instanceof TreeSitterGrammar) {
if (grammar instanceof WASMTreeSitterGrammar) {
delete this.wasmTreeSitterGrammarsById[grammar.scopeName];
} else if (grammar instanceof TreeSitterGrammar) {
delete this.treeSitterGrammarsById[grammar.scopeName];
} else {
return this.textmateRegistry.removeGrammar(grammar);
@ -604,10 +643,12 @@ module.exports = class GrammarRegistry {
grammarPath,
CSON.readFileSync(grammarPath) || {}
);
}
}
createGrammar(grammarPath, params) {
if (params.type === 'tree-sitter') {
if (params.type === 'tree-sitter-2') {
return new WASMTreeSitterGrammar(this, grammarPath, params)
} else if (params.type === 'tree-sitter') {
return new TreeSitterGrammar(this, grammarPath, params);
} else {
if (
@ -633,16 +674,20 @@ module.exports = class GrammarRegistry {
let tmGrammars = this.textmateRegistry.getGrammars();
if (!(params && params.includeTreeSitter)) return tmGrammars;
const tsGrammars2 = Object.values(this.wasmTreeSitterGrammarsById).filter(
g => g.scopeName
);
const tsGrammars = Object.values(this.treeSitterGrammarsById).filter(
g => g.scopeName
);
return tmGrammars.concat(tsGrammars); // NullGrammar is expected to be first
return tmGrammars.concat(tsGrammars).concat(tsGrammars2); // NullGrammar is expected to be first
}
scopeForId(id) {
return this.textmateRegistry.scopeForId(id);
}
// TODO: why is this being used? Can we remove it soon?
treeSitterGrammarForLanguageString(languageString) {
let longestMatchLength = 0;
let grammarWithLongestMatch = null;
@ -663,7 +708,6 @@ module.exports = class GrammarRegistry {
}
shouldUseOldTreeSitterParser(languageId) {
return false
return this.config.get('core.languageParser', {
scope: new ScopeDescriptor({ scopes: [languageId] })
}) === 'node-tree-sitter';

View File

@ -0,0 +1,43 @@
const fs = require('fs');
const path = require('path');
const Parser = require('web-tree-sitter');
module.exports = class WASMTreeSitterGrammar {
constructor(registry, grammarPath, params) {
this.scopeName = params.scopeName
const dirName = path.dirname(grammarPath)
const qPath = path.join(dirName, params.treeSitter.syntaxQuery)
this.syntaxQuery = fs.readFileSync(qPath, 'utf-8')
if(params.treeSitter.localsQuery) {
const lPath = path.join(dirName, params.treeSitter.localsQuery)
this.localsQuery = fs.readFileSync(lPath, 'utf-8')
}
this.grammarPath = path.join(dirName, params.treeSitter.grammar)
this.contentRegex = buildRegex(params.contentRegex);
this.firstLineRegex = buildRegex(params.firstLineRegex);
this.fileTypes = params.fileTypes || [];
this.registry = registry
}
// TODO: Why is this here?
activate() {
this.registration = this.registry.addGrammar(this);
}
// TODO: Why is this here?
deactivate() {
this.registration?.dispose();
}
inspect() {
return `TreeSitterGrammar {scopeName: ${this.scopeName}}`;
}
}
function buildRegex(value) {
// Allow multiple alternatives to be specified via an array, for
// readability of the grammar file
if (Array.isArray(value)) value = value.map(_ => `(${_})`).join('|');
if (typeof value === 'string') return new RegExp(value);
return null;
}

View File

@ -9,7 +9,7 @@ createTree = require("./rb-tree")
const VAR_ID = 257
class WASMTreeSitterLanguageMode {
constructor( buffer, config) {
constructor(buffer, config, grammar) {
this.emitter = new Emitter();
this.lastId = 259
this.scopeNames = new Map([["variable", VAR_ID]])
@ -19,16 +19,16 @@ class WASMTreeSitterLanguageMode {
this.injectionsMarkerLayer = buffer.addMarkerLayer();
this.newRanges = []
this.oldNodeTexts = new Set()
let resolve
this.ready = new Promise(r => resolve = r)
initPromise.then(() =>
Parser.Language.load('/tmp/grammars/ruby/grammar.wasm')
Parser.Language.load(grammar.grammarPath)
).then(lang => {
const syntaxQuery = fs.readFileSync('/tmp/grammars/ruby/queries/highlights.scm', 'utf-8')
if(fs.existsSync('/tmp/grammars/ruby/queries/locals.scm')) {
const localsQuery = fs.readFileSync('/tmp/grammars/ruby/queries/locals.scm', 'utf-8')
this.localsQuery = lang.query(localsQuery)
this.syntaxQuery = lang.query(grammar.syntaxQuery)
if(grammar.localsQuery) {
this.localsQuery = lang.query(grammar.localsQuery)
}
this.syntaxQuery = lang.query(syntaxQuery)
this.parser = new Parser()
this.parser.setLanguage(lang)
@ -38,11 +38,12 @@ class WASMTreeSitterLanguageMode {
const startRange = new Range([0, 0], [0, 0])
const range = buffer.getRange()
buffer.emitDidChangeEvent({oldRange: startRange, newRange: range, oldText: ""})
resolve(true)
global.mode = this
})
this.rootScopeDescriptor = new ScopeDescriptor({
scopes: ['ruby']
scopes: [grammar.scopeName]
});
}
@ -103,43 +104,45 @@ class WASMTreeSitterLanguageMode {
}
oldScopes = oldScopes || []
syntax.forEach(capture => {
const node = capture.node
const names = capture.name.split('.')
syntax.forEach(({node, name}) => {
// const node = capture.node
// const names = capture.name.split('.')
names.forEach(name => {
if(!this.scopeNames.get(name)) {
this.lastId += 2
const newId = this.lastId;
this.scopeNames.set(name, newId)
this.scopeIds.set(newId, name)
}
})
// names.forEach(name => {
let id = this.scopeNames.get(name)
if(!id) {
this.lastId += 2
id = this.lastId
const newId = this.lastId;
this.scopeNames.set(name, newId)
this.scopeIds.set(newId, name)
}
// })
const ids = names.map(name => this.scopeNames.get(name))
// const ds = names.map(name => this.scopeNames.get(name))
let old = this.boundaries.get(node.startPosition)
if(old) {
old.openNode = node
if(old.openScopeIds.length === 0) {
old.openScopeIds = [...ids]
old.openScopeIds = [id]
}
} else {
this.boundaries = this.boundaries.insert(node.startPosition, {
closeScopeIds: [...oldScopes],
openScopeIds: [...ids],
openScopeIds: [id],
openNode: node,
position: node.startPosition
})
oldScopes = ids
oldScopes = [id]
}
old = this.boundaries.get(node.endPosition)
if(old) {
old.closeNode = node
if(old.closeScopeIds.length === 0) old.closeScopeIds = ids.reverse()
if(old.closeScopeIds.length === 0) old.closeScopeIds = [id]
} else {
this.boundaries = this.boundaries.insert(node.endPosition, {
closeScopeIds: ids.reverse(),
closeScopeIds: [id],
openScopeIds: [],
closeNode: node,
position: node.endPosition
@ -284,7 +287,6 @@ class WASMTreeSitterLanguageMode {
},
seek(start, endRow) {
// debugger
const end = {row: endRow + 1, column: 0}
iterator = updateBoundaries(start, end).ge(start)
return []
@ -293,19 +295,36 @@ class WASMTreeSitterLanguageMode {
}
classNameForScopeId(scopeId) {
// console.log('classNameForScopeId', scopeId, this.scopeIds)
const scope = this.scopeIds.get(scopeId)
if(scope) return `syntax--${scope}`
// // console.log("classNameForScopeId", scopeId)
// if(scopeId === 259) {
// return "syntax--keyword"
// }
if(scope) return `syntax--${scope.replace(/\./g, ' syntax--')}`
}
scopeForId(scopeId) {
return this.scopeIds[scopeId]
}
scopeDescriptorForPosition(position) {
if(!this.tree) return new ScopeDescriptor({scopes: ['text']})
const current = Point.fromObject(position)
let begin = Point.fromObject(position)
begin.column = 0
const end = Point.fromObject([begin.row+1, 0])
this._updateBoundaries(begin, end)
const it = this.boundaries.ge(begin)
if(!it.value) return new ScopeDescriptor({scopes: ['text']})
let scopeIds = []
while(comparePoints(it.key, current) <= 0) {
const closing = new Set(it.value.closeScopeIds)
scopeIds = scopeIds.filter(s => !closing.has(s))
scopeIds.push(...it.value.openScopeIds)
if(!it.hasNext) break
it.next()
}
const scopes = scopeIds.map(id => this.classNameForScopeId(id).replace(/^syntax--/, '').replace(/\s?syntax--/g, '.'))
return new ScopeDescriptor({scopes})
}
}
module.exports = WASMTreeSitterLanguageMode;

7
vendor/jasmine.js vendored
View File

@ -1448,6 +1448,13 @@ jasmine.Matchers.prototype.toContain = function(expected) {
return this.env.contains_(this.actual, expected);
};
jasmine.Matchers.prototype.toSatisfy = function(fn) {
const msgFun = (string) => {
this.message = () => string
}
return fn(this.actual, msgFun)
};
/**
* Matcher that checks that the expected item is NOT an element in the actual Array.
*