Add support for back-references in TextMate grammars

This commit is contained in:
Corey Johnson & Nathan Sobo 2012-09-07 15:27:13 -07:00
parent b00830cbc8
commit 12e2fbdb14
2 changed files with 85 additions and 52 deletions

View File

@ -50,30 +50,6 @@ describe "TextMateGrammar", ->
expect(tokens[5]).toEqual value: 'foo.bar.Baz', scopes: ['source.coffee', 'meta.class.instance.constructor', 'entity.name.type.instance.coffee']
expect(tokens[6]).toEqual value: ' ', scopes: ['source.coffee']
describe "when the line matches a begin/end pattern", ->
it "returns tokens based on the beginCaptures, endCaptures and the child scope", ->
{tokens} = grammar.getLineTokens("'''single-quoted heredoc'''")
expect(tokens.length).toBe 3
expect(tokens[0]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
expect(tokens[1]).toEqual value: "single-quoted heredoc", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
expect(tokens[2]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.end.coffee']
describe "when begin/end pattern spans multiple lines", ->
it "uses the currentRule returned by the first line to parse the second line", ->
{tokens: firstTokens, stack} = grammar.getLineTokens("'''single-quoted")
{tokens: secondTokens, stack} = grammar.getLineTokens("heredoc'''", stack)
expect(firstTokens.length).toBe 2
expect(secondTokens.length).toBe 2
expect(firstTokens[0]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
expect(firstTokens[1]).toEqual value: "single-quoted", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
expect(secondTokens[0]).toEqual value: "heredoc", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
expect(secondTokens[1]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.end.coffee']
describe "when the line matches a pattern with optional capture groups", ->
it "only returns tokens for capture groups that matched", ->
{tokens} = grammar.getLineTokens("class Quicksort")
@ -94,17 +70,6 @@ describe "TextMateGrammar", ->
expect(tokens[4]).toEqual(value: ' ', scopes: ["source.coffee"])
expect(tokens[5]).toEqual(value: '->', scopes: ["source.coffee", "storage.type.function.coffee"])
describe "when the line matches a begin/end pattern that contains sub-patterns", ->
it "returns tokens within the begin/end scope based on the sub-patterns", ->
{tokens} = grammar.getLineTokens('"""heredoc with character escape \\t"""')
expect(tokens.length).toBe 4
expect(tokens[0]).toEqual value: '"""', scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
expect(tokens[1]).toEqual value: "heredoc with character escape ", scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee']
expect(tokens[2]).toEqual value: "\\t", scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'constant.character.escape.coffee']
expect(tokens[3]).toEqual value: '"""', scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'punctuation.definition.string.end.coffee']
describe "when the line matches a pattern that includes a rule", ->
it "returns tokens based on the included rule", ->
{tokens} = grammar.getLineTokens("7777777")
@ -152,9 +117,54 @@ describe "TextMateGrammar", ->
it "creates tokens without adding a new scope", ->
grammar = TextMateBundle.grammarsByFileType["rb"]
{tokens} = grammar.getLineTokens('%w|oh \\look|')
expect(tokens[0]).toEqual value: '%w', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.begin.ruby"]
expect(tokens[1]).toEqual value: '|', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.begin.ruby"]
expect(tokens[2]).toEqual value: 'oh ', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
expect(tokens[3]).toEqual value: '\\l', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
expect(tokens[4]).toEqual value: 'ook|', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
expect(tokens.length).toBe 5
expect(tokens[0]).toEqual value: '%w|', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.begin.ruby"]
expect(tokens[1]).toEqual value: 'oh ', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
expect(tokens[2]).toEqual value: '\\l', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
expect(tokens[3]).toEqual value: 'ook', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
describe "when the line matches a begin/end pattern", ->
it "returns tokens based on the beginCaptures, endCaptures and the child scope", ->
{tokens} = grammar.getLineTokens("'''single-quoted heredoc'''")
expect(tokens.length).toBe 3
expect(tokens[0]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
expect(tokens[1]).toEqual value: "single-quoted heredoc", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
expect(tokens[2]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.end.coffee']
describe "when the pattern spans multiple lines", ->
it "uses the currentRule returned by the first line to parse the second line", ->
{tokens: firstTokens, stack} = grammar.getLineTokens("'''single-quoted")
{tokens: secondTokens, stack} = grammar.getLineTokens("heredoc'''", stack)
expect(firstTokens.length).toBe 2
expect(secondTokens.length).toBe 2
expect(firstTokens[0]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
expect(firstTokens[1]).toEqual value: "single-quoted", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
expect(secondTokens[0]).toEqual value: "heredoc", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
expect(secondTokens[1]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.end.coffee']
describe "when pattern contains sub-patterns", ->
it "returns tokens within the begin/end scope based on the sub-patterns", ->
{tokens} = grammar.getLineTokens('"""heredoc with character escape \\t"""')
expect(tokens.length).toBe 4
expect(tokens[0]).toEqual value: '"""', scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
expect(tokens[1]).toEqual value: "heredoc with character escape ", scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee']
expect(tokens[2]).toEqual value: "\\t", scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'constant.character.escape.coffee']
expect(tokens[3]).toEqual value: '"""', scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'punctuation.definition.string.end.coffee']
describe "when the end pattern contains a back reference", ->
it "creates tokens without adding a new scope", ->
grammar = TextMateBundle.grammarsByFileType["rb"]
{tokens} = grammar.getLineTokens('%w|oh|,')
expect(tokens.length).toBe 4
expect(tokens[0]).toEqual value: '%w|', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.begin.ruby"]
expect(tokens[1]).toEqual value: 'oh', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
expect(tokens[2]).toEqual value: '|', scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.end.ruby"]
expect(tokens[3]).toEqual value: ',', scopes: ["source.ruby", "punctuation.separator.object.ruby"]

View File

@ -67,26 +67,24 @@ class Rule
grammar: null
scopeName: null
patterns: null
endPattern: null
createEndPattern: null
constructor: (@grammar, {@scopeName, patterns, @endPattern}) ->
constructor: (@grammar, {@scopeName, patterns, @createEndPattern}) ->
patterns ?= []
@patterns = []
@patterns.push(@endPattern) if @endPattern
@patterns.push((patterns.map (pattern) => new Pattern(grammar, pattern))...)
getAllPatterns: (included=[]) ->
getIncludedPatterns: (included=[]) ->
return [] if _.include(included, this)
included.push(this)
allPatterns = []
allPatterns.push(@endPattern.getIncludedPatterns()...) if @endPattern
for pattern in @patterns
allPatterns.push(pattern.getIncludedPatterns(included)...)
allPatterns
getNextTokens: (stack, line, position) ->
patterns = @getAllPatterns()
patterns = @getIncludedPatterns()
{index, captureIndices} = OnigRegExp.captureIndices(line, position, patterns.map (p) -> p.regex )
return {} unless index?
@ -108,6 +106,13 @@ class Rule
{ match: nextMatch, pattern: matchedPattern }
addEndPattern: (backReferences) ->
endPattern = @createEndPattern(backReferences)
@patterns.unshift(endPattern)
removeEndPattern: ->
@patterns.shift()
class Pattern
grammar: null
pushRule: null
@ -115,6 +120,7 @@ class Pattern
scopeName: null
regex: null
captures: null
backReferences: null
constructor: (@grammar, { name, contentName, @include, match, begin, end, captures, beginCaptures, endCaptures, patterns, @popRule}) ->
@scopeName = name ? contentName # TODO: We need special treatment of contentName
@ -124,14 +130,18 @@ class Pattern
else if begin
@regex = new OnigRegExp(begin)
@captures = beginCaptures ? captures
endPattern = new Pattern(@grammar, { match: end, captures: endCaptures ? captures, popRule: true})
@pushRule = new Rule(@grammar, { @scopeName, patterns, endPattern })
createEndPattern = (backReferences) ->
end = end.replace /(\\\d+)/g, (match) ->
index = parseInt(match[1..])
_.escapeRegExp(backReferences[index] ? "\\#{index}")
new Pattern(@grammar, { match: end, captures: endCaptures ? captures, popRule: true})
@pushRule = new Rule(@grammar, { @scopeName, patterns, createEndPattern })
getIncludedPatterns: (included) ->
if @include
rule = @grammar.ruleForInclude(@include)
# console.log "Could not find rule for include #{@include} in #{@grammar.name} grammar" unless rule
rule?.getAllPatterns(included) ? []
rule?.getIncludedPatterns(included) ? []
else
[this]
@ -147,7 +157,7 @@ class Pattern
scopes.push(@scopeName) if @scopeName and not @popRule
if @captures
tokens = @getTokensForCaptureIndices(line, captureIndices, scopes)
tokens = @getTokensForCaptureIndices(line, _.clone(captureIndices), scopes)
else
[start, end] = captureIndices[1..2]
zeroLengthMatch = end == start
@ -157,12 +167,23 @@ class Pattern
tokens = [{ value: line[start...end], scopes: scopes }]
if @pushRule
@pushRule.addEndPattern(@backreferencesForCaptureIndices(line, captureIndices))
stack.push(@pushRule)
else if @popRule
stack.pop()
rule = stack.pop()
rule.removeEndPattern()
tokens
backreferencesForCaptureIndices: (line, captureIndices) ->
backReferences = []
for i in [0...captureIndices.length] by 3
start = captureIndices[i + 1]
end = captureIndices[i + 2]
backReferences.push line[start...end]
backReferences
getTokensForCaptureIndices: (line, captureIndices, scopes) ->
[parentCaptureIndex, parentCaptureStart, parentCaptureEnd] = shiftCapture(captureIndices)
@ -174,7 +195,9 @@ class Pattern
while captureIndices.length and captureIndices[1] < parentCaptureEnd
[childCaptureIndex, childCaptureStart, childCaptureEnd] = captureIndices
if childCaptureEnd - childCaptureStart == 0 # An empty capture, so it can't contain any tokens
emptyCapture = childCaptureEnd - childCaptureStart == 0
captureHasNoScope = not @captures[childCaptureIndex]
if emptyCapture or captureHasNoScope
shiftCapture(captureIndices)
continue