TextMateGrammar handles nested capture groups and trailing lookahead captures

Our previous implementation only allowed for a single layer of capture groups. Now we can have captures within captures. I achieved this by converting the match into a tree before generating tokens. If there are any capture scopes specified, then we will always emit a token for every capture group in the match. This may create some redundant tokens (a serious of 2 or more tokens with the same scopes), but it will at least be technically correct. I think the overhead of removing these redundancies exceeds the cost of maintaining them for now.
This commit is contained in:
Nathan Sobo 2012-08-06 16:35:33 -06:00
parent 0d6d16a438
commit 7bc6a5f24b
2 changed files with 36 additions and 40 deletions

View File

@ -72,18 +72,21 @@ describe "TextMateGrammar", ->
it "only returns tokens for capture groups that matched", ->
{tokens} = grammar.getLineTokens("class Quicksort")
expect(tokens.length).toBe 3
expect(token[0].value).toBe "class"
expect(token[1].value).toBe " "
expect(token[2].value).toBe "Quicksort"
expect(tokens[0].value).toBe "class"
expect(tokens[1].value).toBe " "
expect(tokens[2].value).toBe "Quicksort"
describe "when the line matches a rule with nested capture groups and lookahead capture groups beyond the scope of the overall match", ->
fit "creates distinct tokens for nested captures and does not return tokens beyond the scope of the overall capture", ->
it "creates distinct tokens for nested captures and does not return tokens beyond the scope of the overall capture", ->
{tokens} = grammar.getLineTokens(" destroy: ->")
for token in tokens
console.log token.value, token.scopes.join(' ')
expect(tokens.length).toBe 5
expect(tokens.length).toBe 6
expect(tokens[0]).toEqual(value: ' ', scopes: ["source.coffee", "meta.function.coffee"])
expect(tokens[1]).toEqual(value: 'destro', scopes: ["source.coffee", "meta.function.coffee", "entity.name.function.coffee"])
# this dangling 'y' with a duplicated scope looks wrong, but textmate yields the same behavior. probably a quirk in the coffee grammar.
expect(tokens[2]).toEqual(value: 'y', scopes: ["source.coffee", "meta.function.coffee", "entity.name.function.coffee", "entity.name.function.coffee"])
expect(tokens[3]).toEqual(value: ':', scopes: ["source.coffee", "keyword.operator.coffee"])
expect(tokens[4]).toEqual(value: ' ', scopes: ["source.coffee"])
expect(tokens[5]).toEqual(value: '->', scopes: ["source.coffee", "storage.type.function.coffee"])
describe "when the line matches a begin/end pattern that contains sub-patterns", ->
it "returns tokens within the begin/end scope based on the sub-patterns", ->

View File

@ -70,11 +70,10 @@ class Rule
{ match, pattern } = @getNextMatch(line, position)
return {} unless match
tokens = pattern.handleMatch(stack, match)
nextTokens = pattern.handleMatch(stack, match)
tokensStartPosition = match.position
tokensEndPosition = tokensStartPosition + match.text.length
nextTokens = tokens
tokensStartPosition = match.index
tokensEndPosition = tokensStartPosition + match[0].length
{ nextTokens, tokensStartPosition, tokensEndPosition }
getNextMatch: (line, position) ->
@ -84,7 +83,7 @@ class Rule
for pattern in @patterns
{ pattern, match } = pattern.getNextMatch(line, position)
if match
if !nextMatch or match.index < nextMatch.index
if !nextMatch or match.position < nextMatch.position
nextMatch = match
matchedPattern = pattern
@ -114,16 +113,16 @@ class Pattern
rule = @grammar.ruleForInclude(@include)
rule.getNextMatch(line, position)
else
{ match: @regex.search(line, position), pattern: this }
{ match: @regex.getCaptureTree(line, position), pattern: this }
handleMatch: (stack, match) ->
scopes = _.pluck(stack, "scopeName")
scopes.push(@scopeName) unless @popRule
if @captures
tokens = @getTokensForMatchWithCaptures(match, scopes)
tokens = @getTokensForCaptureTree(match, scopes)
else
tokens = [{ value: match[0], scopes: scopes }]
tokens = [{ value: match.text, scopes: scopes }]
if @pushRule
stack.push(@pushRule)
@ -132,35 +131,29 @@ class Pattern
tokens
getTokensForMatchWithCaptures: (match, scopes) ->
getTokensForCaptureTree: (tree, scopes) ->
tokens = []
if scope = @captures[tree.index]?.name
scopes = scopes.concat(scope)
previousCaptureEndPosition = 0
if tree.captures
for capture in tree.captures
continue unless capture.text.length
console.log match
console.log match.indices
console.log @captures
currentCaptureStartPosition = capture.position - tree.position
if previousCaptureEndPosition < currentCaptureStartPosition
tokens.push
value: tree.text[previousCaptureEndPosition...currentCaptureStartPosition]
scopes: scopes
for captureIndex in _.keys(@captures)
currentCaptureText = match[captureIndex]
continue unless currentCaptureText.length
currentCaptureStartPosition = match.indices[captureIndex] - match.index
currentCaptureScopeName = @captures[captureIndex].name
if previousCaptureEndPosition < currentCaptureStartPosition
tokens.push
value: match[0][previousCaptureEndPosition...currentCaptureStartPosition]
scopes: scopes
captureTokens = @getTokensForCaptureTree(capture, scopes)
tokens.push(captureTokens...)
previousCaptureEndPosition = currentCaptureStartPosition + capture.text.length
if previousCaptureEndPosition < tree.text.length
tokens.push
value: currentCaptureText
scopes: scopes.concat(currentCaptureScopeName)
previousCaptureEndPosition = currentCaptureStartPosition + currentCaptureText.length
if previousCaptureEndPosition < match[0].length
tokens.push
value: match[0][previousCaptureEndPosition...match[0].length]
value: tree.text[previousCaptureEndPosition...tree.text.length]
scopes: scopes
tokens