TextMateGrammar handles nested capture groups and trailing lookahead captures

Our previous implementation only allowed for a single layer of capture groups. Now we can have captures within captures. I achieved this by converting the match into a tree before generating tokens. If there are any capture scopes specified, then we will always emit a token for every capture group in the match. This may create some redundant tokens (a serious of 2 or more tokens with the same scopes), but it will at least be technically correct. I think the overhead of removing these redundancies exceeds the cost of maintaining them for now.
2025-01-02 11:52:28 +03:00 · 2012-08-06 16:35:33 -06:00 · 2012-08-06 16:35:33 -06:00 · 7bc6a5f24b
commit 7bc6a5f24b
parent 0d6d16a438
2 changed files with 36 additions and 40 deletions
--- a/spec/app/text-mate-grammar-spec.coffee
+++ b/spec/app/text-mate-grammar-spec.coffee
@ -72,18 +72,21 @@ describe "TextMateGrammar", ->
      it "only returns tokens for capture groups that matched", ->
        {tokens} = grammar.getLineTokens("class Quicksort")
        expect(tokens.length).toBe 3
-        expect(token[0].value).toBe "class"
-        expect(token[1].value).toBe " "
-        expect(token[2].value).toBe "Quicksort"
+        expect(tokens[0].value).toBe "class"
+        expect(tokens[1].value).toBe " "
+        expect(tokens[2].value).toBe "Quicksort"

    describe "when the line matches a rule with nested capture groups and lookahead capture groups beyond the scope of the overall match", ->
-      fit "creates distinct tokens for nested captures and does not return tokens beyond the scope of the overall capture", ->
+      it "creates distinct tokens for nested captures and does not return tokens beyond the scope of the overall capture", ->
        {tokens} = grammar.getLineTokens("  destroy: ->")
-
-        for token in tokens
-          console.log token.value, token.scopes.join(' ')
-
-        expect(tokens.length).toBe 5
+        expect(tokens.length).toBe 6
+        expect(tokens[0]).toEqual(value: '  ', scopes: ["source.coffee", "meta.function.coffee"])
+        expect(tokens[1]).toEqual(value: 'destro', scopes: ["source.coffee", "meta.function.coffee", "entity.name.function.coffee"])
+        # this dangling 'y' with a duplicated scope looks wrong, but textmate yields the same behavior. probably a quirk in the coffee grammar.
+        expect(tokens[2]).toEqual(value: 'y', scopes: ["source.coffee", "meta.function.coffee", "entity.name.function.coffee", "entity.name.function.coffee"])
+        expect(tokens[3]).toEqual(value: ':', scopes: ["source.coffee", "keyword.operator.coffee"])
+        expect(tokens[4]).toEqual(value: ' ', scopes: ["source.coffee"])
+        expect(tokens[5]).toEqual(value: '->', scopes: ["source.coffee", "storage.type.function.coffee"])

   describe "when the line matches a begin/end pattern that contains sub-patterns", ->
     it "returns tokens within the begin/end scope based on the sub-patterns", ->
--- a/src/app/text-mate-grammar.coffee
+++ b/src/app/text-mate-grammar.coffee
@ -70,11 +70,10 @@ class Rule
    { match, pattern } = @getNextMatch(line, position)
    return {} unless match

-    tokens = pattern.handleMatch(stack, match)
+    nextTokens = pattern.handleMatch(stack, match)
+    tokensStartPosition = match.position
+    tokensEndPosition = tokensStartPosition + match.text.length

-    nextTokens = tokens
-    tokensStartPosition = match.index
-    tokensEndPosition = tokensStartPosition + match[0].length
    { nextTokens, tokensStartPosition, tokensEndPosition }

  getNextMatch: (line, position) ->
@ -84,7 +83,7 @@ class Rule
    for pattern in @patterns
      { pattern, match } = pattern.getNextMatch(line, position)
      if match
-        if !nextMatch or match.index < nextMatch.index
+        if !nextMatch or match.position < nextMatch.position
          nextMatch = match
          matchedPattern = pattern

@ -114,16 +113,16 @@ class Pattern
      rule = @grammar.ruleForInclude(@include)
      rule.getNextMatch(line, position)
    else
-      { match: @regex.search(line, position), pattern: this }
+      { match: @regex.getCaptureTree(line, position), pattern: this }

  handleMatch: (stack, match) ->
    scopes = _.pluck(stack, "scopeName")
    scopes.push(@scopeName) unless @popRule

    if @captures
-      tokens = @getTokensForMatchWithCaptures(match, scopes)
+      tokens = @getTokensForCaptureTree(match, scopes)
    else
-      tokens = [{ value: match[0], scopes: scopes }]
+      tokens = [{ value: match.text, scopes: scopes }]

    if @pushRule
      stack.push(@pushRule)
@ -132,35 +131,29 @@ class Pattern

    tokens

-  getTokensForMatchWithCaptures: (match, scopes) ->
+  getTokensForCaptureTree: (tree, scopes) ->
    tokens = []
+    if scope = @captures[tree.index]?.name
+      scopes = scopes.concat(scope)
+
    previousCaptureEndPosition = 0
+    if tree.captures
+      for capture in tree.captures
+        continue unless capture.text.length

-    console.log match
-    console.log match.indices
-    console.log @captures
+        currentCaptureStartPosition = capture.position - tree.position
+        if previousCaptureEndPosition < currentCaptureStartPosition
+          tokens.push
+            value: tree.text[previousCaptureEndPosition...currentCaptureStartPosition]
+            scopes: scopes

-    for captureIndex in _.keys(@captures)
-      currentCaptureText = match[captureIndex]
-      continue unless currentCaptureText.length
-
-      currentCaptureStartPosition = match.indices[captureIndex] - match.index
-      currentCaptureScopeName = @captures[captureIndex].name
-
-      if previousCaptureEndPosition < currentCaptureStartPosition
-        tokens.push
-          value: match[0][previousCaptureEndPosition...currentCaptureStartPosition]
-          scopes: scopes
+        captureTokens = @getTokensForCaptureTree(capture, scopes)
+        tokens.push(captureTokens...)
+        previousCaptureEndPosition = currentCaptureStartPosition + capture.text.length

+    if previousCaptureEndPosition < tree.text.length
      tokens.push
-        value: currentCaptureText
-        scopes: scopes.concat(currentCaptureScopeName)
-
-      previousCaptureEndPosition = currentCaptureStartPosition + currentCaptureText.length
-
-    if previousCaptureEndPosition < match[0].length
-      tokens.push
-        value: match[0][previousCaptureEndPosition...match[0].length]
+        value: tree.text[previousCaptureEndPosition...tree.text.length]
        scopes: scopes

    tokens