Add support for back-references in TextMate grammars

2024-12-28 09:01:33 +03:00 · 2012-09-07 15:27:13 -07:00 · 2012-09-07 15:27:13 -07:00 · 12e2fbdb14
commit 12e2fbdb14
parent b00830cbc8
2 changed files with 85 additions and 52 deletions
--- a/spec/app/text-mate-grammar-spec.coffee
+++ b/spec/app/text-mate-grammar-spec.coffee
@ -50,30 +50,6 @@ describe "TextMateGrammar", ->
        expect(tokens[5]).toEqual value: 'foo.bar.Baz', scopes: ['source.coffee', 'meta.class.instance.constructor', 'entity.name.type.instance.coffee']
        expect(tokens[6]).toEqual value: ' ', scopes: ['source.coffee']

-    describe "when the line matches a begin/end pattern", ->
-      it "returns tokens based on the beginCaptures, endCaptures and the child scope", ->
-        {tokens} = grammar.getLineTokens("'''single-quoted heredoc'''")
-
-        expect(tokens.length).toBe 3
-
-        expect(tokens[0]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
-        expect(tokens[1]).toEqual value: "single-quoted heredoc", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
-        expect(tokens[2]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.end.coffee']
-
-    describe "when begin/end pattern spans multiple lines", ->
-      it "uses the currentRule returned by the first line to parse the second line", ->
-        {tokens: firstTokens, stack} = grammar.getLineTokens("'''single-quoted")
-        {tokens: secondTokens, stack} = grammar.getLineTokens("heredoc'''", stack)
-
-        expect(firstTokens.length).toBe 2
-        expect(secondTokens.length).toBe 2
-
-        expect(firstTokens[0]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
-        expect(firstTokens[1]).toEqual value: "single-quoted", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
-
-        expect(secondTokens[0]).toEqual value: "heredoc", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
-        expect(secondTokens[1]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.end.coffee']
-
    describe "when the line matches a pattern with optional capture groups", ->
      it "only returns tokens for capture groups that matched", ->
        {tokens} = grammar.getLineTokens("class Quicksort")
@ -94,17 +70,6 @@ describe "TextMateGrammar", ->
        expect(tokens[4]).toEqual(value: ' ', scopes: ["source.coffee"])
        expect(tokens[5]).toEqual(value: '->', scopes: ["source.coffee", "storage.type.function.coffee"])

-    describe "when the line matches a begin/end pattern that contains sub-patterns", ->
-      it "returns tokens within the begin/end scope based on the sub-patterns", ->
-        {tokens} = grammar.getLineTokens('"""heredoc with character escape \\t"""')
-
-        expect(tokens.length).toBe 4
-
-        expect(tokens[0]).toEqual value: '"""', scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
-        expect(tokens[1]).toEqual value: "heredoc with character escape ", scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee']
-        expect(tokens[2]).toEqual value: "\\t", scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'constant.character.escape.coffee']
-        expect(tokens[3]).toEqual value: '"""', scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'punctuation.definition.string.end.coffee']
-
    describe "when the line matches a pattern that includes a rule", ->
      it "returns tokens based on the included rule", ->
        {tokens} = grammar.getLineTokens("7777777")
@ -152,9 +117,54 @@ describe "TextMateGrammar", ->
      it "creates tokens without adding a new scope", ->
        grammar = TextMateBundle.grammarsByFileType["rb"]
        {tokens} = grammar.getLineTokens('%w|oh \\look|')
-        expect(tokens[0]).toEqual value: '%w',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.begin.ruby"]
-        expect(tokens[1]).toEqual value: '|',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.begin.ruby"]
-        expect(tokens[2]).toEqual value: 'oh ',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
-        expect(tokens[3]).toEqual value: '\\l',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
-        expect(tokens[4]).toEqual value: 'ook|',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
+        expect(tokens.length).toBe 5
+        expect(tokens[0]).toEqual value: '%w|',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.begin.ruby"]
+        expect(tokens[1]).toEqual value: 'oh ',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
+        expect(tokens[2]).toEqual value: '\\l',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
+        expect(tokens[3]).toEqual value: 'ook',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
+
+    describe "when the line matches a begin/end pattern", ->
+      it "returns tokens based on the beginCaptures, endCaptures and the child scope", ->
+        {tokens} = grammar.getLineTokens("'''single-quoted heredoc'''")
+
+        expect(tokens.length).toBe 3
+
+        expect(tokens[0]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
+        expect(tokens[1]).toEqual value: "single-quoted heredoc", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
+        expect(tokens[2]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.end.coffee']
+
+      describe "when the pattern spans multiple lines", ->
+        it "uses the currentRule returned by the first line to parse the second line", ->
+          {tokens: firstTokens, stack} = grammar.getLineTokens("'''single-quoted")
+          {tokens: secondTokens, stack} = grammar.getLineTokens("heredoc'''", stack)
+
+          expect(firstTokens.length).toBe 2
+          expect(secondTokens.length).toBe 2
+
+          expect(firstTokens[0]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
+          expect(firstTokens[1]).toEqual value: "single-quoted", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
+
+          expect(secondTokens[0]).toEqual value: "heredoc", scopes: ['source.coffee', 'string.quoted.heredoc.coffee']
+          expect(secondTokens[1]).toEqual value: "'''", scopes: ['source.coffee', 'string.quoted.heredoc.coffee', 'punctuation.definition.string.end.coffee']
+
+      describe "when pattern contains sub-patterns", ->
+        it "returns tokens within the begin/end scope based on the sub-patterns", ->
+          {tokens} = grammar.getLineTokens('"""heredoc with character escape \\t"""')
+
+          expect(tokens.length).toBe 4
+
+          expect(tokens[0]).toEqual value: '"""', scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'punctuation.definition.string.begin.coffee']
+          expect(tokens[1]).toEqual value: "heredoc with character escape ", scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee']
+          expect(tokens[2]).toEqual value: "\\t", scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'constant.character.escape.coffee']
+          expect(tokens[3]).toEqual value: '"""', scopes: ['source.coffee', 'string.quoted.double.heredoc.coffee', 'punctuation.definition.string.end.coffee']
+
+      describe "when the end pattern contains a back reference", ->
+        it "creates tokens without adding a new scope", ->
+          grammar = TextMateBundle.grammarsByFileType["rb"]
+          {tokens} = grammar.getLineTokens('%w|oh|,')
+          expect(tokens.length).toBe 4
+          expect(tokens[0]).toEqual value: '%w|',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.begin.ruby"]
+          expect(tokens[1]).toEqual value: 'oh',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby"]
+          expect(tokens[2]).toEqual value: '|',  scopes: ["source.ruby", "string.quoted.other.literal.lower.ruby", "punctuation.definition.string.end.ruby"]
+          expect(tokens[3]).toEqual value: ',',  scopes: ["source.ruby", "punctuation.separator.object.ruby"]

--- a/src/app/text-mate-grammar.coffee
+++ b/src/app/text-mate-grammar.coffee
@ -67,26 +67,24 @@ class Rule
  grammar: null
  scopeName: null
  patterns: null
-  endPattern: null
+  createEndPattern: null

-  constructor: (@grammar, {@scopeName, patterns, @endPattern}) ->
+  constructor: (@grammar, {@scopeName, patterns, @createEndPattern}) ->
    patterns ?= []
    @patterns = []
-    @patterns.push(@endPattern) if @endPattern
    @patterns.push((patterns.map (pattern) => new Pattern(grammar, pattern))...)

-  getAllPatterns: (included=[]) ->
+  getIncludedPatterns: (included=[]) ->
    return [] if _.include(included, this)
    included.push(this)
    allPatterns = []

-    allPatterns.push(@endPattern.getIncludedPatterns()...) if @endPattern
    for pattern in @patterns
      allPatterns.push(pattern.getIncludedPatterns(included)...)
    allPatterns

  getNextTokens: (stack, line, position) ->
-    patterns = @getAllPatterns()
+    patterns = @getIncludedPatterns()
    {index, captureIndices} = OnigRegExp.captureIndices(line, position, patterns.map (p) -> p.regex )

    return {} unless index?
@ -108,6 +106,13 @@ class Rule

    { match: nextMatch, pattern: matchedPattern }

+  addEndPattern: (backReferences) ->
+    endPattern = @createEndPattern(backReferences)
+    @patterns.unshift(endPattern)
+
+  removeEndPattern: ->
+    @patterns.shift()
+
 class Pattern
  grammar: null
  pushRule: null
@ -115,6 +120,7 @@ class Pattern
  scopeName: null
  regex: null
  captures: null
+  backReferences: null

  constructor: (@grammar, { name, contentName, @include, match, begin, end, captures, beginCaptures, endCaptures, patterns, @popRule}) ->
    @scopeName = name ? contentName # TODO: We need special treatment of contentName
@ -124,14 +130,18 @@ class Pattern
    else if begin
      @regex = new OnigRegExp(begin)
      @captures = beginCaptures ? captures
-      endPattern = new Pattern(@grammar, { match: end, captures: endCaptures ? captures, popRule: true})
-      @pushRule = new Rule(@grammar, { @scopeName, patterns, endPattern })
+      createEndPattern = (backReferences) ->
+        end = end.replace /(\\\d+)/g, (match) ->
+          index = parseInt(match[1..])
+          _.escapeRegExp(backReferences[index] ? "\\#{index}")
+        new Pattern(@grammar, { match: end, captures: endCaptures ? captures, popRule: true})
+      @pushRule = new Rule(@grammar, { @scopeName, patterns, createEndPattern })

  getIncludedPatterns: (included) ->
    if @include
      rule = @grammar.ruleForInclude(@include)
      # console.log "Could not find rule for include #{@include} in #{@grammar.name} grammar" unless rule
-      rule?.getAllPatterns(included) ? []
+      rule?.getIncludedPatterns(included) ? []
    else
      [this]

@ -147,7 +157,7 @@ class Pattern
    scopes.push(@scopeName) if @scopeName and not @popRule

    if @captures
-      tokens = @getTokensForCaptureIndices(line, captureIndices, scopes)
+      tokens = @getTokensForCaptureIndices(line, _.clone(captureIndices), scopes)
    else
      [start, end] = captureIndices[1..2]
      zeroLengthMatch = end == start
@ -157,12 +167,23 @@ class Pattern
        tokens = [{ value: line[start...end], scopes: scopes }]

    if @pushRule
+      @pushRule.addEndPattern(@backreferencesForCaptureIndices(line, captureIndices))
      stack.push(@pushRule)
    else if @popRule
-      stack.pop()
+      rule = stack.pop()
+      rule.removeEndPattern()

    tokens

+  backreferencesForCaptureIndices: (line, captureIndices) ->
+    backReferences = []
+    for i in [0...captureIndices.length] by 3
+      start = captureIndices[i + 1]
+      end = captureIndices[i + 2]
+      backReferences.push line[start...end]
+
+    backReferences
+
  getTokensForCaptureIndices: (line, captureIndices, scopes) ->
    [parentCaptureIndex, parentCaptureStart, parentCaptureEnd] = shiftCapture(captureIndices)

@ -174,7 +195,9 @@ class Pattern
    while captureIndices.length and captureIndices[1] < parentCaptureEnd
      [childCaptureIndex, childCaptureStart, childCaptureEnd] = captureIndices

-      if childCaptureEnd - childCaptureStart == 0 # An empty capture, so it can't contain any tokens
+      emptyCapture = childCaptureEnd - childCaptureStart == 0
+      captureHasNoScope = not @captures[childCaptureIndex]
+      if emptyCapture or captureHasNoScope
        shiftCapture(captureIndices)
        continue