Support contentRegExp field on grammars, to match more than one line

Signed-off-by: Nathan Sobo <nathan@github.com>
2024-09-20 07:28:08 +03:00 · 2017-12-05 16:26:24 -08:00 · 2017-12-05 16:26:24 -08:00 · f3715779e5
commit f3715779e5
parent a475baf4b5
3 changed files with 60 additions and 27 deletions
--- a/spec/grammar-registry-spec.js
+++ b/spec/grammar-registry-spec.js
@ -1,5 +1,6 @@
 const {it, fit, ffit, fffit, beforeEach, afterEach} = require('./async-spec-helpers')

+const dedent = require('dedent')
 const path = require('path')
 const fs = require('fs-plus')
 const temp = require('temp').track()
@ -273,6 +274,32 @@ describe('GrammarRegistry', () => {
      expect(atom.grammars.selectGrammar('/hu.git/config').name).toBe('Null Grammar')
    })

+    describe('when the grammar has a contentRegExp field', () => {
+      it('favors grammars whose contentRegExp matches a prefix of the file\'s content', () => {
+        atom.grammars.addGrammar({
+          id: 'javascript-1',
+          fileTypes: ['js']
+        })
+        atom.grammars.addGrammar({
+          id: 'flow-javascript',
+          contentRegExp: new RegExp('//.*@flow'),
+          fileTypes: ['js']
+        })
+        atom.grammars.addGrammar({
+          id: 'javascript-2',
+          fileTypes: ['js']
+        })
+
+        const selectedGrammar = atom.grammars.selectGrammar('test.js', dedent`
+          // Copyright EvilCorp
+          // @flow
+
+          module.exports = function () { return 1 + 1 }
+        `)
+        expect(selectedGrammar.id).toBe('flow-javascript')
+      })
+    })
+
    it("uses the filePath's shebang line if the grammar cannot be determined by the extension or basename", async () => {
      await atom.packages.activatePackage('language-javascript')
      await atom.packages.activatePackage('language-ruby')
--- a/src/grammar-registry.js
+++ b/src/grammar-registry.js
@ -11,7 +11,6 @@ const fs = require('fs-plus')
 const {Point, Range} = require('text-buffer')

 const GRAMMAR_TYPE_BONUS = 1000
-const GRAMMAR_SELECTION_RANGE = Range(Point.ZERO, Point(10, 0)).freeze()
 const PATH_SPLIT_REGEX = new RegExp('[/.]')

 const LANGUAGE_ID_MAP = [
@ -147,7 +146,7 @@ class GrammarRegistry {
  autoAssignLanguageMode (buffer) {
    const result = this.selectGrammarWithScore(
      buffer.getPath(),
-      buffer.getTextInRange(GRAMMAR_SELECTION_RANGE)
+      getGrammarSelectionContent(buffer)
    )
    this.languageOverridesByBufferId.delete(buffer.id)
    this.grammarScoresByBuffer.set(buffer, result.score)
@ -245,26 +244,32 @@ class GrammarRegistry {
  }

  grammarMatchesContents (grammar, contents) {
-    if (contents == null || grammar.firstLineRegex == null) return false
+    if (contents == null) return false

-    let escaped = false
-    let numberOfNewlinesInRegex = 0
-    for (let character of grammar.firstLineRegex.source) {
-      switch (character) {
-        case '\\':
-          escaped = !escaped
-          break
-        case 'n':
-          if (escaped) { numberOfNewlinesInRegex++ }
-          escaped = false
-          break
-        default:
-          escaped = false
+    if (grammar.contentRegExp) { // TreeSitter grammars
+      return grammar.contentRegExp.test(contents)
+    } else if (grammar.firstLineRegex) { // FirstMate grammars
+      let escaped = false
+      let numberOfNewlinesInRegex = 0
+      for (let character of grammar.firstLineRegex.source) {
+        switch (character) {
+          case '\\':
+            escaped = !escaped
+            break
+          case 'n':
+            if (escaped) { numberOfNewlinesInRegex++ }
+            escaped = false
+            break
+          default:
+            escaped = false
+        }
      }
-    }

-    const lines = contents.split('\n')
-    return grammar.firstLineRegex.testSync(lines.slice(0, numberOfNewlinesInRegex + 1).join('\n'))
+      const lines = contents.split('\n')
+      return grammar.firstLineRegex.testSync(lines.slice(0, numberOfNewlinesInRegex + 1).join('\n'))
+    } else {
+      return false
+    }
  }

  forEachGrammar (callback) {
@ -338,12 +343,7 @@ class GrammarRegistry {
           grammar.id === languageOverride)) {
        buffer.setLanguageMode(this.languageModeForGrammarAndBuffer(grammar, buffer))
      } else if (!languageOverride) {
-        const score = this.getGrammarScore(
-          grammar,
-          buffer.getPath(),
-          buffer.getTextInRange(GRAMMAR_SELECTION_RANGE)
-        )
-
+        const score = this.getGrammarScore(grammar, buffer.getPath(), getGrammarSelectionContent(buffer))
        const currentScore = this.grammarScoresByBuffer.get(buffer)
        if (currentScore == null || score > currentScore) {
          buffer.setLanguageMode(this.languageModeForGrammarAndBuffer(grammar, buffer))
@ -500,3 +500,10 @@ class GrammarRegistry {
    }
  }
 }
+
+function getGrammarSelectionContent (buffer) {
+  return buffer.getTextInRange(Range(
+    Point(0, 0),
+    buffer.positionForCharacterIndex(1024)
+  ))
+}
--- a/src/tree-sitter-grammar.js
+++ b/src/tree-sitter-grammar.js
@ -1,7 +1,6 @@
 const path = require('path')
 const SyntaxScopeMap = require('./syntax-scope-map')
 const Module = require('module')
-const {OnigRegExp} = require('oniguruma')

 module.exports =
 class TreeSitterGrammar {
@ -9,6 +8,7 @@ class TreeSitterGrammar {
    this.registry = registry
    this.id = params.id
    this.name = params.name
+    if (params.contentRegExp) this.contentRegExp = new RegExp(params.contentRegExp)

    this.folds = params.folds || []

@ -37,7 +37,6 @@ class TreeSitterGrammar {
    })

    this.languageModule = require(languageModulePath)
-    this.firstLineRegex = params.firstLineMatch && new OnigRegExp(params.firstLineMatch)
    this.scopesById = new Map()
    this.idsByScope = {}
    this.nextScopeId = 256 + 1