Add context support to ripgrep-directory-searcher

2024-09-19 23:17:16 +03:00 · 2019-05-16 17:47:18 +02:00 · 2019-05-16 17:47:18 +02:00 · 0647a0044f
commit 0647a0044f
parent fd82a58f1d
5 changed files with 188 additions and 27 deletions
--- a/script/package-lock.json
+++ b/script/package-lock.json
@ -901,7 +901,6 @@
      "version": "0.4.2",
      "resolved": "https://registry.npmjs.org/boom/-/boom-0.4.2.tgz",
      "integrity": "sha1-emNune1O/O+xnO9JR6PGffrukRs=",
-      "optional": true,
      "requires": {
        "hoek": "0.9.x"
      }
@ -3991,8 +3990,7 @@
    "hoek": {
      "version": "0.9.1",
      "resolved": "https://registry.npmjs.org/hoek/-/hoek-0.9.1.tgz",
-      "integrity": "sha1-PTIkYrrfB3Fup+uFuviAec3c5QU=",
-      "optional": true
+      "integrity": "sha1-PTIkYrrfB3Fup+uFuviAec3c5QU="
    },
    "home-or-tmp": {
      "version": "1.0.0",
--- a/spec/fixtures/dir/c
+++ b/spec/fixtures/dir/c
@ -0,0 +1,19 @@
+line 1
+line 2
+line 3
+line 4
+line 5
+result 1
+line 6
+line 7
+line 8
+line 9
+line 10
+result 2
+result 3
+line 11
+line 12
+result 4
+line 13
+line 14
+line 15
--- a/spec/workspace-spec.js
+++ b/spec/workspace-spec.js
@ -2407,20 +2407,20 @@ describe('Workspace', () => {
    })
  })

-  for (const ripgrep of [true, /* false */]) {
+  for (const ripgrep of [true, false]) {
    describe(`::scan(regex, options, callback) { ripgrep: ${ripgrep} }`, () => {
-      function scan(regex, options, iterator) {
-        return atom.workspace.scan(regex, {...options, ripgrep}, iterator)
+      function scan (regex, options, iterator) {
+        return atom.workspace.scan(regex, { ...options, ripgrep }, iterator)
      }

      describe('when called with a regex', () => {
-        fit('calls the callback with all regex results in all files in the project', async () => {
+        it('calls the callback with all regex results in all files in the project', async () => {
          const results = []
          await scan(
            /(a)+/,
            { leadingContextLineCount: 1, trailingContextLineCount: 1 },
            result => results.push(result)
-           )
+          )

          expect(results).toHaveLength(3)
          expect(results[0].filePath).toBe(
@ -2433,7 +2433,7 @@ describe('Workspace', () => {
            lineTextOffset: 0,
            range: [[0, 0], [0, 3]],
            leadingContextLines: [],
-            trailingContextLines: ripgrep ? [] : ['cc aa cc']
+            trailingContextLines: ['cc aa cc']
          })
        })

@ -2905,6 +2905,72 @@ describe('Workspace', () => {
          })
        })
      })
+
+      describe('leadingContextLineCount and trailingContextLineCount options', () => {
+        async function search ({ leadingContextLineCount, trailingContextLineCount }) {
+          const results = []
+          await scan(
+            /result/,
+            { leadingContextLineCount, trailingContextLineCount },
+            result => results.push(result)
+          )
+
+          return {
+            leadingContext: results[0].matches.map(result => result.leadingContextLines),
+            trailingContext: results[0].matches.map(result => result.trailingContextLines)
+          }
+        }
+
+        const expectedLeadingContext = [
+          ['line 1', 'line 2', 'line 3', 'line 4', 'line 5'],
+          ['line 6', 'line 7', 'line 8', 'line 9', 'line 10'],
+          ['line 7', 'line 8', 'line 9', 'line 10', 'result 2'],
+          ['line 10', 'result 2', 'result 3', 'line 11', 'line 12']
+        ];
+        const expectedTrailingContext = [
+          ['line 6', 'line 7', 'line 8', 'line 9', 'line 10'],
+          ['result 3', 'line 11', 'line 12', 'result 4', 'line 13'],
+          ['line 11', 'line 12', 'result 4', 'line 13', 'line 14'],
+          ['line 13', 'line 14', 'line 15']
+        ]
+
+        it('returns valid contexts no matter how many lines are requested', async () => {
+          expect(
+            await search({})
+          ).toEqual({
+            leadingContext: [[], [], [], []],
+            trailingContext: [[], [], [], []]
+          })
+
+          expect(
+            await search({ leadingContextLineCount: 1, trailingContextLineCount: 1 })
+          ).toEqual({
+            leadingContext: expectedLeadingContext.map(result => result.slice(-1)),
+            trailingContext: expectedTrailingContext.map(result => result.slice(0, 1))
+          })
+
+          expect(
+            await search({ leadingContextLineCount: 2, trailingContextLineCount: 2 })
+          ).toEqual({
+            leadingContext: expectedLeadingContext.map(result => result.slice(-2)),
+            trailingContext: expectedTrailingContext.map(result => result.slice(0, 2))
+          })
+
+          expect(
+            await search({ leadingContextLineCount: 5, trailingContextLineCount: 5 })
+          ).toEqual({
+            leadingContext: expectedLeadingContext.map(result => result.slice(-5)),
+            trailingContext: expectedTrailingContext.map(result => result.slice(0, 5))
+          })
+
+          expect(
+            await search({ leadingContextLineCount: 2, trailingContextLineCount: 3 })
+          ).toEqual({
+            leadingContext: expectedLeadingContext.map(result => result.slice(-2)),
+            trailingContext: expectedTrailingContext.map(result => result.slice(0, 3))
+          })
+        })
+      })
    }) // Cancels other ongoing searches
  }

--- a/src/ripgrep-directory-searcher.js
+++ b/src/ripgrep-directory-searcher.js
@ -1,7 +1,80 @@
-const { spawn } = require("child_process")
+const { spawn } = require('child_process')
+
+// `ripgrep` and `scandal` have a different way of handling the trailing and leading
+// context lines:
+//  * `scandal` returns all the context lines that are requested, even if they include
+//    previous or future results.
+//  * `ripgrep` is a bit smarter and only returns the context lines that do not correspond
+//    to any result (in a similar way that is shown in the find and replace UI).
+//
+// For example, if we have the following file and we request to leading context lines:
+//
+//    line 1
+//    line 2
+//    result 1
+//    result 2
+//    line 3
+//    line 4
+//
+// `scandal` will return two results:
+//   * First result with `['line 1', line 2']` as leading context.
+//   * Second result with `['line 2', result 1']` as leading context.
+// `ripgrep` on the other hand will return a JS object that is more similar to the way that
+// the results are shown:
+//   [
+//     {type: 'begin', ...},
+//     {type: 'context', ...}, // context for line 1
+//     {type: 'context', ...}, // context for line 2
+//     {type: 'match', ...}, // result 1
+//     {type: 'match', ...}, // result 2
+//     {type: 'end', ...},
+//   ]
+//
+// In order to keep backwards compatibility, and avoid doing changes to the find and replace logic,
+// for `ripgrep` we need to keep some state with the context lines (and matches) to be able to build
+// a data structure that has the same behaviour as the `scandal` one.
+//
+// We use the `pendingLeadingContext` array to generate the leading context. This array gets mutated
+// to always contain the leading `n` lines and is cloned every time a match is found. It's currently
+// implemented as a standard array but we can easily change it to use a linked list if we find that
+// the shift operations are slow.
+//
+// We use the `pendingTrailingContexts` Set to generate the trailing context. Since the trailing
+// context needs to be generated after receiving a match, we keep all trailing context arrays that
+// haven't been fulfilled in this Set, and mutate them adding new lines until they are fulfilled.
+
+function updateLeadingContext (message, pendingLeadingContext, options) {
+  if (message.type !== 'match' && message.type !== 'context') {
+    return
+  }
+
+  if (options.leadingContextLineCount) {
+    pendingLeadingContext.push(message.data.lines.text.trim())
+
+    if (pendingLeadingContext.length > options.leadingContextLineCount) {
+      pendingLeadingContext.shift()
+    }
+  }
+}
+
+function updateTrailingContexts (message, pendingTrailingContexts, options) {
+  if (message.type !== 'match' && message.type !== 'context') {
+    return
+  }
+
+  if (options.trailingContextLineCount) {
+    for (const trailingContextLines of pendingTrailingContexts) {
+      trailingContextLines.push(message.data.lines.text.trim())
+
+      if (trailingContextLines.length === options.trailingContextLineCount) {
+        pendingTrailingContexts.delete(trailingContextLines)
+      }
+    }
+  }
+}

 module.exports = class RipgrepDirectorySearcher {
-  constructor() {
+  constructor () {
    this.rgPath = require('vscode-ripgrep').rgPath
  }

@ -45,17 +118,15 @@ module.exports = class RipgrepDirectorySearcher {
  search (directories, regexp, options) {
    const paths = directories.map(d => d.getPath())

-    const args = ["--json", "--regexp", regexp.source]
+    const args = ['--json', '--regexp', regexp.source]
    if (options.leadingContextLineCount) {
-      args.push("--before-context", options.leadingContextLineCount)
+      args.push('--before-context', options.leadingContextLineCount)
    }
    if (options.trailingContextLineCount) {
-      args.push("--after-context", options.trailingContextLineCount)
+      args.push('--after-context', options.trailingContextLineCount)
    }
    args.push(...paths)

-    console.log(args)
-
    const child = spawn(this.rgPath, args, {
      stdio: ['pipe', 'pipe', 'inherit']
    })
@ -65,42 +136,51 @@ module.exports = class RipgrepDirectorySearcher {
    return new Promise(resolve => {
      let buffer = ''
      let pendingEvent
+      let pendingLeadingContext
+      let pendingTrailingContexts
+
      child.stdout.on('data', chunk => {
-        buffer += chunk;
+        buffer += chunk
        const lines = buffer.split('\n')
        buffer = lines.pop()
        for (const line of lines) {
          const message = JSON.parse(line)
-          console.log(message);
+
+          updateTrailingContexts(message, pendingTrailingContexts, options)

          if (message.type === 'begin') {
            pendingEvent = {
              filePath: message.data.path.text,
              matches: []
            }
+            pendingLeadingContext = []
+            pendingTrailingContexts = new Set()
          } else if (message.type === 'match') {
            const startRow = message.data.line_number - 1
+            const trailingContextLines = []
+            pendingTrailingContexts.add(trailingContextLines)
+
            for (const submatch of message.data.submatches) {
              pendingEvent.matches.push({
                matchText: submatch.match.text,
-                lineText: message.data.lines.text,
+                lineText: message.data.lines.text.trim(),
                lineTextOffset: 0,
                range: [[startRow, submatch.start], [startRow, submatch.end]],
-                leadingContextLines: [],
-                trailingContextLines: []
+                leadingContextLines: [...pendingLeadingContext],
+                trailingContextLines
              })
            }
          } else if (message.type === 'end') {
-            console.log('yielding', pendingEvent)
            didMatch(pendingEvent)
            pendingEvent = null
          } else if (message.type === 'summary') {
            resolve()
+            return
          }
+
+          updateLeadingContext(message, pendingLeadingContext, options)
        }
      })
-
    })
  }
-
 }
--- a/src/workspace.js
+++ b/src/workspace.js
@ -1903,9 +1903,7 @@ module.exports = class Workspace extends Model {
        leadingContextLineCount: options.leadingContextLineCount || 0,
        trailingContextLineCount: options.trailingContextLineCount || 0,
        didMatch: result => {
-          console.log("didMatch", result)
          if (!this.project.isPathModified(result.filePath)) {
-            console.log("!!!!!!!!!!!!!!!!!!!!")
            return iterator(result)
          }
        },
@ -1930,7 +1928,7 @@ module.exports = class Workspace extends Model {
        var matches = []
        buffer.scan(regex, match => matches.push(match))
        if (matches.length > 0) {
-          iterator({filePath, matches})
+          iterator({ filePath, matches })
        }
      }
    }