Process unicode results from ripgrep correctly

This commit is contained in:
Rafael Oleza 2019-05-22 21:32:19 +02:00
parent 411e2a95fb
commit 6748b84060
2 changed files with 53 additions and 1 deletions

View File

@ -2647,6 +2647,28 @@ describe('Workspace', () => {
})
}
it('returns results on lines with unicode strings', async () => {
const results = []
await scan(
/line with unico/,
{},
result => results.push(result)
)
expect(results.length).toBe(1)
const { filePath, matches } = results[0]
expect(filePath).toBe(atom.project.getDirectories()[0].resolve('file-with-unicode'))
expect(matches).toHaveLength(1)
expect(matches[0]).toEqual({
matchText: 'line with unico',
lineText: 'ДДДДДДДДДДДДДДДДДД line with unicode',
lineTextOffset: 0,
range: [[0, 19], [0, 34]],
leadingContextLines: [],
trailingContextLines: []
})
})
describe('when the core.excludeVcsIgnoredPaths config is truthy', () => {
let projectPath
let ignoredPath

View File

@ -92,6 +92,35 @@ function getPositionFromColumn (lines, column) {
return [currentLine - 1, column - previousLength]
}
function processUnicodeMatch (match) {
if (match.lines.text.length === Buffer.byteLength(match.lines.text)) {
// fast codepath for lines that only contain characters of 1 byte length.
return
}
let remainingBuffer = Buffer.from(match.lines.text)
let currentLength = 0
let previousPosition = 0
function convertPosition (position) {
const currentBuffer = remainingBuffer.slice(0, position - previousPosition)
currentLength = currentBuffer.toString().length + currentLength
remainingBuffer = remainingBuffer.slice(position)
previousPosition = position
return currentLength
}
// Iterate over all the submatches to find the convert the start and end values
// (which come as bytes from ripgrep) to character positions.
// We can do this because submatches come ordered by position.
for (const submatch of match.submatches) {
submatch.start = convertPosition(submatch.start)
submatch.end = convertPosition(submatch.end)
}
}
// This function processes a ripgrep submatch to create the correct
// range. This is mostly needed for multi-line results, since the range
// will have differnt start and end rows and we need to calculate these
@ -247,7 +276,6 @@ module.exports = class RipgrepDirectorySearcher {
buffer = lines.pop()
for (const line of lines) {
const message = JSON.parse(line)
updateTrailingContexts(message, pendingTrailingContexts, options)
if (message.type === 'begin') {
@ -261,6 +289,8 @@ module.exports = class RipgrepDirectorySearcher {
const trailingContextLines = []
pendingTrailingContexts.add(trailingContextLines)
processUnicodeMatch(message.data)
for (const submatch of message.data.submatches) {
const { lineText, range } = processSubmatch(
submatch,