pulsar/spec/tokenized-buffer-iterator-spec.js

190 lines
6.2 KiB
JavaScript
Raw Normal View History

/** @babel */
import TokenizedBufferIterator from '../src/tokenized-buffer-iterator'
import {Point} from 'text-buffer'
describe('TokenizedBufferIterator', () => {
describe('seek(position)', function () {
it('seeks to the leftmost tag boundary greater than or equal to the given position and returns the containing tags', function () {
const tokenizedBuffer = {
tokenizedLineForRow (row) {
if (row === 0) {
return {
tags: [-1, -2, -3, -4, -5, 3, -3, -4, -6, -5, 4, -6, -3, -4],
text: 'foo bar',
openScopes: []
}
} else {
return null
}
},
grammar: {
scopeForId (id) {
return {
'-1': 'foo', '-2': 'foo',
'-3': 'bar', '-4': 'bar',
'-5': 'baz', '-6': 'baz'
}[id]
}
}
}
const iterator = new TokenizedBufferIterator(tokenizedBuffer)
expect(iterator.seek(Point(0, 0))).toEqual([])
expect(iterator.getPosition()).toEqual(Point(0, 0))
expect(iterator.getCloseTags()).toEqual([])
expect(iterator.getOpenTags()).toEqual(['foo'])
iterator.moveToSuccessor()
expect(iterator.getCloseTags()).toEqual(['foo'])
expect(iterator.getOpenTags()).toEqual(['bar'])
expect(iterator.seek(Point(0, 1))).toEqual(['baz'])
expect(iterator.getPosition()).toEqual(Point(0, 3))
expect(iterator.getCloseTags()).toEqual([])
expect(iterator.getOpenTags()).toEqual(['bar'])
iterator.moveToSuccessor()
expect(iterator.getPosition()).toEqual(Point(0, 3))
expect(iterator.getCloseTags()).toEqual(['bar', 'baz'])
expect(iterator.getOpenTags()).toEqual(['baz'])
expect(iterator.seek(Point(0, 3))).toEqual(['baz'])
expect(iterator.getPosition()).toEqual(Point(0, 3))
expect(iterator.getCloseTags()).toEqual([])
expect(iterator.getOpenTags()).toEqual(['bar'])
iterator.moveToSuccessor()
expect(iterator.getPosition()).toEqual(Point(0, 3))
expect(iterator.getCloseTags()).toEqual(['bar', 'baz'])
expect(iterator.getOpenTags()).toEqual(['baz'])
iterator.moveToSuccessor()
expect(iterator.getPosition()).toEqual(Point(0, 7))
expect(iterator.getCloseTags()).toEqual(['baz'])
expect(iterator.getOpenTags()).toEqual(['bar'])
iterator.moveToSuccessor()
expect(iterator.getPosition()).toEqual(Point(0, 7))
expect(iterator.getCloseTags()).toEqual(['bar'])
expect(iterator.getOpenTags()).toEqual([])
iterator.moveToSuccessor()
expect(iterator.getPosition()).toEqual(Point(1, 0))
expect(iterator.getCloseTags()).toEqual([])
expect(iterator.getOpenTags()).toEqual([])
expect(iterator.seek(Point(0, 5))).toEqual(['baz'])
expect(iterator.getPosition()).toEqual(Point(0, 7))
expect(iterator.getCloseTags()).toEqual(['baz'])
expect(iterator.getOpenTags()).toEqual(['bar'])
iterator.moveToSuccessor()
expect(iterator.getPosition()).toEqual(Point(0, 7))
expect(iterator.getCloseTags()).toEqual(['bar'])
expect(iterator.getOpenTags()).toEqual([])
})
})
Report boundary when next line's `openScopes` don't match containingTags Sometimes, when performing an edit, a change on some row can cause another row's tokenization to be affected: the classic example is opening a multi-line comment on a line, thereby causing subsequent lines to become commented out without changing the buffer's contents at those locations. We call this technique "spill detection". Since the amount of affected lines can grow quite large, Atom tokenizes synchronously only those lines where the edit occurred, triggering background (i.e. `setInterval`) tokenization for all the other lines that need to be refreshed because of a "spill". As predictable, this approach causes a temporary inconsistency in the stored tokenized lines. In particular, suppose we had two tokenized lines, and that there's an open tag in the middle of the first one which closes on the second one. If we perform an edit that causes that tag to be deleted, when reading the second tokenized line we now have a dangling close tag. This didn't matter much in the `DisplayBuffer` version, because for each line we reopened all the tags found in the stored `openScopes` property, and closed all the tags starting on such line right at the end of it. In the `DisplayLayer` world, however, we don't read tags from each tokenized line, but we let `TokenizedBufferIterator` report tag boundaries and their respective location: since this is an iterator-based approach, we were not reading `openScopes` for each `TokenizedLine`, making the dangling close tag example showed above evident (e.g. close and open tags didn't match anymore, and exceptions were being thrown all over the place). To solve this issue I have considered several approaches: 1. Recompute all the lines where a spill occurs synchronously when the buffer changes. For large files, this can be pretty onerous, and we don't want to regress in terms of performance. 2. Let `TokenizedBuffer.tokenizedLineForRow(bufferRow)` recompute potential invalid lines lazily (starting from the first invalid line, down to the requested buffer row). When editing the first lines of a long file and causing a spill to occur, Atom (or any other package, for that matter) could request a line down in the file, causing this method to recompute lots and lots of lines. 3. Let `DisplayLayer` deal with closing an un-opened tag. This is nice because we already keep track of containing tags there. However, it also feels like the wrong spot where to put this logic, as display layers shouldn't deal with grammar-related stuff. 4. Keep track of containing tags in `TokenizedBufferIterator`, and report a boundary at the end of the line when the subsequent one's `openScopes` property doesn't match the `containingTags` that the iterator has been keeping track of. Of all these solutions I've chosen 4), because it's the most performant and clean in terms of code.
2016-04-27 18:47:49 +03:00
describe('moveToSuccessor()', function () {
it('reports two boundaries at the same position when tags close, open, then close again without a non-negative integer separating them (regression)', () => {
const tokenizedBuffer = {
tokenizedLineForRow () {
Report boundary when next line's `openScopes` don't match containingTags Sometimes, when performing an edit, a change on some row can cause another row's tokenization to be affected: the classic example is opening a multi-line comment on a line, thereby causing subsequent lines to become commented out without changing the buffer's contents at those locations. We call this technique "spill detection". Since the amount of affected lines can grow quite large, Atom tokenizes synchronously only those lines where the edit occurred, triggering background (i.e. `setInterval`) tokenization for all the other lines that need to be refreshed because of a "spill". As predictable, this approach causes a temporary inconsistency in the stored tokenized lines. In particular, suppose we had two tokenized lines, and that there's an open tag in the middle of the first one which closes on the second one. If we perform an edit that causes that tag to be deleted, when reading the second tokenized line we now have a dangling close tag. This didn't matter much in the `DisplayBuffer` version, because for each line we reopened all the tags found in the stored `openScopes` property, and closed all the tags starting on such line right at the end of it. In the `DisplayLayer` world, however, we don't read tags from each tokenized line, but we let `TokenizedBufferIterator` report tag boundaries and their respective location: since this is an iterator-based approach, we were not reading `openScopes` for each `TokenizedLine`, making the dangling close tag example showed above evident (e.g. close and open tags didn't match anymore, and exceptions were being thrown all over the place). To solve this issue I have considered several approaches: 1. Recompute all the lines where a spill occurs synchronously when the buffer changes. For large files, this can be pretty onerous, and we don't want to regress in terms of performance. 2. Let `TokenizedBuffer.tokenizedLineForRow(bufferRow)` recompute potential invalid lines lazily (starting from the first invalid line, down to the requested buffer row). When editing the first lines of a long file and causing a spill to occur, Atom (or any other package, for that matter) could request a line down in the file, causing this method to recompute lots and lots of lines. 3. Let `DisplayLayer` deal with closing an un-opened tag. This is nice because we already keep track of containing tags there. However, it also feels like the wrong spot where to put this logic, as display layers shouldn't deal with grammar-related stuff. 4. Keep track of containing tags in `TokenizedBufferIterator`, and report a boundary at the end of the line when the subsequent one's `openScopes` property doesn't match the `containingTags` that the iterator has been keeping track of. Of all these solutions I've chosen 4), because it's the most performant and clean in terms of code.
2016-04-27 18:47:49 +03:00
return {
tags: [-1, -2, -1, -2],
Report boundary when next line's `openScopes` don't match containingTags Sometimes, when performing an edit, a change on some row can cause another row's tokenization to be affected: the classic example is opening a multi-line comment on a line, thereby causing subsequent lines to become commented out without changing the buffer's contents at those locations. We call this technique "spill detection". Since the amount of affected lines can grow quite large, Atom tokenizes synchronously only those lines where the edit occurred, triggering background (i.e. `setInterval`) tokenization for all the other lines that need to be refreshed because of a "spill". As predictable, this approach causes a temporary inconsistency in the stored tokenized lines. In particular, suppose we had two tokenized lines, and that there's an open tag in the middle of the first one which closes on the second one. If we perform an edit that causes that tag to be deleted, when reading the second tokenized line we now have a dangling close tag. This didn't matter much in the `DisplayBuffer` version, because for each line we reopened all the tags found in the stored `openScopes` property, and closed all the tags starting on such line right at the end of it. In the `DisplayLayer` world, however, we don't read tags from each tokenized line, but we let `TokenizedBufferIterator` report tag boundaries and their respective location: since this is an iterator-based approach, we were not reading `openScopes` for each `TokenizedLine`, making the dangling close tag example showed above evident (e.g. close and open tags didn't match anymore, and exceptions were being thrown all over the place). To solve this issue I have considered several approaches: 1. Recompute all the lines where a spill occurs synchronously when the buffer changes. For large files, this can be pretty onerous, and we don't want to regress in terms of performance. 2. Let `TokenizedBuffer.tokenizedLineForRow(bufferRow)` recompute potential invalid lines lazily (starting from the first invalid line, down to the requested buffer row). When editing the first lines of a long file and causing a spill to occur, Atom (or any other package, for that matter) could request a line down in the file, causing this method to recompute lots and lots of lines. 3. Let `DisplayLayer` deal with closing an un-opened tag. This is nice because we already keep track of containing tags there. However, it also feels like the wrong spot where to put this logic, as display layers shouldn't deal with grammar-related stuff. 4. Keep track of containing tags in `TokenizedBufferIterator`, and report a boundary at the end of the line when the subsequent one's `openScopes` property doesn't match the `containingTags` that the iterator has been keeping track of. Of all these solutions I've chosen 4), because it's the most performant and clean in terms of code.
2016-04-27 18:47:49 +03:00
text: '',
openScopes: []
}
},
Report boundary when next line's `openScopes` don't match containingTags Sometimes, when performing an edit, a change on some row can cause another row's tokenization to be affected: the classic example is opening a multi-line comment on a line, thereby causing subsequent lines to become commented out without changing the buffer's contents at those locations. We call this technique "spill detection". Since the amount of affected lines can grow quite large, Atom tokenizes synchronously only those lines where the edit occurred, triggering background (i.e. `setInterval`) tokenization for all the other lines that need to be refreshed because of a "spill". As predictable, this approach causes a temporary inconsistency in the stored tokenized lines. In particular, suppose we had two tokenized lines, and that there's an open tag in the middle of the first one which closes on the second one. If we perform an edit that causes that tag to be deleted, when reading the second tokenized line we now have a dangling close tag. This didn't matter much in the `DisplayBuffer` version, because for each line we reopened all the tags found in the stored `openScopes` property, and closed all the tags starting on such line right at the end of it. In the `DisplayLayer` world, however, we don't read tags from each tokenized line, but we let `TokenizedBufferIterator` report tag boundaries and their respective location: since this is an iterator-based approach, we were not reading `openScopes` for each `TokenizedLine`, making the dangling close tag example showed above evident (e.g. close and open tags didn't match anymore, and exceptions were being thrown all over the place). To solve this issue I have considered several approaches: 1. Recompute all the lines where a spill occurs synchronously when the buffer changes. For large files, this can be pretty onerous, and we don't want to regress in terms of performance. 2. Let `TokenizedBuffer.tokenizedLineForRow(bufferRow)` recompute potential invalid lines lazily (starting from the first invalid line, down to the requested buffer row). When editing the first lines of a long file and causing a spill to occur, Atom (or any other package, for that matter) could request a line down in the file, causing this method to recompute lots and lots of lines. 3. Let `DisplayLayer` deal with closing an un-opened tag. This is nice because we already keep track of containing tags there. However, it also feels like the wrong spot where to put this logic, as display layers shouldn't deal with grammar-related stuff. 4. Keep track of containing tags in `TokenizedBufferIterator`, and report a boundary at the end of the line when the subsequent one's `openScopes` property doesn't match the `containingTags` that the iterator has been keeping track of. Of all these solutions I've chosen 4), because it's the most performant and clean in terms of code.
2016-04-27 18:47:49 +03:00
grammar: {
scopeForId () {
return 'foo'
Report boundary when next line's `openScopes` don't match containingTags Sometimes, when performing an edit, a change on some row can cause another row's tokenization to be affected: the classic example is opening a multi-line comment on a line, thereby causing subsequent lines to become commented out without changing the buffer's contents at those locations. We call this technique "spill detection". Since the amount of affected lines can grow quite large, Atom tokenizes synchronously only those lines where the edit occurred, triggering background (i.e. `setInterval`) tokenization for all the other lines that need to be refreshed because of a "spill". As predictable, this approach causes a temporary inconsistency in the stored tokenized lines. In particular, suppose we had two tokenized lines, and that there's an open tag in the middle of the first one which closes on the second one. If we perform an edit that causes that tag to be deleted, when reading the second tokenized line we now have a dangling close tag. This didn't matter much in the `DisplayBuffer` version, because for each line we reopened all the tags found in the stored `openScopes` property, and closed all the tags starting on such line right at the end of it. In the `DisplayLayer` world, however, we don't read tags from each tokenized line, but we let `TokenizedBufferIterator` report tag boundaries and their respective location: since this is an iterator-based approach, we were not reading `openScopes` for each `TokenizedLine`, making the dangling close tag example showed above evident (e.g. close and open tags didn't match anymore, and exceptions were being thrown all over the place). To solve this issue I have considered several approaches: 1. Recompute all the lines where a spill occurs synchronously when the buffer changes. For large files, this can be pretty onerous, and we don't want to regress in terms of performance. 2. Let `TokenizedBuffer.tokenizedLineForRow(bufferRow)` recompute potential invalid lines lazily (starting from the first invalid line, down to the requested buffer row). When editing the first lines of a long file and causing a spill to occur, Atom (or any other package, for that matter) could request a line down in the file, causing this method to recompute lots and lots of lines. 3. Let `DisplayLayer` deal with closing an un-opened tag. This is nice because we already keep track of containing tags there. However, it also feels like the wrong spot where to put this logic, as display layers shouldn't deal with grammar-related stuff. 4. Keep track of containing tags in `TokenizedBufferIterator`, and report a boundary at the end of the line when the subsequent one's `openScopes` property doesn't match the `containingTags` that the iterator has been keeping track of. Of all these solutions I've chosen 4), because it's the most performant and clean in terms of code.
2016-04-27 18:47:49 +03:00
}
}
}
const iterator = new TokenizedBufferIterator(tokenizedBuffer)
iterator.seek(Point(0, 0))
expect(iterator.getPosition()).toEqual(Point(0, 0))
expect(iterator.getCloseTags()).toEqual([])
expect(iterator.getOpenTags()).toEqual(['foo'])
iterator.moveToSuccessor()
expect(iterator.getPosition()).toEqual(Point(0, 0))
expect(iterator.getCloseTags()).toEqual(['foo'])
expect(iterator.getOpenTags()).toEqual(['foo'])
iterator.moveToSuccessor()
expect(iterator.getCloseTags()).toEqual(['foo'])
expect(iterator.getOpenTags()).toEqual([])
})
it("reports a boundary at line end if the next line's open scopes don't match the containing tags for the current line", () => {
const tokenizedBuffer = {
tokenizedLineForRow (row) {
if (row === 0) {
return {
tags: [-1, 3, -2, -3],
text: 'bar',
openScopes: []
}
} else if (row === 1) {
return {
tags: [3],
text: 'baz',
openScopes: [-1]
}
} else if (row === 2) {
return {
tags: [-2],
text: '',
openScopes: [-1]
}
}
},
grammar: {
scopeForId (id) {
if (id === -2 || id === -1) {
return 'foo'
} else if (id === -3) {
return 'qux'
}
}
Report boundary when next line's `openScopes` don't match containingTags Sometimes, when performing an edit, a change on some row can cause another row's tokenization to be affected: the classic example is opening a multi-line comment on a line, thereby causing subsequent lines to become commented out without changing the buffer's contents at those locations. We call this technique "spill detection". Since the amount of affected lines can grow quite large, Atom tokenizes synchronously only those lines where the edit occurred, triggering background (i.e. `setInterval`) tokenization for all the other lines that need to be refreshed because of a "spill". As predictable, this approach causes a temporary inconsistency in the stored tokenized lines. In particular, suppose we had two tokenized lines, and that there's an open tag in the middle of the first one which closes on the second one. If we perform an edit that causes that tag to be deleted, when reading the second tokenized line we now have a dangling close tag. This didn't matter much in the `DisplayBuffer` version, because for each line we reopened all the tags found in the stored `openScopes` property, and closed all the tags starting on such line right at the end of it. In the `DisplayLayer` world, however, we don't read tags from each tokenized line, but we let `TokenizedBufferIterator` report tag boundaries and their respective location: since this is an iterator-based approach, we were not reading `openScopes` for each `TokenizedLine`, making the dangling close tag example showed above evident (e.g. close and open tags didn't match anymore, and exceptions were being thrown all over the place). To solve this issue I have considered several approaches: 1. Recompute all the lines where a spill occurs synchronously when the buffer changes. For large files, this can be pretty onerous, and we don't want to regress in terms of performance. 2. Let `TokenizedBuffer.tokenizedLineForRow(bufferRow)` recompute potential invalid lines lazily (starting from the first invalid line, down to the requested buffer row). When editing the first lines of a long file and causing a spill to occur, Atom (or any other package, for that matter) could request a line down in the file, causing this method to recompute lots and lots of lines. 3. Let `DisplayLayer` deal with closing an un-opened tag. This is nice because we already keep track of containing tags there. However, it also feels like the wrong spot where to put this logic, as display layers shouldn't deal with grammar-related stuff. 4. Keep track of containing tags in `TokenizedBufferIterator`, and report a boundary at the end of the line when the subsequent one's `openScopes` property doesn't match the `containingTags` that the iterator has been keeping track of. Of all these solutions I've chosen 4), because it's the most performant and clean in terms of code.
2016-04-27 18:47:49 +03:00
}
}
const iterator = new TokenizedBufferIterator(tokenizedBuffer)
iterator.seek(Point(0, 0))
expect(iterator.getPosition()).toEqual(Point(0, 0))
expect(iterator.getCloseTags()).toEqual([])
expect(iterator.getOpenTags()).toEqual(['foo'])
iterator.moveToSuccessor()
expect(iterator.getPosition()).toEqual(Point(0, 3))
expect(iterator.getCloseTags()).toEqual(['foo'])
expect(iterator.getOpenTags()).toEqual(['qux'])
iterator.moveToSuccessor()
expect(iterator.getPosition()).toEqual(Point(0, 3))
expect(iterator.getCloseTags()).toEqual(['qux'])
expect(iterator.getOpenTags()).toEqual([])
iterator.moveToSuccessor()
expect(iterator.getPosition()).toEqual(Point(1, 0))
expect(iterator.getCloseTags()).toEqual([])
expect(iterator.getOpenTags()).toEqual(['foo'])
iterator.moveToSuccessor()
expect(iterator.getPosition()).toEqual(Point(2, 0))
expect(iterator.getCloseTags()).toEqual(['foo'])
expect(iterator.getOpenTags()).toEqual([])
})
Report boundary when next line's `openScopes` don't match containingTags Sometimes, when performing an edit, a change on some row can cause another row's tokenization to be affected: the classic example is opening a multi-line comment on a line, thereby causing subsequent lines to become commented out without changing the buffer's contents at those locations. We call this technique "spill detection". Since the amount of affected lines can grow quite large, Atom tokenizes synchronously only those lines where the edit occurred, triggering background (i.e. `setInterval`) tokenization for all the other lines that need to be refreshed because of a "spill". As predictable, this approach causes a temporary inconsistency in the stored tokenized lines. In particular, suppose we had two tokenized lines, and that there's an open tag in the middle of the first one which closes on the second one. If we perform an edit that causes that tag to be deleted, when reading the second tokenized line we now have a dangling close tag. This didn't matter much in the `DisplayBuffer` version, because for each line we reopened all the tags found in the stored `openScopes` property, and closed all the tags starting on such line right at the end of it. In the `DisplayLayer` world, however, we don't read tags from each tokenized line, but we let `TokenizedBufferIterator` report tag boundaries and their respective location: since this is an iterator-based approach, we were not reading `openScopes` for each `TokenizedLine`, making the dangling close tag example showed above evident (e.g. close and open tags didn't match anymore, and exceptions were being thrown all over the place). To solve this issue I have considered several approaches: 1. Recompute all the lines where a spill occurs synchronously when the buffer changes. For large files, this can be pretty onerous, and we don't want to regress in terms of performance. 2. Let `TokenizedBuffer.tokenizedLineForRow(bufferRow)` recompute potential invalid lines lazily (starting from the first invalid line, down to the requested buffer row). When editing the first lines of a long file and causing a spill to occur, Atom (or any other package, for that matter) could request a line down in the file, causing this method to recompute lots and lots of lines. 3. Let `DisplayLayer` deal with closing an un-opened tag. This is nice because we already keep track of containing tags there. However, it also feels like the wrong spot where to put this logic, as display layers shouldn't deal with grammar-related stuff. 4. Keep track of containing tags in `TokenizedBufferIterator`, and report a boundary at the end of the line when the subsequent one's `openScopes` property doesn't match the `containingTags` that the iterator has been keeping track of. Of all these solutions I've chosen 4), because it's the most performant and clean in terms of code.
2016-04-27 18:47:49 +03:00
})
})