Implemented minimal support for /N escape in string literals.

This commit is contained in:
Eric Traut 2019-04-21 12:06:07 -07:00
parent e354ef2865
commit b9edc5ccdd
2 changed files with 86 additions and 4 deletions

View File

@ -892,11 +892,39 @@ export class Tokenizer {
localValue = scanHexEscape(2);
break;
case Char.N:
// TODO - need to handle name
localValue = '0';
this._cs.moveNext();
case Char.N: {
let foundIllegalChar = false;
let charCount = 1;
if (this._cs.lookAhead(charCount) !== Char.OpenBrace) {
foundIllegalChar = true;
} else {
charCount++;
while (true) {
const lookaheadChar = this._cs.lookAhead(charCount);
if (lookaheadChar === Char.CloseBrace) {
break;
} else if (!this._isAlphaNumericChar(lookaheadChar)) {
foundIllegalChar = true;
break;
} else {
charCount++;
}
}
}
if (foundIllegalChar) {
addInvalidEscapeOffset();
localValue = '\\' + String.fromCharCode(this._cs.currentChar);
this._cs.moveNext();
} else {
// We don't have the Unicode name database handy, so
// assume that the name is valid and use a '-' as a
// replacement character.
localValue = '-';
this._cs.advance(1 + charCount);
}
break;
}
case Char.u:
localValue = scanHexEscape(4);
@ -970,6 +998,22 @@ export class Tokenizer {
return { value: unescapedValue, flags, invalidEscapeOffsets };
}
private _isAlphaNumericChar(charCode: number): boolean {
if (charCode >= Char._0 && charCode <= Char._9) {
return true;
}
if (charCode >= Char.a && charCode <= Char.z) {
return true;
}
if (charCode >= Char.A && charCode <= Char.A) {
return true;
}
return false;
}
private _isOctalCharCode(charCode: number): boolean {
return charCode >= Char._0 && charCode <= Char._7;
}

View File

@ -589,6 +589,44 @@ test('Strings: bad hex escapes', () => {
assert.equal(stringToken2.value, '\\U0000006m');
});
test('Strings: good name escapes', () => {
const t = new Tokenizer();
const results = t.tokenize('"\\N{caret}" "a\\N{A9}a"');
assert.equal(results.tokens.count, 2 + _implicitTokenCount);
const stringToken0 = results.tokens.getItemAt(0) as StringToken;
assert.equal(stringToken0.type, TokenType.String);
assert.equal(stringToken0.flags, StringTokenFlags.DoubleQuote);
assert.equal(stringToken0.length, 11);
assert.equal(stringToken0.value, '-');
const stringToken1 = results.tokens.getItemAt(1) as StringToken;
assert.equal(stringToken1.type, TokenType.String);
assert.equal(stringToken1.flags, StringTokenFlags.DoubleQuote);
assert.equal(stringToken1.length, 10);
assert.equal(stringToken1.value, 'a-a');
});
test('Strings: bad name escapes', () => {
const t = new Tokenizer();
const results = t.tokenize('"\\N{caret" "\\N{ A9}"');
assert.equal(results.tokens.count, 2 + _implicitTokenCount);
const stringToken0 = results.tokens.getItemAt(0) as StringToken;
assert.equal(stringToken0.type, TokenType.String);
assert.equal(stringToken0.flags, StringTokenFlags.DoubleQuote |
StringTokenFlags.UnrecognizedEscape);
assert.equal(stringToken0.length, 10);
assert.equal(stringToken0.value, '\\N{caret');
const stringToken1 = results.tokens.getItemAt(1) as StringToken;
assert.equal(stringToken1.type, TokenType.String);
assert.equal(stringToken1.flags, StringTokenFlags.DoubleQuote |
StringTokenFlags.UnrecognizedEscape);
assert.equal(stringToken1.length, 9);
assert.equal(stringToken1.value, '\\N{ A9}');
});
test('Comments', () => {
const t = new Tokenizer();
const results = t.tokenize(' #co"""mment1\n\t\n#comm\'ent2 ');