Implemented minimal support for /N escape in string literals.

2024-09-11 16:06:39 +03:00 · 2019-04-21 12:06:07 -07:00 · 2019-04-21 12:06:07 -07:00 · b9edc5ccdd
commit b9edc5ccdd
parent e354ef2865
2 changed files with 86 additions and 4 deletions
--- a/server/src/parser/tokenizer.ts
+++ b/server/src/parser/tokenizer.ts
@ -892,11 +892,39 @@ export class Tokenizer {
                                localValue = scanHexEscape(2);
                                break;

-                            case Char.N:
-                                // TODO - need to handle name
-                                localValue = '0';
-                                this._cs.moveNext();
+                            case Char.N: {
+                                let foundIllegalChar = false;
+                                let charCount = 1;
+                                if (this._cs.lookAhead(charCount) !== Char.OpenBrace) {
+                                    foundIllegalChar = true;
+                                } else {
+                                    charCount++;
+                                    while (true) {
+                                        const lookaheadChar = this._cs.lookAhead(charCount);
+                                        if (lookaheadChar === Char.CloseBrace) {
+                                            break;
+                                        } else if (!this._isAlphaNumericChar(lookaheadChar)) {
+                                            foundIllegalChar = true;
+                                            break;
+                                        } else {
+                                            charCount++;
+                                        }
+                                    }
+                                }
+
+                                if (foundIllegalChar) {
+                                    addInvalidEscapeOffset();
+                                    localValue = '\\' + String.fromCharCode(this._cs.currentChar);
+                                    this._cs.moveNext();
+                                } else {
+                                    // We don't have the Unicode name database handy, so
+                                    // assume that the name is valid and use a '-' as a
+                                    // replacement character.
+                                    localValue = '-';
+                                    this._cs.advance(1 + charCount);
+                                }
                                break;
+                            }

                            case Char.u:
                                localValue = scanHexEscape(4);
@ -970,6 +998,22 @@ export class Tokenizer {
        return { value: unescapedValue, flags, invalidEscapeOffsets };
    }

+    private _isAlphaNumericChar(charCode: number): boolean {
+        if (charCode >= Char._0 && charCode <= Char._9) {
+            return true;
+        }
+
+        if (charCode >= Char.a && charCode <= Char.z) {
+            return true;
+        }
+
+        if (charCode >= Char.A && charCode <= Char.A) {
+            return true;
+        }
+
+        return false;
+    }
+
    private _isOctalCharCode(charCode: number): boolean {
        return charCode >= Char._0 && charCode <= Char._7;
    }
--- a/server/src/tests/tokenizer.test.ts
+++ b/server/src/tests/tokenizer.test.ts
@ -589,6 +589,44 @@ test('Strings: bad hex escapes', () => {
    assert.equal(stringToken2.value, '\\U0000006m');
 });

+test('Strings: good name escapes', () => {
+    const t = new Tokenizer();
+    const results = t.tokenize('"\\N{caret}" "a\\N{A9}a"');
+    assert.equal(results.tokens.count, 2 + _implicitTokenCount);
+
+    const stringToken0 = results.tokens.getItemAt(0) as StringToken;
+    assert.equal(stringToken0.type, TokenType.String);
+    assert.equal(stringToken0.flags, StringTokenFlags.DoubleQuote);
+    assert.equal(stringToken0.length, 11);
+    assert.equal(stringToken0.value, '-');
+
+    const stringToken1 = results.tokens.getItemAt(1) as StringToken;
+    assert.equal(stringToken1.type, TokenType.String);
+    assert.equal(stringToken1.flags, StringTokenFlags.DoubleQuote);
+    assert.equal(stringToken1.length, 10);
+    assert.equal(stringToken1.value, 'a-a');
+});
+
+test('Strings: bad name escapes', () => {
+    const t = new Tokenizer();
+    const results = t.tokenize('"\\N{caret" "\\N{ A9}"');
+    assert.equal(results.tokens.count, 2 + _implicitTokenCount);
+
+    const stringToken0 = results.tokens.getItemAt(0) as StringToken;
+    assert.equal(stringToken0.type, TokenType.String);
+    assert.equal(stringToken0.flags, StringTokenFlags.DoubleQuote |
+        StringTokenFlags.UnrecognizedEscape);
+    assert.equal(stringToken0.length, 10);
+    assert.equal(stringToken0.value, '\\N{caret');
+
+    const stringToken1 = results.tokens.getItemAt(1) as StringToken;
+    assert.equal(stringToken1.type, TokenType.String);
+    assert.equal(stringToken1.flags, StringTokenFlags.DoubleQuote |
+        StringTokenFlags.UnrecognizedEscape);
+    assert.equal(stringToken1.length, 9);
+    assert.equal(stringToken1.value, '\\N{ A9}');
+});
+
 test('Comments', () => {
    const t = new Tokenizer();
    const results = t.tokenize(' #co"""mment1\n\t\n#comm\'ent2 ');