Added detection and reporting of invalid escape sequences in text literals. Added "reportInvalidStringEscapeSequences" to control reporting.

2024-08-17 03:40:44 +03:00 · 2019-04-21 11:10:41 -07:00 · 2019-04-21 11:10:41 -07:00 · b3b7e76705
commit b3b7e76705
parent 7e58f045f8
8 changed files with 97 additions and 36 deletions
--- a/client/schemas/pyrightconfig.schema.json
+++ b/client/schemas/pyrightconfig.schema.json
@ -145,6 +145,12 @@
      "title": "Controls reporting of private variables and functions used outside of the owning class or module",
      "default": "none"
    },
+    "reportInvalidStringEscapeSequence": {
+      "$id": "#/properties/reportInvalidStringEscapeSequence",
+      "$ref": "#/definitions/diagnostic",
+      "title": "Controls reporting of invalid escape sequences used within string literals",
+      "default": "none"
+    },
    "pythonVersion": {
      "$id": "#/properties/pythonVersion",
      "type": "string",
--- a/docs/configuration.md
+++ b/docs/configuration.md
@ -48,13 +48,15 @@ The following settings control pyright's diagnostic output (warnings or errors).

 **reportOptionalOperand** [boolean or string, optional]: Generate or suppress diagnostics for an attempt to use an Optional type as an operand to a binary or unary operator (like '+', '==', 'or', 'not'). The default value for this setting is 'none'.

-**reportUntypedFunctionDecorator** [boolean or string, optional]: Generate or suppress diagnostics for function decorators that have no type annotations. These obscure the function type, defeating many type analysis features.
+**reportUntypedFunctionDecorator** [boolean or string, optional]: Generate or suppress diagnostics for function decorators that have no type annotations. These obscure the function type, defeating many type analysis features. The default value for this setting is 'none'.

-**reportUntypedClassDecorator** [boolean or string, optional]: Generate or suppress diagnostics for class decorators that have no type annotations. These obscure the class type, defeating many type analysis features.
+**reportUntypedClassDecorator** [boolean or string, optional]: Generate or suppress diagnostics for class decorators that have no type annotations. These obscure the class type, defeating many type analysis features. The default value for this setting is 'none'.

-**reportUntypedBaseClass** [boolean or string, optional]: Generate or suppress diagnostics for base classes whose type cannot be determined statically. These obscure the class type, defeating many type analysis features.
+**reportUntypedBaseClass** [boolean or string, optional]: Generate or suppress diagnostics for base classes whose type cannot be determined statically. These obscure the class type, defeating many type analysis features. The default value for this setting is 'none'.

-**reportPrivateUsage** [boolean or string, optional]: Generate or suppress diagnostics for uses of private variables or functions outside of the class or module that declares them. Private variables and functions, by convention, are named starting with a single underscoe (“_”) character.
+**reportPrivateUsage** [boolean or string, optional]: Generate or suppress diagnostics for uses of private variables or functions outside of the class or module that declares them. Private variables and functions, by convention, are named starting with a single underscoe (“_”) character. The default value for this setting is 'none'.
+
+**reportInvalidStringEscapeSequence** [boolean or string, optional]: Generate or suppress diagnostics for invalid escape sequences used within string literals. The Python specification indicates that such sequences will generate a syntax error in future versions. The default value for this setting is 'warning'.


 ## Execution Environment Options
--- a/server/src/analyzer/semanticAnalyzer.ts
+++ b/server/src/analyzer/semanticAnalyzer.ts
@ -31,6 +31,7 @@ import { AssignmentNode, AwaitExpressionNode, ClassNode, DelNode, ExceptNode,
    StringNode, SuiteNode, TryNode, TupleExpressionNode,
    TypeAnnotationExpressionNode, UnpackExpressionNode, WhileNode, WithNode,
    YieldExpressionNode, YieldFromExpressionNode } from '../parser/parseNodes';
+import { StringTokenFlags } from '../parser/tokenizerTypes';
 import { ScopeUtils } from '../scopeUtils';
 import { AnalyzerFileInfo } from './analyzerFileInfo';
 import { AnalyzerNodeInfo } from './analyzerNodeInfo';
@ -43,8 +44,7 @@ import { ParseTreeWalker } from './parseTreeWalker';
 import { Scope, ScopeType } from './scope';
 import { Declaration, SymbolCategory } from './symbol';
 import { AnyType, ClassType, ClassTypeFlags, FunctionParameter, FunctionType,
-    FunctionTypeFlags, ModuleType, Type, TypeCategory,
-    UnboundType, UnknownType } from './types';
+    FunctionTypeFlags, ModuleType, Type, UnboundType, UnknownType } from './types';

 type ScopedNode = ModuleNode | ClassNode | FunctionNode | LambdaNode;

@ -541,6 +541,26 @@ export abstract class SemanticAnalyzer extends ParseTreeWalker {
    }

    visitString(node: StringNode): boolean {
+        for (let stringToken of node.tokens) {
+            if (stringToken.flags & StringTokenFlags.Unterminated) {
+                this._addError('String literal is unterminated', stringToken);
+            }
+
+            if (stringToken.flags & StringTokenFlags.NonAsciiInBytes) {
+                this._addError('Non-ASCII character not allowed in bytes string literal', stringToken);
+            }
+
+            if (stringToken.flags & StringTokenFlags.UnrecognizedEscape) {
+                if (stringToken.invalidEscapeOffsets) {
+                    stringToken.invalidEscapeOffsets.forEach(offset => {
+                        const textRange = new TextRange(stringToken.start + offset, 1);
+                        this._addDiagnostic(this._fileInfo.configOptions.reportInvalidStringEscapeSequence,
+                            'Unsupported escape sequence in string literal', textRange);
+                    });
+                }
+            }
+        }
+
        // Don't explore the parsed forward reference in
        // a string node because this pass of the analyzer
        // isn't capable of handling forward references.
--- a/server/src/common/configOptions.ts
+++ b/server/src/common/configOptions.ts
@ -119,6 +119,9 @@ export class ConfigOptions {
    // the owning class or module?
    reportPrivateUsage: DiagnosticLevel = 'none';

+    // Report usage of invalid escape sequences in string literals?
+    reportInvalidStringEscapeSequence: DiagnosticLevel = 'warning';
+
    //---------------------------------------------------------------
    // Parsing and Import Resolution Settings

@ -275,6 +278,10 @@ export class ConfigOptions {
        this.reportPrivateUsage = this._convertDiagnosticLevel(
            configObj.reportPrivateUsage, 'reportPrivateUsage', 'none');

+        // Read the "reportInvalidStringEscapeSequence" entry.
+        this.reportInvalidStringEscapeSequence = this._convertDiagnosticLevel(
+            configObj.reportInvalidStringEscapeSequence, 'reportInvalidStringEscapeSequence', 'warning');
+
        // Read the "venvPath".
        this.venvPath = undefined;
        if (configObj.venvPath !== undefined) {
--- a/server/src/parser/parser.ts
+++ b/server/src/parser/parser.ts
@ -2207,7 +2207,7 @@ export class Parser {
        const typeString = match[2];
        const tokenOffset = curToken.end + match[1].length;
        const stringToken = new StringToken(tokenOffset,
-            typeString.length, StringTokenFlags.None, typeString);
+            typeString.length, StringTokenFlags.None, typeString, undefined);
        const stringNode = new StringNode([stringToken]);

        let parser = new Parser();
@ -2231,20 +2231,7 @@ export class Parser {
        let stringTokenList: StringToken[] = [];

        while (this._peekTokenType() === TokenType.String) {
-            const stringToken = this._getNextToken() as StringToken;
-            if (stringToken.flags & StringTokenFlags.Unterminated) {
-                this._addError('String literal is unterminated', stringToken);
-            }
-
-            if (stringToken.flags & StringTokenFlags.NonAsciiInBytes) {
-                this._addError('Non-ASCII character not allowed in bytes string literal', stringToken);
-            }
-
-            if (stringToken.flags & StringTokenFlags.UnrecognizedEscape) {
-                this._addWarning('Unsupported escape sequence in string literal', stringToken);
-            }
-
-            stringTokenList.push(stringToken);
+            stringTokenList.push(this._getNextToken() as StringToken);
        }

        const stringNode = new StringNode(stringTokenList);
@ -2468,10 +2455,4 @@ export class Parser {
        this._diagSink.addError(message,
            convertOffsetsToRange(range.start, range.end, this._tokenizerOutput!.lines));
    }
-
-    private _addWarning(message: string, range: TextRange) {
-        assert(range !== undefined);
-        this._diagSink.addWarning(message,
-            convertOffsetsToRange(range.start, range.end, this._tokenizerOutput!.lines));
-    }
 }
--- a/server/src/parser/tokenizer.ts
+++ b/server/src/parser/tokenizer.ts
@ -109,6 +109,12 @@ export interface TokenizerOutput {
    lines: TextRangeCollection<TextRange>;
 }

+interface StringScannerOutput {
+    value: string;
+    flags: StringTokenFlags;
+    invalidEscapeOffsets?: number[];
+}
+
 export class Tokenizer {
    private _cs = new CharacterStream('');
    private _tokens: Token[] = [];
@ -753,7 +759,6 @@ export class Tokenizer {

    private _handleString(flags: StringTokenFlags, stringPrefixLength: number): void {
        let start = this._cs.position - stringPrefixLength;
-        let value: string;

        if (flags & StringTokenFlags.Triplicate) {
            this._cs.advance(3);
@ -761,25 +766,27 @@ export class Tokenizer {
            this._cs.moveNext();
        }

-        [value, flags] = this._skipToEndOfStringLiteral(flags);
+        const stringLiteralInfo = this._skipToEndOfStringLiteral(flags, start);

        let end = this._cs.position;

-        this._tokens.push(new StringToken(start, end - start, flags, value));
+        this._tokens.push(new StringToken(start, end - start, stringLiteralInfo.flags,
+            stringLiteralInfo.value, stringLiteralInfo.invalidEscapeOffsets));
    }

-    private _skipToEndOfStringLiteral(flags: StringTokenFlags): [string, StringTokenFlags] {
+    private _skipToEndOfStringLiteral(flags: StringTokenFlags, startPosition: number): StringScannerOutput {
        const quoteChar = (flags & StringTokenFlags.SingleQuote) ? Char.SingleQuote : Char.DoubleQuote;
        const isTriplicate = (flags & StringTokenFlags.Triplicate) !== 0;
        const isRaw = (flags & StringTokenFlags.Raw) !== 0;
        const isBytes = (flags & StringTokenFlags.Bytes) !== 0;
        let unescapedValue = '';
+        let invalidEscapeOffsets: number[] | undefined;

        while (true) {
            if (this._cs.isEndOfStream()) {
                // Hit the end of file without a termination.
                flags |= StringTokenFlags.Unterminated;
-                return [unescapedValue, flags];
+                return { value: unescapedValue, flags, invalidEscapeOffsets };
            }

            if (this._cs.currentChar === Char.Backslash) {
@ -886,6 +893,12 @@ export class Tokenizer {
                            default:
                                localValue = '\\' + String.fromCharCode(this._cs.currentChar);
                                flags |= StringTokenFlags.UnrecognizedEscape;
+
+                                if (!invalidEscapeOffsets) {
+                                    invalidEscapeOffsets = [];
+                                }
+                                invalidEscapeOffsets.push(this._cs.position - startPosition);
+
                                this._cs.moveNext();
                                break;
                        }
@ -897,7 +910,7 @@ export class Tokenizer {
                if (!isTriplicate) {
                    // Unterminated single-line string
                    flags |= StringTokenFlags.Unterminated;
-                    return [unescapedValue, flags];
+                    return { value: unescapedValue, flags, invalidEscapeOffsets };
                }

                // Skip over the escaped new line (either one or two characters).
@ -928,7 +941,7 @@ export class Tokenizer {
            }
        }

-        return [unescapedValue, flags];
+        return { value: unescapedValue, flags, invalidEscapeOffsets };
    }

    private _skipFloatingPointCandidate(): boolean {
--- a/server/src/parser/tokenizerTypes.ts
+++ b/server/src/parser/tokenizerTypes.ts
@ -209,11 +209,15 @@ export class KeywordToken extends Token {
 export class StringToken extends Token {
    readonly flags: StringTokenFlags;
    readonly value: string;
+    readonly invalidEscapeOffsets: number[] | undefined;
+
+    constructor(start: number, length: number, flags: StringTokenFlags, value: string,
+            invalidEscapeOffsets: number[] | undefined) {

-    constructor(start: number, length: number, flags: StringTokenFlags, value: string) {
        super(TokenType.String, start, length);
        this.flags = flags;
        this.value = value;
+        this.invalidEscapeOffsets = invalidEscapeOffsets;
    }
 }

--- a/server/src/tests/tokenizer.test.ts
+++ b/server/src/tests/tokenizer.test.ts
@ -494,7 +494,7 @@ test('Strings: raw strings with escapes', () => {
    assert.equal(stringToken1.value, '\\\r\n\\\n\\a');
 });

-test('Strings: escape at the end of double quoted string ', () => {
+test('Strings: escape at the end of double quoted string', () => {
    const t = new Tokenizer();
    const results = t.tokenize('"quoted\\"\nx');
    assert.equal(results.tokens.count, 3 + _implicitTokenCount);
@ -510,6 +510,34 @@ test('Strings: escape at the end of double quoted string ', () => {
    assert.equal(results.tokens.getItemAt(2).type, TokenType.Identifier);
 });

+test('Strings: special escape characters', () => {
+    const t = new Tokenizer();
+    const results = t.tokenize('"\\r\\n\\a\\v\\t\\b\\f\\\\"');
+    assert.equal(results.tokens.count, 1 + _implicitTokenCount);
+
+    const stringToken = results.tokens.getItemAt(0) as StringToken;
+    assert.equal(stringToken.type, TokenType.String);
+    assert.equal(stringToken.flags, StringTokenFlags.DoubleQuote);
+    assert.equal(stringToken.length, 18);
+    assert.equal(stringToken.value, '\r\n\u0007\v\t\b\f\\');
+});
+
+test('Strings: invalid escape characters', () => {
+    const t = new Tokenizer();
+    const results = t.tokenize('"\\d  \\ "');
+    assert.equal(results.tokens.count, 1 + _implicitTokenCount);
+
+    const stringToken = results.tokens.getItemAt(0) as StringToken;
+    assert.equal(stringToken.type, TokenType.String);
+    assert.equal(stringToken.flags, StringTokenFlags.DoubleQuote |
+        StringTokenFlags.UnrecognizedEscape);
+    assert.equal(stringToken.length, 8);
+    assert.equal(stringToken.value, '\\d  \\ ');
+    assert.equal(stringToken.invalidEscapeOffsets!.length, 2);
+    assert.equal(stringToken.invalidEscapeOffsets![0], 2);
+    assert.equal(stringToken.invalidEscapeOffsets![1], 6);
+});
+
 test('Comments', () => {
    const t = new Tokenizer();
    const results = t.tokenize(' #co"""mment1\n\t\n#comm\'ent2 ');