Distinguish assignment/thunk by statement context (#11324)

Align `Assignment`/`Function` distinction in AST with compiler's implemented semantics:
- The ambiguous case `funcOrVar = expression` is now parsed as a `Function` when in a `Type` definition or in the top level of a module. I.e. it is an `Assignment` in contexts where the RHS is evaluated immediately when the binding is evaluated, and a `Function` in contexts where the RHS is evaluated each time the bound name is evaluated.
- `Assignment` statements now may only occur in function bodies.

Correcting this distinction lays the groundwork for #11302.

Other changes:
- Fixed incorrect source code locations for negative literals and negated expressions.

# Important Notes
New APIs:
- The parser now exposes a `parse_block` entry point, which allows parsing input lines as if in the body of a function. The previous entry point has been renamed to `parse_module`.
This commit is contained in:
Kaz Wesley 2024-10-18 10:54:55 -07:00 committed by GitHub
parent ed12224267
commit 4d4a2990a0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
39 changed files with 643 additions and 441 deletions

View File

@ -31,7 +31,6 @@
"test-dev:e2e": "cross-env NODE_ENV=production playwright test --ui",
"preinstall": "corepack pnpm run generate-metadata",
"postinstall": "playwright install",
"build-rust-ffi": "wasm-pack build ./rust-ffi --release --target web && wasm-pack build ./rust-ffi --out-dir node-pkg --target nodejs",
"generate-metadata": "node scripts/generateIconMetadata.js"
},
"dependencies": {

View File

@ -170,7 +170,7 @@ export const { injectFn: useGraphStore, provideFn: provideGraphStore } = createC
if (!methodAst.value.ok || !moduleSource.text) return
const method = methodAst.value.value
const toRaw = new Map<SourceRangeKey, RawAst.Tree.Function>()
visitRecursive(Ast.parseEnso(moduleSource.text), (node) => {
visitRecursive(Ast.rawParseModule(moduleSource.text), (node) => {
if (node.type === RawAst.Tree.Type.Function) {
const start = node.whitespaceStartInCodeParsed + node.whitespaceLengthInCodeParsed
const end = start + node.childrenLengthInCodeParsed

View File

@ -19,7 +19,7 @@ import { findExpressions, testCase, tryFindExpressions } from './testCase'
test('Raw block abstracts to Ast.BodyBlock', () => {
const code = 'value = 2 + 2'
const rawBlock = Ast.parseEnso(code)
const rawBlock = Ast.rawParseModule(code)
const edit = MutableModule.Transient()
const abstracted = Ast.abstract(edit, rawBlock, code)
expect(abstracted.root).toBeInstanceOf(Ast.BodyBlock)
@ -376,7 +376,7 @@ const cases = [
]
test.each(cases)('parse/print round trip: %s', (code) => {
// Get an AST.
const root = Ast.parseBlock(code)
const { root } = Ast.parseModuleWithSpans(code)
// Print AST back to source.
const printed = Ast.print(root)
expect(printed.code).toEqual(code)
@ -389,7 +389,7 @@ test.each(cases)('parse/print round trip: %s', (code) => {
expect(Ast.repair(root).fixes).toBe(undefined)
// Re-parse.
const { root: root1, spans: spans1 } = Ast.parseBlockWithSpans(printed.code)
const { root: root1, spans: spans1 } = Ast.parseModuleWithSpans(printed.code)
Ast.setExternalIds(root1.module, spans1, idMap)
// Check that Identities match original AST.
const printed1 = Ast.print(root1)
@ -815,12 +815,12 @@ describe('Code edit', () => {
})
test('Shifting whitespace ownership', () => {
const beforeRoot = Ast.parseBlock('value = 1 +\n')
const beforeRoot = Ast.parseModuleWithSpans('value = 1 +\n').root
beforeRoot.module.replaceRoot(beforeRoot)
const before = findExpressions(beforeRoot, {
value: Ast.Ident,
'1': Ast.NumericLiteral,
'value = 1 +': Ast.Assignment,
'value = 1 +': Ast.Function,
})
const edit = beforeRoot.module.edit()
const newCode = 'value = 1 \n'
@ -831,7 +831,7 @@ describe('Code edit', () => {
const after = findExpressions(edit.root()!, {
value: Ast.Ident,
'1': Ast.NumericLiteral,
'value = 1': Ast.Assignment,
'value = 1': Ast.Function,
})
expect(after.value.id).toBe(before.value.id)
expect(after['1'].id).toBe(before['1'].id)
@ -839,7 +839,7 @@ describe('Code edit', () => {
})
test('merging', () => {
const block = Ast.parseBlock('a = 1\nb = 2')
const block = Ast.parseModuleWithSpans('a = 1\nb = 2').root
const module = block.module
module.replaceRoot(block)

View File

@ -2,8 +2,8 @@ import {
astContainingChar,
childrenAstNodes,
debugAst,
parseEnso,
parseEnsoLine,
rawParseLine,
rawParseModule,
readAstOrTokenSpan,
readAstSpan,
readTokenSpan,
@ -48,18 +48,18 @@ const parseCases = [
]
test.each(parseCases)("Parsing '%s'", (code) => {
expect(debugAst(parseEnso(code))).toMatchSnapshot()
expect(debugAst(rawParseModule(code))).toMatchSnapshot()
})
test.each(parseCases)("AST spans of '%s' are valid", (input) => {
const tree = parseEnso(input)
const tree = rawParseModule(input)
const endPos = validateSpans(tree)
expect(endPos).toStrictEqual(input.length)
})
test("Reading AST node's code", () => {
const code = 'Data.read File\n2 + 3'
const ast = parseEnso(code)
const ast = rawParseModule(code)
expect(readAstSpan(ast, code)).toStrictEqual(code)
assert(ast.type === Tree.Type.BodyBlock)
const statements = Array.from(ast.statements)
@ -123,7 +123,7 @@ test.each([
['(', [{ type: Tree.Type.Group, repr: '(' }]],
['(foo', [{ type: Tree.Type.Group, repr: '(foo' }]],
])("Reading children of '%s'", (code, expected) => {
const ast = parseEnsoLine(code)
const ast = rawParseLine(code)
const children = Array.from(childrenAstNodes(ast))
const childrenWithExpected = children.map((child, i) => {
return { child, expected: expected[i] }
@ -147,7 +147,7 @@ test.each([
],
],
])("Walking AST of '%s'", (code, expected) => {
const ast = parseEnsoLine(code)
const ast = rawParseLine(code)
const visited = Array.from(walkRecursive(ast))
const visitedRepr = visited.map((visited) => {
return {
@ -206,7 +206,7 @@ test.each([
],
],
])("Reading AST from code '%s' and position %i", (code, position, expected) => {
const ast = parseEnso(code)
const ast = rawParseModule(code)
const astAtPosition = astContainingChar(position, ast)
const resultWithExpected = astAtPosition.map((ast, i) => {
return { ast, expected: expected[i] }

View File

@ -2,8 +2,8 @@ import { assert } from '@/util/assert'
import {
RawAst,
astPrettyPrintType,
parseEnso,
parsedTreeOrTokenRange,
rawParseModule,
readAstOrTokenSpan,
readTokenSpan,
} from '@/util/ast/raw'
@ -114,7 +114,7 @@ export class AliasAnalyzer {
private readonly code: string,
ast?: RawAst.Tree,
) {
this.ast = ast ?? parseEnso(code)
this.ast = ast ?? rawParseModule(code)
this.rootScope = new Scope(parsedTreeOrTokenRange(this.ast))
this.scopes = new NonEmptyStack(this.rootScope)
}

View File

@ -1,8 +1,8 @@
import { assert, assertDefined } from '@/util/assert'
import {
childrenAstNodesOrTokens,
parseEnso,
parsedTreeOrTokenRange,
rawParseModule,
readAstOrTokenSpan,
visitGenerator,
visitRecursive,
@ -46,7 +46,7 @@ class AstExtended<T extends Tree | Token = Tree | Token, HasIdMap extends boolea
public static parse(code: string): AstExtended<Tree, false>
public static parse(code: string, idMap: IdMap): AstExtended<Tree, true>
public static parse(code: string, idMap?: IdMap): AstExtended<Tree, boolean> {
const ast = parseEnso(code)
const ast = rawParseModule(code)
if (idMap != null) {
visitRecursive(ast, (node) => {
const range = parsedTreeOrTokenRange(node)

View File

@ -1,13 +1,13 @@
import { assertDefined } from '@/util/assert'
import * as map from 'lib0/map'
import * as RawAst from 'ydoc-shared/ast/generated/ast'
import { parseEnso } from 'ydoc-shared/ast/parse'
import { rawParseModule } from 'ydoc-shared/ast/parse'
import { LazyObject, LazySequence } from 'ydoc-shared/ast/parserSupport'
import { tryGetSoleValue } from 'ydoc-shared/util/data/iterable'
import { isResult, mapOk } from 'ydoc-shared/util/data/result'
import type { SourceRange } from 'ydoc-shared/yjsModel'
export { parseEnso, RawAst }
export { RawAst, rawParseModule }
export type HasAstRange = SourceRange | RawAst.Tree | RawAst.Token
@ -16,8 +16,8 @@ export type HasAstRange = SourceRange | RawAst.Tree | RawAst.Token
*
* Is meant to be a helper for tests. If the code is multiline, an exception is raised.
*/
export function parseEnsoLine(code: string): RawAst.Tree {
const block = parseEnso(code)
export function rawParseLine(code: string): RawAst.Tree {
const block = rawParseModule(code)
const soleExpression = tryGetSoleValue(block.statements)?.expression
assertDefined(soleExpression)
return soleExpression

View File

@ -15,8 +15,14 @@ pub fn parse_doc_to_json(docs: &str) -> String {
}
#[wasm_bindgen]
pub fn parse(code: &str) -> Vec<u8> {
let ast = PARSER.with(|parser| parser.run(code));
pub fn parse_module(code: &str) -> Vec<u8> {
let ast = PARSER.with(|parser| parser.parse_module(code));
enso_parser::format::serialize(&ast).expect("Failed to serialize AST to binary format")
}
#[wasm_bindgen]
pub fn parse_block(code: &str) -> Vec<u8> {
let ast = PARSER.with(|parser| parser.parse_block(code));
enso_parser::format::serialize(&ast).expect("Failed to serialize AST to binary format")
}
@ -39,7 +45,7 @@ pub fn is_ident_or_operator(code: &str) -> u32 {
#[wasm_bindgen]
pub fn is_numeric_literal(code: &str) -> bool {
let parsed = PARSER.with(|parser| parser.run(code));
let parsed = PARSER.with(|parser| parser.parse_block(code));
let enso_parser::syntax::tree::Variant::BodyBlock(body) = parsed.variant else { return false };
let [stmt] = &body.statements[..] else { return false };
stmt.expression.as_ref().map_or(false, |expr| match &expr.variant {

View File

@ -11,6 +11,7 @@ export const {
is_ident_or_operator,
is_numeric_literal,
parse_doc_to_json,
parse_tree,
parse_block,
parse_module,
xxHash128,
} = globalThis

View File

@ -13,7 +13,8 @@ declare const YDOC_PORT: number | undefined
declare const YDOC_LS_DEBUG: boolean | undefined
// rust ffi shims
declare function parse_tree(code: string): Uint8Array
declare function parse_block(code: string): Uint8Array
declare function parse_module(code: string): Uint8Array
declare function parse_doc_to_json(docs: string): string
declare function is_ident_or_operator(code: string): number
declare function is_numeric_literal(code: string): boolean

View File

@ -569,7 +569,7 @@ class ModulePersistence extends ObservableV2<{ removed: () => void }> {
if (editedRoot instanceof Ast.BodyBlock) Ast.repair(editedRoot, edit)
syncModule.applyEdit(edit)
} else {
const { root, spans } = Ast.parseBlockWithSpans(code, syncModule)
const { root, spans } = Ast.parseModuleWithSpans(code, syncModule)
syncModule.syncRoot(root)
parsedSpans = spans
}

View File

@ -6,7 +6,13 @@
import { createXXHash128 } from 'hash-wasm'
import type { IDataType } from 'hash-wasm/dist/lib/util'
import { is_ident_or_operator, is_numeric_literal, parse, parse_doc_to_json } from 'rust-ffi'
import {
is_ident_or_operator,
is_numeric_literal,
parse_block,
parse_doc_to_json,
parse_module,
} from 'rust-ffi'
const xxHasher128 = await createXXHash128()
export function xxHash128(input: IDataType) {
@ -16,4 +22,4 @@ export function xxHash128(input: IDataType) {
}
/* eslint-disable-next-line camelcase */
export { is_ident_or_operator, is_numeric_literal, parse_doc_to_json, parse as parse_tree }
export { is_ident_or_operator, is_numeric_literal, parse_block, parse_doc_to_json, parse_module }

View File

@ -31,7 +31,7 @@ import {
type SourceRangeKey,
} from '../yjsModel'
import { graphParentPointers } from './debug'
import { parse_tree, xxHash128 } from './ffi'
import { parse_block, parse_module, xxHash128 } from './ffi'
import * as RawAst from './generated/ast'
import { MutableModule } from './mutableModule'
import type { LazyObject } from './parserSupport'
@ -62,9 +62,17 @@ import {
Wildcard,
} from './tree'
/** Return the raw parser output for the given code. */
export function parseEnso(code: string): RawAst.Tree.BodyBlock {
const blob = parse_tree(code)
/** Return the raw parser output for the given code, parsed as a module. */
export function rawParseModule(code: string): RawAst.Tree.BodyBlock {
return deserializeBlock(parse_module(code))
}
/** Return the raw parser output for the given code, parsed as a body block. */
export function rawParseBlock(code: string): RawAst.Tree.BodyBlock {
return deserializeBlock(parse_block(code))
}
function deserializeBlock(blob: Uint8Array): RawAst.Tree.BodyBlock {
const tree = RawAst.Tree.read(new DataView(blob.buffer), blob.byteLength - 4)
// The root of the parser output is always a body block.
assert(tree.type === RawAst.Tree.Type.BodyBlock)
@ -76,7 +84,7 @@ export function normalize(rootIn: Ast): Ast {
const printed = print(rootIn)
const idMap = spanMapToIdMap(printed.info)
const module = MutableModule.Transient()
const tree = parseEnso(printed.code)
const tree = rawParseModule(printed.code)
const { root: parsed, spans } = abstract(module, tree, printed.code)
module.replaceRoot(parsed)
setExternalIds(module, spans, idMap)
@ -596,15 +604,18 @@ export function printDocumented(
return code
}
/** Parse the input as a block. */
export function parseBlock(code: string, inModule?: MutableModule): Owned<MutableBodyBlock> {
return parseBlockWithSpans(code, inModule).root
/** Parse the input as a body block, not the top level of a module. */
export function parseBlock(code: string, module?: MutableModule): Owned<MutableBodyBlock> {
const tree = rawParseBlock(code)
return abstract(module ?? MutableModule.Transient(), tree, code).root
}
/** Parse the input. If it contains a single expression at the top level, return it; otherwise, return a block. */
/**
* Parse the input. If it contains a single expression at the top level, return it; otherwise, parse it as a body block.
*/
export function parse(code: string, module?: MutableModule): Owned {
const module_ = module ?? MutableModule.Transient()
const ast = parseBlock(code, module_)
const ast = parseBlock(code, module)
const soleStatement = tryGetSoleValue(ast.statements())
if (!soleStatement) return ast
const parent = parentId(soleStatement)
@ -613,21 +624,20 @@ export function parse(code: string, module?: MutableModule): Owned {
return asOwned(soleStatement)
}
/** Parse a block, and return it along with a mapping from source locations to parsed objects. */
export function parseBlockWithSpans(
/** Parse a module, and return it along with a mapping from source locations to parsed objects. */
export function parseModuleWithSpans(
code: string,
inModule?: MutableModule,
module?: MutableModule | undefined,
): { root: Owned<MutableBodyBlock>; spans: SpanMap } {
const tree = parseEnso(code)
const module = inModule ?? MutableModule.Transient()
return abstract(module, tree, code)
const tree = rawParseModule(code)
return abstract(module ?? MutableModule.Transient(), tree, code)
}
/** Parse the input, and apply the given `IdMap`. Return the parsed tree, the updated `IdMap`, the span map, and a
* mapping to the `RawAst` representation.
*/
export function parseExtended(code: string, idMap?: IdMap | undefined, inModule?: MutableModule) {
const rawRoot = parseEnso(code)
const rawRoot = rawParseModule(code)
const module = inModule ?? MutableModule.Transient()
const { root, spans, toRaw } = module.transact(() => {
const { root, spans, toRaw } = abstract(module, rawRoot, code)
@ -701,7 +711,7 @@ export function repair(
// Print the input to see what spans its nodes expect to have in the output.
const printed = print(root)
// Parse the printed output to see what spans actually correspond to nodes in the printed code.
const reparsed = parseBlockWithSpans(printed.code)
const reparsed = parseModuleWithSpans(printed.code)
// See if any span we expected to be a node isn't; if so, it likely merged with its parent due to wrong precedence.
const { lostInline, lostBlock } = checkSpans(
printed.info.nodes,
@ -727,7 +737,7 @@ export function repair(
// Verify that it's fixed.
const printed2 = print(fixes.getVersion(root))
const reparsed2 = parseBlockWithSpans(printed2.code)
const reparsed2 = parseModuleWithSpans(printed2.code)
const { lostInline: lostInline2, lostBlock: lostBlock2 } = checkSpans(
printed2.info.nodes,
reparsed2.spans.nodes,
@ -919,7 +929,7 @@ export function applyTextEditsToAst(
) {
const printed = print(ast)
const code = applyTextEdits(printed.code, textEdits)
const rawParsedBlock = parseEnso(code)
const rawParsedBlock = rawParseModule(code)
const rawParsed =
ast instanceof MutableBodyBlock ? rawParsedBlock : rawBlockToInline(rawParsedBlock)
const parsed = abstract(ast.module, rawParsed, code)

View File

@ -198,8 +198,8 @@ export abstract class Ast {
return this.module.get(this.parentId)
}
static parseBlock(source: string, inModule?: MutableModule) {
return parseBlock(source, inModule)
static parseBlock(source: string, module?: MutableModule) {
return parseBlock(source, module)
}
static parse(source: string, module?: MutableModule) {

View File

@ -696,7 +696,7 @@ class Compiler(
* @return A Tree representation of `source`
*/
def parseInline(source: CharSequence): Tree =
Parser.parse(source)
Parser.parseBlock(source)
/** Enhances the provided IR with import/export statements for the provided list
* of fully qualified names of modules. The statements are considered to be "synthetic" i.e. compiler-generated.

View File

@ -10,6 +10,7 @@ import java.nio.file.StandardOpenOption;
import java.util.function.Function;
import org.enso.compiler.core.EnsoParser;
import org.enso.compiler.core.IR;
import org.enso.compiler.core.ir.Expression;
import org.enso.compiler.core.ir.Module;
public abstract class CompilerTests {
@ -19,6 +20,12 @@ public abstract class CompilerTests {
return ir;
}
protected static Expression.Block parseBlock(CharSequence code) {
Expression.Block ir = EnsoParser.compileBlock(code);
assertNotNull("IR was generated", ir);
return ir;
}
public static void assertIR(String msg, IR old, IR now) throws IOException {
assertEqualsIR(msg, null, old, now);
}

View File

@ -3,10 +3,10 @@ package org.enso.compiler.test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.enso.compiler.core.IR;
import org.enso.compiler.core.ir.Empty;
import org.enso.compiler.core.ir.Expression;
import org.enso.compiler.core.ir.Location;
import org.enso.compiler.core.ir.Module;
import org.enso.compiler.core.ir.expression.errors.Syntax;
import org.enso.compiler.core.ir.module.scope.definition.Method;
import org.junit.Test;
@ -466,7 +466,7 @@ public class ErrorCompilerTest extends CompilerTests {
@Test
public void illegalPrivateVariableDeclaration() throws Exception {
var ir = parse("private var = 42");
var ir = parseBlock("private var = 42");
assertSingleSyntaxError(
ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 16);
}
@ -645,8 +645,7 @@ public class ErrorCompilerTest extends CompilerTests {
ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 29, 35);
}
private void assertSingleSyntaxError(
Module ir, Syntax.Reason type, String msg, int start, int end) {
private void assertSingleSyntaxError(IR ir, Syntax.Reason type, String msg, int start, int end) {
var errors = assertIR(ir, Syntax.class, 1);
assertEquals(type, errors.head().reason());
if (msg != null) {
@ -655,7 +654,7 @@ public class ErrorCompilerTest extends CompilerTests {
assertEquals(new Location(start, end), errors.head().location().get().location());
}
private List<Syntax> assertIR(Module ir, Class<Syntax> type, int count) {
private List<Syntax> assertIR(IR ir, Class<Syntax> type, int count) {
var errors = ir.preorder().filter(type::isInstance).map(type::cast);
assertEquals("Expecting errors: " + errors, count, errors.size());
return errors;

View File

@ -6,6 +6,7 @@ import org.enso.compiler.core.ir.Expression;
import org.enso.compiler.core.ir.Location;
import org.enso.compiler.core.ir.Module;
import org.enso.syntax2.Parser;
import scala.Option;
public final class EnsoParser {
private EnsoParser() {}
@ -15,7 +16,7 @@ public final class EnsoParser {
}
public static Module compile(CharSequence src, Map<Location, UUID> idMap) {
var tree = Parser.parse(src);
var tree = Parser.parseModule(src);
var treeToIr = TreeToIr.MODULE;
if (idMap != null) {
treeToIr = new TreeToIr(idMap);
@ -23,8 +24,13 @@ public final class EnsoParser {
return treeToIr.translate(tree);
}
public static scala.Option<Expression> compileInline(CharSequence src) {
var tree = Parser.parse(src);
public static Expression.Block compileBlock(CharSequence src) {
var tree = Parser.parseBlock(src);
return TreeToIr.MODULE.translateBlock(tree);
}
public static Option<Expression> compileInline(CharSequence src) {
var tree = Parser.parseBlock(src);
return TreeToIr.MODULE.translateInline(tree);
}

View File

@ -75,6 +75,10 @@ final class TreeToIr {
return translateModule(ast);
}
Expression.Block translateBlock(Tree.BodyBlock ast) {
return translateBodyBlock(ast, false);
}
/**
* Translates an inline program expression represented in the parser {@link Tree} to the
* compiler's {@link IR} representation.
@ -86,77 +90,68 @@ final class TreeToIr {
* @return The {@link IR} representation of the given ast if it is valid, otherwise
* {@link Option#empty()}.
*/
Option<Expression> translateInline(Tree ast) {
return switch (ast) {
case Tree.BodyBlock b -> {
List<Expression> expressions = nil();
java.util.List<IdentifiedLocation> locations = new ArrayList<>();
for (Line statement : b.getStatements()) {
Tree exprTree = statement.getExpression();
Expression expr = switch (exprTree) {
case null -> null;
case Tree.Export x -> null;
case Tree.Import x -> null;
case Tree.Invalid x -> null;
case Tree.TypeSignature sig -> {
Expression methodReference;
try {
methodReference = translateMethodReference(sig.getVariable(), true);
} catch (SyntaxException ex) {
methodReference = ex.toError();
}
var signature = translateType(sig.getType());
var ascription =
new Type.Ascription(
methodReference,
signature,
Option.empty(),
getIdentifiedLocation(sig),
meta());
yield ascription;
}
case Tree.TypeAnnotated anno -> translateTypeAnnotated(anno);
default -> translateExpression(exprTree);
};
if (expr != null) {
expressions = join(expr, expressions);
if (expr.location().isDefined()) {
locations.add(expr.location().get());
}
}
Option<Expression> translateInline(Tree.BodyBlock ast) {
List<Expression> expressions = nil();
java.util.List<IdentifiedLocation> locations = new ArrayList<>();
for (Line statement : ast.getStatements()) {
Tree exprTree = statement.getExpression();
Expression expr = switch (exprTree) {
case null -> null;
case Tree.Export x -> null;
case Tree.Import x -> null;
case Tree.Invalid x -> null;
case Tree.TypeSignature sig -> {
Expression methodReference;
try {
methodReference = translateMethodReference(sig.getVariable(), true);
} catch (SyntaxException ex) {
methodReference = ex.toError();
}
var signature = translateType(sig.getType());
yield new Type.Ascription(
methodReference,
signature,
Option.empty(),
getIdentifiedLocation(sig),
meta());
}
case Tree.TypeAnnotated anno -> translateTypeAnnotated(anno);
default -> translateExpression(exprTree);
};
if (expr != null) {
expressions = join(expr, expressions);
if (expr.location().isDefined()) {
locations.add(expr.location().get());
}
yield switch (expressions.size()) {
case 0 -> Option.empty();
case 1 -> Option.apply(expressions.head());
default -> {
IdentifiedLocation combinedLocation;
if (locations.isEmpty()) {
combinedLocation = null;
} else {
combinedLocation =
new IdentifiedLocation(
new Location(
locations.get(1).start(),
locations.get(locations.size() - 1).end()
),
null
);
}
var returnValue = expressions.head();
@SuppressWarnings("unchecked")
var statements = ((List<Expression>) expressions.tail()).reverse();
yield Option.apply(new Expression.Block(
statements,
returnValue,
combinedLocation,
false,
meta()
));
}
};
}
}
return switch (expressions.size()) {
case 0 -> Option.empty();
case 1 -> Option.apply(expressions.head());
default -> {
throw new IllegalStateException();
IdentifiedLocation combinedLocation;
if (locations.isEmpty()) {
combinedLocation = null;
} else {
combinedLocation =
new IdentifiedLocation(
new Location(
locations.get(1).start(),
locations.get(locations.size() - 1).end()
),
null
);
}
var returnValue = expressions.head();
@SuppressWarnings("unchecked")
var statements = ((List<Expression>) expressions.tail()).reverse();
yield Option.apply(new Expression.Block(
statements,
returnValue,
combinedLocation,
false,
meta()
));
}
};
}
@ -334,24 +329,6 @@ final class TreeToIr {
yield translateModuleSymbol(doc.getExpression(), join(comment, appendTo));
}
case Tree.Assignment a -> {
var reference = translateMethodReference(a.getPattern(), false);
var body = translateExpression(a.getExpr());
if (body == null) {
throw new NullPointerException();
}
var aLoc = expandToContain(getIdentifiedLocation(a.getExpr()), body.identifiedLocation());
var binding = new Method.Binding(
reference,
nil(),
false,
body.setLocation(Option.apply(aLoc)),
expandToContain(getIdentifiedLocation(a), aLoc),
meta()
);
yield join(binding, appendTo);
}
case Tree.TypeSignature sig -> {
var methodReference = translateMethodReference(sig.getVariable(), true);
var signature = translateType(sig.getType());
@ -457,16 +434,6 @@ final class TreeToIr {
yield join(ir, appendTo);
}
// In some cases this is a `Function` in IR, but an `Assignment` in Tree.
// See: https://discord.com/channels/401396655599124480/1001476608957349917
case Tree.Assignment assignment -> {
var name = buildName(assignment.getPattern());
java.util.List<ArgumentDefinition> args = java.util.Collections.emptyList();
var ir = translateFunction(assignment, name, false, args, assignment.getExpr(), null,
false);
yield join(ir, appendTo);
}
case Tree.ForeignFunction fn when fn.getBody() instanceof Tree.TextLiteral body -> {
var name = buildName(fn.getName());
var args = translateArgumentsDefinition(fn.getArgs());
@ -595,15 +562,14 @@ final class TreeToIr {
String functionName = fn.getName().codeRepr();
var ascribedBody = addTypeAscription(functionName, body, returnSignature, loc);
var binding = new Method.Binding(
methodRef,
args,
isPrivate,
ascribedBody,
loc,
meta()
return new Method.Binding(
methodRef,
args,
isPrivate,
ascribedBody,
loc,
meta()
);
return binding;
}
private Expression translateFunction(
@ -617,30 +583,23 @@ final class TreeToIr {
} catch (SyntaxException ex) {
return ex.toError();
}
var loc = getIdentifiedLocation(fun);
var body = translateExpression(treeBody);
String functionName = name.name();
if (args.isEmpty()) {
if (body instanceof Expression.Block block) {
Expression body;
if (treeBody instanceof Tree.BodyBlock block) {
// suspended block has a name and no arguments
body = block.copy(
block.copy$default$1(),
block.copy$default$2(),
block.copy$default$3(),
true,
block.copy$default$5(),
block.copy$default$6(),
block.copy$default$7()
);
body = translateBodyBlock(block, true);
} else {
body = translateExpression(treeBody);
}
if (body == null) {
body = translateSyntaxError(fun, Syntax.UnexpectedExpression$.MODULE$);
}
var ascribedBody = addTypeAscription(functionName, body, returnType, loc);
return new Expression.Binding(name, ascribedBody, loc, meta());
} else {
var body = translateExpression(treeBody);
if (body == null) {
return translateSyntaxError(fun, Syntax.UnexpectedDeclarationInType$.MODULE$);
}
@ -1010,43 +969,7 @@ final class TreeToIr {
}
yield new Application.Prefix(fn, args.reverse(), false, getIdentifiedLocation(tree), meta());
}
case Tree.BodyBlock body -> {
var expressions = new java.util.ArrayList<Expression>();
Expression last = null;
for (var line : body.getStatements()) {
Tree expr = line.getExpression();
if (expr == null) {
continue;
}
if (last != null) {
expressions.add(last);
}
while (expr instanceof Tree.Documented doc) {
expr = doc.getExpression();
expressions.add(translateComment(doc, doc.getDocumentation()));
}
last = translateExpression(expr, false);
}
var locationWithANewLine = getIdentifiedLocation(body, 0, 0, null);
if (last == null) {
if (expressions.isEmpty()) {
last = new Empty(locationWithANewLine, meta());
} else {
last = expressions.get(expressions.size() - 1);
expressions.remove(expressions.size() - 1);
}
}
var list = CollectionConverters.asScala(expressions.iterator()).toList();
if (last != null
&& last.location().isDefined()
&& last.location().get().end() != locationWithANewLine.end()) {
int start = last.location().get().start();
int end = locationWithANewLine.end() - 1;
var id = new IdentifiedLocation(start, end, last.location().get().uuid());
last = last.setLocation(Option.apply(id));
}
yield new Expression.Block(list, last, locationWithANewLine, false, meta());
}
case Tree.BodyBlock body -> translateBodyBlock(body, false);
case Tree.Assignment assign -> {
var name = buildNameOrQualifiedName(assign.getPattern());
var expr = translateExpression(assign.getExpr(), false);
@ -1156,7 +1079,7 @@ final class TreeToIr {
case Literal.Number n -> n.copy(
n.copy$default$1(),
"-" + n.copy$default$2(),
n.copy$default$3(),
Option.apply(getIdentifiedLocation(un)),
n.copy$default$4(),
n.copy$default$5(),
n.copy$default$6()
@ -1164,7 +1087,7 @@ final class TreeToIr {
case Expression expr -> {
var negate = new Name.Literal("negate", true, null, Option.empty(), meta());
var arg = new CallArgument.Specified(Option.empty(), expr, expr.identifiedLocation(), meta());
yield new Application.Prefix(negate, join(arg, nil()), false, expr.identifiedLocation(), meta());
yield new Application.Prefix(negate, join(arg, nil()), false, getIdentifiedLocation(un), meta());
}
case null ->
translateSyntaxError(tree, new Syntax.UnsupportedSyntax("Strange unary -"));
@ -1215,6 +1138,50 @@ final class TreeToIr {
};
}
private Expression.Block translateBodyBlock(Tree.BodyBlock body, boolean suspended) {
var expressions = new java.util.ArrayList<Expression>();
Expression last = null;
for (var line : body.getStatements()) {
Tree expr = line.getExpression();
if (expr == null) {
continue;
}
if (last != null) {
expressions.add(last);
}
while (expr instanceof Tree.Documented doc) {
expr = doc.getExpression();
Expression commentIr;
try {
commentIr = translateComment(doc, doc.getDocumentation());
} catch (SyntaxException ex) {
commentIr = ex.toError();
}
expressions.add(commentIr);
}
last = translateExpression(expr, false);
}
var locationWithANewLine = getIdentifiedLocation(body, 0, 0, null);
if (last == null) {
if (expressions.isEmpty()) {
last = new Empty(locationWithANewLine, meta());
} else {
last = expressions.get(expressions.size() - 1);
expressions.remove(expressions.size() - 1);
}
}
var list = CollectionConverters.asScala(expressions.iterator()).toList();
if (last != null
&& last.location().isDefined()
&& last.location().get().end() != locationWithANewLine.end()) {
int start = last.location().get().start();
int end = locationWithANewLine.end() - 1;
var id = new IdentifiedLocation(start, end, last.location().get().uuid());
last = last.setLocation(Option.apply(id));
}
return new Expression.Block(list, last, locationWithANewLine, suspended, meta());
}
private void attachTranslatedWarnings(IR ir, Tree tree) {
for (var warning : tree.getWarnings()) {
var message = Parser.getWarningMessage(warning);
@ -1624,15 +1591,16 @@ final class TreeToIr {
new Pattern.Literal((Literal) translateNumber(num), getIdentifiedLocation(num), meta());
case Tree.UnaryOprApp num when num.getOpr().codeRepr().equals("-") -> {
var n = (Literal.Number) translateExpression(num.getRhs());
var loc = getIdentifiedLocation(num);
var t = n.copy(
n.copy$default$1(),
"-" + n.copy$default$2(),
n.copy$default$3(),
Option.apply(loc),
n.copy$default$4(),
n.copy$default$5(),
n.copy$default$6()
);
yield new Pattern.Literal(t, getIdentifiedLocation(num), meta());
yield new Pattern.Literal(t, loc, meta());
}
case Tree.TypeAnnotated anno -> {
var type = buildNameOrQualifiedName(maybeManyParensed(anno.getType()));

View File

@ -13,7 +13,8 @@ public final class ParserPolyfill implements ProxyExecutable, Polyfill {
private static final Logger log = LoggerFactory.getLogger(ParserPolyfill.class);
private static final String PARSE_TREE = "parse-tree";
private static final String PARSE_BLOCK = "parse-block";
private static final String PARSE_MODULE = "parse-module";
private static final String XX_HASH_128 = "xx-hash-128";
private static final String IS_IDENT_OR_OPERATOR = "is-ident-or-operator";
@ -36,10 +37,16 @@ public final class ParserPolyfill implements ProxyExecutable, Polyfill {
log.debug(Arguments.toString(arguments));
return switch (command) {
case PARSE_TREE -> {
case PARSE_BLOCK -> {
var input = arguments[1].asString();
yield Parser.parseInputLazy(input);
yield Parser.parseBlockLazy(input);
}
case PARSE_MODULE -> {
var input = arguments[1].asString();
yield Parser.parseModuleLazy(input);
}
case XX_HASH_128 -> {

View File

@ -1,7 +1,12 @@
(function (jvm) {
globalThis.parse_tree = function(code) {
const byteBuffer = jvm('parse-tree', code);
globalThis.parse_module = function(code) {
const byteBuffer = jvm('parse-module', code);
return new Uint8Array(new ArrayBuffer(byteBuffer));
};
globalThis.parse_block = function(code) {
const byteBuffer = jvm('parse-block', code);
return new Uint8Array(new ArrayBuffer(byteBuffer));
};

View File

@ -40,9 +40,21 @@ public class ParserPolyfillTest extends ExecutorSetup {
}
@Test
public void parseTree() throws Exception {
public void parseModule() throws Exception {
var code = """
const arr = parse_tree(`main = 1 + 2`)
const arr = parse_module(`main = 1 + 2`)
arr.buffer
""";
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertTrue(result.as(ByteSequence.class).length() > 0);
}
@Test
public void parseBlock() throws Exception {
var code = """
const arr = parse_block(`value = 1 + 2`)
arr.buffer
""";

View File

@ -12,7 +12,7 @@ fn main() {
fuzz!(|code: &[u8]| {
if let Ok(code) = std::str::from_utf8(code) {
let parser = enso_parser::Parser::new();
let ast = parser.run(code);
let ast = parser.parse_module(code);
assert_eq!(ast.code(), code);
}
});

View File

@ -24,7 +24,7 @@ fn main() {
.map(|path| {
let code = read_source(path).unwrap();
let start = std::time::Instant::now();
std::hint::black_box(parser.run(&code));
std::hint::black_box(parser.parse_module(&code));
start.elapsed()
})
.sum();
@ -85,7 +85,7 @@ fn bench_std_lib(b: &mut test::Bencher) {
b.bytes = sources.iter().map(|s| s.len() as u64).sum();
b.iter(|| {
for source in &sources {
test::black_box(parser.run(source));
test::black_box(parser.parse_module(source));
}
});
}

View File

@ -13,7 +13,7 @@ fn main() {
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_;
}
let ast = enso_parser::Parser::new().run(code);
let ast = enso_parser::Parser::new().parse_module(code);
let data =
enso_parser::format::serialize(&ast).expect("Failed to serialize AST to binary format");
std::io::stdout().write_all(&data).unwrap();

View File

@ -132,7 +132,7 @@ fn check_file(
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_;
}
let ast = parser.run(code);
let ast = parser.parse_module(code);
let mut messages = if smoke_test { vec![] } else { collect_messages(&ast, &file.path) };
if ast.code() != code {
messages.push(format!(

View File

@ -13,6 +13,6 @@ fn main() {
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_;
}
let ast = enso_parser::Parser::new().run(code);
let ast = enso_parser::Parser::new().parse_module(code);
serde_json::to_writer(std::io::stdout(), &ast).unwrap();
}

View File

@ -26,7 +26,7 @@ fn check_file(path: &str, mut code: &str) {
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_;
}
let ast = enso_parser::Parser::new().run(code);
let ast = enso_parser::Parser::new().parse_module(code);
let expected_span = 0..(code.encode_utf16().count() as u32);
let mut locations = enso_parser::source::code::debug::LocationCheck::new();
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations)

View File

@ -40,6 +40,12 @@ macro_rules! test {
}
}
macro_rules! test_block {
( $code:expr, $($statements:tt)* ) => {
test_block($code, block![$( $statements )*])
}
}
// ================================
@ -379,9 +385,14 @@ fn type_def_nested() {
#[test]
fn assignment_simple() {
test!("foo = x", (Assignment (Ident foo) (Ident x)));
test!("foo=x", (Assignment (Ident foo) (Ident x)));
test!("foo= x", (Assignment (Ident foo) (Ident x)));
// At the top level of a module, this defines a function with no arguments.
test!("foo = x", (Function (Ident foo) #() () (Ident x)));
// In a body block, this is a variable binding.
test_block!("main =\n foo = x",
(Function (Ident main) #() () (BodyBlock #(
(Assignment (Ident foo) (Ident x))))));
test_block!("foo=x", (Assignment (Ident foo) (Ident x)));
test_block!("foo= x", (Assignment (Ident foo) (Ident x)));
expect_invalid_node("foo =x");
}
@ -618,14 +629,12 @@ fn code_block_body() {
#[test]
fn code_block_operator() {
let code = ["value = nums", " * each random", " + constant"];
let expect = block![
test_block!(code.join("\n"),
(Assignment (Ident value)
(OperatorBlockApplication (Ident nums)
#(((Ok "*") (App (Ident each) (Ident random)))
((Ok "+") (Ident constant)))
#()))
];
test(code.join("\n"), expect);
#())));
}
#[test]
@ -641,37 +650,17 @@ fn dot_operator_blocks() {
#[test]
fn code_block_argument_list() {
#[rustfmt::skip]
let code = [
"foo",
" bar",
];
test!(code.join("\n"), (ArgumentBlockApplication (Ident foo) #((Ident bar))));
test!("foo\n bar", (ArgumentBlockApplication (Ident foo) #((Ident bar))));
#[rustfmt::skip]
let code = [
"value = foo",
" bar",
];
let expect = block![
(Assignment (Ident value) (ArgumentBlockApplication (Ident foo) #((Ident bar))))
];
test(code.join("\n"), expect);
test_block!("value = foo\n bar",
(Assignment (Ident value) (ArgumentBlockApplication (Ident foo) #((Ident bar)))));
#[rustfmt::skip]
let code = [
"value = foo",
" +x",
" bar",
];
#[rustfmt::skip]
let expect = block![
let code = ["value = foo", " +x", " bar"];
test_block!(code.join("\n"),
(Assignment (Ident value)
(ArgumentBlockApplication (Ident foo) #(
(OprSectionBoundary 1 (OprApp () (Ok "+") (Ident x)))
(Ident bar))))
];
test(code.join("\n"), expect);
(Ident bar)))));
}
#[test]
@ -804,25 +793,21 @@ fn accessor_operator() {
#[test]
fn operator_sections() {
#[rustfmt::skip]
test(".map (+2 * 3) *7", block![
test!(".map (+2 * 3) *7",
(OprSectionBoundary 1
(App (App (OprApp () (Ok ".") (Ident map))
(Group
(OprSectionBoundary 1 (OprApp (OprApp () (Ok "+") (Number () "2" ()))
(Ok "*") (Number () "3" ())))))
(OprSectionBoundary 1 (OprApp () (Ok "*") (Number () "7" ())))))]);
#[rustfmt::skip]
test(".sum 1", block![
(OprSectionBoundary 1 (App (OprApp () (Ok ".") (Ident sum)) (Number () "1" ())))]);
#[rustfmt::skip]
test("+1 + x", block![
(OprSectionBoundary 1 (OprApp () (Ok "*") (Number () "7" ()))))));
test!(".sum 1",
(OprSectionBoundary 1 (App (OprApp () (Ok ".") (Ident sum)) (Number () "1" ()))));
test!("+1 + x",
(OprSectionBoundary 1 (OprApp (OprApp () (Ok "+") (Number () "1" ()))
(Ok "+") (Ident x)))]);
#[rustfmt::skip]
test("increment = 1 +", block![
(Ok "+") (Ident x))));
test_block!("increment = 1 +",
(Assignment (Ident increment)
(OprSectionBoundary 1 (OprApp (Number () "1" ()) (Ok "+") ())))]);
(OprSectionBoundary 1 (OprApp (Number () "1" ()) (Ok "+") ()))));
test!("1+ << 2*",
(OprSectionBoundary 1
(OprApp (OprApp (Number () "1" ()) (Ok "+") ())
@ -895,18 +880,18 @@ fn unary_operator_at_end_of_expression() {
#[test]
fn unspaced_operator_sequence() {
// Add a negated value.
test!("x = y+-z",
test_block!("x = y+-z",
(Assignment (Ident x) (OprApp (Ident y) (Ok "+") (UnaryOprApp "-" (Ident z)))));
// Create an operator section that adds a negated value to its input.
test!("x = +-z",
test_block!("x = +-z",
(Assignment (Ident x) (OprSectionBoundary 1
(OprApp () (Ok "+") (UnaryOprApp "-" (Ident z))))));
// The `-` can only be lexed as a unary operator, and unary operators cannot form sections.
expect_invalid_node("x = y+-");
expect_invalid_node("main =\n x = y+-");
// Assign a negative number to x.
test!("x=-1", (Assignment (Ident x) (UnaryOprApp "-" (Number () "1" ()))));
test_block!("x=-1", (Assignment (Ident x) (UnaryOprApp "-" (Number () "1" ()))));
// Assign a negated value to x.
test!("x=-y", (Assignment (Ident x) (UnaryOprApp "-" (Ident y))));
test_block!("x=-y", (Assignment (Ident x) (UnaryOprApp "-" (Ident y))));
}
#[test]
@ -935,7 +920,7 @@ fn minus_unary() {
test!("-x", (UnaryOprApp "-" (Ident x)));
test!("(-x)", (Group (UnaryOprApp "-" (Ident x))));
test!("-(x * x)", (UnaryOprApp "-" (Group (OprApp (Ident x) (Ok "*") (Ident x)))));
test!("x=-x", (Assignment (Ident x) (UnaryOprApp "-" (Ident x))));
test_block!("x=-x", (Assignment (Ident x) (UnaryOprApp "-" (Ident x))));
test!("-x+x", (OprApp (UnaryOprApp "-" (Ident x)) (Ok "+") (Ident x)));
test!("-x*x", (OprApp (UnaryOprApp "-" (Ident x)) (Ok "*") (Ident x)));
}
@ -961,9 +946,9 @@ fn method_app_in_minus_unary() {
#[test]
fn autoscope_operator() {
test!("x : ..True", (TypeSignature (Ident x) ":" (AutoscopedIdentifier ".." True)));
test!("x = ..True", (Assignment (Ident x) (AutoscopedIdentifier ".." True)));
test!("x = f ..True",
test_block!("x : ..True", (TypeSignature (Ident x) ":" (AutoscopedIdentifier ".." True)));
test_block!("x = ..True", (Assignment (Ident x) (AutoscopedIdentifier ".." True)));
test_block!("x = f ..True",
(Assignment (Ident x) (App (Ident f) (AutoscopedIdentifier ".." True))));
expect_invalid_node("x = ..not_a_constructor");
expect_invalid_node("x = case a of ..True -> True");
@ -1106,9 +1091,9 @@ fn type_signatures() {
#[test]
fn type_annotations() {
test!("val = x : Int",
test_block!("val = x : Int",
(Assignment (Ident val) (TypeAnnotated (Ident x) ":" (Ident Int))));
test!("val = foo (x : Int)",
test_block!("val = foo (x : Int)",
(Assignment (Ident val)
(App (Ident foo)
(Group (TypeAnnotated (Ident x) ":" (Ident Int))))));
@ -1131,10 +1116,10 @@ fn type_annotations() {
#[test]
fn inline_text_literals() {
test!(r#""I'm an inline raw text!""#, (TextLiteral #((Section "I'm an inline raw text!"))));
test!(r#"zero_length = """#, (Assignment (Ident zero_length) (TextLiteral #())));
test_block!(r#"zero_length = """#, (Assignment (Ident zero_length) (TextLiteral #())));
test!(r#""type""#, (TextLiteral #((Section "type"))));
test!(r#"unclosed = ""#, (Assignment (Ident unclosed) (TextLiteral #())));
test!(r#"unclosed = "a"#, (Assignment (Ident unclosed) (TextLiteral #((Section "a")))));
test_block!(r#"unclosed = ""#, (Assignment (Ident unclosed) (TextLiteral #())));
test_block!(r#"unclosed = "a"#, (Assignment (Ident unclosed) (TextLiteral #((Section "a")))));
test!(r#"'Other quote type'"#, (TextLiteral #((Section "Other quote type"))));
test!(r#""Non-escape: \n""#, (TextLiteral #((Section "Non-escape: \\n"))));
test!(r#""Non-escape: \""#, (TextLiteral #((Section "Non-escape: \\"))));
@ -1152,7 +1137,7 @@ fn inline_text_literals() {
#[test]
fn multiline_text_literals() {
test("'''", block![(TextLiteral #())]);
test!("'''", (TextLiteral #()));
let code = r#""""
part of the string
3-spaces indented line, part of the Text Block
@ -1161,8 +1146,7 @@ fn multiline_text_literals() {
`also` part of the string
x"#;
#[rustfmt::skip]
let expected = block![
test!(code,
(TextLiteral
#((Section "part of the string") (Newline)
(Section " 3-spaces indented line, part of the Text Block") (Newline)
@ -1170,40 +1154,25 @@ x"#;
(Newline)
(Section "`also` part of the string")))
()
(Ident x)
];
test(code, expected);
let code = r#""""
(Ident x));
test!(r#""""
multiline string that doesn't end in a newline
x"#;
#[rustfmt::skip]
let expected = block![
x"#,
(TextLiteral #((Section "multiline string that doesn't end in a newline")))
(Ident x)
];
test(code, expected);
let code = "x = \"\"\"\n Indented multiline\nx";
#[rustfmt::skip]
let expected = block![
(Ident x));
test_block!("x = \"\"\"\n Indented multiline\nx",
(Assignment (Ident x) (TextLiteral #((Section "Indented multiline"))))
(Ident x)
];
test(code, expected);
let code = "'''\n \\nEscape at start\n";
test!(code, (TextLiteral #((Escape 0x0A) (Section "Escape at start"))) ());
let code = "x =\n x = '''\n x\nx";
#[rustfmt::skip]
let expected = block![
(Ident x));
test!("'''\n \\nEscape at start\n",
(TextLiteral #((Escape 0x0A) (Section "Escape at start"))) ());
test!("x =\n x = '''\n x\nx",
(Function (Ident x) #() ()
(BodyBlock #((Assignment (Ident x) (TextLiteral #((Section "x")))))))
(Ident x)
];
test(code, expected);
test!("foo = bar '''\n baz",
(Ident x));
test_block!("foo = bar '''\n baz",
(Assignment (Ident foo) (App (Ident bar) (TextLiteral #((Section "baz"))))));
test!("'''\n \\t'", (TextLiteral #((Escape 0x09) (Section "'"))));
test!("'''\n x\n \\t'",
(TextLiteral #((Section "x") (Newline) (Escape 0x09) (Section "'"))));
test!("'''\n x\n \\t'", (TextLiteral #((Section "x") (Newline) (Escape 0x09) (Section "'"))));
}
#[test]
@ -1287,7 +1256,7 @@ fn old_lambdas() {
test!("f x->\n y",
(App (Ident f) (OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))));
test!("x->y-> z", (OprApp (Ident x) (Ok "->") (OprApp (Ident y) (Ok "->") (Ident z))));
test!("foo = x -> (y = bar x) -> x + y",
test_block!("foo = x -> (y = bar x) -> x + y",
(Assignment (Ident foo)
(OprApp (Ident x) (Ok "->")
(OprApp (Group (OprApp (Ident y) (Ok "=") (App (Ident bar) (Ident x)))) (Ok "->")
@ -1299,10 +1268,10 @@ fn old_lambdas() {
#[test]
fn pattern_irrefutable() {
test!("Point x_val = my_point",
test_block!("Point x_val = my_point",
(Assignment (App (Ident Point) (Ident x_val)) (Ident my_point)));
test!("Vector _ = x", (Assignment (App (Ident Vector) (Wildcard -1)) (Ident x)));
test!("X.y = z", (Function (OprApp (Ident X) (Ok ".") (Ident y)) #() () (Ident z)));
test_block!("Vector _ = x", (Assignment (App (Ident Vector) (Wildcard -1)) (Ident x)));
test_block!("X.y = z", (Function (OprApp (Ident X) (Ok ".") (Ident y)) #() () (Ident z)));
}
#[test]
@ -1411,10 +1380,10 @@ fn suspended_default_arguments_in_pattern() {
#[test]
fn suspended_default_arguments_in_expression() {
test!("c = self.value ...",
test_block!("c = self.value ...",
(Assignment (Ident c)
(App (OprApp (Ident self) (Ok ".") (Ident value)) (SuspendedDefaultArguments))));
test!("c = self.value...",
test_block!("c = self.value...",
(Assignment (Ident c)
(App (OprApp (Ident self) (Ok ".") (Ident value)) (SuspendedDefaultArguments))));
}
@ -1426,7 +1395,7 @@ fn private_keyword() {
test!("private", (Private()));
expect_invalid_node("private func");
// Private binding is not supported.
expect_invalid_node("private var = 42");
expect_invalid_node("main =\n private var = 42");
expect_invalid_node("private ConstructorOutsideType");
expect_invalid_node("type My_Type\n private");
expect_invalid_node("private type My_Type\n Ctor");
@ -1502,7 +1471,7 @@ mod numbers {
#[test]
fn with_decimal() {
test!("pi = 3.14", (Assignment (Ident pi) (Number () "3" ("." "14"))));
test_block!("pi = 3.14", (Assignment (Ident pi) (Number () "3" ("." "14"))));
}
#[test]
@ -1644,11 +1613,11 @@ fn skip() {
#[test]
fn statement_in_expression_context() {
test!("x = y = z", (Assignment (Ident x) (Invalid)));
test_block!("x = y = z", (Assignment (Ident x) (Invalid)));
test!("(y = z)", (Group(Invalid)));
test!("(y = z) x", (App (Group (Invalid)) (Ident x)));
test!("(f x = x)", (Group(Invalid)));
test!("y = f x = x", (Assignment (Ident y) (Invalid)));
test_block!("(f x = x)", (Group(Invalid)));
test_block!("y = f x = x", (Assignment (Ident y) (Invalid)));
}
@ -1858,13 +1827,31 @@ fn test<T: AsRef<str>>(code: T, expect: lexpr::Value) {
expect_tree_representing_code(code, &ast);
}
fn test_block<T: AsRef<str>>(code: T, expect: lexpr::Value) {
let code = code.as_ref();
let ast = parse_block(code);
let ast_s_expr = to_s_expr(&ast, code);
assert_eq!(ast_s_expr.to_string(), expect.to_string(), "{:?}", &ast);
expect_tree_representing_code(code, &ast);
}
fn parse(code: &str) -> enso_parser::syntax::tree::Tree {
let ast = enso_parser::Parser::new().run(code);
let ast = enso_parser::Parser::new().parse_module(code);
validate_parse(code, &ast);
ast
}
fn parse_block(code: &str) -> enso_parser::syntax::tree::Tree {
let ast = enso_parser::Parser::new().parse_block(code);
validate_parse(code, &ast);
ast
}
fn validate_parse(code: &str, ast: &enso_parser::syntax::Tree) {
let expected_span = 0..(code.encode_utf16().count() as u32);
let mut locations = enso_parser::source::code::debug::LocationCheck::new();
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations).unwrap();
enso_parser_debug::validate_spans(ast, expected_span, &mut locations).unwrap();
locations.check(code);
ast
}
@ -1895,7 +1882,7 @@ impl Errors {
/// Checks that an input contains an `Invalid` node somewhere.
fn expect_invalid_node(code: &str) {
let ast = enso_parser::Parser::new().run(code);
let ast = enso_parser::Parser::new().parse_module(code);
expect_tree_representing_code(code, &ast);
let errors = Errors::collect(&ast, code);
assert!(errors.invalid_node, "{}", to_s_expr(&ast, code));
@ -1903,7 +1890,7 @@ fn expect_invalid_node(code: &str) {
/// Checks that an input contains a multiple-operator error somewhere.
fn expect_multiple_operator_error(code: &str) {
let ast = enso_parser::Parser::new().run(code);
let ast = enso_parser::Parser::new().parse_module(code);
expect_tree_representing_code(code, &ast);
let errors = Errors::collect(&ast, code);
assert!(errors.multiple_operator || errors.invalid_node, "{}", to_s_expr(&ast, code));
@ -1912,7 +1899,7 @@ fn expect_multiple_operator_error(code: &str) {
/// Check that the input can be parsed, and doesn't yield any `Invalid` nodes.
fn expect_valid(code: &str) {
let ast = enso_parser::Parser::new().run(code);
let ast = enso_parser::Parser::new().parse_module(code);
expect_tree_representing_code(code, &ast);
let errors = Errors::collect(&ast, code);
assert!(!errors.invalid_node);

View File

@ -49,7 +49,7 @@ fn extract_docs(_filename: &str, mut code: &str) -> Vec<String> {
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_;
}
let ast = enso_parser::Parser::new().run(code);
let ast = enso_parser::Parser::new().parse_module(code);
let docs = RefCell::new(vec![]);
ast.visit_trees(|tree| match &tree.variant {
enso_parser::syntax::tree::Variant::Documented(doc) => {

View File

@ -23,12 +23,20 @@ public final class Parser {
return getWorker().isIdentOrOperator(input);
}
public static ByteBuffer parseInputLazy(CharSequence input) {
return getWorker().parseInputLazy(input);
public static ByteBuffer parseModuleLazy(CharSequence input) {
return getWorker().parseLazy(input, false);
}
public static Tree parse(CharSequence input) {
return getWorker().parse(input);
public static ByteBuffer parseBlockLazy(CharSequence input) {
return getWorker().parseLazy(input, true);
}
public static Tree.BodyBlock parseModule(CharSequence input) {
return getWorker().parse(input, false);
}
public static Tree.BodyBlock parseBlock(CharSequence input) {
return getWorker().parse(input, true);
}
public static UUID getUuid(long metadata, long nodeOffset, long nodeLength) {
@ -191,31 +199,47 @@ public final class Parser {
return Parser.isIdentOrOperator(inputBuf);
}
ByteBuffer parseInputLazy(CharSequence input) {
byte[] inputBytes = input.toString().getBytes(StandardCharsets.UTF_8);
ByteBuffer inputBuf = ByteBuffer.allocateDirect(inputBytes.length);
inputBuf.put(inputBytes);
return withState(state -> parseTreeLazy(state, inputBuf));
}
Tree parse(CharSequence input) {
ByteBuffer parseLazy(CharSequence input, boolean isInternalBlock) {
byte[] inputBytes = input.toString().getBytes(StandardCharsets.UTF_8);
ByteBuffer inputBuf = ByteBuffer.allocateDirect(inputBytes.length);
inputBuf.put(inputBytes);
return withState(
state -> {
var serializedTree = parseTree(state, inputBuf);
ByteBuffer serializedTree;
if (isInternalBlock) {
serializedTree = parseBlockLazy(state, inputBuf);
} else {
serializedTree = parseModuleLazy(state, inputBuf);
}
return serializedTree;
});
}
Tree.BodyBlock parse(CharSequence input, boolean isInternalBlock) {
byte[] inputBytes = input.toString().getBytes(StandardCharsets.UTF_8);
ByteBuffer inputBuf = ByteBuffer.allocateDirect(inputBytes.length);
inputBuf.put(inputBytes);
return withState(
state -> {
ByteBuffer serializedTree;
if (isInternalBlock) {
serializedTree = parseBlock(state, inputBuf);
} else {
serializedTree = parseModule(state, inputBuf);
}
var base = getLastInputBase(state);
var metadata = getMetadata(state);
serializedTree.order(ByteOrder.LITTLE_ENDIAN);
var message = new Message(serializedTree, input, base, metadata);
Tree parsed;
try {
return Tree.deserialize(message);
parsed = Tree.deserialize(message);
} catch (BufferUnderflowException | IllegalArgumentException e) {
LoggerFactory.getLogger(this.getClass())
.error("Unrecoverable parser failure for: {}", input, e);
throw e;
}
return (Tree.BodyBlock) parsed;
});
}
}
@ -226,9 +250,13 @@ public final class Parser {
private static native void freeState(long state);
private static native ByteBuffer parseTree(long state, ByteBuffer input);
private static native ByteBuffer parseModule(long state, ByteBuffer input);
private static native ByteBuffer parseTreeLazy(long state, ByteBuffer input);
private static native ByteBuffer parseBlock(long state, ByteBuffer input);
private static native ByteBuffer parseModuleLazy(long state, ByteBuffer input);
private static native ByteBuffer parseBlockLazy(long state, ByteBuffer input);
private static native long isIdentOrOperator(ByteBuffer input);

View File

@ -28,9 +28,9 @@ fn main() {
println!("import java.nio.ByteOrder;");
println!();
println!("class GeneratedFormatTests {{");
// Force the parser to load its shared library. `parse` handles this because usually it is the
// entry point to the class, but we're doing low-level operations directly.
println!(" private static final Object INIT = {package}.Parser.parse(\"\");");
// Force the parser to load its shared library. `parseModule` handles this because usually it is
// the entry point to the class, but we're doing low-level operations directly.
println!(" private static final Object INIT = {package}.Parser.parseModule(\"\");");
println!(" private static java.util.Vector<byte[]> accept;");
println!(" private static java.util.Vector<byte[]> reject;");
for (i, case) in cases.accept.iter().enumerate() {

View File

@ -11,6 +11,7 @@
use enso_prelude::*;
use enso_parser::macros::resolver::RootContext;
use jni::objects::JByteBuffer;
use jni::objects::JClass;
use jni::sys::jobject;
@ -26,8 +27,7 @@ use jni::JNIEnv;
static DIRECT_ALLOCATED: &str = "Internal Error: ByteBuffer must be direct-allocated.";
static FAILED_SERIALIZE_AST: &str = "Failed to serialize AST to binary format.";
/// Parse the input. Returns a serialized representation of the parse tree. The caller is
/// responsible for freeing the memory associated with the returned buffer.
/// Parse the input as a module. Returns a serialized representation of the parse tree.
///
/// # Safety
///
@ -37,11 +37,41 @@ static FAILED_SERIALIZE_AST: &str = "Failed to serialize AST to binary format.";
/// a call to `freeState`.
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_syntax2_Parser_parseTree(
pub extern "system" fn Java_org_enso_syntax2_Parser_parseModule(
env: JNIEnv,
class: JClass,
state: u64,
input: JByteBuffer,
) -> jobject {
parse(env, class, state, input, RootContext::Module)
}
/// Parse the input as a block. Returns a serialized representation of the parse tree.
///
/// # Safety
///
/// The state MUST be a value returned by `allocState` that has not been passed to `freeState`.
/// The input buffer contents MUST be valid UTF-8.
/// The contents of the returned buffer MUST not be accessed after another call to `parseInput`, or
/// a call to `freeState`.
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_syntax2_Parser_parseBlock(
env: JNIEnv,
class: JClass,
state: u64,
input: JByteBuffer,
) -> jobject {
parse(env, class, state, input, RootContext::Block)
}
#[allow(unsafe_code)]
fn parse(
mut env: JNIEnv,
_class: JClass,
state: u64,
input: JByteBuffer,
root_context: RootContext,
) -> jobject {
let state = unsafe { &mut *(state as usize as *mut State) };
let input = unsafe { decode_utf8_buffer(&env, &input) };
@ -55,34 +85,34 @@ pub extern "system" fn Java_org_enso_syntax2_Parser_parseTree(
code = code_;
}
state.base = str::as_ptr(code) as usize as u64;
let tree = enso_parser::Parser::new().run(code);
state.output = match enso_parser::serialization::serialize_tree(&tree) {
Ok(tree) => tree,
let parser = enso_parser::Parser::new();
let tree = match root_context {
RootContext::Module => parser.parse_module(code),
RootContext::Block => parser.parse_block(code),
};
state.output = enso_parser::serialization::serialize_tree(&tree).unwrap_or_else(|_| {
// `Tree` does not contain any types with fallible `serialize` implementations, so this
// cannot fail.
Err(_) => {
debug_assert!(false);
default()
}
};
debug_assert!(false);
default()
});
state.metadata = meta;
let result =
unsafe { env.new_direct_byte_buffer(state.output.as_mut_ptr(), state.output.len()) };
result.unwrap().into_raw()
}
/// Parse the input. Returns a serialize format compatible with a lazy deserialization strategy. The
/// caller is responsible for freeing the memory associated with the returned buffer.
/// Parse a module. Returns a serialize format compatible with a lazy deserialization strategy.
///
/// # Safety
///
/// The state MUST be a value returned by `allocState` that has not been passed to `freeState`.
/// The input buffer contents MUST be valid UTF-8.
/// The contents of the returned buffer MUST not be accessed after another call to `parseInput`, or
/// The contents of the returned buffer MUST NOT be accessed after another call to `parseInput`, or
/// a call to `freeState`.
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_syntax2_Parser_parseTreeLazy(
pub extern "system" fn Java_org_enso_syntax2_Parser_parseModuleLazy(
mut env: JNIEnv,
_class: JClass,
state: u64,
@ -91,7 +121,34 @@ pub extern "system" fn Java_org_enso_syntax2_Parser_parseTreeLazy(
let state = unsafe { &mut *(state as usize as *mut State) };
let input = unsafe { decode_utf8_buffer(&env, &input) };
let tree = enso_parser::Parser::new().run(input);
let tree = enso_parser::Parser::new().parse_module(input);
state.output = enso_parser::format::serialize(&tree).expect(FAILED_SERIALIZE_AST);
let result =
unsafe { env.new_direct_byte_buffer(state.output.as_mut_ptr(), state.output.len()) };
result.unwrap().into_raw()
}
/// Parse a block. Returns a serialize format compatible with a lazy deserialization strategy.
///
/// # Safety
///
/// The state MUST be a value returned by `allocState` that has not been passed to `freeState`.
/// The input buffer contents MUST be valid UTF-8.
/// The contents of the returned buffer MUST NOT be accessed after another call to `parseInput`, or
/// a call to `freeState`.
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_syntax2_Parser_parseBlockLazy(
mut env: JNIEnv,
_class: JClass,
state: u64,
input: JByteBuffer,
) -> jobject {
let state = unsafe { &mut *(state as usize as *mut State) };
let input = unsafe { decode_utf8_buffer(&env, &input) };
let tree = enso_parser::Parser::new().parse_block(input);
state.output = enso_parser::format::serialize(&tree).expect(FAILED_SERIALIZE_AST);
let result =

View File

@ -89,6 +89,7 @@
use crate::prelude::*;
use crate::lexer::Lexer;
use crate::macros::resolver::RootContext;
use crate::source::Code;
use crate::syntax::token;
use crate::syntax::tree::SyntaxError;
@ -163,9 +164,18 @@ impl Parser {
Self { macros }
}
/// Main entry point.
pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
let resolver = macros::resolver::Resolver::new(&self.macros);
/// Main entry point. Interprets the input as a module, and returns the resulting [`BodyBlock`].
pub fn parse_module<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
self.run(code, RootContext::Module)
}
/// Parses the input as a block.
pub fn parse_block<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
self.run(code, RootContext::Block)
}
fn run<'s>(&self, code: &'s str, root_context: RootContext) -> syntax::Tree<'s> {
let resolver = macros::resolver::Resolver::new(&self.macros, root_context);
let ParseResult { value, internal_error } = Lexer::new(code, resolver).finish();
if let Some(error) = internal_error {
return value.with_error(format!("Internal error: {error}"));
@ -294,7 +304,7 @@ mod benches {
let str = "type Option a b c\n".repeat(reps);
let parser = Parser::new();
bencher.iter(move || {
parser.run(&str);
parser.parse_module(&str);
});
}
@ -336,7 +346,7 @@ mod benches {
let parser = Parser::new();
bencher.bytes = str.len() as u64;
bencher.iter(move || {
parser.run(&str);
parser.parse_module(&str);
});
}
@ -371,7 +381,7 @@ mod benches {
let parser = Parser::new();
bencher.bytes = str.len() as u64;
bencher.iter(move || {
parser.run(&str);
parser.parse_module(&str);
});
}
}

View File

@ -129,35 +129,37 @@ impl<'a> SegmentMap<'a> {
/// to learn more about the macro resolution steps.
#[derive(Debug)]
struct ResolverState<'s> {
blocks: Vec<Block>,
blocks: Vec<Block>,
/// The lines of all currently-open blocks. This is partitioned by `blocks`.
lines: Vec<syntax::item::Line<'s>>,
groups: Vec<OpenGroup<'s>>,
lines: Vec<syntax::item::Line<'s>>,
groups: Vec<OpenGroup<'s>>,
/// All currently-open macros. These are partitioned into scopes by `blocks`.
macros: Vec<PartiallyMatchedMacro<'s>>,
macros: Vec<PartiallyMatchedMacro<'s>>,
/// Segments of all currently-open macros. These are partitioned by `macros`.
segments: Vec<MatchedSegment<'s>>,
segments: Vec<MatchedSegment<'s>>,
/// Items of all segments of all currently-open macros. These are partitioned by `segments`.
items: Vec<Item<'s>>,
context: Context,
precedence: syntax::operator::Precedence<'s>,
items: Vec<Item<'s>>,
context: Context,
root_context: RootContext,
precedence: syntax::operator::Precedence<'s>,
}
// === Public API ===
impl<'s> ResolverState<'s> {
/// Create a new resolver, in statement context.
fn new_statement() -> Self {
/// Create a new resolver.
fn new(root_context: RootContext, context: Context) -> Self {
Self {
context: Context::Statement,
context,
root_context,
precedence: syntax::operator::Precedence::new(),
blocks: default(),
lines: vec![initial_line()],
groups: default(),
macros: default(),
segments: default(),
items: default(),
blocks: default(),
lines: vec![initial_line()],
groups: default(),
macros: default(),
segments: default(),
items: default(),
}
}
}
@ -174,7 +176,11 @@ impl<'s> Finish for ResolverState<'s> {
fn finish(&mut self) -> Self::Result {
self.finish_current_line();
let tree = syntax::tree::block::parse_module(self.lines.drain(..), &mut self.precedence);
let lines = self.lines.drain(..);
let tree = match self.root_context {
RootContext::Module => syntax::tree::block::parse_module(lines, &mut self.precedence),
RootContext::Block => syntax::tree::block::parse_block(lines, &mut self.precedence),
};
debug_assert!(self.blocks.is_empty());
debug_assert!(self.lines.is_empty());
debug_assert!(self.groups.is_empty());
@ -187,6 +193,15 @@ impl<'s> Finish for ResolverState<'s> {
}
}
/// Specifies how statements of the input should be interpreted.
#[derive(Debug, Copy, Clone)]
pub enum RootContext {
/// Interpret the input as a sequence of module-level statements.
Module,
/// Interpret the input as a sequence of statements inside a body block.
Block,
}
/// Resolves macros.
#[derive(Debug)]
pub struct Resolver<'s, 'macros> {
@ -196,8 +211,8 @@ pub struct Resolver<'s, 'macros> {
impl<'s, 'macros> Resolver<'s, 'macros> {
/// Creates a macro resolver to use with the given macro map.
pub fn new(root_macro_map: &'macros MacroMap) -> Self {
Self { resolver: ResolverState::new_statement(), root_macro_map }
pub fn new(root_macro_map: &'macros MacroMap, root_context: RootContext) -> Self {
Self { resolver: ResolverState::new(root_context, Context::Statement), root_macro_map }
}
}

View File

@ -42,7 +42,12 @@ impl<'s> BodyBlockParser<'s> {
) -> Tree<'s> {
let lines = lines.into_iter().map(|item::Line { newline, mut items }| block::Line {
newline,
expression: self.statement_parser.parse_body_block_statement(&mut items, 0, precedence),
expression: self.statement_parser.parse_statement(
&mut items,
0,
precedence,
EvaluationContext::Eager,
),
});
Tree::body_block(block::compound_lines(lines).collect())
}
@ -67,18 +72,20 @@ struct StatementParser<'s> {
}
impl<'s> StatementParser<'s> {
fn parse_body_block_statement(
fn parse_statement(
&mut self,
items: &mut Vec<Item<'s>>,
start: usize,
precedence: &mut Precedence<'s>,
evaluation_context: EvaluationContext,
) -> Option<Tree<'s>> {
let private_keywords = scan_private_keywords(&*items);
let mut statement = parse_body_block_statement(
let mut statement = parse_statement(
items,
start + private_keywords,
precedence,
&mut self.args_buffer,
evaluation_context,
);
for _ in 0..private_keywords {
let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() };
@ -101,24 +108,34 @@ impl<'s> StatementParser<'s> {
precedence: &mut Precedence<'s>,
) -> Option<Tree<'s>> {
let private_keywords = scan_private_keywords(&*items);
let mut statement = parse_body_block_statement(
let mut statement = parse_statement(
items,
start + private_keywords,
precedence,
&mut self.args_buffer,
EvaluationContext::Lazy,
);
let mut error = None;
if let Some(statement) = statement.as_ref() {
error = match &statement.variant {
tree::Variant::Assignment(_) =>
SyntaxError::StmtUnexpectedAssignmentInModuleBody.into(),
_ => None,
};
}
for _ in 0..private_keywords {
let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() };
let token::Variant::Private(variant) = keyword.variant else { unreachable!() };
let keyword = keyword.with_variant(variant);
let error = match statement.as_ref().map(|tree| &tree.variant) {
Some(tree::Variant::Invalid(_) | tree::Variant::Function(_)) | None => None,
_ => SyntaxError::StmtUnexpectedPrivateUsage.into(),
};
let private_stmt = Tree::private(keyword, statement.take());
statement = maybe_with_error(private_stmt, error).into();
if error.is_none() {
error = match statement.as_ref().map(|tree| &tree.variant) {
Some(tree::Variant::Invalid(_) | tree::Variant::Function(_)) | None => None,
_ => SyntaxError::StmtUnexpectedPrivateUsage.into(),
};
}
statement = Tree::private(keyword, statement.take()).into();
}
statement
statement.map(|statement| maybe_with_error(statement, error))
}
}
@ -131,11 +148,12 @@ fn scan_private_keywords<'s>(items: impl IntoIterator<Item = impl AsRef<Item<'s>
.count()
}
fn parse_body_block_statement<'s>(
fn parse_statement<'s>(
items: &mut Vec<Item<'s>>,
start: usize,
precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>,
evaluation_context: EvaluationContext,
) -> Option<Tree<'s>> {
use token::Variant;
if let Some(type_def) = try_parse_type_def(items, start, precedence, args_buffer) {
@ -152,7 +170,15 @@ fn parse_body_block_statement<'s>(
};
let statement = match top_level_operator {
Some((i, Token { variant: Variant::AssignmentOperator(_), .. })) =>
parse_assignment_like_statement(items, start, i, precedence, args_buffer).into(),
parse_assignment_like_statement(
items,
start,
i,
precedence,
args_buffer,
evaluation_context,
)
.into(),
Some((i, Token { variant: Variant::TypeAnnotationOperator(_), .. })) => {
let type_ = precedence.resolve_non_section_offset(i + 1, items);
let Some(Item::Token(operator)) = items.pop() else { unreachable!() };
@ -179,12 +205,21 @@ fn parse_body_block_statement<'s>(
statement
}
#[derive(Debug, Copy, Clone)]
enum EvaluationContext {
/// A context in which variable assignments are allowed.
Eager,
/// A context in which variable assignments must not occur.
Lazy,
}
fn parse_assignment_like_statement<'s>(
items: &mut Vec<Item<'s>>,
start: usize,
operator: usize,
precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>,
evaluation_context: EvaluationContext,
) -> Tree<'s> {
if operator == start {
return precedence
@ -199,7 +234,13 @@ fn parse_assignment_like_statement<'s>(
let token::Variant::AssignmentOperator(variant) = operator.variant else { unreachable!() };
let operator = operator.with_variant(variant);
let qn_len = scan_qn(&items[start..]);
let qn_len = match (evaluation_context, scan_qn(&items[start..])) {
(_, Some(Qn::Binding { len }))
// In a context where assignments are not allowed, even a name whose last identifier is
// capitalized can be a function definition (rather than an assignment pattern).
| (EvaluationContext::Lazy, Some(Qn::Type { len })) => len.into(),
_ => None,
};
let mut operator = Some(operator);
if let Some(function) = try_parse_foreign_function(
@ -214,22 +255,29 @@ fn parse_assignment_like_statement<'s>(
}
let operator = operator.unwrap();
match (expression, qn_len) {
(Some(e), Some(qn_len)) if matches!(e.variant, tree::Variant::BodyBlock(_)) => {
enum Type<'s> {
Assignment { expression: Tree<'s> },
Function { expression: Option<Tree<'s>>, qn_len: usize },
InvalidNoExpressionNoQn,
}
match match (expression, qn_len) {
(Some(e), Some(qn_len))
if matches!(evaluation_context, EvaluationContext::Lazy)
|| matches!(e.variant, tree::Variant::BodyBlock(_)) =>
Type::Function { expression: Some(e), qn_len },
(Some(expression), None) => Type::Assignment { expression },
(Some(expression), Some(1)) if items.len() == start + 1 => Type::Assignment { expression },
(expression, Some(qn_len)) => Type::Function { expression, qn_len },
(None, None) => Type::InvalidNoExpressionNoQn,
} {
Type::Assignment { expression } =>
parse_assignment(start, items, operator, expression, precedence),
Type::Function { expression, qn_len } => {
let (qn, args, return_) =
parse_function_decl(items, start, qn_len, precedence, args_buffer);
Tree::function(qn, args, return_, operator, Some(e))
Tree::function(qn, args, return_, operator, expression)
}
(Some(expression), None) =>
parse_assignment(start, items, operator, expression, precedence),
(Some(expression), Some(1)) if items.len() == start + 1 =>
parse_assignment(start, items, operator, expression, precedence),
(e, Some(qn_len)) => {
let (qn, args, return_) =
parse_function_decl(items, start, qn_len, precedence, args_buffer);
Tree::function(qn, args, return_, operator, e)
}
(None, None) => Tree::opr_app(
Type::InvalidNoExpressionNoQn => Tree::opr_app(
precedence.resolve_non_section_offset(start, items),
Ok(operator.with_variant(token::variant::Operator())),
None,
@ -360,10 +408,19 @@ fn next_spaced(items: &[Item]) -> Option<usize> {
None
}
#[derive(Debug)]
enum Qn {
/// A qualified-name whose last segment is capitalized; usually a type or module.
Type { len: usize },
/// A qualified-name whose last segment is lowercase; usually a variable or function.
Binding { len: usize },
}
/// Returns length of the QN.
fn scan_qn<'s>(items: impl IntoIterator<Item = impl AsRef<Item<'s>>>) -> Option<usize> {
fn scan_qn<'s>(items: impl IntoIterator<Item = impl AsRef<Item<'s>>>) -> Option<Qn> {
#[derive(Copy, Clone)]
enum State {
ExpectingDot,
ExpectingDot { len: usize },
ExpectingIdent,
}
use token::Variant::*;
@ -374,17 +431,21 @@ fn scan_qn<'s>(items: impl IntoIterator<Item = impl AsRef<Item<'s>>>) -> Option<
match item.as_ref() {
Token(token) if i != 0 && token.is_spaced() => break,
Token(token) => match (state, &token.variant) {
(ExpectingDot, DotOperator(_)) => state = ExpectingIdent,
(ExpectingIdent, Ident(ident)) if ident.is_type => state = ExpectingDot,
(ExpectingDot { .. }, DotOperator(_)) => state = ExpectingIdent,
(ExpectingIdent, Ident(ident)) if ident.is_type =>
state = ExpectingDot { len: i + 1 },
(
ExpectingIdent,
Ident(_) | Operator(_) | NegationOperator(_) | UnaryOperator(_),
) => return Some(i + 1),
) => return Some(Qn::Binding { len: i + 1 }),
_ => break,
},
Group(_) | Tree(_) => break,
Block(_) => unreachable!(),
}
}
None
match state {
ExpectingDot { len } => Some(Qn::Type { len }),
_ => None,
}
}

View File

@ -5,8 +5,9 @@ use crate::syntax::maybe_with_error;
use crate::syntax::operator::Precedence;
use crate::syntax::statement::function_def::parse_constructor_definition;
use crate::syntax::statement::function_def::parse_type_args;
use crate::syntax::statement::parse_body_block_statement;
use crate::syntax::statement::parse_statement;
use crate::syntax::statement::scan_private_keywords;
use crate::syntax::statement::EvaluationContext;
use crate::syntax::token;
use crate::syntax::tree;
use crate::syntax::tree::block;
@ -97,9 +98,14 @@ fn parse_type_body_statement<'s>(
)),
None => None,
_ => {
let tree =
parse_body_block_statement(&mut items, private_keywords, precedence, args_buffer)
.unwrap();
let tree = parse_statement(
&mut items,
private_keywords,
precedence,
args_buffer,
EvaluationContext::Lazy,
)
.unwrap();
let error = match &tree.variant {
tree::Variant::Function(_)
| tree::Variant::ForeignFunction(_)

View File

@ -817,6 +817,7 @@ pub enum SyntaxError {
ForeignFnExpectedStringBody,
StmtInvalidAssignmentOrMethod,
StmtLhsInvalidOperatorSpacing,
StmtUnexpectedAssignmentInModuleBody,
StmtUnexpectedPrivateUsage,
TypeBodyUnexpectedPrivateUsage,
TypeDefExpectedTypeName,
@ -851,6 +852,8 @@ impl From<SyntaxError> for Cow<'static, str> {
StmtInvalidAssignmentOrMethod => "Invalid assignment or method definition",
StmtLhsInvalidOperatorSpacing =>
"Each operator on the left side of an assignment operator must be applied to two operands, with the same spacing on each side",
StmtUnexpectedAssignmentInModuleBody =>
"Unexpected variable assignment in module statement",
StmtUnexpectedPrivateUsage =>
"In a body block, the `private` keyword can only be applied to a function definition",
TypeBodyUnexpectedPrivateUsage =>

View File

@ -56,6 +56,14 @@ pub fn parse_module<'s>(
BodyBlockParser::default().parse_module(lines, precedence)
}
/// Parse a body block.
pub fn parse_block<'s>(
lines: impl IntoIterator<Item = item::Line<'s>>,
precedence: &mut operator::Precedence<'s>,
) -> Tree<'s> {
BodyBlockParser::default().parse_body_block(lines, precedence)
}
// === Multi-line expression construction ===