Distinguish assignment/thunk by statement context (#11324)

Align `Assignment`/`Function` distinction in AST with compiler's implemented semantics:
- The ambiguous case `funcOrVar = expression` is now parsed as a `Function` when in a `Type` definition or in the top level of a module. I.e. it is an `Assignment` in contexts where the RHS is evaluated immediately when the binding is evaluated, and a `Function` in contexts where the RHS is evaluated each time the bound name is evaluated.
- `Assignment` statements now may only occur in function bodies.

Correcting this distinction lays the groundwork for #11302.

Other changes:
- Fixed incorrect source code locations for negative literals and negated expressions.

# Important Notes
New APIs:
- The parser now exposes a `parse_block` entry point, which allows parsing input lines as if in the body of a function. The previous entry point has been renamed to `parse_module`.
This commit is contained in:
Kaz Wesley 2024-10-18 10:54:55 -07:00 committed by GitHub
parent ed12224267
commit 4d4a2990a0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
39 changed files with 643 additions and 441 deletions

View File

@ -31,7 +31,6 @@
"test-dev:e2e": "cross-env NODE_ENV=production playwright test --ui", "test-dev:e2e": "cross-env NODE_ENV=production playwright test --ui",
"preinstall": "corepack pnpm run generate-metadata", "preinstall": "corepack pnpm run generate-metadata",
"postinstall": "playwright install", "postinstall": "playwright install",
"build-rust-ffi": "wasm-pack build ./rust-ffi --release --target web && wasm-pack build ./rust-ffi --out-dir node-pkg --target nodejs",
"generate-metadata": "node scripts/generateIconMetadata.js" "generate-metadata": "node scripts/generateIconMetadata.js"
}, },
"dependencies": { "dependencies": {

View File

@ -170,7 +170,7 @@ export const { injectFn: useGraphStore, provideFn: provideGraphStore } = createC
if (!methodAst.value.ok || !moduleSource.text) return if (!methodAst.value.ok || !moduleSource.text) return
const method = methodAst.value.value const method = methodAst.value.value
const toRaw = new Map<SourceRangeKey, RawAst.Tree.Function>() const toRaw = new Map<SourceRangeKey, RawAst.Tree.Function>()
visitRecursive(Ast.parseEnso(moduleSource.text), (node) => { visitRecursive(Ast.rawParseModule(moduleSource.text), (node) => {
if (node.type === RawAst.Tree.Type.Function) { if (node.type === RawAst.Tree.Type.Function) {
const start = node.whitespaceStartInCodeParsed + node.whitespaceLengthInCodeParsed const start = node.whitespaceStartInCodeParsed + node.whitespaceLengthInCodeParsed
const end = start + node.childrenLengthInCodeParsed const end = start + node.childrenLengthInCodeParsed

View File

@ -19,7 +19,7 @@ import { findExpressions, testCase, tryFindExpressions } from './testCase'
test('Raw block abstracts to Ast.BodyBlock', () => { test('Raw block abstracts to Ast.BodyBlock', () => {
const code = 'value = 2 + 2' const code = 'value = 2 + 2'
const rawBlock = Ast.parseEnso(code) const rawBlock = Ast.rawParseModule(code)
const edit = MutableModule.Transient() const edit = MutableModule.Transient()
const abstracted = Ast.abstract(edit, rawBlock, code) const abstracted = Ast.abstract(edit, rawBlock, code)
expect(abstracted.root).toBeInstanceOf(Ast.BodyBlock) expect(abstracted.root).toBeInstanceOf(Ast.BodyBlock)
@ -376,7 +376,7 @@ const cases = [
] ]
test.each(cases)('parse/print round trip: %s', (code) => { test.each(cases)('parse/print round trip: %s', (code) => {
// Get an AST. // Get an AST.
const root = Ast.parseBlock(code) const { root } = Ast.parseModuleWithSpans(code)
// Print AST back to source. // Print AST back to source.
const printed = Ast.print(root) const printed = Ast.print(root)
expect(printed.code).toEqual(code) expect(printed.code).toEqual(code)
@ -389,7 +389,7 @@ test.each(cases)('parse/print round trip: %s', (code) => {
expect(Ast.repair(root).fixes).toBe(undefined) expect(Ast.repair(root).fixes).toBe(undefined)
// Re-parse. // Re-parse.
const { root: root1, spans: spans1 } = Ast.parseBlockWithSpans(printed.code) const { root: root1, spans: spans1 } = Ast.parseModuleWithSpans(printed.code)
Ast.setExternalIds(root1.module, spans1, idMap) Ast.setExternalIds(root1.module, spans1, idMap)
// Check that Identities match original AST. // Check that Identities match original AST.
const printed1 = Ast.print(root1) const printed1 = Ast.print(root1)
@ -815,12 +815,12 @@ describe('Code edit', () => {
}) })
test('Shifting whitespace ownership', () => { test('Shifting whitespace ownership', () => {
const beforeRoot = Ast.parseBlock('value = 1 +\n') const beforeRoot = Ast.parseModuleWithSpans('value = 1 +\n').root
beforeRoot.module.replaceRoot(beforeRoot) beforeRoot.module.replaceRoot(beforeRoot)
const before = findExpressions(beforeRoot, { const before = findExpressions(beforeRoot, {
value: Ast.Ident, value: Ast.Ident,
'1': Ast.NumericLiteral, '1': Ast.NumericLiteral,
'value = 1 +': Ast.Assignment, 'value = 1 +': Ast.Function,
}) })
const edit = beforeRoot.module.edit() const edit = beforeRoot.module.edit()
const newCode = 'value = 1 \n' const newCode = 'value = 1 \n'
@ -831,7 +831,7 @@ describe('Code edit', () => {
const after = findExpressions(edit.root()!, { const after = findExpressions(edit.root()!, {
value: Ast.Ident, value: Ast.Ident,
'1': Ast.NumericLiteral, '1': Ast.NumericLiteral,
'value = 1': Ast.Assignment, 'value = 1': Ast.Function,
}) })
expect(after.value.id).toBe(before.value.id) expect(after.value.id).toBe(before.value.id)
expect(after['1'].id).toBe(before['1'].id) expect(after['1'].id).toBe(before['1'].id)
@ -839,7 +839,7 @@ describe('Code edit', () => {
}) })
test('merging', () => { test('merging', () => {
const block = Ast.parseBlock('a = 1\nb = 2') const block = Ast.parseModuleWithSpans('a = 1\nb = 2').root
const module = block.module const module = block.module
module.replaceRoot(block) module.replaceRoot(block)

View File

@ -2,8 +2,8 @@ import {
astContainingChar, astContainingChar,
childrenAstNodes, childrenAstNodes,
debugAst, debugAst,
parseEnso, rawParseLine,
parseEnsoLine, rawParseModule,
readAstOrTokenSpan, readAstOrTokenSpan,
readAstSpan, readAstSpan,
readTokenSpan, readTokenSpan,
@ -48,18 +48,18 @@ const parseCases = [
] ]
test.each(parseCases)("Parsing '%s'", (code) => { test.each(parseCases)("Parsing '%s'", (code) => {
expect(debugAst(parseEnso(code))).toMatchSnapshot() expect(debugAst(rawParseModule(code))).toMatchSnapshot()
}) })
test.each(parseCases)("AST spans of '%s' are valid", (input) => { test.each(parseCases)("AST spans of '%s' are valid", (input) => {
const tree = parseEnso(input) const tree = rawParseModule(input)
const endPos = validateSpans(tree) const endPos = validateSpans(tree)
expect(endPos).toStrictEqual(input.length) expect(endPos).toStrictEqual(input.length)
}) })
test("Reading AST node's code", () => { test("Reading AST node's code", () => {
const code = 'Data.read File\n2 + 3' const code = 'Data.read File\n2 + 3'
const ast = parseEnso(code) const ast = rawParseModule(code)
expect(readAstSpan(ast, code)).toStrictEqual(code) expect(readAstSpan(ast, code)).toStrictEqual(code)
assert(ast.type === Tree.Type.BodyBlock) assert(ast.type === Tree.Type.BodyBlock)
const statements = Array.from(ast.statements) const statements = Array.from(ast.statements)
@ -123,7 +123,7 @@ test.each([
['(', [{ type: Tree.Type.Group, repr: '(' }]], ['(', [{ type: Tree.Type.Group, repr: '(' }]],
['(foo', [{ type: Tree.Type.Group, repr: '(foo' }]], ['(foo', [{ type: Tree.Type.Group, repr: '(foo' }]],
])("Reading children of '%s'", (code, expected) => { ])("Reading children of '%s'", (code, expected) => {
const ast = parseEnsoLine(code) const ast = rawParseLine(code)
const children = Array.from(childrenAstNodes(ast)) const children = Array.from(childrenAstNodes(ast))
const childrenWithExpected = children.map((child, i) => { const childrenWithExpected = children.map((child, i) => {
return { child, expected: expected[i] } return { child, expected: expected[i] }
@ -147,7 +147,7 @@ test.each([
], ],
], ],
])("Walking AST of '%s'", (code, expected) => { ])("Walking AST of '%s'", (code, expected) => {
const ast = parseEnsoLine(code) const ast = rawParseLine(code)
const visited = Array.from(walkRecursive(ast)) const visited = Array.from(walkRecursive(ast))
const visitedRepr = visited.map((visited) => { const visitedRepr = visited.map((visited) => {
return { return {
@ -206,7 +206,7 @@ test.each([
], ],
], ],
])("Reading AST from code '%s' and position %i", (code, position, expected) => { ])("Reading AST from code '%s' and position %i", (code, position, expected) => {
const ast = parseEnso(code) const ast = rawParseModule(code)
const astAtPosition = astContainingChar(position, ast) const astAtPosition = astContainingChar(position, ast)
const resultWithExpected = astAtPosition.map((ast, i) => { const resultWithExpected = astAtPosition.map((ast, i) => {
return { ast, expected: expected[i] } return { ast, expected: expected[i] }

View File

@ -2,8 +2,8 @@ import { assert } from '@/util/assert'
import { import {
RawAst, RawAst,
astPrettyPrintType, astPrettyPrintType,
parseEnso,
parsedTreeOrTokenRange, parsedTreeOrTokenRange,
rawParseModule,
readAstOrTokenSpan, readAstOrTokenSpan,
readTokenSpan, readTokenSpan,
} from '@/util/ast/raw' } from '@/util/ast/raw'
@ -114,7 +114,7 @@ export class AliasAnalyzer {
private readonly code: string, private readonly code: string,
ast?: RawAst.Tree, ast?: RawAst.Tree,
) { ) {
this.ast = ast ?? parseEnso(code) this.ast = ast ?? rawParseModule(code)
this.rootScope = new Scope(parsedTreeOrTokenRange(this.ast)) this.rootScope = new Scope(parsedTreeOrTokenRange(this.ast))
this.scopes = new NonEmptyStack(this.rootScope) this.scopes = new NonEmptyStack(this.rootScope)
} }

View File

@ -1,8 +1,8 @@
import { assert, assertDefined } from '@/util/assert' import { assert, assertDefined } from '@/util/assert'
import { import {
childrenAstNodesOrTokens, childrenAstNodesOrTokens,
parseEnso,
parsedTreeOrTokenRange, parsedTreeOrTokenRange,
rawParseModule,
readAstOrTokenSpan, readAstOrTokenSpan,
visitGenerator, visitGenerator,
visitRecursive, visitRecursive,
@ -46,7 +46,7 @@ class AstExtended<T extends Tree | Token = Tree | Token, HasIdMap extends boolea
public static parse(code: string): AstExtended<Tree, false> public static parse(code: string): AstExtended<Tree, false>
public static parse(code: string, idMap: IdMap): AstExtended<Tree, true> public static parse(code: string, idMap: IdMap): AstExtended<Tree, true>
public static parse(code: string, idMap?: IdMap): AstExtended<Tree, boolean> { public static parse(code: string, idMap?: IdMap): AstExtended<Tree, boolean> {
const ast = parseEnso(code) const ast = rawParseModule(code)
if (idMap != null) { if (idMap != null) {
visitRecursive(ast, (node) => { visitRecursive(ast, (node) => {
const range = parsedTreeOrTokenRange(node) const range = parsedTreeOrTokenRange(node)

View File

@ -1,13 +1,13 @@
import { assertDefined } from '@/util/assert' import { assertDefined } from '@/util/assert'
import * as map from 'lib0/map' import * as map from 'lib0/map'
import * as RawAst from 'ydoc-shared/ast/generated/ast' import * as RawAst from 'ydoc-shared/ast/generated/ast'
import { parseEnso } from 'ydoc-shared/ast/parse' import { rawParseModule } from 'ydoc-shared/ast/parse'
import { LazyObject, LazySequence } from 'ydoc-shared/ast/parserSupport' import { LazyObject, LazySequence } from 'ydoc-shared/ast/parserSupport'
import { tryGetSoleValue } from 'ydoc-shared/util/data/iterable' import { tryGetSoleValue } from 'ydoc-shared/util/data/iterable'
import { isResult, mapOk } from 'ydoc-shared/util/data/result' import { isResult, mapOk } from 'ydoc-shared/util/data/result'
import type { SourceRange } from 'ydoc-shared/yjsModel' import type { SourceRange } from 'ydoc-shared/yjsModel'
export { parseEnso, RawAst } export { RawAst, rawParseModule }
export type HasAstRange = SourceRange | RawAst.Tree | RawAst.Token export type HasAstRange = SourceRange | RawAst.Tree | RawAst.Token
@ -16,8 +16,8 @@ export type HasAstRange = SourceRange | RawAst.Tree | RawAst.Token
* *
* Is meant to be a helper for tests. If the code is multiline, an exception is raised. * Is meant to be a helper for tests. If the code is multiline, an exception is raised.
*/ */
export function parseEnsoLine(code: string): RawAst.Tree { export function rawParseLine(code: string): RawAst.Tree {
const block = parseEnso(code) const block = rawParseModule(code)
const soleExpression = tryGetSoleValue(block.statements)?.expression const soleExpression = tryGetSoleValue(block.statements)?.expression
assertDefined(soleExpression) assertDefined(soleExpression)
return soleExpression return soleExpression

View File

@ -15,8 +15,14 @@ pub fn parse_doc_to_json(docs: &str) -> String {
} }
#[wasm_bindgen] #[wasm_bindgen]
pub fn parse(code: &str) -> Vec<u8> { pub fn parse_module(code: &str) -> Vec<u8> {
let ast = PARSER.with(|parser| parser.run(code)); let ast = PARSER.with(|parser| parser.parse_module(code));
enso_parser::format::serialize(&ast).expect("Failed to serialize AST to binary format")
}
#[wasm_bindgen]
pub fn parse_block(code: &str) -> Vec<u8> {
let ast = PARSER.with(|parser| parser.parse_block(code));
enso_parser::format::serialize(&ast).expect("Failed to serialize AST to binary format") enso_parser::format::serialize(&ast).expect("Failed to serialize AST to binary format")
} }
@ -39,7 +45,7 @@ pub fn is_ident_or_operator(code: &str) -> u32 {
#[wasm_bindgen] #[wasm_bindgen]
pub fn is_numeric_literal(code: &str) -> bool { pub fn is_numeric_literal(code: &str) -> bool {
let parsed = PARSER.with(|parser| parser.run(code)); let parsed = PARSER.with(|parser| parser.parse_block(code));
let enso_parser::syntax::tree::Variant::BodyBlock(body) = parsed.variant else { return false }; let enso_parser::syntax::tree::Variant::BodyBlock(body) = parsed.variant else { return false };
let [stmt] = &body.statements[..] else { return false }; let [stmt] = &body.statements[..] else { return false };
stmt.expression.as_ref().map_or(false, |expr| match &expr.variant { stmt.expression.as_ref().map_or(false, |expr| match &expr.variant {

View File

@ -11,6 +11,7 @@ export const {
is_ident_or_operator, is_ident_or_operator,
is_numeric_literal, is_numeric_literal,
parse_doc_to_json, parse_doc_to_json,
parse_tree, parse_block,
parse_module,
xxHash128, xxHash128,
} = globalThis } = globalThis

View File

@ -13,7 +13,8 @@ declare const YDOC_PORT: number | undefined
declare const YDOC_LS_DEBUG: boolean | undefined declare const YDOC_LS_DEBUG: boolean | undefined
// rust ffi shims // rust ffi shims
declare function parse_tree(code: string): Uint8Array declare function parse_block(code: string): Uint8Array
declare function parse_module(code: string): Uint8Array
declare function parse_doc_to_json(docs: string): string declare function parse_doc_to_json(docs: string): string
declare function is_ident_or_operator(code: string): number declare function is_ident_or_operator(code: string): number
declare function is_numeric_literal(code: string): boolean declare function is_numeric_literal(code: string): boolean

View File

@ -569,7 +569,7 @@ class ModulePersistence extends ObservableV2<{ removed: () => void }> {
if (editedRoot instanceof Ast.BodyBlock) Ast.repair(editedRoot, edit) if (editedRoot instanceof Ast.BodyBlock) Ast.repair(editedRoot, edit)
syncModule.applyEdit(edit) syncModule.applyEdit(edit)
} else { } else {
const { root, spans } = Ast.parseBlockWithSpans(code, syncModule) const { root, spans } = Ast.parseModuleWithSpans(code, syncModule)
syncModule.syncRoot(root) syncModule.syncRoot(root)
parsedSpans = spans parsedSpans = spans
} }

View File

@ -6,7 +6,13 @@
import { createXXHash128 } from 'hash-wasm' import { createXXHash128 } from 'hash-wasm'
import type { IDataType } from 'hash-wasm/dist/lib/util' import type { IDataType } from 'hash-wasm/dist/lib/util'
import { is_ident_or_operator, is_numeric_literal, parse, parse_doc_to_json } from 'rust-ffi' import {
is_ident_or_operator,
is_numeric_literal,
parse_block,
parse_doc_to_json,
parse_module,
} from 'rust-ffi'
const xxHasher128 = await createXXHash128() const xxHasher128 = await createXXHash128()
export function xxHash128(input: IDataType) { export function xxHash128(input: IDataType) {
@ -16,4 +22,4 @@ export function xxHash128(input: IDataType) {
} }
/* eslint-disable-next-line camelcase */ /* eslint-disable-next-line camelcase */
export { is_ident_or_operator, is_numeric_literal, parse_doc_to_json, parse as parse_tree } export { is_ident_or_operator, is_numeric_literal, parse_block, parse_doc_to_json, parse_module }

View File

@ -31,7 +31,7 @@ import {
type SourceRangeKey, type SourceRangeKey,
} from '../yjsModel' } from '../yjsModel'
import { graphParentPointers } from './debug' import { graphParentPointers } from './debug'
import { parse_tree, xxHash128 } from './ffi' import { parse_block, parse_module, xxHash128 } from './ffi'
import * as RawAst from './generated/ast' import * as RawAst from './generated/ast'
import { MutableModule } from './mutableModule' import { MutableModule } from './mutableModule'
import type { LazyObject } from './parserSupport' import type { LazyObject } from './parserSupport'
@ -62,9 +62,17 @@ import {
Wildcard, Wildcard,
} from './tree' } from './tree'
/** Return the raw parser output for the given code. */ /** Return the raw parser output for the given code, parsed as a module. */
export function parseEnso(code: string): RawAst.Tree.BodyBlock { export function rawParseModule(code: string): RawAst.Tree.BodyBlock {
const blob = parse_tree(code) return deserializeBlock(parse_module(code))
}
/** Return the raw parser output for the given code, parsed as a body block. */
export function rawParseBlock(code: string): RawAst.Tree.BodyBlock {
return deserializeBlock(parse_block(code))
}
function deserializeBlock(blob: Uint8Array): RawAst.Tree.BodyBlock {
const tree = RawAst.Tree.read(new DataView(blob.buffer), blob.byteLength - 4) const tree = RawAst.Tree.read(new DataView(blob.buffer), blob.byteLength - 4)
// The root of the parser output is always a body block. // The root of the parser output is always a body block.
assert(tree.type === RawAst.Tree.Type.BodyBlock) assert(tree.type === RawAst.Tree.Type.BodyBlock)
@ -76,7 +84,7 @@ export function normalize(rootIn: Ast): Ast {
const printed = print(rootIn) const printed = print(rootIn)
const idMap = spanMapToIdMap(printed.info) const idMap = spanMapToIdMap(printed.info)
const module = MutableModule.Transient() const module = MutableModule.Transient()
const tree = parseEnso(printed.code) const tree = rawParseModule(printed.code)
const { root: parsed, spans } = abstract(module, tree, printed.code) const { root: parsed, spans } = abstract(module, tree, printed.code)
module.replaceRoot(parsed) module.replaceRoot(parsed)
setExternalIds(module, spans, idMap) setExternalIds(module, spans, idMap)
@ -596,15 +604,18 @@ export function printDocumented(
return code return code
} }
/** Parse the input as a block. */ /** Parse the input as a body block, not the top level of a module. */
export function parseBlock(code: string, inModule?: MutableModule): Owned<MutableBodyBlock> { export function parseBlock(code: string, module?: MutableModule): Owned<MutableBodyBlock> {
return parseBlockWithSpans(code, inModule).root const tree = rawParseBlock(code)
return abstract(module ?? MutableModule.Transient(), tree, code).root
} }
/** Parse the input. If it contains a single expression at the top level, return it; otherwise, return a block. */ /**
* Parse the input. If it contains a single expression at the top level, return it; otherwise, parse it as a body block.
*/
export function parse(code: string, module?: MutableModule): Owned { export function parse(code: string, module?: MutableModule): Owned {
const module_ = module ?? MutableModule.Transient() const module_ = module ?? MutableModule.Transient()
const ast = parseBlock(code, module_) const ast = parseBlock(code, module)
const soleStatement = tryGetSoleValue(ast.statements()) const soleStatement = tryGetSoleValue(ast.statements())
if (!soleStatement) return ast if (!soleStatement) return ast
const parent = parentId(soleStatement) const parent = parentId(soleStatement)
@ -613,21 +624,20 @@ export function parse(code: string, module?: MutableModule): Owned {
return asOwned(soleStatement) return asOwned(soleStatement)
} }
/** Parse a block, and return it along with a mapping from source locations to parsed objects. */ /** Parse a module, and return it along with a mapping from source locations to parsed objects. */
export function parseBlockWithSpans( export function parseModuleWithSpans(
code: string, code: string,
inModule?: MutableModule, module?: MutableModule | undefined,
): { root: Owned<MutableBodyBlock>; spans: SpanMap } { ): { root: Owned<MutableBodyBlock>; spans: SpanMap } {
const tree = parseEnso(code) const tree = rawParseModule(code)
const module = inModule ?? MutableModule.Transient() return abstract(module ?? MutableModule.Transient(), tree, code)
return abstract(module, tree, code)
} }
/** Parse the input, and apply the given `IdMap`. Return the parsed tree, the updated `IdMap`, the span map, and a /** Parse the input, and apply the given `IdMap`. Return the parsed tree, the updated `IdMap`, the span map, and a
* mapping to the `RawAst` representation. * mapping to the `RawAst` representation.
*/ */
export function parseExtended(code: string, idMap?: IdMap | undefined, inModule?: MutableModule) { export function parseExtended(code: string, idMap?: IdMap | undefined, inModule?: MutableModule) {
const rawRoot = parseEnso(code) const rawRoot = rawParseModule(code)
const module = inModule ?? MutableModule.Transient() const module = inModule ?? MutableModule.Transient()
const { root, spans, toRaw } = module.transact(() => { const { root, spans, toRaw } = module.transact(() => {
const { root, spans, toRaw } = abstract(module, rawRoot, code) const { root, spans, toRaw } = abstract(module, rawRoot, code)
@ -701,7 +711,7 @@ export function repair(
// Print the input to see what spans its nodes expect to have in the output. // Print the input to see what spans its nodes expect to have in the output.
const printed = print(root) const printed = print(root)
// Parse the printed output to see what spans actually correspond to nodes in the printed code. // Parse the printed output to see what spans actually correspond to nodes in the printed code.
const reparsed = parseBlockWithSpans(printed.code) const reparsed = parseModuleWithSpans(printed.code)
// See if any span we expected to be a node isn't; if so, it likely merged with its parent due to wrong precedence. // See if any span we expected to be a node isn't; if so, it likely merged with its parent due to wrong precedence.
const { lostInline, lostBlock } = checkSpans( const { lostInline, lostBlock } = checkSpans(
printed.info.nodes, printed.info.nodes,
@ -727,7 +737,7 @@ export function repair(
// Verify that it's fixed. // Verify that it's fixed.
const printed2 = print(fixes.getVersion(root)) const printed2 = print(fixes.getVersion(root))
const reparsed2 = parseBlockWithSpans(printed2.code) const reparsed2 = parseModuleWithSpans(printed2.code)
const { lostInline: lostInline2, lostBlock: lostBlock2 } = checkSpans( const { lostInline: lostInline2, lostBlock: lostBlock2 } = checkSpans(
printed2.info.nodes, printed2.info.nodes,
reparsed2.spans.nodes, reparsed2.spans.nodes,
@ -919,7 +929,7 @@ export function applyTextEditsToAst(
) { ) {
const printed = print(ast) const printed = print(ast)
const code = applyTextEdits(printed.code, textEdits) const code = applyTextEdits(printed.code, textEdits)
const rawParsedBlock = parseEnso(code) const rawParsedBlock = rawParseModule(code)
const rawParsed = const rawParsed =
ast instanceof MutableBodyBlock ? rawParsedBlock : rawBlockToInline(rawParsedBlock) ast instanceof MutableBodyBlock ? rawParsedBlock : rawBlockToInline(rawParsedBlock)
const parsed = abstract(ast.module, rawParsed, code) const parsed = abstract(ast.module, rawParsed, code)

View File

@ -198,8 +198,8 @@ export abstract class Ast {
return this.module.get(this.parentId) return this.module.get(this.parentId)
} }
static parseBlock(source: string, inModule?: MutableModule) { static parseBlock(source: string, module?: MutableModule) {
return parseBlock(source, inModule) return parseBlock(source, module)
} }
static parse(source: string, module?: MutableModule) { static parse(source: string, module?: MutableModule) {

View File

@ -696,7 +696,7 @@ class Compiler(
* @return A Tree representation of `source` * @return A Tree representation of `source`
*/ */
def parseInline(source: CharSequence): Tree = def parseInline(source: CharSequence): Tree =
Parser.parse(source) Parser.parseBlock(source)
/** Enhances the provided IR with import/export statements for the provided list /** Enhances the provided IR with import/export statements for the provided list
* of fully qualified names of modules. The statements are considered to be "synthetic" i.e. compiler-generated. * of fully qualified names of modules. The statements are considered to be "synthetic" i.e. compiler-generated.

View File

@ -10,6 +10,7 @@ import java.nio.file.StandardOpenOption;
import java.util.function.Function; import java.util.function.Function;
import org.enso.compiler.core.EnsoParser; import org.enso.compiler.core.EnsoParser;
import org.enso.compiler.core.IR; import org.enso.compiler.core.IR;
import org.enso.compiler.core.ir.Expression;
import org.enso.compiler.core.ir.Module; import org.enso.compiler.core.ir.Module;
public abstract class CompilerTests { public abstract class CompilerTests {
@ -19,6 +20,12 @@ public abstract class CompilerTests {
return ir; return ir;
} }
protected static Expression.Block parseBlock(CharSequence code) {
Expression.Block ir = EnsoParser.compileBlock(code);
assertNotNull("IR was generated", ir);
return ir;
}
public static void assertIR(String msg, IR old, IR now) throws IOException { public static void assertIR(String msg, IR old, IR now) throws IOException {
assertEqualsIR(msg, null, old, now); assertEqualsIR(msg, null, old, now);
} }

View File

@ -3,10 +3,10 @@ package org.enso.compiler.test;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import org.enso.compiler.core.IR;
import org.enso.compiler.core.ir.Empty; import org.enso.compiler.core.ir.Empty;
import org.enso.compiler.core.ir.Expression; import org.enso.compiler.core.ir.Expression;
import org.enso.compiler.core.ir.Location; import org.enso.compiler.core.ir.Location;
import org.enso.compiler.core.ir.Module;
import org.enso.compiler.core.ir.expression.errors.Syntax; import org.enso.compiler.core.ir.expression.errors.Syntax;
import org.enso.compiler.core.ir.module.scope.definition.Method; import org.enso.compiler.core.ir.module.scope.definition.Method;
import org.junit.Test; import org.junit.Test;
@ -466,7 +466,7 @@ public class ErrorCompilerTest extends CompilerTests {
@Test @Test
public void illegalPrivateVariableDeclaration() throws Exception { public void illegalPrivateVariableDeclaration() throws Exception {
var ir = parse("private var = 42"); var ir = parseBlock("private var = 42");
assertSingleSyntaxError( assertSingleSyntaxError(
ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 16); ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 0, 16);
} }
@ -645,8 +645,7 @@ public class ErrorCompilerTest extends CompilerTests {
ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 29, 35); ir, Syntax.UnexpectedExpression$.MODULE$, "Unexpected expression", 29, 35);
} }
private void assertSingleSyntaxError( private void assertSingleSyntaxError(IR ir, Syntax.Reason type, String msg, int start, int end) {
Module ir, Syntax.Reason type, String msg, int start, int end) {
var errors = assertIR(ir, Syntax.class, 1); var errors = assertIR(ir, Syntax.class, 1);
assertEquals(type, errors.head().reason()); assertEquals(type, errors.head().reason());
if (msg != null) { if (msg != null) {
@ -655,7 +654,7 @@ public class ErrorCompilerTest extends CompilerTests {
assertEquals(new Location(start, end), errors.head().location().get().location()); assertEquals(new Location(start, end), errors.head().location().get().location());
} }
private List<Syntax> assertIR(Module ir, Class<Syntax> type, int count) { private List<Syntax> assertIR(IR ir, Class<Syntax> type, int count) {
var errors = ir.preorder().filter(type::isInstance).map(type::cast); var errors = ir.preorder().filter(type::isInstance).map(type::cast);
assertEquals("Expecting errors: " + errors, count, errors.size()); assertEquals("Expecting errors: " + errors, count, errors.size());
return errors; return errors;

View File

@ -6,6 +6,7 @@ import org.enso.compiler.core.ir.Expression;
import org.enso.compiler.core.ir.Location; import org.enso.compiler.core.ir.Location;
import org.enso.compiler.core.ir.Module; import org.enso.compiler.core.ir.Module;
import org.enso.syntax2.Parser; import org.enso.syntax2.Parser;
import scala.Option;
public final class EnsoParser { public final class EnsoParser {
private EnsoParser() {} private EnsoParser() {}
@ -15,7 +16,7 @@ public final class EnsoParser {
} }
public static Module compile(CharSequence src, Map<Location, UUID> idMap) { public static Module compile(CharSequence src, Map<Location, UUID> idMap) {
var tree = Parser.parse(src); var tree = Parser.parseModule(src);
var treeToIr = TreeToIr.MODULE; var treeToIr = TreeToIr.MODULE;
if (idMap != null) { if (idMap != null) {
treeToIr = new TreeToIr(idMap); treeToIr = new TreeToIr(idMap);
@ -23,8 +24,13 @@ public final class EnsoParser {
return treeToIr.translate(tree); return treeToIr.translate(tree);
} }
public static scala.Option<Expression> compileInline(CharSequence src) { public static Expression.Block compileBlock(CharSequence src) {
var tree = Parser.parse(src); var tree = Parser.parseBlock(src);
return TreeToIr.MODULE.translateBlock(tree);
}
public static Option<Expression> compileInline(CharSequence src) {
var tree = Parser.parseBlock(src);
return TreeToIr.MODULE.translateInline(tree); return TreeToIr.MODULE.translateInline(tree);
} }

View File

@ -75,6 +75,10 @@ final class TreeToIr {
return translateModule(ast); return translateModule(ast);
} }
Expression.Block translateBlock(Tree.BodyBlock ast) {
return translateBodyBlock(ast, false);
}
/** /**
* Translates an inline program expression represented in the parser {@link Tree} to the * Translates an inline program expression represented in the parser {@link Tree} to the
* compiler's {@link IR} representation. * compiler's {@link IR} representation.
@ -86,77 +90,68 @@ final class TreeToIr {
* @return The {@link IR} representation of the given ast if it is valid, otherwise * @return The {@link IR} representation of the given ast if it is valid, otherwise
* {@link Option#empty()}. * {@link Option#empty()}.
*/ */
Option<Expression> translateInline(Tree ast) { Option<Expression> translateInline(Tree.BodyBlock ast) {
return switch (ast) { List<Expression> expressions = nil();
case Tree.BodyBlock b -> { java.util.List<IdentifiedLocation> locations = new ArrayList<>();
List<Expression> expressions = nil(); for (Line statement : ast.getStatements()) {
java.util.List<IdentifiedLocation> locations = new ArrayList<>(); Tree exprTree = statement.getExpression();
for (Line statement : b.getStatements()) { Expression expr = switch (exprTree) {
Tree exprTree = statement.getExpression(); case null -> null;
Expression expr = switch (exprTree) { case Tree.Export x -> null;
case null -> null; case Tree.Import x -> null;
case Tree.Export x -> null; case Tree.Invalid x -> null;
case Tree.Import x -> null; case Tree.TypeSignature sig -> {
case Tree.Invalid x -> null; Expression methodReference;
case Tree.TypeSignature sig -> { try {
Expression methodReference; methodReference = translateMethodReference(sig.getVariable(), true);
try { } catch (SyntaxException ex) {
methodReference = translateMethodReference(sig.getVariable(), true); methodReference = ex.toError();
} catch (SyntaxException ex) { }
methodReference = ex.toError(); var signature = translateType(sig.getType());
} yield new Type.Ascription(
var signature = translateType(sig.getType()); methodReference,
var ascription = signature,
new Type.Ascription( Option.empty(),
methodReference, getIdentifiedLocation(sig),
signature, meta());
Option.empty(), }
getIdentifiedLocation(sig), case Tree.TypeAnnotated anno -> translateTypeAnnotated(anno);
meta()); default -> translateExpression(exprTree);
yield ascription; };
} if (expr != null) {
case Tree.TypeAnnotated anno -> translateTypeAnnotated(anno); expressions = join(expr, expressions);
default -> translateExpression(exprTree); if (expr.location().isDefined()) {
}; locations.add(expr.location().get());
if (expr != null) {
expressions = join(expr, expressions);
if (expr.location().isDefined()) {
locations.add(expr.location().get());
}
}
} }
yield switch (expressions.size()) {
case 0 -> Option.empty();
case 1 -> Option.apply(expressions.head());
default -> {
IdentifiedLocation combinedLocation;
if (locations.isEmpty()) {
combinedLocation = null;
} else {
combinedLocation =
new IdentifiedLocation(
new Location(
locations.get(1).start(),
locations.get(locations.size() - 1).end()
),
null
);
}
var returnValue = expressions.head();
@SuppressWarnings("unchecked")
var statements = ((List<Expression>) expressions.tail()).reverse();
yield Option.apply(new Expression.Block(
statements,
returnValue,
combinedLocation,
false,
meta()
));
}
};
} }
}
return switch (expressions.size()) {
case 0 -> Option.empty();
case 1 -> Option.apply(expressions.head());
default -> { default -> {
throw new IllegalStateException(); IdentifiedLocation combinedLocation;
if (locations.isEmpty()) {
combinedLocation = null;
} else {
combinedLocation =
new IdentifiedLocation(
new Location(
locations.get(1).start(),
locations.get(locations.size() - 1).end()
),
null
);
}
var returnValue = expressions.head();
@SuppressWarnings("unchecked")
var statements = ((List<Expression>) expressions.tail()).reverse();
yield Option.apply(new Expression.Block(
statements,
returnValue,
combinedLocation,
false,
meta()
));
} }
}; };
} }
@ -334,24 +329,6 @@ final class TreeToIr {
yield translateModuleSymbol(doc.getExpression(), join(comment, appendTo)); yield translateModuleSymbol(doc.getExpression(), join(comment, appendTo));
} }
case Tree.Assignment a -> {
var reference = translateMethodReference(a.getPattern(), false);
var body = translateExpression(a.getExpr());
if (body == null) {
throw new NullPointerException();
}
var aLoc = expandToContain(getIdentifiedLocation(a.getExpr()), body.identifiedLocation());
var binding = new Method.Binding(
reference,
nil(),
false,
body.setLocation(Option.apply(aLoc)),
expandToContain(getIdentifiedLocation(a), aLoc),
meta()
);
yield join(binding, appendTo);
}
case Tree.TypeSignature sig -> { case Tree.TypeSignature sig -> {
var methodReference = translateMethodReference(sig.getVariable(), true); var methodReference = translateMethodReference(sig.getVariable(), true);
var signature = translateType(sig.getType()); var signature = translateType(sig.getType());
@ -457,16 +434,6 @@ final class TreeToIr {
yield join(ir, appendTo); yield join(ir, appendTo);
} }
// In some cases this is a `Function` in IR, but an `Assignment` in Tree.
// See: https://discord.com/channels/401396655599124480/1001476608957349917
case Tree.Assignment assignment -> {
var name = buildName(assignment.getPattern());
java.util.List<ArgumentDefinition> args = java.util.Collections.emptyList();
var ir = translateFunction(assignment, name, false, args, assignment.getExpr(), null,
false);
yield join(ir, appendTo);
}
case Tree.ForeignFunction fn when fn.getBody() instanceof Tree.TextLiteral body -> { case Tree.ForeignFunction fn when fn.getBody() instanceof Tree.TextLiteral body -> {
var name = buildName(fn.getName()); var name = buildName(fn.getName());
var args = translateArgumentsDefinition(fn.getArgs()); var args = translateArgumentsDefinition(fn.getArgs());
@ -595,15 +562,14 @@ final class TreeToIr {
String functionName = fn.getName().codeRepr(); String functionName = fn.getName().codeRepr();
var ascribedBody = addTypeAscription(functionName, body, returnSignature, loc); var ascribedBody = addTypeAscription(functionName, body, returnSignature, loc);
var binding = new Method.Binding( return new Method.Binding(
methodRef, methodRef,
args, args,
isPrivate, isPrivate,
ascribedBody, ascribedBody,
loc, loc,
meta() meta()
); );
return binding;
} }
private Expression translateFunction( private Expression translateFunction(
@ -617,30 +583,23 @@ final class TreeToIr {
} catch (SyntaxException ex) { } catch (SyntaxException ex) {
return ex.toError(); return ex.toError();
} }
var loc = getIdentifiedLocation(fun); var loc = getIdentifiedLocation(fun);
var body = translateExpression(treeBody);
String functionName = name.name(); String functionName = name.name();
if (args.isEmpty()) { if (args.isEmpty()) {
if (body instanceof Expression.Block block) { Expression body;
if (treeBody instanceof Tree.BodyBlock block) {
// suspended block has a name and no arguments // suspended block has a name and no arguments
body = block.copy( body = translateBodyBlock(block, true);
block.copy$default$1(), } else {
block.copy$default$2(), body = translateExpression(treeBody);
block.copy$default$3(),
true,
block.copy$default$5(),
block.copy$default$6(),
block.copy$default$7()
);
} }
if (body == null) { if (body == null) {
body = translateSyntaxError(fun, Syntax.UnexpectedExpression$.MODULE$); body = translateSyntaxError(fun, Syntax.UnexpectedExpression$.MODULE$);
} }
var ascribedBody = addTypeAscription(functionName, body, returnType, loc); var ascribedBody = addTypeAscription(functionName, body, returnType, loc);
return new Expression.Binding(name, ascribedBody, loc, meta()); return new Expression.Binding(name, ascribedBody, loc, meta());
} else { } else {
var body = translateExpression(treeBody);
if (body == null) { if (body == null) {
return translateSyntaxError(fun, Syntax.UnexpectedDeclarationInType$.MODULE$); return translateSyntaxError(fun, Syntax.UnexpectedDeclarationInType$.MODULE$);
} }
@ -1010,43 +969,7 @@ final class TreeToIr {
} }
yield new Application.Prefix(fn, args.reverse(), false, getIdentifiedLocation(tree), meta()); yield new Application.Prefix(fn, args.reverse(), false, getIdentifiedLocation(tree), meta());
} }
case Tree.BodyBlock body -> { case Tree.BodyBlock body -> translateBodyBlock(body, false);
var expressions = new java.util.ArrayList<Expression>();
Expression last = null;
for (var line : body.getStatements()) {
Tree expr = line.getExpression();
if (expr == null) {
continue;
}
if (last != null) {
expressions.add(last);
}
while (expr instanceof Tree.Documented doc) {
expr = doc.getExpression();
expressions.add(translateComment(doc, doc.getDocumentation()));
}
last = translateExpression(expr, false);
}
var locationWithANewLine = getIdentifiedLocation(body, 0, 0, null);
if (last == null) {
if (expressions.isEmpty()) {
last = new Empty(locationWithANewLine, meta());
} else {
last = expressions.get(expressions.size() - 1);
expressions.remove(expressions.size() - 1);
}
}
var list = CollectionConverters.asScala(expressions.iterator()).toList();
if (last != null
&& last.location().isDefined()
&& last.location().get().end() != locationWithANewLine.end()) {
int start = last.location().get().start();
int end = locationWithANewLine.end() - 1;
var id = new IdentifiedLocation(start, end, last.location().get().uuid());
last = last.setLocation(Option.apply(id));
}
yield new Expression.Block(list, last, locationWithANewLine, false, meta());
}
case Tree.Assignment assign -> { case Tree.Assignment assign -> {
var name = buildNameOrQualifiedName(assign.getPattern()); var name = buildNameOrQualifiedName(assign.getPattern());
var expr = translateExpression(assign.getExpr(), false); var expr = translateExpression(assign.getExpr(), false);
@ -1156,7 +1079,7 @@ final class TreeToIr {
case Literal.Number n -> n.copy( case Literal.Number n -> n.copy(
n.copy$default$1(), n.copy$default$1(),
"-" + n.copy$default$2(), "-" + n.copy$default$2(),
n.copy$default$3(), Option.apply(getIdentifiedLocation(un)),
n.copy$default$4(), n.copy$default$4(),
n.copy$default$5(), n.copy$default$5(),
n.copy$default$6() n.copy$default$6()
@ -1164,7 +1087,7 @@ final class TreeToIr {
case Expression expr -> { case Expression expr -> {
var negate = new Name.Literal("negate", true, null, Option.empty(), meta()); var negate = new Name.Literal("negate", true, null, Option.empty(), meta());
var arg = new CallArgument.Specified(Option.empty(), expr, expr.identifiedLocation(), meta()); var arg = new CallArgument.Specified(Option.empty(), expr, expr.identifiedLocation(), meta());
yield new Application.Prefix(negate, join(arg, nil()), false, expr.identifiedLocation(), meta()); yield new Application.Prefix(negate, join(arg, nil()), false, getIdentifiedLocation(un), meta());
} }
case null -> case null ->
translateSyntaxError(tree, new Syntax.UnsupportedSyntax("Strange unary -")); translateSyntaxError(tree, new Syntax.UnsupportedSyntax("Strange unary -"));
@ -1215,6 +1138,50 @@ final class TreeToIr {
}; };
} }
private Expression.Block translateBodyBlock(Tree.BodyBlock body, boolean suspended) {
var expressions = new java.util.ArrayList<Expression>();
Expression last = null;
for (var line : body.getStatements()) {
Tree expr = line.getExpression();
if (expr == null) {
continue;
}
if (last != null) {
expressions.add(last);
}
while (expr instanceof Tree.Documented doc) {
expr = doc.getExpression();
Expression commentIr;
try {
commentIr = translateComment(doc, doc.getDocumentation());
} catch (SyntaxException ex) {
commentIr = ex.toError();
}
expressions.add(commentIr);
}
last = translateExpression(expr, false);
}
var locationWithANewLine = getIdentifiedLocation(body, 0, 0, null);
if (last == null) {
if (expressions.isEmpty()) {
last = new Empty(locationWithANewLine, meta());
} else {
last = expressions.get(expressions.size() - 1);
expressions.remove(expressions.size() - 1);
}
}
var list = CollectionConverters.asScala(expressions.iterator()).toList();
if (last != null
&& last.location().isDefined()
&& last.location().get().end() != locationWithANewLine.end()) {
int start = last.location().get().start();
int end = locationWithANewLine.end() - 1;
var id = new IdentifiedLocation(start, end, last.location().get().uuid());
last = last.setLocation(Option.apply(id));
}
return new Expression.Block(list, last, locationWithANewLine, suspended, meta());
}
private void attachTranslatedWarnings(IR ir, Tree tree) { private void attachTranslatedWarnings(IR ir, Tree tree) {
for (var warning : tree.getWarnings()) { for (var warning : tree.getWarnings()) {
var message = Parser.getWarningMessage(warning); var message = Parser.getWarningMessage(warning);
@ -1624,15 +1591,16 @@ final class TreeToIr {
new Pattern.Literal((Literal) translateNumber(num), getIdentifiedLocation(num), meta()); new Pattern.Literal((Literal) translateNumber(num), getIdentifiedLocation(num), meta());
case Tree.UnaryOprApp num when num.getOpr().codeRepr().equals("-") -> { case Tree.UnaryOprApp num when num.getOpr().codeRepr().equals("-") -> {
var n = (Literal.Number) translateExpression(num.getRhs()); var n = (Literal.Number) translateExpression(num.getRhs());
var loc = getIdentifiedLocation(num);
var t = n.copy( var t = n.copy(
n.copy$default$1(), n.copy$default$1(),
"-" + n.copy$default$2(), "-" + n.copy$default$2(),
n.copy$default$3(), Option.apply(loc),
n.copy$default$4(), n.copy$default$4(),
n.copy$default$5(), n.copy$default$5(),
n.copy$default$6() n.copy$default$6()
); );
yield new Pattern.Literal(t, getIdentifiedLocation(num), meta()); yield new Pattern.Literal(t, loc, meta());
} }
case Tree.TypeAnnotated anno -> { case Tree.TypeAnnotated anno -> {
var type = buildNameOrQualifiedName(maybeManyParensed(anno.getType())); var type = buildNameOrQualifiedName(maybeManyParensed(anno.getType()));

View File

@ -13,7 +13,8 @@ public final class ParserPolyfill implements ProxyExecutable, Polyfill {
private static final Logger log = LoggerFactory.getLogger(ParserPolyfill.class); private static final Logger log = LoggerFactory.getLogger(ParserPolyfill.class);
private static final String PARSE_TREE = "parse-tree"; private static final String PARSE_BLOCK = "parse-block";
private static final String PARSE_MODULE = "parse-module";
private static final String XX_HASH_128 = "xx-hash-128"; private static final String XX_HASH_128 = "xx-hash-128";
private static final String IS_IDENT_OR_OPERATOR = "is-ident-or-operator"; private static final String IS_IDENT_OR_OPERATOR = "is-ident-or-operator";
@ -36,10 +37,16 @@ public final class ParserPolyfill implements ProxyExecutable, Polyfill {
log.debug(Arguments.toString(arguments)); log.debug(Arguments.toString(arguments));
return switch (command) { return switch (command) {
case PARSE_TREE -> { case PARSE_BLOCK -> {
var input = arguments[1].asString(); var input = arguments[1].asString();
yield Parser.parseInputLazy(input); yield Parser.parseBlockLazy(input);
}
case PARSE_MODULE -> {
var input = arguments[1].asString();
yield Parser.parseModuleLazy(input);
} }
case XX_HASH_128 -> { case XX_HASH_128 -> {

View File

@ -1,7 +1,12 @@
(function (jvm) { (function (jvm) {
globalThis.parse_tree = function(code) { globalThis.parse_module = function(code) {
const byteBuffer = jvm('parse-tree', code); const byteBuffer = jvm('parse-module', code);
return new Uint8Array(new ArrayBuffer(byteBuffer));
};
globalThis.parse_block = function(code) {
const byteBuffer = jvm('parse-block', code);
return new Uint8Array(new ArrayBuffer(byteBuffer)); return new Uint8Array(new ArrayBuffer(byteBuffer));
}; };

View File

@ -40,9 +40,21 @@ public class ParserPolyfillTest extends ExecutorSetup {
} }
@Test @Test
public void parseTree() throws Exception { public void parseModule() throws Exception {
var code = """ var code = """
const arr = parse_tree(`main = 1 + 2`) const arr = parse_module(`main = 1 + 2`)
arr.buffer
""";
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertTrue(result.as(ByteSequence.class).length() > 0);
}
@Test
public void parseBlock() throws Exception {
var code = """
const arr = parse_block(`value = 1 + 2`)
arr.buffer arr.buffer
"""; """;

View File

@ -12,7 +12,7 @@ fn main() {
fuzz!(|code: &[u8]| { fuzz!(|code: &[u8]| {
if let Ok(code) = std::str::from_utf8(code) { if let Ok(code) = std::str::from_utf8(code) {
let parser = enso_parser::Parser::new(); let parser = enso_parser::Parser::new();
let ast = parser.run(code); let ast = parser.parse_module(code);
assert_eq!(ast.code(), code); assert_eq!(ast.code(), code);
} }
}); });

View File

@ -24,7 +24,7 @@ fn main() {
.map(|path| { .map(|path| {
let code = read_source(path).unwrap(); let code = read_source(path).unwrap();
let start = std::time::Instant::now(); let start = std::time::Instant::now();
std::hint::black_box(parser.run(&code)); std::hint::black_box(parser.parse_module(&code));
start.elapsed() start.elapsed()
}) })
.sum(); .sum();
@ -85,7 +85,7 @@ fn bench_std_lib(b: &mut test::Bencher) {
b.bytes = sources.iter().map(|s| s.len() as u64).sum(); b.bytes = sources.iter().map(|s| s.len() as u64).sum();
b.iter(|| { b.iter(|| {
for source in &sources { for source in &sources {
test::black_box(parser.run(source)); test::black_box(parser.parse_module(source));
} }
}); });
} }

View File

@ -13,7 +13,7 @@ fn main() {
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) { if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_; code = code_;
} }
let ast = enso_parser::Parser::new().run(code); let ast = enso_parser::Parser::new().parse_module(code);
let data = let data =
enso_parser::format::serialize(&ast).expect("Failed to serialize AST to binary format"); enso_parser::format::serialize(&ast).expect("Failed to serialize AST to binary format");
std::io::stdout().write_all(&data).unwrap(); std::io::stdout().write_all(&data).unwrap();

View File

@ -132,7 +132,7 @@ fn check_file(
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) { if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_; code = code_;
} }
let ast = parser.run(code); let ast = parser.parse_module(code);
let mut messages = if smoke_test { vec![] } else { collect_messages(&ast, &file.path) }; let mut messages = if smoke_test { vec![] } else { collect_messages(&ast, &file.path) };
if ast.code() != code { if ast.code() != code {
messages.push(format!( messages.push(format!(

View File

@ -13,6 +13,6 @@ fn main() {
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) { if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_; code = code_;
} }
let ast = enso_parser::Parser::new().run(code); let ast = enso_parser::Parser::new().parse_module(code);
serde_json::to_writer(std::io::stdout(), &ast).unwrap(); serde_json::to_writer(std::io::stdout(), &ast).unwrap();
} }

View File

@ -26,7 +26,7 @@ fn check_file(path: &str, mut code: &str) {
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) { if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_; code = code_;
} }
let ast = enso_parser::Parser::new().run(code); let ast = enso_parser::Parser::new().parse_module(code);
let expected_span = 0..(code.encode_utf16().count() as u32); let expected_span = 0..(code.encode_utf16().count() as u32);
let mut locations = enso_parser::source::code::debug::LocationCheck::new(); let mut locations = enso_parser::source::code::debug::LocationCheck::new();
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations) enso_parser_debug::validate_spans(&ast, expected_span, &mut locations)

View File

@ -40,6 +40,12 @@ macro_rules! test {
} }
} }
macro_rules! test_block {
( $code:expr, $($statements:tt)* ) => {
test_block($code, block![$( $statements )*])
}
}
// ================================ // ================================
@ -379,9 +385,14 @@ fn type_def_nested() {
#[test] #[test]
fn assignment_simple() { fn assignment_simple() {
test!("foo = x", (Assignment (Ident foo) (Ident x))); // At the top level of a module, this defines a function with no arguments.
test!("foo=x", (Assignment (Ident foo) (Ident x))); test!("foo = x", (Function (Ident foo) #() () (Ident x)));
test!("foo= x", (Assignment (Ident foo) (Ident x))); // In a body block, this is a variable binding.
test_block!("main =\n foo = x",
(Function (Ident main) #() () (BodyBlock #(
(Assignment (Ident foo) (Ident x))))));
test_block!("foo=x", (Assignment (Ident foo) (Ident x)));
test_block!("foo= x", (Assignment (Ident foo) (Ident x)));
expect_invalid_node("foo =x"); expect_invalid_node("foo =x");
} }
@ -618,14 +629,12 @@ fn code_block_body() {
#[test] #[test]
fn code_block_operator() { fn code_block_operator() {
let code = ["value = nums", " * each random", " + constant"]; let code = ["value = nums", " * each random", " + constant"];
let expect = block![ test_block!(code.join("\n"),
(Assignment (Ident value) (Assignment (Ident value)
(OperatorBlockApplication (Ident nums) (OperatorBlockApplication (Ident nums)
#(((Ok "*") (App (Ident each) (Ident random))) #(((Ok "*") (App (Ident each) (Ident random)))
((Ok "+") (Ident constant))) ((Ok "+") (Ident constant)))
#())) #())));
];
test(code.join("\n"), expect);
} }
#[test] #[test]
@ -641,37 +650,17 @@ fn dot_operator_blocks() {
#[test] #[test]
fn code_block_argument_list() { fn code_block_argument_list() {
#[rustfmt::skip] test!("foo\n bar", (ArgumentBlockApplication (Ident foo) #((Ident bar))));
let code = [
"foo",
" bar",
];
test!(code.join("\n"), (ArgumentBlockApplication (Ident foo) #((Ident bar))));
#[rustfmt::skip] test_block!("value = foo\n bar",
let code = [ (Assignment (Ident value) (ArgumentBlockApplication (Ident foo) #((Ident bar)))));
"value = foo",
" bar",
];
let expect = block![
(Assignment (Ident value) (ArgumentBlockApplication (Ident foo) #((Ident bar))))
];
test(code.join("\n"), expect);
#[rustfmt::skip] let code = ["value = foo", " +x", " bar"];
let code = [ test_block!(code.join("\n"),
"value = foo",
" +x",
" bar",
];
#[rustfmt::skip]
let expect = block![
(Assignment (Ident value) (Assignment (Ident value)
(ArgumentBlockApplication (Ident foo) #( (ArgumentBlockApplication (Ident foo) #(
(OprSectionBoundary 1 (OprApp () (Ok "+") (Ident x))) (OprSectionBoundary 1 (OprApp () (Ok "+") (Ident x)))
(Ident bar)))) (Ident bar)))));
];
test(code.join("\n"), expect);
} }
#[test] #[test]
@ -804,25 +793,21 @@ fn accessor_operator() {
#[test] #[test]
fn operator_sections() { fn operator_sections() {
#[rustfmt::skip] test!(".map (+2 * 3) *7",
test(".map (+2 * 3) *7", block![
(OprSectionBoundary 1 (OprSectionBoundary 1
(App (App (OprApp () (Ok ".") (Ident map)) (App (App (OprApp () (Ok ".") (Ident map))
(Group (Group
(OprSectionBoundary 1 (OprApp (OprApp () (Ok "+") (Number () "2" ())) (OprSectionBoundary 1 (OprApp (OprApp () (Ok "+") (Number () "2" ()))
(Ok "*") (Number () "3" ()))))) (Ok "*") (Number () "3" ())))))
(OprSectionBoundary 1 (OprApp () (Ok "*") (Number () "7" ())))))]); (OprSectionBoundary 1 (OprApp () (Ok "*") (Number () "7" ()))))));
#[rustfmt::skip] test!(".sum 1",
test(".sum 1", block![ (OprSectionBoundary 1 (App (OprApp () (Ok ".") (Ident sum)) (Number () "1" ()))));
(OprSectionBoundary 1 (App (OprApp () (Ok ".") (Ident sum)) (Number () "1" ())))]); test!("+1 + x",
#[rustfmt::skip]
test("+1 + x", block![
(OprSectionBoundary 1 (OprApp (OprApp () (Ok "+") (Number () "1" ())) (OprSectionBoundary 1 (OprApp (OprApp () (Ok "+") (Number () "1" ()))
(Ok "+") (Ident x)))]); (Ok "+") (Ident x))));
#[rustfmt::skip] test_block!("increment = 1 +",
test("increment = 1 +", block![
(Assignment (Ident increment) (Assignment (Ident increment)
(OprSectionBoundary 1 (OprApp (Number () "1" ()) (Ok "+") ())))]); (OprSectionBoundary 1 (OprApp (Number () "1" ()) (Ok "+") ()))));
test!("1+ << 2*", test!("1+ << 2*",
(OprSectionBoundary 1 (OprSectionBoundary 1
(OprApp (OprApp (Number () "1" ()) (Ok "+") ()) (OprApp (OprApp (Number () "1" ()) (Ok "+") ())
@ -895,18 +880,18 @@ fn unary_operator_at_end_of_expression() {
#[test] #[test]
fn unspaced_operator_sequence() { fn unspaced_operator_sequence() {
// Add a negated value. // Add a negated value.
test!("x = y+-z", test_block!("x = y+-z",
(Assignment (Ident x) (OprApp (Ident y) (Ok "+") (UnaryOprApp "-" (Ident z))))); (Assignment (Ident x) (OprApp (Ident y) (Ok "+") (UnaryOprApp "-" (Ident z)))));
// Create an operator section that adds a negated value to its input. // Create an operator section that adds a negated value to its input.
test!("x = +-z", test_block!("x = +-z",
(Assignment (Ident x) (OprSectionBoundary 1 (Assignment (Ident x) (OprSectionBoundary 1
(OprApp () (Ok "+") (UnaryOprApp "-" (Ident z)))))); (OprApp () (Ok "+") (UnaryOprApp "-" (Ident z))))));
// The `-` can only be lexed as a unary operator, and unary operators cannot form sections. // The `-` can only be lexed as a unary operator, and unary operators cannot form sections.
expect_invalid_node("x = y+-"); expect_invalid_node("main =\n x = y+-");
// Assign a negative number to x. // Assign a negative number to x.
test!("x=-1", (Assignment (Ident x) (UnaryOprApp "-" (Number () "1" ())))); test_block!("x=-1", (Assignment (Ident x) (UnaryOprApp "-" (Number () "1" ()))));
// Assign a negated value to x. // Assign a negated value to x.
test!("x=-y", (Assignment (Ident x) (UnaryOprApp "-" (Ident y)))); test_block!("x=-y", (Assignment (Ident x) (UnaryOprApp "-" (Ident y))));
} }
#[test] #[test]
@ -935,7 +920,7 @@ fn minus_unary() {
test!("-x", (UnaryOprApp "-" (Ident x))); test!("-x", (UnaryOprApp "-" (Ident x)));
test!("(-x)", (Group (UnaryOprApp "-" (Ident x)))); test!("(-x)", (Group (UnaryOprApp "-" (Ident x))));
test!("-(x * x)", (UnaryOprApp "-" (Group (OprApp (Ident x) (Ok "*") (Ident x))))); test!("-(x * x)", (UnaryOprApp "-" (Group (OprApp (Ident x) (Ok "*") (Ident x)))));
test!("x=-x", (Assignment (Ident x) (UnaryOprApp "-" (Ident x)))); test_block!("x=-x", (Assignment (Ident x) (UnaryOprApp "-" (Ident x))));
test!("-x+x", (OprApp (UnaryOprApp "-" (Ident x)) (Ok "+") (Ident x))); test!("-x+x", (OprApp (UnaryOprApp "-" (Ident x)) (Ok "+") (Ident x)));
test!("-x*x", (OprApp (UnaryOprApp "-" (Ident x)) (Ok "*") (Ident x))); test!("-x*x", (OprApp (UnaryOprApp "-" (Ident x)) (Ok "*") (Ident x)));
} }
@ -961,9 +946,9 @@ fn method_app_in_minus_unary() {
#[test] #[test]
fn autoscope_operator() { fn autoscope_operator() {
test!("x : ..True", (TypeSignature (Ident x) ":" (AutoscopedIdentifier ".." True))); test_block!("x : ..True", (TypeSignature (Ident x) ":" (AutoscopedIdentifier ".." True)));
test!("x = ..True", (Assignment (Ident x) (AutoscopedIdentifier ".." True))); test_block!("x = ..True", (Assignment (Ident x) (AutoscopedIdentifier ".." True)));
test!("x = f ..True", test_block!("x = f ..True",
(Assignment (Ident x) (App (Ident f) (AutoscopedIdentifier ".." True)))); (Assignment (Ident x) (App (Ident f) (AutoscopedIdentifier ".." True))));
expect_invalid_node("x = ..not_a_constructor"); expect_invalid_node("x = ..not_a_constructor");
expect_invalid_node("x = case a of ..True -> True"); expect_invalid_node("x = case a of ..True -> True");
@ -1106,9 +1091,9 @@ fn type_signatures() {
#[test] #[test]
fn type_annotations() { fn type_annotations() {
test!("val = x : Int", test_block!("val = x : Int",
(Assignment (Ident val) (TypeAnnotated (Ident x) ":" (Ident Int)))); (Assignment (Ident val) (TypeAnnotated (Ident x) ":" (Ident Int))));
test!("val = foo (x : Int)", test_block!("val = foo (x : Int)",
(Assignment (Ident val) (Assignment (Ident val)
(App (Ident foo) (App (Ident foo)
(Group (TypeAnnotated (Ident x) ":" (Ident Int)))))); (Group (TypeAnnotated (Ident x) ":" (Ident Int))))));
@ -1131,10 +1116,10 @@ fn type_annotations() {
#[test] #[test]
fn inline_text_literals() { fn inline_text_literals() {
test!(r#""I'm an inline raw text!""#, (TextLiteral #((Section "I'm an inline raw text!")))); test!(r#""I'm an inline raw text!""#, (TextLiteral #((Section "I'm an inline raw text!"))));
test!(r#"zero_length = """#, (Assignment (Ident zero_length) (TextLiteral #()))); test_block!(r#"zero_length = """#, (Assignment (Ident zero_length) (TextLiteral #())));
test!(r#""type""#, (TextLiteral #((Section "type")))); test!(r#""type""#, (TextLiteral #((Section "type"))));
test!(r#"unclosed = ""#, (Assignment (Ident unclosed) (TextLiteral #()))); test_block!(r#"unclosed = ""#, (Assignment (Ident unclosed) (TextLiteral #())));
test!(r#"unclosed = "a"#, (Assignment (Ident unclosed) (TextLiteral #((Section "a"))))); test_block!(r#"unclosed = "a"#, (Assignment (Ident unclosed) (TextLiteral #((Section "a")))));
test!(r#"'Other quote type'"#, (TextLiteral #((Section "Other quote type")))); test!(r#"'Other quote type'"#, (TextLiteral #((Section "Other quote type"))));
test!(r#""Non-escape: \n""#, (TextLiteral #((Section "Non-escape: \\n")))); test!(r#""Non-escape: \n""#, (TextLiteral #((Section "Non-escape: \\n"))));
test!(r#""Non-escape: \""#, (TextLiteral #((Section "Non-escape: \\")))); test!(r#""Non-escape: \""#, (TextLiteral #((Section "Non-escape: \\"))));
@ -1152,7 +1137,7 @@ fn inline_text_literals() {
#[test] #[test]
fn multiline_text_literals() { fn multiline_text_literals() {
test("'''", block![(TextLiteral #())]); test!("'''", (TextLiteral #()));
let code = r#"""" let code = r#""""
part of the string part of the string
3-spaces indented line, part of the Text Block 3-spaces indented line, part of the Text Block
@ -1161,8 +1146,7 @@ fn multiline_text_literals() {
`also` part of the string `also` part of the string
x"#; x"#;
#[rustfmt::skip] test!(code,
let expected = block![
(TextLiteral (TextLiteral
#((Section "part of the string") (Newline) #((Section "part of the string") (Newline)
(Section " 3-spaces indented line, part of the Text Block") (Newline) (Section " 3-spaces indented line, part of the Text Block") (Newline)
@ -1170,40 +1154,25 @@ x"#;
(Newline) (Newline)
(Section "`also` part of the string"))) (Section "`also` part of the string")))
() ()
(Ident x) (Ident x));
]; test!(r#""""
test(code, expected);
let code = r#""""
multiline string that doesn't end in a newline multiline string that doesn't end in a newline
x"#; x"#,
#[rustfmt::skip]
let expected = block![
(TextLiteral #((Section "multiline string that doesn't end in a newline"))) (TextLiteral #((Section "multiline string that doesn't end in a newline")))
(Ident x) (Ident x));
]; test_block!("x = \"\"\"\n Indented multiline\nx",
test(code, expected);
let code = "x = \"\"\"\n Indented multiline\nx";
#[rustfmt::skip]
let expected = block![
(Assignment (Ident x) (TextLiteral #((Section "Indented multiline")))) (Assignment (Ident x) (TextLiteral #((Section "Indented multiline"))))
(Ident x) (Ident x));
]; test!("'''\n \\nEscape at start\n",
test(code, expected); (TextLiteral #((Escape 0x0A) (Section "Escape at start"))) ());
let code = "'''\n \\nEscape at start\n"; test!("x =\n x = '''\n x\nx",
test!(code, (TextLiteral #((Escape 0x0A) (Section "Escape at start"))) ());
let code = "x =\n x = '''\n x\nx";
#[rustfmt::skip]
let expected = block![
(Function (Ident x) #() () (Function (Ident x) #() ()
(BodyBlock #((Assignment (Ident x) (TextLiteral #((Section "x"))))))) (BodyBlock #((Assignment (Ident x) (TextLiteral #((Section "x")))))))
(Ident x) (Ident x));
]; test_block!("foo = bar '''\n baz",
test(code, expected);
test!("foo = bar '''\n baz",
(Assignment (Ident foo) (App (Ident bar) (TextLiteral #((Section "baz")))))); (Assignment (Ident foo) (App (Ident bar) (TextLiteral #((Section "baz"))))));
test!("'''\n \\t'", (TextLiteral #((Escape 0x09) (Section "'")))); test!("'''\n \\t'", (TextLiteral #((Escape 0x09) (Section "'"))));
test!("'''\n x\n \\t'", test!("'''\n x\n \\t'", (TextLiteral #((Section "x") (Newline) (Escape 0x09) (Section "'"))));
(TextLiteral #((Section "x") (Newline) (Escape 0x09) (Section "'"))));
} }
#[test] #[test]
@ -1287,7 +1256,7 @@ fn old_lambdas() {
test!("f x->\n y", test!("f x->\n y",
(App (Ident f) (OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y)))))); (App (Ident f) (OprApp (Ident x) (Ok "->") (BodyBlock #((Ident y))))));
test!("x->y-> z", (OprApp (Ident x) (Ok "->") (OprApp (Ident y) (Ok "->") (Ident z)))); test!("x->y-> z", (OprApp (Ident x) (Ok "->") (OprApp (Ident y) (Ok "->") (Ident z))));
test!("foo = x -> (y = bar x) -> x + y", test_block!("foo = x -> (y = bar x) -> x + y",
(Assignment (Ident foo) (Assignment (Ident foo)
(OprApp (Ident x) (Ok "->") (OprApp (Ident x) (Ok "->")
(OprApp (Group (OprApp (Ident y) (Ok "=") (App (Ident bar) (Ident x)))) (Ok "->") (OprApp (Group (OprApp (Ident y) (Ok "=") (App (Ident bar) (Ident x)))) (Ok "->")
@ -1299,10 +1268,10 @@ fn old_lambdas() {
#[test] #[test]
fn pattern_irrefutable() { fn pattern_irrefutable() {
test!("Point x_val = my_point", test_block!("Point x_val = my_point",
(Assignment (App (Ident Point) (Ident x_val)) (Ident my_point))); (Assignment (App (Ident Point) (Ident x_val)) (Ident my_point)));
test!("Vector _ = x", (Assignment (App (Ident Vector) (Wildcard -1)) (Ident x))); test_block!("Vector _ = x", (Assignment (App (Ident Vector) (Wildcard -1)) (Ident x)));
test!("X.y = z", (Function (OprApp (Ident X) (Ok ".") (Ident y)) #() () (Ident z))); test_block!("X.y = z", (Function (OprApp (Ident X) (Ok ".") (Ident y)) #() () (Ident z)));
} }
#[test] #[test]
@ -1411,10 +1380,10 @@ fn suspended_default_arguments_in_pattern() {
#[test] #[test]
fn suspended_default_arguments_in_expression() { fn suspended_default_arguments_in_expression() {
test!("c = self.value ...", test_block!("c = self.value ...",
(Assignment (Ident c) (Assignment (Ident c)
(App (OprApp (Ident self) (Ok ".") (Ident value)) (SuspendedDefaultArguments)))); (App (OprApp (Ident self) (Ok ".") (Ident value)) (SuspendedDefaultArguments))));
test!("c = self.value...", test_block!("c = self.value...",
(Assignment (Ident c) (Assignment (Ident c)
(App (OprApp (Ident self) (Ok ".") (Ident value)) (SuspendedDefaultArguments)))); (App (OprApp (Ident self) (Ok ".") (Ident value)) (SuspendedDefaultArguments))));
} }
@ -1426,7 +1395,7 @@ fn private_keyword() {
test!("private", (Private())); test!("private", (Private()));
expect_invalid_node("private func"); expect_invalid_node("private func");
// Private binding is not supported. // Private binding is not supported.
expect_invalid_node("private var = 42"); expect_invalid_node("main =\n private var = 42");
expect_invalid_node("private ConstructorOutsideType"); expect_invalid_node("private ConstructorOutsideType");
expect_invalid_node("type My_Type\n private"); expect_invalid_node("type My_Type\n private");
expect_invalid_node("private type My_Type\n Ctor"); expect_invalid_node("private type My_Type\n Ctor");
@ -1502,7 +1471,7 @@ mod numbers {
#[test] #[test]
fn with_decimal() { fn with_decimal() {
test!("pi = 3.14", (Assignment (Ident pi) (Number () "3" ("." "14")))); test_block!("pi = 3.14", (Assignment (Ident pi) (Number () "3" ("." "14"))));
} }
#[test] #[test]
@ -1644,11 +1613,11 @@ fn skip() {
#[test] #[test]
fn statement_in_expression_context() { fn statement_in_expression_context() {
test!("x = y = z", (Assignment (Ident x) (Invalid))); test_block!("x = y = z", (Assignment (Ident x) (Invalid)));
test!("(y = z)", (Group(Invalid))); test!("(y = z)", (Group(Invalid)));
test!("(y = z) x", (App (Group (Invalid)) (Ident x))); test!("(y = z) x", (App (Group (Invalid)) (Ident x)));
test!("(f x = x)", (Group(Invalid))); test_block!("(f x = x)", (Group(Invalid)));
test!("y = f x = x", (Assignment (Ident y) (Invalid))); test_block!("y = f x = x", (Assignment (Ident y) (Invalid)));
} }
@ -1858,13 +1827,31 @@ fn test<T: AsRef<str>>(code: T, expect: lexpr::Value) {
expect_tree_representing_code(code, &ast); expect_tree_representing_code(code, &ast);
} }
fn test_block<T: AsRef<str>>(code: T, expect: lexpr::Value) {
let code = code.as_ref();
let ast = parse_block(code);
let ast_s_expr = to_s_expr(&ast, code);
assert_eq!(ast_s_expr.to_string(), expect.to_string(), "{:?}", &ast);
expect_tree_representing_code(code, &ast);
}
fn parse(code: &str) -> enso_parser::syntax::tree::Tree { fn parse(code: &str) -> enso_parser::syntax::tree::Tree {
let ast = enso_parser::Parser::new().run(code); let ast = enso_parser::Parser::new().parse_module(code);
validate_parse(code, &ast);
ast
}
fn parse_block(code: &str) -> enso_parser::syntax::tree::Tree {
let ast = enso_parser::Parser::new().parse_block(code);
validate_parse(code, &ast);
ast
}
fn validate_parse(code: &str, ast: &enso_parser::syntax::Tree) {
let expected_span = 0..(code.encode_utf16().count() as u32); let expected_span = 0..(code.encode_utf16().count() as u32);
let mut locations = enso_parser::source::code::debug::LocationCheck::new(); let mut locations = enso_parser::source::code::debug::LocationCheck::new();
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations).unwrap(); enso_parser_debug::validate_spans(ast, expected_span, &mut locations).unwrap();
locations.check(code); locations.check(code);
ast
} }
@ -1895,7 +1882,7 @@ impl Errors {
/// Checks that an input contains an `Invalid` node somewhere. /// Checks that an input contains an `Invalid` node somewhere.
fn expect_invalid_node(code: &str) { fn expect_invalid_node(code: &str) {
let ast = enso_parser::Parser::new().run(code); let ast = enso_parser::Parser::new().parse_module(code);
expect_tree_representing_code(code, &ast); expect_tree_representing_code(code, &ast);
let errors = Errors::collect(&ast, code); let errors = Errors::collect(&ast, code);
assert!(errors.invalid_node, "{}", to_s_expr(&ast, code)); assert!(errors.invalid_node, "{}", to_s_expr(&ast, code));
@ -1903,7 +1890,7 @@ fn expect_invalid_node(code: &str) {
/// Checks that an input contains a multiple-operator error somewhere. /// Checks that an input contains a multiple-operator error somewhere.
fn expect_multiple_operator_error(code: &str) { fn expect_multiple_operator_error(code: &str) {
let ast = enso_parser::Parser::new().run(code); let ast = enso_parser::Parser::new().parse_module(code);
expect_tree_representing_code(code, &ast); expect_tree_representing_code(code, &ast);
let errors = Errors::collect(&ast, code); let errors = Errors::collect(&ast, code);
assert!(errors.multiple_operator || errors.invalid_node, "{}", to_s_expr(&ast, code)); assert!(errors.multiple_operator || errors.invalid_node, "{}", to_s_expr(&ast, code));
@ -1912,7 +1899,7 @@ fn expect_multiple_operator_error(code: &str) {
/// Check that the input can be parsed, and doesn't yield any `Invalid` nodes. /// Check that the input can be parsed, and doesn't yield any `Invalid` nodes.
fn expect_valid(code: &str) { fn expect_valid(code: &str) {
let ast = enso_parser::Parser::new().run(code); let ast = enso_parser::Parser::new().parse_module(code);
expect_tree_representing_code(code, &ast); expect_tree_representing_code(code, &ast);
let errors = Errors::collect(&ast, code); let errors = Errors::collect(&ast, code);
assert!(!errors.invalid_node); assert!(!errors.invalid_node);

View File

@ -49,7 +49,7 @@ fn extract_docs(_filename: &str, mut code: &str) -> Vec<String> {
if let Some((_meta, code_)) = enso_parser::metadata::parse(code) { if let Some((_meta, code_)) = enso_parser::metadata::parse(code) {
code = code_; code = code_;
} }
let ast = enso_parser::Parser::new().run(code); let ast = enso_parser::Parser::new().parse_module(code);
let docs = RefCell::new(vec![]); let docs = RefCell::new(vec![]);
ast.visit_trees(|tree| match &tree.variant { ast.visit_trees(|tree| match &tree.variant {
enso_parser::syntax::tree::Variant::Documented(doc) => { enso_parser::syntax::tree::Variant::Documented(doc) => {

View File

@ -23,12 +23,20 @@ public final class Parser {
return getWorker().isIdentOrOperator(input); return getWorker().isIdentOrOperator(input);
} }
public static ByteBuffer parseInputLazy(CharSequence input) { public static ByteBuffer parseModuleLazy(CharSequence input) {
return getWorker().parseInputLazy(input); return getWorker().parseLazy(input, false);
} }
public static Tree parse(CharSequence input) { public static ByteBuffer parseBlockLazy(CharSequence input) {
return getWorker().parse(input); return getWorker().parseLazy(input, true);
}
public static Tree.BodyBlock parseModule(CharSequence input) {
return getWorker().parse(input, false);
}
public static Tree.BodyBlock parseBlock(CharSequence input) {
return getWorker().parse(input, true);
} }
public static UUID getUuid(long metadata, long nodeOffset, long nodeLength) { public static UUID getUuid(long metadata, long nodeOffset, long nodeLength) {
@ -191,31 +199,47 @@ public final class Parser {
return Parser.isIdentOrOperator(inputBuf); return Parser.isIdentOrOperator(inputBuf);
} }
ByteBuffer parseInputLazy(CharSequence input) { ByteBuffer parseLazy(CharSequence input, boolean isInternalBlock) {
byte[] inputBytes = input.toString().getBytes(StandardCharsets.UTF_8);
ByteBuffer inputBuf = ByteBuffer.allocateDirect(inputBytes.length);
inputBuf.put(inputBytes);
return withState(state -> parseTreeLazy(state, inputBuf));
}
Tree parse(CharSequence input) {
byte[] inputBytes = input.toString().getBytes(StandardCharsets.UTF_8); byte[] inputBytes = input.toString().getBytes(StandardCharsets.UTF_8);
ByteBuffer inputBuf = ByteBuffer.allocateDirect(inputBytes.length); ByteBuffer inputBuf = ByteBuffer.allocateDirect(inputBytes.length);
inputBuf.put(inputBytes); inputBuf.put(inputBytes);
return withState( return withState(
state -> { state -> {
var serializedTree = parseTree(state, inputBuf); ByteBuffer serializedTree;
if (isInternalBlock) {
serializedTree = parseBlockLazy(state, inputBuf);
} else {
serializedTree = parseModuleLazy(state, inputBuf);
}
return serializedTree;
});
}
Tree.BodyBlock parse(CharSequence input, boolean isInternalBlock) {
byte[] inputBytes = input.toString().getBytes(StandardCharsets.UTF_8);
ByteBuffer inputBuf = ByteBuffer.allocateDirect(inputBytes.length);
inputBuf.put(inputBytes);
return withState(
state -> {
ByteBuffer serializedTree;
if (isInternalBlock) {
serializedTree = parseBlock(state, inputBuf);
} else {
serializedTree = parseModule(state, inputBuf);
}
var base = getLastInputBase(state); var base = getLastInputBase(state);
var metadata = getMetadata(state); var metadata = getMetadata(state);
serializedTree.order(ByteOrder.LITTLE_ENDIAN); serializedTree.order(ByteOrder.LITTLE_ENDIAN);
var message = new Message(serializedTree, input, base, metadata); var message = new Message(serializedTree, input, base, metadata);
Tree parsed;
try { try {
return Tree.deserialize(message); parsed = Tree.deserialize(message);
} catch (BufferUnderflowException | IllegalArgumentException e) { } catch (BufferUnderflowException | IllegalArgumentException e) {
LoggerFactory.getLogger(this.getClass()) LoggerFactory.getLogger(this.getClass())
.error("Unrecoverable parser failure for: {}", input, e); .error("Unrecoverable parser failure for: {}", input, e);
throw e; throw e;
} }
return (Tree.BodyBlock) parsed;
}); });
} }
} }
@ -226,9 +250,13 @@ public final class Parser {
private static native void freeState(long state); private static native void freeState(long state);
private static native ByteBuffer parseTree(long state, ByteBuffer input); private static native ByteBuffer parseModule(long state, ByteBuffer input);
private static native ByteBuffer parseTreeLazy(long state, ByteBuffer input); private static native ByteBuffer parseBlock(long state, ByteBuffer input);
private static native ByteBuffer parseModuleLazy(long state, ByteBuffer input);
private static native ByteBuffer parseBlockLazy(long state, ByteBuffer input);
private static native long isIdentOrOperator(ByteBuffer input); private static native long isIdentOrOperator(ByteBuffer input);

View File

@ -28,9 +28,9 @@ fn main() {
println!("import java.nio.ByteOrder;"); println!("import java.nio.ByteOrder;");
println!(); println!();
println!("class GeneratedFormatTests {{"); println!("class GeneratedFormatTests {{");
// Force the parser to load its shared library. `parse` handles this because usually it is the // Force the parser to load its shared library. `parseModule` handles this because usually it is
// entry point to the class, but we're doing low-level operations directly. // the entry point to the class, but we're doing low-level operations directly.
println!(" private static final Object INIT = {package}.Parser.parse(\"\");"); println!(" private static final Object INIT = {package}.Parser.parseModule(\"\");");
println!(" private static java.util.Vector<byte[]> accept;"); println!(" private static java.util.Vector<byte[]> accept;");
println!(" private static java.util.Vector<byte[]> reject;"); println!(" private static java.util.Vector<byte[]> reject;");
for (i, case) in cases.accept.iter().enumerate() { for (i, case) in cases.accept.iter().enumerate() {

View File

@ -11,6 +11,7 @@
use enso_prelude::*; use enso_prelude::*;
use enso_parser::macros::resolver::RootContext;
use jni::objects::JByteBuffer; use jni::objects::JByteBuffer;
use jni::objects::JClass; use jni::objects::JClass;
use jni::sys::jobject; use jni::sys::jobject;
@ -26,8 +27,7 @@ use jni::JNIEnv;
static DIRECT_ALLOCATED: &str = "Internal Error: ByteBuffer must be direct-allocated."; static DIRECT_ALLOCATED: &str = "Internal Error: ByteBuffer must be direct-allocated.";
static FAILED_SERIALIZE_AST: &str = "Failed to serialize AST to binary format."; static FAILED_SERIALIZE_AST: &str = "Failed to serialize AST to binary format.";
/// Parse the input. Returns a serialized representation of the parse tree. The caller is /// Parse the input as a module. Returns a serialized representation of the parse tree.
/// responsible for freeing the memory associated with the returned buffer.
/// ///
/// # Safety /// # Safety
/// ///
@ -37,11 +37,41 @@ static FAILED_SERIALIZE_AST: &str = "Failed to serialize AST to binary format.";
/// a call to `freeState`. /// a call to `freeState`.
#[allow(unsafe_code)] #[allow(unsafe_code)]
#[no_mangle] #[no_mangle]
pub extern "system" fn Java_org_enso_syntax2_Parser_parseTree( pub extern "system" fn Java_org_enso_syntax2_Parser_parseModule(
env: JNIEnv,
class: JClass,
state: u64,
input: JByteBuffer,
) -> jobject {
parse(env, class, state, input, RootContext::Module)
}
/// Parse the input as a block. Returns a serialized representation of the parse tree.
///
/// # Safety
///
/// The state MUST be a value returned by `allocState` that has not been passed to `freeState`.
/// The input buffer contents MUST be valid UTF-8.
/// The contents of the returned buffer MUST not be accessed after another call to `parseInput`, or
/// a call to `freeState`.
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_syntax2_Parser_parseBlock(
env: JNIEnv,
class: JClass,
state: u64,
input: JByteBuffer,
) -> jobject {
parse(env, class, state, input, RootContext::Block)
}
#[allow(unsafe_code)]
fn parse(
mut env: JNIEnv, mut env: JNIEnv,
_class: JClass, _class: JClass,
state: u64, state: u64,
input: JByteBuffer, input: JByteBuffer,
root_context: RootContext,
) -> jobject { ) -> jobject {
let state = unsafe { &mut *(state as usize as *mut State) }; let state = unsafe { &mut *(state as usize as *mut State) };
let input = unsafe { decode_utf8_buffer(&env, &input) }; let input = unsafe { decode_utf8_buffer(&env, &input) };
@ -55,34 +85,34 @@ pub extern "system" fn Java_org_enso_syntax2_Parser_parseTree(
code = code_; code = code_;
} }
state.base = str::as_ptr(code) as usize as u64; state.base = str::as_ptr(code) as usize as u64;
let tree = enso_parser::Parser::new().run(code); let parser = enso_parser::Parser::new();
state.output = match enso_parser::serialization::serialize_tree(&tree) { let tree = match root_context {
Ok(tree) => tree, RootContext::Module => parser.parse_module(code),
RootContext::Block => parser.parse_block(code),
};
state.output = enso_parser::serialization::serialize_tree(&tree).unwrap_or_else(|_| {
// `Tree` does not contain any types with fallible `serialize` implementations, so this // `Tree` does not contain any types with fallible `serialize` implementations, so this
// cannot fail. // cannot fail.
Err(_) => { debug_assert!(false);
debug_assert!(false); default()
default() });
}
};
state.metadata = meta; state.metadata = meta;
let result = let result =
unsafe { env.new_direct_byte_buffer(state.output.as_mut_ptr(), state.output.len()) }; unsafe { env.new_direct_byte_buffer(state.output.as_mut_ptr(), state.output.len()) };
result.unwrap().into_raw() result.unwrap().into_raw()
} }
/// Parse the input. Returns a serialize format compatible with a lazy deserialization strategy. The /// Parse a module. Returns a serialize format compatible with a lazy deserialization strategy.
/// caller is responsible for freeing the memory associated with the returned buffer.
/// ///
/// # Safety /// # Safety
/// ///
/// The state MUST be a value returned by `allocState` that has not been passed to `freeState`. /// The state MUST be a value returned by `allocState` that has not been passed to `freeState`.
/// The input buffer contents MUST be valid UTF-8. /// The input buffer contents MUST be valid UTF-8.
/// The contents of the returned buffer MUST not be accessed after another call to `parseInput`, or /// The contents of the returned buffer MUST NOT be accessed after another call to `parseInput`, or
/// a call to `freeState`. /// a call to `freeState`.
#[allow(unsafe_code)] #[allow(unsafe_code)]
#[no_mangle] #[no_mangle]
pub extern "system" fn Java_org_enso_syntax2_Parser_parseTreeLazy( pub extern "system" fn Java_org_enso_syntax2_Parser_parseModuleLazy(
mut env: JNIEnv, mut env: JNIEnv,
_class: JClass, _class: JClass,
state: u64, state: u64,
@ -91,7 +121,34 @@ pub extern "system" fn Java_org_enso_syntax2_Parser_parseTreeLazy(
let state = unsafe { &mut *(state as usize as *mut State) }; let state = unsafe { &mut *(state as usize as *mut State) };
let input = unsafe { decode_utf8_buffer(&env, &input) }; let input = unsafe { decode_utf8_buffer(&env, &input) };
let tree = enso_parser::Parser::new().run(input); let tree = enso_parser::Parser::new().parse_module(input);
state.output = enso_parser::format::serialize(&tree).expect(FAILED_SERIALIZE_AST);
let result =
unsafe { env.new_direct_byte_buffer(state.output.as_mut_ptr(), state.output.len()) };
result.unwrap().into_raw()
}
/// Parse a block. Returns a serialize format compatible with a lazy deserialization strategy.
///
/// # Safety
///
/// The state MUST be a value returned by `allocState` that has not been passed to `freeState`.
/// The input buffer contents MUST be valid UTF-8.
/// The contents of the returned buffer MUST NOT be accessed after another call to `parseInput`, or
/// a call to `freeState`.
#[allow(unsafe_code)]
#[no_mangle]
pub extern "system" fn Java_org_enso_syntax2_Parser_parseBlockLazy(
mut env: JNIEnv,
_class: JClass,
state: u64,
input: JByteBuffer,
) -> jobject {
let state = unsafe { &mut *(state as usize as *mut State) };
let input = unsafe { decode_utf8_buffer(&env, &input) };
let tree = enso_parser::Parser::new().parse_block(input);
state.output = enso_parser::format::serialize(&tree).expect(FAILED_SERIALIZE_AST); state.output = enso_parser::format::serialize(&tree).expect(FAILED_SERIALIZE_AST);
let result = let result =

View File

@ -89,6 +89,7 @@
use crate::prelude::*; use crate::prelude::*;
use crate::lexer::Lexer; use crate::lexer::Lexer;
use crate::macros::resolver::RootContext;
use crate::source::Code; use crate::source::Code;
use crate::syntax::token; use crate::syntax::token;
use crate::syntax::tree::SyntaxError; use crate::syntax::tree::SyntaxError;
@ -163,9 +164,18 @@ impl Parser {
Self { macros } Self { macros }
} }
/// Main entry point. /// Main entry point. Interprets the input as a module, and returns the resulting [`BodyBlock`].
pub fn run<'s>(&self, code: &'s str) -> syntax::Tree<'s> { pub fn parse_module<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
let resolver = macros::resolver::Resolver::new(&self.macros); self.run(code, RootContext::Module)
}
/// Parses the input as a block.
pub fn parse_block<'s>(&self, code: &'s str) -> syntax::Tree<'s> {
self.run(code, RootContext::Block)
}
fn run<'s>(&self, code: &'s str, root_context: RootContext) -> syntax::Tree<'s> {
let resolver = macros::resolver::Resolver::new(&self.macros, root_context);
let ParseResult { value, internal_error } = Lexer::new(code, resolver).finish(); let ParseResult { value, internal_error } = Lexer::new(code, resolver).finish();
if let Some(error) = internal_error { if let Some(error) = internal_error {
return value.with_error(format!("Internal error: {error}")); return value.with_error(format!("Internal error: {error}"));
@ -294,7 +304,7 @@ mod benches {
let str = "type Option a b c\n".repeat(reps); let str = "type Option a b c\n".repeat(reps);
let parser = Parser::new(); let parser = Parser::new();
bencher.iter(move || { bencher.iter(move || {
parser.run(&str); parser.parse_module(&str);
}); });
} }
@ -336,7 +346,7 @@ mod benches {
let parser = Parser::new(); let parser = Parser::new();
bencher.bytes = str.len() as u64; bencher.bytes = str.len() as u64;
bencher.iter(move || { bencher.iter(move || {
parser.run(&str); parser.parse_module(&str);
}); });
} }
@ -371,7 +381,7 @@ mod benches {
let parser = Parser::new(); let parser = Parser::new();
bencher.bytes = str.len() as u64; bencher.bytes = str.len() as u64;
bencher.iter(move || { bencher.iter(move || {
parser.run(&str); parser.parse_module(&str);
}); });
} }
} }

View File

@ -129,35 +129,37 @@ impl<'a> SegmentMap<'a> {
/// to learn more about the macro resolution steps. /// to learn more about the macro resolution steps.
#[derive(Debug)] #[derive(Debug)]
struct ResolverState<'s> { struct ResolverState<'s> {
blocks: Vec<Block>, blocks: Vec<Block>,
/// The lines of all currently-open blocks. This is partitioned by `blocks`. /// The lines of all currently-open blocks. This is partitioned by `blocks`.
lines: Vec<syntax::item::Line<'s>>, lines: Vec<syntax::item::Line<'s>>,
groups: Vec<OpenGroup<'s>>, groups: Vec<OpenGroup<'s>>,
/// All currently-open macros. These are partitioned into scopes by `blocks`. /// All currently-open macros. These are partitioned into scopes by `blocks`.
macros: Vec<PartiallyMatchedMacro<'s>>, macros: Vec<PartiallyMatchedMacro<'s>>,
/// Segments of all currently-open macros. These are partitioned by `macros`. /// Segments of all currently-open macros. These are partitioned by `macros`.
segments: Vec<MatchedSegment<'s>>, segments: Vec<MatchedSegment<'s>>,
/// Items of all segments of all currently-open macros. These are partitioned by `segments`. /// Items of all segments of all currently-open macros. These are partitioned by `segments`.
items: Vec<Item<'s>>, items: Vec<Item<'s>>,
context: Context, context: Context,
precedence: syntax::operator::Precedence<'s>, root_context: RootContext,
precedence: syntax::operator::Precedence<'s>,
} }
// === Public API === // === Public API ===
impl<'s> ResolverState<'s> { impl<'s> ResolverState<'s> {
/// Create a new resolver, in statement context. /// Create a new resolver.
fn new_statement() -> Self { fn new(root_context: RootContext, context: Context) -> Self {
Self { Self {
context: Context::Statement, context,
root_context,
precedence: syntax::operator::Precedence::new(), precedence: syntax::operator::Precedence::new(),
blocks: default(), blocks: default(),
lines: vec![initial_line()], lines: vec![initial_line()],
groups: default(), groups: default(),
macros: default(), macros: default(),
segments: default(), segments: default(),
items: default(), items: default(),
} }
} }
} }
@ -174,7 +176,11 @@ impl<'s> Finish for ResolverState<'s> {
fn finish(&mut self) -> Self::Result { fn finish(&mut self) -> Self::Result {
self.finish_current_line(); self.finish_current_line();
let tree = syntax::tree::block::parse_module(self.lines.drain(..), &mut self.precedence); let lines = self.lines.drain(..);
let tree = match self.root_context {
RootContext::Module => syntax::tree::block::parse_module(lines, &mut self.precedence),
RootContext::Block => syntax::tree::block::parse_block(lines, &mut self.precedence),
};
debug_assert!(self.blocks.is_empty()); debug_assert!(self.blocks.is_empty());
debug_assert!(self.lines.is_empty()); debug_assert!(self.lines.is_empty());
debug_assert!(self.groups.is_empty()); debug_assert!(self.groups.is_empty());
@ -187,6 +193,15 @@ impl<'s> Finish for ResolverState<'s> {
} }
} }
/// Specifies how statements of the input should be interpreted.
#[derive(Debug, Copy, Clone)]
pub enum RootContext {
/// Interpret the input as a sequence of module-level statements.
Module,
/// Interpret the input as a sequence of statements inside a body block.
Block,
}
/// Resolves macros. /// Resolves macros.
#[derive(Debug)] #[derive(Debug)]
pub struct Resolver<'s, 'macros> { pub struct Resolver<'s, 'macros> {
@ -196,8 +211,8 @@ pub struct Resolver<'s, 'macros> {
impl<'s, 'macros> Resolver<'s, 'macros> { impl<'s, 'macros> Resolver<'s, 'macros> {
/// Creates a macro resolver to use with the given macro map. /// Creates a macro resolver to use with the given macro map.
pub fn new(root_macro_map: &'macros MacroMap) -> Self { pub fn new(root_macro_map: &'macros MacroMap, root_context: RootContext) -> Self {
Self { resolver: ResolverState::new_statement(), root_macro_map } Self { resolver: ResolverState::new(root_context, Context::Statement), root_macro_map }
} }
} }

View File

@ -42,7 +42,12 @@ impl<'s> BodyBlockParser<'s> {
) -> Tree<'s> { ) -> Tree<'s> {
let lines = lines.into_iter().map(|item::Line { newline, mut items }| block::Line { let lines = lines.into_iter().map(|item::Line { newline, mut items }| block::Line {
newline, newline,
expression: self.statement_parser.parse_body_block_statement(&mut items, 0, precedence), expression: self.statement_parser.parse_statement(
&mut items,
0,
precedence,
EvaluationContext::Eager,
),
}); });
Tree::body_block(block::compound_lines(lines).collect()) Tree::body_block(block::compound_lines(lines).collect())
} }
@ -67,18 +72,20 @@ struct StatementParser<'s> {
} }
impl<'s> StatementParser<'s> { impl<'s> StatementParser<'s> {
fn parse_body_block_statement( fn parse_statement(
&mut self, &mut self,
items: &mut Vec<Item<'s>>, items: &mut Vec<Item<'s>>,
start: usize, start: usize,
precedence: &mut Precedence<'s>, precedence: &mut Precedence<'s>,
evaluation_context: EvaluationContext,
) -> Option<Tree<'s>> { ) -> Option<Tree<'s>> {
let private_keywords = scan_private_keywords(&*items); let private_keywords = scan_private_keywords(&*items);
let mut statement = parse_body_block_statement( let mut statement = parse_statement(
items, items,
start + private_keywords, start + private_keywords,
precedence, precedence,
&mut self.args_buffer, &mut self.args_buffer,
evaluation_context,
); );
for _ in 0..private_keywords { for _ in 0..private_keywords {
let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() }; let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() };
@ -101,24 +108,34 @@ impl<'s> StatementParser<'s> {
precedence: &mut Precedence<'s>, precedence: &mut Precedence<'s>,
) -> Option<Tree<'s>> { ) -> Option<Tree<'s>> {
let private_keywords = scan_private_keywords(&*items); let private_keywords = scan_private_keywords(&*items);
let mut statement = parse_body_block_statement( let mut statement = parse_statement(
items, items,
start + private_keywords, start + private_keywords,
precedence, precedence,
&mut self.args_buffer, &mut self.args_buffer,
EvaluationContext::Lazy,
); );
let mut error = None;
if let Some(statement) = statement.as_ref() {
error = match &statement.variant {
tree::Variant::Assignment(_) =>
SyntaxError::StmtUnexpectedAssignmentInModuleBody.into(),
_ => None,
};
}
for _ in 0..private_keywords { for _ in 0..private_keywords {
let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() }; let Item::Token(keyword) = items.pop().unwrap() else { unreachable!() };
let token::Variant::Private(variant) = keyword.variant else { unreachable!() }; let token::Variant::Private(variant) = keyword.variant else { unreachable!() };
let keyword = keyword.with_variant(variant); let keyword = keyword.with_variant(variant);
let error = match statement.as_ref().map(|tree| &tree.variant) { if error.is_none() {
Some(tree::Variant::Invalid(_) | tree::Variant::Function(_)) | None => None, error = match statement.as_ref().map(|tree| &tree.variant) {
_ => SyntaxError::StmtUnexpectedPrivateUsage.into(), Some(tree::Variant::Invalid(_) | tree::Variant::Function(_)) | None => None,
}; _ => SyntaxError::StmtUnexpectedPrivateUsage.into(),
let private_stmt = Tree::private(keyword, statement.take()); };
statement = maybe_with_error(private_stmt, error).into(); }
statement = Tree::private(keyword, statement.take()).into();
} }
statement statement.map(|statement| maybe_with_error(statement, error))
} }
} }
@ -131,11 +148,12 @@ fn scan_private_keywords<'s>(items: impl IntoIterator<Item = impl AsRef<Item<'s>
.count() .count()
} }
fn parse_body_block_statement<'s>( fn parse_statement<'s>(
items: &mut Vec<Item<'s>>, items: &mut Vec<Item<'s>>,
start: usize, start: usize,
precedence: &mut Precedence<'s>, precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>, args_buffer: &mut Vec<ArgumentDefinition<'s>>,
evaluation_context: EvaluationContext,
) -> Option<Tree<'s>> { ) -> Option<Tree<'s>> {
use token::Variant; use token::Variant;
if let Some(type_def) = try_parse_type_def(items, start, precedence, args_buffer) { if let Some(type_def) = try_parse_type_def(items, start, precedence, args_buffer) {
@ -152,7 +170,15 @@ fn parse_body_block_statement<'s>(
}; };
let statement = match top_level_operator { let statement = match top_level_operator {
Some((i, Token { variant: Variant::AssignmentOperator(_), .. })) => Some((i, Token { variant: Variant::AssignmentOperator(_), .. })) =>
parse_assignment_like_statement(items, start, i, precedence, args_buffer).into(), parse_assignment_like_statement(
items,
start,
i,
precedence,
args_buffer,
evaluation_context,
)
.into(),
Some((i, Token { variant: Variant::TypeAnnotationOperator(_), .. })) => { Some((i, Token { variant: Variant::TypeAnnotationOperator(_), .. })) => {
let type_ = precedence.resolve_non_section_offset(i + 1, items); let type_ = precedence.resolve_non_section_offset(i + 1, items);
let Some(Item::Token(operator)) = items.pop() else { unreachable!() }; let Some(Item::Token(operator)) = items.pop() else { unreachable!() };
@ -179,12 +205,21 @@ fn parse_body_block_statement<'s>(
statement statement
} }
#[derive(Debug, Copy, Clone)]
enum EvaluationContext {
/// A context in which variable assignments are allowed.
Eager,
/// A context in which variable assignments must not occur.
Lazy,
}
fn parse_assignment_like_statement<'s>( fn parse_assignment_like_statement<'s>(
items: &mut Vec<Item<'s>>, items: &mut Vec<Item<'s>>,
start: usize, start: usize,
operator: usize, operator: usize,
precedence: &mut Precedence<'s>, precedence: &mut Precedence<'s>,
args_buffer: &mut Vec<ArgumentDefinition<'s>>, args_buffer: &mut Vec<ArgumentDefinition<'s>>,
evaluation_context: EvaluationContext,
) -> Tree<'s> { ) -> Tree<'s> {
if operator == start { if operator == start {
return precedence return precedence
@ -199,7 +234,13 @@ fn parse_assignment_like_statement<'s>(
let token::Variant::AssignmentOperator(variant) = operator.variant else { unreachable!() }; let token::Variant::AssignmentOperator(variant) = operator.variant else { unreachable!() };
let operator = operator.with_variant(variant); let operator = operator.with_variant(variant);
let qn_len = scan_qn(&items[start..]); let qn_len = match (evaluation_context, scan_qn(&items[start..])) {
(_, Some(Qn::Binding { len }))
// In a context where assignments are not allowed, even a name whose last identifier is
// capitalized can be a function definition (rather than an assignment pattern).
| (EvaluationContext::Lazy, Some(Qn::Type { len })) => len.into(),
_ => None,
};
let mut operator = Some(operator); let mut operator = Some(operator);
if let Some(function) = try_parse_foreign_function( if let Some(function) = try_parse_foreign_function(
@ -214,22 +255,29 @@ fn parse_assignment_like_statement<'s>(
} }
let operator = operator.unwrap(); let operator = operator.unwrap();
match (expression, qn_len) { enum Type<'s> {
(Some(e), Some(qn_len)) if matches!(e.variant, tree::Variant::BodyBlock(_)) => { Assignment { expression: Tree<'s> },
Function { expression: Option<Tree<'s>>, qn_len: usize },
InvalidNoExpressionNoQn,
}
match match (expression, qn_len) {
(Some(e), Some(qn_len))
if matches!(evaluation_context, EvaluationContext::Lazy)
|| matches!(e.variant, tree::Variant::BodyBlock(_)) =>
Type::Function { expression: Some(e), qn_len },
(Some(expression), None) => Type::Assignment { expression },
(Some(expression), Some(1)) if items.len() == start + 1 => Type::Assignment { expression },
(expression, Some(qn_len)) => Type::Function { expression, qn_len },
(None, None) => Type::InvalidNoExpressionNoQn,
} {
Type::Assignment { expression } =>
parse_assignment(start, items, operator, expression, precedence),
Type::Function { expression, qn_len } => {
let (qn, args, return_) = let (qn, args, return_) =
parse_function_decl(items, start, qn_len, precedence, args_buffer); parse_function_decl(items, start, qn_len, precedence, args_buffer);
Tree::function(qn, args, return_, operator, Some(e)) Tree::function(qn, args, return_, operator, expression)
} }
(Some(expression), None) => Type::InvalidNoExpressionNoQn => Tree::opr_app(
parse_assignment(start, items, operator, expression, precedence),
(Some(expression), Some(1)) if items.len() == start + 1 =>
parse_assignment(start, items, operator, expression, precedence),
(e, Some(qn_len)) => {
let (qn, args, return_) =
parse_function_decl(items, start, qn_len, precedence, args_buffer);
Tree::function(qn, args, return_, operator, e)
}
(None, None) => Tree::opr_app(
precedence.resolve_non_section_offset(start, items), precedence.resolve_non_section_offset(start, items),
Ok(operator.with_variant(token::variant::Operator())), Ok(operator.with_variant(token::variant::Operator())),
None, None,
@ -360,10 +408,19 @@ fn next_spaced(items: &[Item]) -> Option<usize> {
None None
} }
#[derive(Debug)]
enum Qn {
/// A qualified-name whose last segment is capitalized; usually a type or module.
Type { len: usize },
/// A qualified-name whose last segment is lowercase; usually a variable or function.
Binding { len: usize },
}
/// Returns length of the QN. /// Returns length of the QN.
fn scan_qn<'s>(items: impl IntoIterator<Item = impl AsRef<Item<'s>>>) -> Option<usize> { fn scan_qn<'s>(items: impl IntoIterator<Item = impl AsRef<Item<'s>>>) -> Option<Qn> {
#[derive(Copy, Clone)]
enum State { enum State {
ExpectingDot, ExpectingDot { len: usize },
ExpectingIdent, ExpectingIdent,
} }
use token::Variant::*; use token::Variant::*;
@ -374,17 +431,21 @@ fn scan_qn<'s>(items: impl IntoIterator<Item = impl AsRef<Item<'s>>>) -> Option<
match item.as_ref() { match item.as_ref() {
Token(token) if i != 0 && token.is_spaced() => break, Token(token) if i != 0 && token.is_spaced() => break,
Token(token) => match (state, &token.variant) { Token(token) => match (state, &token.variant) {
(ExpectingDot, DotOperator(_)) => state = ExpectingIdent, (ExpectingDot { .. }, DotOperator(_)) => state = ExpectingIdent,
(ExpectingIdent, Ident(ident)) if ident.is_type => state = ExpectingDot, (ExpectingIdent, Ident(ident)) if ident.is_type =>
state = ExpectingDot { len: i + 1 },
( (
ExpectingIdent, ExpectingIdent,
Ident(_) | Operator(_) | NegationOperator(_) | UnaryOperator(_), Ident(_) | Operator(_) | NegationOperator(_) | UnaryOperator(_),
) => return Some(i + 1), ) => return Some(Qn::Binding { len: i + 1 }),
_ => break, _ => break,
}, },
Group(_) | Tree(_) => break, Group(_) | Tree(_) => break,
Block(_) => unreachable!(), Block(_) => unreachable!(),
} }
} }
None match state {
ExpectingDot { len } => Some(Qn::Type { len }),
_ => None,
}
} }

View File

@ -5,8 +5,9 @@ use crate::syntax::maybe_with_error;
use crate::syntax::operator::Precedence; use crate::syntax::operator::Precedence;
use crate::syntax::statement::function_def::parse_constructor_definition; use crate::syntax::statement::function_def::parse_constructor_definition;
use crate::syntax::statement::function_def::parse_type_args; use crate::syntax::statement::function_def::parse_type_args;
use crate::syntax::statement::parse_body_block_statement; use crate::syntax::statement::parse_statement;
use crate::syntax::statement::scan_private_keywords; use crate::syntax::statement::scan_private_keywords;
use crate::syntax::statement::EvaluationContext;
use crate::syntax::token; use crate::syntax::token;
use crate::syntax::tree; use crate::syntax::tree;
use crate::syntax::tree::block; use crate::syntax::tree::block;
@ -97,9 +98,14 @@ fn parse_type_body_statement<'s>(
)), )),
None => None, None => None,
_ => { _ => {
let tree = let tree = parse_statement(
parse_body_block_statement(&mut items, private_keywords, precedence, args_buffer) &mut items,
.unwrap(); private_keywords,
precedence,
args_buffer,
EvaluationContext::Lazy,
)
.unwrap();
let error = match &tree.variant { let error = match &tree.variant {
tree::Variant::Function(_) tree::Variant::Function(_)
| tree::Variant::ForeignFunction(_) | tree::Variant::ForeignFunction(_)

View File

@ -817,6 +817,7 @@ pub enum SyntaxError {
ForeignFnExpectedStringBody, ForeignFnExpectedStringBody,
StmtInvalidAssignmentOrMethod, StmtInvalidAssignmentOrMethod,
StmtLhsInvalidOperatorSpacing, StmtLhsInvalidOperatorSpacing,
StmtUnexpectedAssignmentInModuleBody,
StmtUnexpectedPrivateUsage, StmtUnexpectedPrivateUsage,
TypeBodyUnexpectedPrivateUsage, TypeBodyUnexpectedPrivateUsage,
TypeDefExpectedTypeName, TypeDefExpectedTypeName,
@ -851,6 +852,8 @@ impl From<SyntaxError> for Cow<'static, str> {
StmtInvalidAssignmentOrMethod => "Invalid assignment or method definition", StmtInvalidAssignmentOrMethod => "Invalid assignment or method definition",
StmtLhsInvalidOperatorSpacing => StmtLhsInvalidOperatorSpacing =>
"Each operator on the left side of an assignment operator must be applied to two operands, with the same spacing on each side", "Each operator on the left side of an assignment operator must be applied to two operands, with the same spacing on each side",
StmtUnexpectedAssignmentInModuleBody =>
"Unexpected variable assignment in module statement",
StmtUnexpectedPrivateUsage => StmtUnexpectedPrivateUsage =>
"In a body block, the `private` keyword can only be applied to a function definition", "In a body block, the `private` keyword can only be applied to a function definition",
TypeBodyUnexpectedPrivateUsage => TypeBodyUnexpectedPrivateUsage =>

View File

@ -56,6 +56,14 @@ pub fn parse_module<'s>(
BodyBlockParser::default().parse_module(lines, precedence) BodyBlockParser::default().parse_module(lines, precedence)
} }
/// Parse a body block.
pub fn parse_block<'s>(
lines: impl IntoIterator<Item = item::Line<'s>>,
precedence: &mut operator::Precedence<'s>,
) -> Tree<'s> {
BodyBlockParser::default().parse_body_block(lines, precedence)
}
// === Multi-line expression construction === // === Multi-line expression construction ===