import * as map from 'lib0/map' import type { AstId, Module, NodeChild, Owned, OwnedRefs, TextElement, TextToken } from '.' import { Token, asOwned, isTokenId, newExternalId, parentId, rewriteRefs, subtreeRoots, syncFields, syncNodeMetadata, } from '.' import { assert, assertDefined, assertEqual } from '../util/assert' import { tryGetSoleValue, zip } from '../util/data/iterable' import type { SourceRangeEdit, SpanTree } from '../util/data/text' import { applyTextEdits, applyTextEditsToSpans, enclosingSpans, textChangeToEdits, trimEnd, } from '../util/data/text' import { IdMap, isUuid, rangeLength, sourceRangeFromKey, sourceRangeKey, type SourceRange, type SourceRangeKey, } from '../yjsModel' import { graphParentPointers } from './debug' import { parse_tree, xxHash128 } from './ffi' import * as RawAst from './generated/ast' import { MutableModule } from './mutableModule' import type { LazyObject } from './parserSupport' import { App, Assignment, Ast, BodyBlock, Documented, Function, Generic, Group, Ident, Import, Invalid, MutableAssignment, MutableAst, MutableBodyBlock, MutableIdent, NegationApp, NumericLiteral, OprApp, PropertyAccess, TextLiteral, UnaryOprApp, Vector, Wildcard, } from './tree' /** Return the raw parser output for the given code. */ export function parseEnso(code: string): RawAst.Tree.BodyBlock { const blob = parse_tree(code) const tree = RawAst.Tree.read(new DataView(blob.buffer), blob.byteLength - 4) // The root of the parser output is always a body block. assert(tree.type === RawAst.Tree.Type.BodyBlock) return tree } /** Print the AST and re-parse it, copying `externalId`s (but not other metadata) from the original. */ export function normalize(rootIn: Ast): Ast { const printed = print(rootIn) const idMap = spanMapToIdMap(printed.info) const module = MutableModule.Transient() const tree = parseEnso(printed.code) const { root: parsed, spans } = abstract(module, tree, printed.code) module.replaceRoot(parsed) setExternalIds(module, spans, idMap) return parsed } /** Produce `Ast` types from `RawAst` parser output. */ export function abstract( module: MutableModule, tree: RawAst.Tree.BodyBlock, code: string, substitutor?: (key: NodeKey) => Owned | undefined, ): { root: Owned; spans: SpanMap; toRaw: Map } export function abstract( module: MutableModule, tree: RawAst.Tree, code: string, substitutor?: (key: NodeKey) => Owned | undefined, ): { root: Owned; spans: SpanMap; toRaw: Map } export function abstract( module: MutableModule, tree: RawAst.Tree, code: string, substitutor?: (key: NodeKey) => Owned | undefined, ): { root: Owned; spans: SpanMap; toRaw: Map } { const abstractor = new Abstractor(module, code, substitutor) const root = abstractor.abstractTree(tree).node const spans = { tokens: abstractor.tokens, nodes: abstractor.nodes } return { root: root as Owned, spans, toRaw: abstractor.toRaw } } /** Produces `Ast` types from `RawAst` parser output. */ class Abstractor { private readonly module: MutableModule private readonly code: string private readonly substitutor: ((key: NodeKey) => Owned | undefined) | undefined readonly nodes: NodeSpanMap readonly tokens: TokenSpanMap readonly toRaw: Map /** * @param module - Where to allocate the new nodes. * @param code - Source code that will be used to resolve references in any passed `RawAst` objects. * @param substitutor - A function that can inject subtrees for some spans, instead of the abstractor producing them. * This can be used for incremental abstraction. */ constructor( module: MutableModule, code: string, substitutor?: (key: NodeKey) => Owned | undefined, ) { this.module = module this.code = code this.substitutor = substitutor this.nodes = new Map() this.tokens = new Map() this.toRaw = new Map() } abstractTree(tree: RawAst.Tree): { whitespace: string | undefined; node: Owned } { const whitespaceStart = tree.whitespaceStartInCodeParsed const whitespaceEnd = whitespaceStart + tree.whitespaceLengthInCodeParsed const whitespace = this.code.substring(whitespaceStart, whitespaceEnd) const codeStart = whitespaceEnd const codeEnd = codeStart + tree.childrenLengthInCodeParsed const spanKey = nodeKey(codeStart, codeEnd - codeStart) const substitute = this.substitutor?.(spanKey) if (substitute) return { node: substitute, whitespace } let node: Owned switch (tree.type) { case RawAst.Tree.Type.BodyBlock: { const lines = Array.from(tree.statements, (line) => { const newline = this.abstractToken(line.newline) const expression = line.expression ? this.abstractTree(line.expression) : undefined return { newline, expression } }) node = BodyBlock.concrete(this.module, lines) break } case RawAst.Tree.Type.Function: { const name = this.abstractTree(tree.name) const argumentDefinitions = Array.from(tree.args, (arg) => this.abstractChildren(arg)) const equals = this.abstractToken(tree.equals) const body = tree.body !== undefined ? this.abstractTree(tree.body) : undefined node = Function.concrete(this.module, name, argumentDefinitions, equals, body) break } case RawAst.Tree.Type.Ident: { const token = this.abstractToken(tree.token) node = Ident.concrete(this.module, token) break } case RawAst.Tree.Type.Assignment: { const pattern = this.abstractTree(tree.pattern) const equals = this.abstractToken(tree.equals) const value = this.abstractTree(tree.expr) node = Assignment.concrete(this.module, pattern, equals, value) break } case RawAst.Tree.Type.App: { const func = this.abstractTree(tree.func) const arg = this.abstractTree(tree.arg) node = App.concrete(this.module, func, undefined, undefined, arg) break } case RawAst.Tree.Type.NamedApp: { const func = this.abstractTree(tree.func) const open = tree.open ? this.abstractToken(tree.open) : undefined const name = this.abstractToken(tree.name) const equals = this.abstractToken(tree.equals) const arg = this.abstractTree(tree.arg) const close = tree.close ? this.abstractToken(tree.close) : undefined const parens = open && close ? { open, close } : undefined const nameSpecification = { name, equals } node = App.concrete(this.module, func, parens, nameSpecification, arg) break } case RawAst.Tree.Type.UnaryOprApp: { const opr = this.abstractToken(tree.opr) const arg = tree.rhs ? this.abstractTree(tree.rhs) : undefined if (arg && opr.node.code() === '-') { node = NegationApp.concrete(this.module, opr, arg) } else { node = UnaryOprApp.concrete(this.module, opr, arg) } break } case RawAst.Tree.Type.OprApp: { const lhs = tree.lhs ? this.abstractTree(tree.lhs) : undefined const opr = tree.opr.ok ? [this.abstractToken(tree.opr.value)] : Array.from(tree.opr.error.payload.operators, this.abstractToken.bind(this)) const rhs = tree.rhs ? this.abstractTree(tree.rhs) : undefined const soleOpr = tryGetSoleValue(opr) if (soleOpr?.node.code() === '.' && rhs?.node instanceof MutableIdent) { // Propagate type. const rhs_ = { ...rhs, node: rhs.node } node = PropertyAccess.concrete(this.module, lhs, soleOpr, rhs_) } else { node = OprApp.concrete(this.module, lhs, opr, rhs) } break } case RawAst.Tree.Type.Number: { const tokens = [] if (tree.base) tokens.push(this.abstractToken(tree.base)) if (tree.integer) tokens.push(this.abstractToken(tree.integer)) if (tree.fractionalDigits) { tokens.push(this.abstractToken(tree.fractionalDigits.dot)) tokens.push(this.abstractToken(tree.fractionalDigits.digits)) } node = NumericLiteral.concrete(this.module, tokens) break } case RawAst.Tree.Type.Wildcard: { const token = this.abstractToken(tree.token) node = Wildcard.concrete(this.module, token) break } // These expression types are (or will be) used for backend analysis. // The frontend can ignore them, avoiding some problems with expressions sharing spans // (which makes it impossible to give them unique IDs in the current IdMap format). case RawAst.Tree.Type.OprSectionBoundary: case RawAst.Tree.Type.TemplateFunction: return { whitespace, node: this.abstractTree(tree.ast).node } case RawAst.Tree.Type.Invalid: { const expression = this.abstractTree(tree.ast) node = Invalid.concrete(this.module, expression) break } case RawAst.Tree.Type.Group: { const open = tree.open ? this.abstractToken(tree.open) : undefined const expression = tree.body ? this.abstractTree(tree.body) : undefined const close = tree.close ? this.abstractToken(tree.close) : undefined node = Group.concrete(this.module, open, expression, close) break } case RawAst.Tree.Type.TextLiteral: { const open = tree.open ? this.abstractToken(tree.open) : undefined const newline = tree.newline ? this.abstractToken(tree.newline) : undefined const elements = Array.from(tree.elements, (raw) => this.abstractTextElement(raw)) const close = tree.close ? this.abstractToken(tree.close) : undefined node = TextLiteral.concrete(this.module, open, newline, elements, close) break } case RawAst.Tree.Type.Documented: { const open = this.abstractToken(tree.documentation.open) const elements = Array.from(tree.documentation.elements, (raw) => this.abstractTextToken(raw), ) const newlines = Array.from(tree.documentation.newlines, this.abstractToken.bind(this)) const expression = tree.expression ? this.abstractTree(tree.expression) : undefined node = Documented.concrete(this.module, open, elements, newlines, expression) break } case RawAst.Tree.Type.Import: { const recurseBody = (tree: RawAst.Tree) => { const body = this.abstractTree(tree) if (body.node instanceof Invalid && body.node.code() === '') return undefined return body } const recurseSegment = (segment: RawAst.MultiSegmentAppSegment) => ({ header: this.abstractToken(segment.header), body: segment.body ? recurseBody(segment.body) : undefined, }) const polyglot = tree.polyglot ? recurseSegment(tree.polyglot) : undefined const from = tree.from ? recurseSegment(tree.from) : undefined const import_ = recurseSegment(tree.import) const all = tree.all ? this.abstractToken(tree.all) : undefined const as = tree.as ? recurseSegment(tree.as) : undefined const hiding = tree.hiding ? recurseSegment(tree.hiding) : undefined node = Import.concrete(this.module, polyglot, from, import_, all, as, hiding) break } case RawAst.Tree.Type.Array: { const left = this.abstractToken(tree.left) const elements = [] if (tree.first) elements.push({ value: this.abstractTree(tree.first) }) for (const rawElement of tree.rest) { elements.push({ delimiter: this.abstractToken(rawElement.operator), value: rawElement.body && this.abstractTree(rawElement.body), }) } const right = this.abstractToken(tree.right) node = Vector.concrete(this.module, left, elements, right) break } default: { node = Generic.concrete(this.module, this.abstractChildren(tree)) } } this.toRaw.set(node.id, tree) map.setIfUndefined(this.nodes, spanKey, (): Ast[] => []).unshift(node) return { node, whitespace } } private abstractToken(token: RawAst.Token): { whitespace: string; node: Token } { const whitespaceStart = token.whitespaceStartInCodeBuffer const whitespaceEnd = whitespaceStart + token.whitespaceLengthInCodeBuffer const whitespace = this.code.substring(whitespaceStart, whitespaceEnd) const codeStart = token.startInCodeBuffer const codeEnd = codeStart + token.lengthInCodeBuffer const tokenCode = this.code.substring(codeStart, codeEnd) const key = tokenKey(codeStart, codeEnd - codeStart) const node = Token.new(tokenCode, token.type) this.tokens.set(key, node) return { whitespace, node } } private abstractChildren(tree: LazyObject): (NodeChild | NodeChild)[] { const children: (NodeChild | NodeChild)[] = [] const visitor = (child: LazyObject) => { if (RawAst.Tree.isInstance(child)) { children.push(this.abstractTree(child)) } else if (RawAst.Token.isInstance(child)) { children.push(this.abstractToken(child)) } else { child.visitChildren(visitor) } } tree.visitChildren(visitor) return children } private abstractTextElement(raw: RawAst.TextElement): TextElement { switch (raw.type) { case RawAst.TextElement.Type.Newline: case RawAst.TextElement.Type.Escape: case RawAst.TextElement.Type.Section: return this.abstractTextToken(raw) case RawAst.TextElement.Type.Splice: return { type: 'splice', open: this.abstractToken(raw.open), expression: raw.expression && this.abstractTree(raw.expression), close: this.abstractToken(raw.close), } } } private abstractTextToken(raw: RawAst.TextElement): TextToken { switch (raw.type) { case RawAst.TextElement.Type.Newline: return { type: 'token', token: this.abstractToken(raw.newline) } case RawAst.TextElement.Type.Escape: { const negativeOneU32 = 4294967295 return { type: 'token', token: this.abstractToken(raw.token), interpreted: raw.token.value !== negativeOneU32 ? String.fromCodePoint(raw.token.value) : undefined, } } case RawAst.TextElement.Type.Section: return { type: 'token', token: this.abstractToken(raw.text) } case RawAst.TextElement.Type.Splice: throw new Error('Unreachable: Splice in non-interpolated text field') } } } declare const nodeKeyBrand: unique symbol /** A source-range key for an `Ast`. */ export type NodeKey = SourceRangeKey & { [nodeKeyBrand]: never } declare const tokenKeyBrand: unique symbol /** A source-range key for a `Token`. */ export type TokenKey = SourceRangeKey & { [tokenKeyBrand]: never } /** Create a source-range key for an `Ast`. */ export function nodeKey(start: number, length: number): NodeKey { return sourceRangeKey([start, start + length]) as NodeKey } /** Create a source-range key for a `Token`. */ export function tokenKey(start: number, length: number): TokenKey { return sourceRangeKey([start, start + length]) as TokenKey } /** Maps from source ranges to `Ast`s. */ export type NodeSpanMap = Map /** Maps from source ranges to `Token`s. */ export type TokenSpanMap = Map /** Maps from source ranges to `Ast`s and `Token`s. */ export interface SpanMap { nodes: NodeSpanMap tokens: TokenSpanMap } /** Code with an associated mapping to `Ast` types. */ interface PrintedSource { info: SpanMap code: string } /** Generate an `IdMap` from a `SpanMap`. */ export function spanMapToIdMap(spans: SpanMap): IdMap { const idMap = new IdMap() for (const [key, token] of spans.tokens.entries()) { assert(isUuid(token.id)) idMap.insertKnownId(sourceRangeFromKey(key), token.id) } for (const [key, asts] of spans.nodes.entries()) { for (const ast of asts) { assert(isUuid(ast.externalId)) idMap.insertKnownId(sourceRangeFromKey(key), ast.externalId) } } return idMap } /** Given a `SpanMap`, return a function that can look up source ranges by AST ID. */ export function spanMapToSpanGetter(spans: SpanMap): (id: AstId) => SourceRange | undefined { const reverseMap = new Map() for (const [key, asts] of spans.nodes) { for (const ast of asts) { reverseMap.set(ast.id, sourceRangeFromKey(key)) } } return (id) => reverseMap.get(id) } /** Return stringification with associated ID map. This is only exported for testing. */ export function print(ast: Ast): PrintedSource { const info: SpanMap = { nodes: new Map(), tokens: new Map(), } const code = ast.printSubtree(info, 0, undefined) return { info, code } } /** @internal Used by `Ast.printSubtree`. Note that some AST types have overrides. */ export function printAst( ast: Ast, info: SpanMap, offset: number, parentIndent: string | undefined, verbatim?: boolean, ): string { let code = '' for (const child of ast.concreteChildren(verbatim)) { if (!isTokenId(child.node) && ast.module.get(child.node) === undefined) continue if (child.whitespace != null) { code += child.whitespace } else if (code.length != 0) { code += ' ' } if (isTokenId(child.node)) { const tokenStart = offset + code.length const token = ast.module.getToken(child.node) const span = tokenKey(tokenStart, token.code().length) info.tokens.set(span, token) code += token.code() } else { const childNode = ast.module.get(child.node) code += childNode.printSubtree(info, offset + code.length, parentIndent, verbatim) // Extra structural validation. assertEqual(childNode.id, child.node) if (parentId(childNode) !== ast.id) { console.error( `Inconsistent parent pointer (expected ${ast.id})`, childNode, graphParentPointers(ast.module.root()!), ) } assertEqual(parentId(childNode), ast.id) } } const span = nodeKey(offset, code.length) map.setIfUndefined(info.nodes, span, (): Ast[] => []).unshift(ast) return code } /** @internal Use `Ast.code()' to stringify. */ export function printBlock( block: BodyBlock, info: SpanMap, offset: number, parentIndent: string | undefined, verbatim?: boolean, ): string { let blockIndent: string | undefined let code = '' for (const line of block.fields.get('lines')) { code += line.newline.whitespace ?? '' const newlineCode = block.module.getToken(line.newline.node).code() // Only print a newline if this isn't the first line in the output, or it's a comment. if (offset || code || newlineCode.startsWith('#')) { // If this isn't the first line in the output, but there is a concrete newline token: // if it's a zero-length newline, ignore it and print a normal newline. code += newlineCode || '\n' } if (line.expression) { if (blockIndent === undefined) { if ((line.expression.whitespace?.length ?? 0) > (parentIndent?.length ?? 0)) { blockIndent = line.expression.whitespace! } else if (parentIndent !== undefined) { blockIndent = parentIndent + ' ' } else { blockIndent = '' } } const validIndent = (line.expression.whitespace?.length ?? 0) > (parentIndent?.length ?? 0) code += validIndent ? line.expression.whitespace : blockIndent const lineNode = block.module.get(line.expression.node) assertEqual(lineNode.id, line.expression.node) assertEqual(parentId(lineNode), block.id) code += lineNode.printSubtree(info, offset + code.length, blockIndent, verbatim) } } const span = nodeKey(offset, code.length) map.setIfUndefined(info.nodes, span, (): Ast[] => []).unshift(block) return code } /** @internal Use `Ast.code()' to stringify. */ export function printDocumented( documented: Documented, info: SpanMap, offset: number, parentIndent: string | undefined, verbatim?: boolean, ): string { const open = documented.fields.get('open') const topIndent = parentIndent ?? open.whitespace ?? '' let code = '' code += open.node.code_ const minWhitespaceLength = topIndent.length + 1 let preferredWhitespace = topIndent + ' ' documented.fields.get('elements').forEach(({ token }, i) => { if (i === 0) { const whitespace = token.whitespace ?? ' ' code += whitespace code += token.node.code_ preferredWhitespace += whitespace } else if (token.node.tokenType_ === RawAst.Token.Type.TextSection) { if (token.whitespace && (verbatim || token.whitespace.length >= minWhitespaceLength)) code += token.whitespace else code += preferredWhitespace code += token.node.code_ } else { code += token.whitespace ?? '' code += token.node.code_ } }) code += documented.fields .get('newlines') .map(({ whitespace, node }) => (whitespace ?? '') + node.code_) .join('') if (documented.expression) { code += documented.fields.get('expression')?.whitespace ?? topIndent code += documented.expression.printSubtree(info, offset + code.length, topIndent, verbatim) } const span = nodeKey(offset, code.length) map.setIfUndefined(info.nodes, span, (): Ast[] => []).unshift(documented) return code } /** Parse the input as a block. */ export function parseBlock(code: string, inModule?: MutableModule): Owned { return parseBlockWithSpans(code, inModule).root } /** Parse the input. If it contains a single expression at the top level, return it; otherwise, return a block. */ export function parse(code: string, module?: MutableModule): Owned { const module_ = module ?? MutableModule.Transient() const ast = parseBlock(code, module_) const soleStatement = tryGetSoleValue(ast.statements()) if (!soleStatement) return ast const parent = parentId(soleStatement) if (parent) module_.delete(parent) soleStatement.fields.set('parent', undefined) return asOwned(soleStatement) } /** Parse a block, and return it along with a mapping from source locations to parsed objects. */ export function parseBlockWithSpans( code: string, inModule?: MutableModule, ): { root: Owned; spans: SpanMap } { const tree = parseEnso(code) const module = inModule ?? MutableModule.Transient() return abstract(module, tree, code) } /** Parse the input, and apply the given `IdMap`. Return the parsed tree, the updated `IdMap`, the span map, and a * mapping to the `RawAst` representation. */ export function parseExtended(code: string, idMap?: IdMap | undefined, inModule?: MutableModule) { const rawRoot = parseEnso(code) const module = inModule ?? MutableModule.Transient() const { root, spans, toRaw } = module.transact(() => { const { root, spans, toRaw } = abstract(module, rawRoot, code) root.module.replaceRoot(root) if (idMap) setExternalIds(root.module, spans, idMap) return { root, spans, toRaw } }) const getSpan = spanMapToSpanGetter(spans) const idMapOut = spanMapToIdMap(spans) return { root, idMap: idMapOut, getSpan, toRaw } } /** Return the number of `Ast`s in the tree, including the provided root. */ export function astCount(ast: Ast): number { let count = 0 ast.visitRecursiveAst((_subtree) => { count += 1 }) return count } /** Apply an `IdMap` to a module, using the given `SpanMap`. * @returns The number of IDs that were assigned from the map. */ export function setExternalIds(edit: MutableModule, spans: SpanMap, ids: IdMap): number { let astsMatched = 0 for (const [key, externalId] of ids.entries()) { const asts = spans.nodes.get(key as NodeKey) if (asts) { for (const ast of asts) { astsMatched += 1 const editAst = edit.getVersion(ast) if (editAst.externalId !== externalId) editAst.setExternalId(externalId) } } } return astsMatched } /** Try to find all the spans in `expected` in `encountered`. If any are missing, use the provided `code` to determine * whether the lost spans are single-line or multi-line. */ function checkSpans(expected: NodeSpanMap, encountered: NodeSpanMap, code: string) { const lost = new Array() for (const [key, asts] of expected) { const outermostPrinted = asts[0] if (!outermostPrinted) continue for (let i = 1; i < asts.length; ++i) assertEqual(asts[i]?.parentId, asts[i - 1]?.id) const encounteredAsts = encountered.get(key) if (encounteredAsts === undefined) lost.push([key, outermostPrinted]) } const lostInline = new Array() const lostBlock = new Array() for (const [key, ast] of lost) { const [start, end] = sourceRangeFromKey(key) ;(code.substring(start, end).match(/[\r\n]/) ? lostBlock : lostInline).push(ast) } return { lostInline, lostBlock } } /** If the input tree's concrete syntax has precedence errors (i.e. its expected code would not parse back to the same * structure), try to fix it. If possible, it will be repaired by inserting parentheses; if that doesn't fix it, the * affected subtree will be re-synced to faithfully represent the source code the incorrect tree prints to. */ export function repair( root: BodyBlock, module?: MutableModule, ): { code: string; fixes: MutableModule | undefined } { // Print the input to see what spans its nodes expect to have in the output. const printed = print(root) // Parse the printed output to see what spans actually correspond to nodes in the printed code. const reparsed = parseBlockWithSpans(printed.code) // See if any span we expected to be a node isn't; if so, it likely merged with its parent due to wrong precedence. const { lostInline, lostBlock } = checkSpans( printed.info.nodes, reparsed.spans.nodes, printed.code, ) if (lostInline.length === 0) { if (lostBlock.length !== 0) { console.warn(`repair: Bad block elements, but all inline elements OK?`) const fixes = module ?? root.module.edit() resync(lostBlock, printed.info.nodes, reparsed.spans.nodes, fixes) return { code: printed.code, fixes } } return { code: printed.code, fixes: undefined } } // Wrap any "lost" nodes in parentheses. const fixes = module ?? root.module.edit() for (const ast of lostInline) { if (ast instanceof Group) continue fixes.getVersion(ast).update((ast) => Group.new(fixes, ast)) } // Verify that it's fixed. const printed2 = print(fixes.getVersion(root)) const reparsed2 = parseBlockWithSpans(printed2.code) const { lostInline: lostInline2, lostBlock: lostBlock2 } = checkSpans( printed2.info.nodes, reparsed2.spans.nodes, printed2.code, ) if (lostInline2.length !== 0 || lostBlock2.length !== 0) resync([...lostInline2, ...lostBlock2], printed2.info.nodes, reparsed2.spans.nodes, fixes) return { code: printed2.code, fixes } } /** * Replace subtrees in the module to ensure that the module contents are consistent with the module's code. * * @param badAsts - ASTs that, if printed, would not parse to exactly their current content. * @param badSpans - Span map produced by printing the `badAsts` nodes and all their parents. * @param goodSpans - Span map produced by parsing the code from the module of `badAsts`. * @param edit - Module to apply the fixes to; must contain all ASTs in `badAsts`. */ function resync( badAsts: Iterable, badSpans: NodeSpanMap, goodSpans: NodeSpanMap, edit: MutableModule, ) { const parentsOfBadSubtrees = new Set() const badAstIds = new Set(Array.from(badAsts, (ast) => ast.id)) for (const id of subtreeRoots(edit, badAstIds)) { const parent = edit.get(id)?.parentId if (parent) parentsOfBadSubtrees.add(parent) } const spanOfBadParent = new Array() for (const [span, asts] of badSpans) { for (const ast of asts) { if (parentsOfBadSubtrees.has(ast.id)) spanOfBadParent.push([ast.id, span]) } } // All ASTs in the module of badAsts should have entries in badSpans. assertEqual(spanOfBadParent.length, parentsOfBadSubtrees.size) for (const [id, span] of spanOfBadParent) { const parent = edit.get(id) const goodAst = goodSpans.get(span)?.[0] // The parent of the root of a bad subtree must be a good AST. assertDefined(goodAst) parent.syncToCode(goodAst.code()) } console.warn( `repair: Replaced ${parentsOfBadSubtrees.size} subtrees with their reparsed equivalents.`, parentsOfBadSubtrees, ) } /** @internal Recursion helper for {@link syntaxHash}. */ function hashSubtreeSyntax(ast: Ast, hashesOut: Map): SyntaxHash { let content = '' content += ast.typeName + ':' for (const child of ast.concreteChildren()) { content += child.whitespace ?? '?' if (isTokenId(child.node)) { content += 'Token:' + hashString(ast.module.getToken(child.node).code()) } else { content += hashSubtreeSyntax(ast.module.get(child.node), hashesOut) } } const astHash = hashString(content) map.setIfUndefined(hashesOut, astHash, (): Ast[] => []).unshift(ast) return astHash } declare const brandHash: unique symbol /** See {@link syntaxHash}. */ type SyntaxHash = string & { [brandHash]: never } /** Applies the syntax-data hashing function to the input, and brands the result as a `SyntaxHash`. */ function hashString(input: string): SyntaxHash { return xxHash128(input) as SyntaxHash } /** Calculates `SyntaxHash`es for the given node and all its children. * * Each `SyntaxHash` summarizes the syntactic content of an AST. If two ASTs have the same code and were parsed the * same way (i.e. one was not parsed in a context that resulted in a different interpretation), they will have the same * hash. Note that the hash is invariant to metadata, including `externalId` assignments. */ function syntaxHash(root: Ast) { const hashes = new Map() const rootHash = hashSubtreeSyntax(root, hashes) return { root: rootHash, hashes } } /** If the input is a block containing a single expression, return the expression; otherwise return the input. */ function rawBlockToInline(tree: RawAst.Tree.Tree) { if (tree.type !== RawAst.Tree.Type.BodyBlock) return tree return tryGetSoleValue(tree.statements)?.expression ?? tree } /** Update `ast` to match the given source code, while modifying it as little as possible. */ export function syncToCode(ast: MutableAst, code: string, metadataSource?: Module) { const codeBefore = ast.code() const textEdits = textChangeToEdits(codeBefore, code) applyTextEditsToAst(ast, textEdits, metadataSource ?? ast.module) } /** Find nodes in the input `ast` that should be treated as equivalents of nodes in `parsedRoot`. */ function calculateCorrespondence( ast: Ast, astSpans: NodeSpanMap, parsedRoot: Ast, parsedSpans: NodeSpanMap, textEdits: SourceRangeEdit[], codeAfter: string, ): Map { const newSpans = new Map() for (const [key, asts] of parsedSpans) { for (const ast of asts) newSpans.set(ast.id, sourceRangeFromKey(key)) } // Retained-code matching: For each new tree, check for some old tree of the same type such that the new tree is the // smallest node to contain all characters of the old tree's code that were not deleted in the edit. // // If the new node's span exactly matches the retained code, add the match to `toSync`. If the new node's span // contains additional code, add the match to `candidates`. const toSync = new Map() const candidates = new Map() const allSpansBefore = Array.from(astSpans.keys(), sourceRangeFromKey) const spansBeforeAndAfter = applyTextEditsToSpans(textEdits, allSpansBefore).map( ([before, after]) => [before, trimEnd(after, codeAfter)] satisfies [any, any], ) const partAfterToAstBefore = new Map() for (const [spanBefore, partAfter] of spansBeforeAndAfter) { const astBefore = astSpans.get(sourceRangeKey(spanBefore) as NodeKey)?.[0]! partAfterToAstBefore.set(sourceRangeKey(partAfter), astBefore) } const matchingPartsAfter = spansBeforeAndAfter.map(([_before, after]) => after) const parsedSpanTree = new AstWithSpans(parsedRoot, (id) => newSpans.get(id)!) const astsMatchingPartsAfter = enclosingSpans(parsedSpanTree, matchingPartsAfter) for (const [astAfter, partsAfter] of astsMatchingPartsAfter) { for (const partAfter of partsAfter) { const astBefore = partAfterToAstBefore.get(sourceRangeKey(partAfter))! if (astBefore.typeName() === astAfter.typeName()) { ;(rangeLength(newSpans.get(astAfter.id)!) === rangeLength(partAfter) ? toSync : candidates ).set(astBefore.id, astAfter) break } } } // Index the matched nodes. const oldIdsMatched = new Set() const newIdsMatched = new Set() for (const [oldId, newAst] of toSync) { oldIdsMatched.add(oldId) newIdsMatched.add(newAst.id) } // Movement matching: For each new tree that hasn't been matched, match it with any identical unmatched old tree. const newHashes = syntaxHash(parsedRoot).hashes const oldHashes = syntaxHash(ast).hashes for (const [hash, newAsts] of newHashes) { const unmatchedNewAsts = newAsts.filter((ast) => !newIdsMatched.has(ast.id)) const unmatchedOldAsts = oldHashes.get(hash)?.filter((ast) => !oldIdsMatched.has(ast.id)) ?? [] for (const [unmatchedNew, unmatchedOld] of zip(unmatchedNewAsts, unmatchedOldAsts)) { toSync.set(unmatchedOld.id, unmatchedNew) // Update the matched-IDs indices. oldIdsMatched.add(unmatchedOld.id) newIdsMatched.add(unmatchedNew.id) } } // Apply any non-optimal span matches from `candidates`, if the nodes involved were not matched during // movement-matching. for (const [beforeId, after] of candidates) { if (oldIdsMatched.has(beforeId) || newIdsMatched.has(after.id)) continue toSync.set(beforeId, after) } return toSync } /** Update `ast` according to changes to its corresponding source code. */ export function applyTextEditsToAst( ast: MutableAst, textEdits: SourceRangeEdit[], metadataSource: Module, ) { const printed = print(ast) const code = applyTextEdits(printed.code, textEdits) const rawParsedBlock = parseEnso(code) const rawParsed = ast instanceof MutableBodyBlock ? rawParsedBlock : rawBlockToInline(rawParsedBlock) const parsed = abstract(ast.module, rawParsed, code) const toSync = calculateCorrespondence( ast, printed.info.nodes, parsed.root, parsed.spans.nodes, textEdits, code, ) syncTree(ast, parsed.root, toSync, ast.module, metadataSource) } /** Replace `target` with `newContent`, reusing nodes according to the correspondence in `toSync`. */ function syncTree( target: Ast, newContent: Owned, toSync: Map, edit: MutableModule, metadataSource: Module, ) { const newIdToEquivalent = new Map() for (const [beforeId, after] of toSync) newIdToEquivalent.set(after.id, beforeId) const childReplacerFor = (parentId: AstId) => (id: AstId) => { const original = newIdToEquivalent.get(id) if (original) { const replacement = edit.get(original) if (replacement.parentId !== parentId) replacement.fields.set('parent', parentId) return original } else { const child = edit.get(id) if (child.parentId !== parentId) child.fields.set('parent', parentId) } } const parentId = target.fields.get('parent') assertDefined(parentId) const parent = edit.get(parentId) const targetSyncEquivalent = toSync.get(target.id) const syncRoot = targetSyncEquivalent?.id === newContent.id ? targetSyncEquivalent : undefined if (!syncRoot) { parent.replaceChild(target.id, newContent) newContent.fields.set('metadata', target.fields.get('metadata').clone()) target.fields.get('metadata').set('externalId', newExternalId()) } const newRoot = syncRoot ? target : newContent newRoot.visitRecursiveAst((ast) => { const syncFieldsFrom = toSync.get(ast.id) const editAst = edit.getVersion(ast) if (syncFieldsFrom) { const originalAssignmentExpression = ast instanceof Assignment ? metadataSource.get(ast.fields.get('expression').node) : undefined syncFields(edit.getVersion(ast), syncFieldsFrom, childReplacerFor(ast.id)) if (editAst instanceof MutableAssignment && originalAssignmentExpression) { if (editAst.expression.externalId !== originalAssignmentExpression.externalId) editAst.expression.setExternalId(originalAssignmentExpression.externalId) syncNodeMetadata( editAst.expression.mutableNodeMetadata(), originalAssignmentExpression.nodeMetadata, ) } } else { rewriteRefs(editAst, childReplacerFor(ast.id)) } return true }) return newRoot } /** Provides a `SpanTree` view of an `Ast`, given span information. */ class AstWithSpans implements SpanTree { private readonly ast: Ast private readonly getSpan: (astId: AstId) => SourceRange constructor(ast: Ast, getSpan: (astId: AstId) => SourceRange) { this.ast = ast this.getSpan = getSpan } id(): Ast { return this.ast } span(): SourceRange { return this.getSpan(this.ast.id) } *children(): IterableIterator> { for (const child of this.ast.children()) { if (child instanceof Ast) yield new AstWithSpans(child, this.getSpan) } } }