Add workaround for huge idmap diffs (#9207)

Fixes probably #9198 See [this comment](https://github.com/enso-org/enso/issues/9198#issuecomment-1968484269) for justification. TLDR: the diff algorithm is too slow for our huge idmap.

The proper fix would be to reduce idmap size. Expect tasks for that soon.
This commit is contained in:
Adam Obuchowicz 2024-02-29 15:42:59 +01:00 committed by GitHub
parent c2842df8d8
commit 874e9efe74
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 170 additions and 16 deletions

10
.vscode/launch.json vendored
View File

@ -19,6 +19,14 @@
"type": "node",
"debugServer": 4711,
"request": "attach"
},
{
"type": "node-terminal",
"name": "Run Script: dev",
"request": "launch",
"command": "npm run dev",
// "env": {"NODE_OPTIONS": "--inspect"},
"cwd": "${workspaceFolder}/app/gui2"
}
]
}
}

View File

@ -44,8 +44,8 @@ export class ComputedValueRegistry {
processUpdates(updates: ExpressionUpdate[]) {
for (const update of updates) {
const info = this.db.get(update.expressionId)
const newInfo = combineInfo(info, update)
this.db.set(update.expressionId, newInfo)
if (info) updateInfo(info, update)
else this.db.set(update.expressionId, combineInfo(undefined, update))
}
}
@ -58,6 +58,14 @@ export class ComputedValueRegistry {
}
}
function updateInfo(info: ExpressionInfo, update: ExpressionUpdate) {
const newInfo = combineInfo(info, update)
if (newInfo.typename !== info.typename) info.typename = newInfo.typename
if (newInfo.methodCall !== info.methodCall) info.methodCall = newInfo.methodCall
if (newInfo.payload !== info.payload) info.payload = newInfo.payload
if (newInfo.profilingInfo !== info.profilingInfo) info.profilingInfo = update.profilingInfo
}
function combineInfo(info: ExpressionInfo | undefined, update: ExpressionUpdate): ExpressionInfo {
const isPending = update.payload.type === 'Pending'
return {

View File

@ -608,18 +608,20 @@ export const useProjectStore = defineStore('project', () => {
}
const dataflowErrors = new ReactiveMapping(computedValueRegistry.db, (id, info) => {
if (info.payload.type !== 'DataflowError') return
const data = useVisualizationData(
ref({
expressionId: id,
visualizationModule: 'Standard.Visualization.Preprocessor',
expression: {
module: 'Standard.Visualization.Preprocessor',
definedOnType: 'Standard.Visualization.Preprocessor',
name: 'error_preprocessor',
},
}),
const config = computed(() =>
info.payload.type === 'DataflowError'
? {
expressionId: id,
visualizationModule: 'Standard.Visualization.Preprocessor',
expression: {
module: 'Standard.Visualization.Preprocessor',
definedOnType: 'Standard.Visualization.Preprocessor',
name: 'error_preprocessor',
},
}
: null,
)
const data = useVisualizationData(config)
return computed<{ kind: 'Dataflow'; message: string } | undefined>(() => {
const visResult = data.value
if (!visResult) return

View File

@ -0,0 +1,60 @@
import diff from 'fast-diff'
import { uuidv4 } from 'lib0/random.js'
import { bench, describe } from 'vitest'
import { stupidFastDiff } from '../edits'
describe('Diff algorithm benchmarks', () => {
let oldString = ''
let newString = ''
function initializeStrings(length: number) {
// simulating metadata list:
oldString = ''
let i = 0
while (oldString.length < length / 2) {
oldString += `[{"index":{"value":${i}},"size":{"value":4}},"${uuidv4()}"]`
i += 1
}
i = 1
newString = ''
while (newString.length < length / 2) {
newString += `[{"index":{"value":${i}},"size":{"value":5}},"${uuidv4()}"]`
}
}
function diffBenchmark(length: number) {
bench(
`Diffing ${length}`,
() => {
diff(oldString, newString)
},
{ setup: () => initializeStrings(length), warmupIterations: 1, iterations: 1 },
)
}
function stupidFastDiffBenchmark(length: number) {
bench(
`Fast Diffing ${length}`,
() => {
stupidFastDiff(oldString, newString)
},
{ setup: () => initializeStrings(length), warmupIterations: 1, iterations: 1 },
)
}
diffBenchmark(10000)
diffBenchmark(20000)
diffBenchmark(30000)
diffBenchmark(40000)
diffBenchmark(50000)
// These are too slow for every-day benchmark run
// diffBenchmark(60000)
// diffBenchmark(70000)
// diffBenchmark(80000)
// diffBenchmark(90000)
// diffBenchmark(100000)
// diffBenchmark(250000)
stupidFastDiffBenchmark(250000)
stupidFastDiffBenchmark(2500000)
})

View File

@ -12,6 +12,29 @@ import { assert } from '../shared/util/assert'
import { IdMap, ModuleDoc, type VisualizationMetadata } from '../shared/yjsModel'
import * as fileFormat from './fileFormat'
/**
* The simulated metadata of this size takes c.a. 1 second on my machine. It should be quite
* bearable, even on slower machines.
*
* Full benchmark results (from edits.bench.ts):
* name hz min max mean p75 p99 p995 p999 rme samples
* · Diffing 10000 8.7370 108.66 132.93 114.46 111.73 132.93 132.93 132.93 ±11.28% 5
* · Diffing 15000 4.0483 239.82 257.99 247.02 257.99 257.99 257.99 257.99 ±9.71% 3
* · Diffing 20000 2.1577 462.40 464.52 463.46 464.52 464.52 464.52 464.52 ±2.90% 2
* · Diffing 25000 1.3744 727.61 727.61 727.61 727.61 727.61 727.61 727.61 ±0.00% 1
* · Diffing 30000 0.9850 1,015.25 1,015.25 1,015.25 1,015.25 1,015.25 1,015.25 1,015.25 ±0.00% 1
* · Diffing 35000 0.6934 1,442.27 1,442.27 1,442.27 1,442.27 1,442.27 1,442.27 1,442.27 ±0.00% 1
* · Diffing 40000 0.5141 1,945.24 1,945.24 1,945.24 1,945.24 1,945.24 1,945.24 1,945.24 ±0.00% 1
* · Diffing 50000 0.3315 3,016.59 3,016.59 3,016.59 3,016.59 3,016.59 3,016.59 3,016.59 ±0.00% 1
* · Diffing 60000 0.2270 4,405.46 4,405.46 4,405.46 4,405.46 4,405.46 4,405.46 4,405.46 ±0.00% 1
* · Diffing 70000 0.1602 6,240.52 6,240.52 6,240.52 6,240.52 6,240.52 6,240.52 6,240.52 ±0.00% 1
* · Diffing 80000 0.1233 8,110.54 8,110.54 8,110.54 8,110.54 8,110.54 8,110.54 8,110.54 ±0.00% 1
* · Diffing 90000 0.0954 10,481.47 10,481.47 10,481.47 10,481.47 10,481.47 10,481.47 10,481.47 ±0.00% 1
* · Diffing 100000 0.0788 12,683.46 12,683.46 12,683.46 12,683.46 12,683.46 12,683.46 12,683.46 ±0.00% 1
* · Diffing 250000 0.0107 93,253.97 93,253.97 93,253.97 93,253.97 93,253.97 93,253.97 93,253.97 ±0.00% 1
*/
const MAX_SIZE_FOR_NORMAL_DIFF = 30000
interface AppliedUpdates {
newCode: string | undefined
newIdMap: IdMap | undefined
@ -116,12 +139,45 @@ export function translateVisualizationFromFile(
}
}
/**
* A simplified diff algorithm.
*
* The `fast-diff` package uses Myers' https://neil.fraser.name/writing/diff/myers.pdf with some
* optimizations to generate minimal diff. Unfortunately, event this algorithm is still to slow
* for our metadata. Therefore we need to use faster algorithm which will not produce theoretically
* minimal diff.
*
* This is quick implementation making diff which just replaces entire string except common prefix
* and suffix.
*/
export function stupidFastDiff(oldString: string, newString: string): diff.Diff[] {
const minLength = Math.min(oldString.length, newString.length)
let commonPrefixLen, commonSuffixLen
for (commonPrefixLen = 0; commonPrefixLen < minLength; ++commonPrefixLen)
if (oldString[commonPrefixLen] !== newString[commonPrefixLen]) break
if (oldString.length === newString.length && oldString.length === commonPrefixLen)
return [[0, oldString]]
for (commonSuffixLen = 0; commonSuffixLen < minLength - commonPrefixLen; ++commonSuffixLen)
if (oldString.at(-1 - commonSuffixLen) !== newString.at(-1 - commonSuffixLen)) break
const commonPrefix = oldString.substring(0, commonPrefixLen)
const removed = oldString.substring(commonPrefixLen, oldString.length - commonSuffixLen)
const added = newString.substring(commonPrefixLen, newString.length - commonSuffixLen)
const commonSuffix = oldString.substring(oldString.length - commonSuffixLen, oldString.length)
return (commonPrefix ? ([[0, commonPrefix]] as diff.Diff[]) : [])
.concat(removed ? [[-1, removed]] : [])
.concat(added ? [[1, added]] : [])
.concat(commonSuffix ? [[0, commonSuffix]] : [])
}
export function applyDiffAsTextEdits(
lineOffset: number,
oldString: string,
newString: string,
): TextEdit[] {
const changes = diff(oldString, newString)
const changes =
oldString.length + newString.length > MAX_SIZE_FOR_NORMAL_DIFF
? stupidFastDiff(oldString, newString)
: diff(oldString, newString)
let newIndex = 0
let lineNum = lineOffset
let lineStartIdx = 0
@ -171,7 +227,8 @@ export function prettyPrintDiff(from: string, to: string): string {
const colRed = '\x1b[31m'
const colGreen = '\x1b[32m'
const diffs = diff(from, to)
const diffs =
from.length + to.length > MAX_SIZE_FOR_NORMAL_DIFF ? stupidFastDiff(from, to) : diff(from, to)
if (diffs.length === 1 && diffs[0]![0] === 0) return 'No changes'
let content = ''
for (let i = 0; i < diffs.length; i++) {
@ -201,3 +258,22 @@ export function prettyPrintDiff(from: string, to: string): string {
content += colReset
return content
}
if (import.meta.vitest) {
const { test, expect, bench, describe } = import.meta.vitest
test.each`
oldStr | newStr | expected
${''} | ${'foo'} | ${[[1, 'foo']]}
${'foo'} | ${''} | ${[[-1, 'foo']]}
${'foo'} | ${'foo'} | ${[[0, 'foo']]}
${'foo'} | ${'bar'} | ${[[-1, 'foo'], [1, 'bar']]}
${'ababx'} | ${'acacx'} | ${[[0, 'a'], [-1, 'bab'], [1, 'cac'], [0, 'x']]}
${'ax'} | ${'acacx'} | ${[[0, 'a'], [1, 'cac'], [0, 'x']]}
${'ababx'} | ${'ax'} | ${[[0, 'a'], [-1, 'bab'], [0, 'x']]}
${'ababx'} | ${'abacax'} | ${[[0, 'aba'], [-1, 'b'], [1, 'ca'], [0, 'x']]}
${'axxxa'} | ${'a'} | ${[[0, 'a'], [-1, 'xxxa']]}
`('Stupid diff of $oldStr and $newStr', ({ oldStr, newStr, expected }) => {
expect(stupidFastDiff(oldStr, newStr)).toEqual(expected)
})
}