Add compression to the metadata code snapshot (#11470)

close #11420

Changelog:
- update: add zlib compression to the `snapshot` metadata field
- add: implement nodejs `zlib` for polyglot ydoc-server
- add: implement nodejs `Buffer` for polyglot ydoc-server
This commit is contained in:
Dmitry Bushev 2024-11-05 14:57:43 +03:00 committed by GitHub
parent c5734a8fc8
commit 47943a2e62
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 437 additions and 19 deletions

View File

@ -1,9 +1,16 @@
import { globalExternals } from '@fal-works/esbuild-plugin-global-externals'
import esbuild from 'esbuild'
import fs from 'fs/promises'
import path from 'path'
import url from 'url'
const watchMode = process.argv[2] === 'watch'
const globals = {
'node:zlib': {
varName: 'zlib',
type: 'cjs',
},
}
const ctx = await esbuild.context({
outfile: 'dist/main.cjs',
@ -14,7 +21,7 @@ const ctx = await esbuild.context({
define: {
self: 'globalThis',
},
plugins: [usePolyglotFfi()],
plugins: [usePolyglotFfi(), globalExternals(globals)],
conditions: watchMode ? ['source'] : [],
external: ['node:url'], // Not actually used, tree-shaken out
format: 'cjs',

View File

@ -15,12 +15,13 @@
"lint": "eslint . --max-warnings=0"
},
"dependencies": {
"ydoc-shared": "workspace:*",
"ydoc-server": "workspace:*"
"ydoc-server": "workspace:*",
"ydoc-shared": "workspace:*"
},
"devDependencies": {
"esbuild-plugin-wasm": "^1.1.0",
"@fal-works/esbuild-plugin-global-externals": "^2.1.2",
"esbuild": "^0.23.0",
"esbuild-plugin-wasm": "^1.1.0",
"typescript": "^5.5.3"
}
}

View File

@ -25,7 +25,6 @@
"debug": "^4.3.6",
"fast-diff": "^1.3.0",
"isomorphic-ws": "^5.0.0",
"js-base64": "^3.7.7",
"lib0": "^0.2.85",
"y-protocols": "^1.0.5",
"ydoc-shared": "workspace:*",

View File

@ -38,8 +38,10 @@ export type IdeMetadata = z.infer<typeof ideMetadata>
export const ideMetadata = z
.object({
node: z.record(z.string().uuid(), nodeMetadata),
snapshot: z.string().optional(),
widget: z.optional(z.record(z.string().uuid(), z.record(z.string(), z.unknown()))),
// The ydoc diff algorithm places the snapshot at the end of the metadata.
// Making it the last field prevents unnecessary edits.
snapshot: z.string().optional(),
})
.passthrough()
.default(() => defaultMetadata().ide)

View File

@ -1,9 +1,9 @@
import createDebug from 'debug'
import { Base64 } from 'js-base64'
import * as json from 'lib0/json'
import * as map from 'lib0/map'
import { ObservableV2 } from 'lib0/observable'
import * as random from 'lib0/random'
import * as zlib from 'node:zlib'
import * as Ast from 'ydoc-shared/ast'
import { astCount } from 'ydoc-shared/ast'
import { EnsoFileParts, combineFileParts, splitFileContents } from 'ydoc-shared/ensoFile'
@ -486,12 +486,22 @@ class ModulePersistence extends ObservableV2<{ removed: () => void }> {
}
}
private static encodeCodeSnapshot(code: string): string {
return Base64.encode(code)
private static encodeCodeSnapshot(code: string): string | undefined {
try {
return zlib.deflateSync(Buffer.from(code, 'utf8')).toString('base64')
} catch (e) {
console.warn('Failed to encode code snapshot.', e)
return
}
}
private static decodeCodeSnapshot(snapshot: string): string {
return Base64.decode(snapshot)
private static decodeCodeSnapshot(snapshot: string): string | undefined {
try {
return zlib.inflateSync(Buffer.from(snapshot, 'base64')).toString('utf8')
} catch (e) {
console.warn('Failed to decode code snapshot.', e)
return
}
}
private sendLsUpdate(
@ -505,6 +515,7 @@ class ModulePersistence extends ObservableV2<{ removed: () => void }> {
const newSnapshot = newCode && {
snapshot: ModulePersistence.encodeCodeSnapshot(newCode),
}
if (newMetadata) newMetadata.snapshot = this.syncedMeta.ide.snapshot
const newMetadataJson =
newMetadata &&
json.stringify({
@ -568,6 +579,7 @@ class ModulePersistence extends ObservableV2<{ removed: () => void }> {
this.syncedContent = newContent
this.syncedVersion = newVersion
if (newMetadata) this.syncedMeta.ide = newMetadata
if (newSnapshot) this.syncedMeta.ide.snapshot = newSnapshot.snapshot
if (newCode) this.syncedCode = newCode
if (newIdMapToPersistJson) this.syncedIdMap = newIdMapToPersistJson
if (newMetadataJson) this.syncedMetaJson = newMetadataJson

View File

@ -34,6 +34,9 @@ public final class WebEnvironment {
var abortController = new AbortController();
abortController.initialize(ctx);
var zlib = new Zlib();
zlib.initialize(ctx);
var webSocketPolyfill = new WebSocket(executor);
webSocketPolyfill.initialize(ctx);
}

View File

@ -0,0 +1,135 @@
package org.enso.ydoc.polyfill.web;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterOutputStream;
import org.enso.ydoc.Polyfill;
import org.enso.ydoc.polyfill.Arguments;
import org.graalvm.polyglot.Context;
import org.graalvm.polyglot.Source;
import org.graalvm.polyglot.Value;
import org.graalvm.polyglot.io.ByteSequence;
import org.graalvm.polyglot.proxy.ProxyExecutable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** Implements the <a href="https://nodejs.org/api/zlib.html">Zlib</a> Node.js interface. */
final class Zlib implements Polyfill, ProxyExecutable {
private static final Logger log = LoggerFactory.getLogger(Zlib.class);
private static final String BUFFER_FROM = "buffer-from";
private static final String BUFFER_TO_STRING = "buffer-to-string";
private static final String ENCODING_BASE64 = "base64";
private static final String ENCODING_BASE64_URL = "base64url";
private static final String ZLIB_DEFLATE_SYNC = "zlib-deflate-sync";
private static final String ZLIB_INFLATE_SYNC = "zlib-inflate-sync";
private static final String ZLIB_JS = "zlib.js";
@Override
public void initialize(Context ctx) {
final var jsSource = Source.newBuilder("js", getClass().getResource(ZLIB_JS)).buildLiteral();
ctx.eval(jsSource).execute(this);
}
@Override
public Object execute(Value... arguments) {
final var command = arguments[0].asString();
log.debug(Arguments.toString(arguments));
return switch (command) {
case BUFFER_FROM -> {
final var text = arguments[1].asString();
final var encoding = arguments[2].asString();
yield switch (encoding) {
case ENCODING_BASE64 -> {
final var buffer = StandardCharsets.UTF_8.encode(text);
yield Base64.getDecoder().decode(buffer);
}
case ENCODING_BASE64_URL -> {
final var buffer = StandardCharsets.UTF_8.encode(text);
yield Base64.getUrlDecoder().decode(buffer);
}
case null -> StandardCharsets.UTF_8.encode(text);
default -> {
Charset charset;
try {
charset = Charset.forName(encoding);
} catch (IllegalArgumentException e) {
throw new RuntimeException("Unknown encoding: " + encoding, e);
}
yield charset.encode(text);
}
};
}
case BUFFER_TO_STRING -> {
final var byteSequence = arguments[1].as(ByteSequence.class);
final var encoding = arguments[2].asString();
yield switch (encoding) {
case ENCODING_BASE64 -> {
final var arr = Base64.getEncoder().encode(byteSequence.toByteArray());
yield new String(arr, StandardCharsets.UTF_8);
}
case ENCODING_BASE64_URL -> {
final var arr = Base64.getUrlEncoder().encode(byteSequence.toByteArray());
yield new String(arr, StandardCharsets.UTF_8);
}
case null -> {
final var buffer = ByteBuffer.wrap(byteSequence.toByteArray());
yield StandardCharsets.UTF_8.decode(buffer).toString();
}
default -> {
Charset charset;
try {
charset = Charset.forName(encoding);
} catch (IllegalArgumentException e) {
throw new RuntimeException("Unknown encoding: " + encoding, e);
}
final var buffer = ByteBuffer.wrap(byteSequence.toByteArray());
yield charset.decode(buffer).toString();
}
};
}
case ZLIB_DEFLATE_SYNC -> {
final var byteSequence = arguments[1].as(ByteSequence.class);
final var output = new ByteArrayOutputStream();
try (final var deflater = new DeflaterOutputStream(output)) {
deflater.write(byteSequence.toByteArray());
} catch (IOException e) {
throw new RuntimeException("Failed to deflate.", e);
}
yield ByteBuffer.wrap(output.toByteArray());
}
case ZLIB_INFLATE_SYNC -> {
final var byteSequence = arguments[1].as(ByteSequence.class);
final var output = new ByteArrayOutputStream();
try (final var inflater = new InflaterOutputStream(output)) {
inflater.write(byteSequence.toByteArray());
} catch (IOException e) {
throw new RuntimeException("Failed to inflate.", e);
}
yield ByteBuffer.wrap(output.toByteArray());
}
default -> throw new IllegalStateException(command);
};
}
}

View File

@ -0,0 +1,40 @@
(function (jvm) {
class Buffer {
#buffer;
constructor(buffer) {
this.#buffer = buffer;
}
get buffer() {
return this.#buffer;
}
static from(txt, encoding) {
return new Buffer(jvm('buffer-from', txt, encoding));
}
toString(encoding) {
return jvm('buffer-to-string', this.#buffer, encoding);
}
}
class Zlib {
deflateSync(buffer) {
const result = jvm('zlib-deflate-sync', buffer.buffer);
return new Buffer(result);
}
inflateSync(buffer) {
const result = jvm('zlib-inflate-sync', buffer.buffer);
return new Buffer(result);
}
}
globalThis.Buffer = Buffer;
globalThis.zlib = new Zlib();
})

View File

@ -0,0 +1,219 @@
package org.enso.ydoc.polyfill.web;
import java.util.concurrent.CompletableFuture;
import org.enso.ydoc.polyfill.ExecutorSetup;
import org.graalvm.polyglot.Context;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
public class ZlibTest extends ExecutorSetup {
private static final String TEXT = "Hello World!";
private static final String TEXT_BASE64 = "SGVsbG8gV29ybGQh";
private static final String TEXT_DEFLATED = "eJzzSM3JyVcIzy/KSVEEABxJBD4=";
private Context context;
public ZlibTest() {}
@Before
public void setup() throws Exception {
super.setup();
var zlib = new Zlib();
var contextBuilder = WebEnvironment.createContext();
context =
CompletableFuture.supplyAsync(
() -> {
var ctx = contextBuilder.build();
zlib.initialize(ctx);
return ctx;
},
executor)
.get();
}
@After
public void tearDown() throws InterruptedException {
super.tearDown();
context.close();
}
@Test
public void bufferFrom() throws Exception {
var code = "Buffer.from(TEXT).toString()";
context.getBindings("js").putMember("TEXT", TEXT);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals(TEXT, result.asString());
}
@Test
public void bufferFromUtf8() throws Exception {
var code = "Buffer.from(TEXT, 'utf8').toString()";
context.getBindings("js").putMember("TEXT", TEXT);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals(TEXT, result.asString());
}
@Test
public void bufferFromBase64() throws Exception {
var code = "Buffer.from(TEXT_BASE64, 'base64').toString()";
context.getBindings("js").putMember("TEXT_BASE64", TEXT_BASE64);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals(TEXT, result.asString());
}
@Test
public void bufferFromInvalid() throws Exception {
var code =
"""
result = ''
try {
Buffer.from(TEXT, 'invalid').toString()
} catch (e) {
result = e.message
}
result
""";
context.getBindings("js").putMember("TEXT", TEXT);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals("Unknown encoding: invalid", result.asString());
}
@Test
public void bufferToUtf8() throws Exception {
var code = "Buffer.from(TEXT).toString('utf8')";
context.getBindings("js").putMember("TEXT", TEXT);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals(TEXT, result.asString());
}
@Test
public void bufferToBase64() throws Exception {
var code = "Buffer.from(TEXT).toString('base64')";
context.getBindings("js").putMember("TEXT", TEXT);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals(TEXT_BASE64, result.asString());
}
@Test
public void bufferToInvalid() throws Exception {
var code =
"""
result = ''
try {
Buffer.from(TEXT).toString('invalid')
} catch (e) {
result = e.message
}
result
""";
context.getBindings("js").putMember("TEXT", TEXT);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals("Unknown encoding: invalid", result.asString());
}
@Test
public void bufferToFromBase64() throws Exception {
var code =
"""
let textBase64 = Buffer.from(TEXT).toString('base64')
Buffer.from(textBase64, 'base64').toString()
""";
context.getBindings("js").putMember("TEXT", TEXT);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals(TEXT, result.asString());
}
@Test
public void zlibDeflateSync() throws Exception {
var code =
"""
let buffer = Buffer.from(TEXT)
zlib.deflateSync(buffer).toString('base64')
""";
context.getBindings("js").putMember("TEXT", TEXT);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals(TEXT_DEFLATED, result.asString());
}
@Test
public void zlibInflateSync() throws Exception {
var code =
"""
let buffer = Buffer.from(TEXT_DEFLATED, 'base64')
zlib.inflateSync(buffer).toString()
""";
context.getBindings("js").putMember("TEXT_DEFLATED", TEXT_DEFLATED);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals(TEXT, result.asString());
}
@Test
public void zlibDeflateInflate() throws Exception {
var code =
"""
let buffer = Buffer.from(TEXT)
zlib.inflateSync(zlib.deflateSync(buffer)).toString()
""";
context.getBindings("js").putMember("TEXT", TEXT);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals(TEXT, result.asString());
}
@Test
public void zlibInflateCorrupted() throws Exception {
var code =
"""
let buffer = Buffer.from('corrupted')
let result = ''
try {
zlib.inflateSync(buffer).toString()
} catch (e) {
result = e.message
}
result
""";
context.getBindings("js").putMember("TEXT", TEXT);
var result = CompletableFuture.supplyAsync(() -> context.eval("js", code), executor).get();
Assert.assertEquals("Failed to inflate.", result.asString());
}
}

View File

@ -683,9 +683,6 @@ importers:
isomorphic-ws:
specifier: ^5.0.0
version: 5.0.0(ws@8.18.0)
js-base64:
specifier: ^3.7.7
version: 3.7.7
lib0:
specifier: ^0.2.85
version: 0.2.94
@ -749,6 +746,9 @@ importers:
specifier: workspace:*
version: link:../ydoc-shared
devDependencies:
'@fal-works/esbuild-plugin-global-externals':
specifier: ^2.1.2
version: 2.1.2
esbuild:
specifier: ^0.23.0
version: 0.23.0
@ -1840,6 +1840,9 @@ packages:
resolution: {integrity: sha512-HFZ4Mp26nbWk9d/BpvP0YNL6W4UoZF0VFcTw/aPPA8RpOxeFQgK+ClABGgAUXs9Y/RGX/l1vOmrqz1MQt9MNuw==}
engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0}
'@fal-works/esbuild-plugin-global-externals@2.1.2':
resolution: {integrity: sha512-cEee/Z+I12mZcFJshKcCqC8tuX5hG3s+d+9nZ3LabqKF1vKdF41B92pJVCBggjAGORAeOzyyDDKrZwIkLffeOQ==}
'@fast-check/vitest@0.0.8':
resolution: {integrity: sha512-cFrcu7nwH+rk1qm1J4YrM1k4MIwvIHG7MrQUMGizqPe58XsvvpZz0X9Xkx1e+xaNg9s1YRVTd241WSR0dK/SpQ==}
peerDependencies:
@ -5403,9 +5406,6 @@ packages:
jpeg-js@0.2.0:
resolution: {integrity: sha512-Ni9PffhJtYtdD7VwxH6V2MnievekGfUefosGCHadog0/jAevRu6HPjYeMHbUemn0IPE8d4wGa8UsOGsX+iKy2g==}
js-base64@3.7.7:
resolution: {integrity: sha512-7rCnleh0z2CkXhH67J8K1Ytz0b2Y+yxTPL+/KOJoa20hfnVQ/3/T6W/KflYI4bRHRagNeXeU2bkNGI3v1oS/lw==}
js-beautify@1.15.1:
resolution: {integrity: sha512-ESjNzSlt/sWE8sciZH8kBF8BPlwXPwhR6pWKAw8bw4Bwj+iZcnKW6ONWUutJ7eObuBZQpiIb8S7OYspWrKt7rA==}
engines: {node: '>=14'}
@ -8888,6 +8888,8 @@ snapshots:
dependencies:
levn: 0.4.1
'@fal-works/esbuild-plugin-global-externals@2.1.2': {}
'@fast-check/vitest@0.0.8(vitest@1.6.0(@types/node@20.11.21)(jsdom@24.1.0)(lightningcss@1.25.1))':
dependencies:
fast-check: 3.19.0
@ -13582,8 +13584,6 @@ snapshots:
jpeg-js@0.2.0: {}
js-base64@3.7.7: {}
js-beautify@1.15.1:
dependencies:
config-chain: 1.1.13