Fix for blobs backup (#6751)

Signed-off-by: Andrey Sobolev <haiodo@gmail.com>
This commit is contained in:
Andrey Sobolev 2024-09-27 14:24:29 +07:00 committed by GitHub
parent b359d080ac
commit 28dbc1bae5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 79 additions and 19 deletions

View File

@ -196,7 +196,13 @@
{#if isAdmin && ws.lastVisit != null && ws.lastVisit !== 0} {#if isAdmin && ws.lastVisit != null && ws.lastVisit !== 0}
<div class="text-sm"> <div class="text-sm">
{#if ws.backupInfo != null} {#if ws.backupInfo != null}
{ws.backupInfo.backupSize}Mb - {@const sz = ws.backupInfo.dataSize + ws.backupInfo.blobsSize}
{@const szGb = Math.round((sz * 100) / 1024) / 100}
{#if szGb > 0}
{Math.round((sz * 100) / 1024) / 100}Gb -
{:else}
{Math.round(sz)}Mb -
{/if}
{/if} {/if}
({lastUsageDays} days) ({lastUsageDays} days)
</div> </div>

View File

@ -43,7 +43,7 @@ import { BlobClient, createClient } from '@hcengineering/server-client'
import { fullTextPushStagePrefix, type StorageAdapter } from '@hcengineering/server-core' import { fullTextPushStagePrefix, type StorageAdapter } from '@hcengineering/server-core'
import { generateToken } from '@hcengineering/server-token' import { generateToken } from '@hcengineering/server-token'
import { connect } from '@hcengineering/server-tool' import { connect } from '@hcengineering/server-tool'
import { createWriteStream, existsSync, mkdirSync, statSync } from 'node:fs' import { createWriteStream, existsSync, mkdirSync } from 'node:fs'
import { dirname } from 'node:path' import { dirname } from 'node:path'
import { PassThrough } from 'node:stream' import { PassThrough } from 'node:stream'
import { createGzip } from 'node:zlib' import { createGzip } from 'node:zlib'
@ -132,6 +132,7 @@ async function loadDigest (
date?: number date?: number
): Promise<Map<Ref<Doc>, string>> { ): Promise<Map<Ref<Doc>, string>> {
ctx = ctx.newChild('load digest', { domain, count: snapshots.length }) ctx = ctx.newChild('load digest', { domain, count: snapshots.length })
ctx.info('load-digest', { domain, count: snapshots.length })
const result = new Map<Ref<Doc>, string>() const result = new Map<Ref<Doc>, string>()
for (const s of snapshots) { for (const s of snapshots) {
const d = s.domains[domain] const d = s.domains[domain]
@ -492,9 +493,9 @@ async function cleanDomain (ctx: MeasureContext, connection: CoreClient & Backup
} }
} }
function doTrimHash (s: string | undefined): string { function doTrimHash (s: string | undefined): string | undefined {
if (s == null) { if (s == null) {
return '' return undefined
} }
if (s.startsWith('"') && s.endsWith('"')) { if (s.startsWith('"') && s.endsWith('"')) {
return s.slice(1, s.length - 1) return s.slice(1, s.length - 1)
@ -716,6 +717,24 @@ export async function backup (
time: Date.now() - st, time: Date.now() - st,
workspace: workspaceId.name workspace: workspaceId.name
}) })
const oldHash = new Map<Ref<Doc>, string>()
function removeFromNeedRetrieve (needRetrieve: Ref<Doc>[], id: string): void {
const pos = needRetrieve.indexOf(id as Ref<Doc>)
if (pos !== -1) {
needRetrieve.splice(pos, 1)
processed--
changed--
}
for (const ch of needRetrieveChunks) {
const pos = ch.indexOf(id as Ref<Doc>)
if (pos !== -1) {
ch.splice(pos, 1)
processed--
changed--
}
}
}
while (true) { while (true) {
try { try {
const currentChunk = await ctx.with('loadChunk', {}, () => connection.loadChunk(domain, idx, options.recheck)) const currentChunk = await ctx.with('loadChunk', {}, () => connection.loadChunk(domain, idx, options.recheck))
@ -741,17 +760,31 @@ export async function backup (
}) })
st = Date.now() st = Date.now()
} }
const _hash = doTrimHash(hash) const _hash = doTrimHash(hash) as string
const kHash = doTrimHash(digest.get(id as Ref<Doc>)) const kHash = doTrimHash(digest.get(id as Ref<Doc>) ?? oldHash.get(id as Ref<Doc>))
if (kHash !== undefined) { if (kHash !== undefined) {
digest.delete(id as Ref<Doc>) if (digest.delete(id as Ref<Doc>)) {
oldHash.set(id as Ref<Doc>, kHash)
}
if (kHash !== _hash) { if (kHash !== _hash) {
if (changes.updated.has(id as Ref<Doc>)) {
removeFromNeedRetrieve(needRetrieve, id as Ref<Doc>)
}
changes.updated.set(id as Ref<Doc>, _hash) changes.updated.set(id as Ref<Doc>, _hash)
needRetrieve.push(id as Ref<Doc>) needRetrieve.push(id as Ref<Doc>)
currentNeedRetrieveSize += size currentNeedRetrieveSize += size
changed++ changed++
} else if (changes.updated.has(id as Ref<Doc>)) {
// We have same
changes.updated.delete(id as Ref<Doc>)
removeFromNeedRetrieve(needRetrieve, id as Ref<Doc>)
processed -= 1
} }
} else { } else {
if (domain === DOMAIN_BLOB && changes.added.has(id as Ref<Doc>)) {
// We need to clean old need retrieve in case of duplicates.
removeFromNeedRetrieve(needRetrieve, id)
}
changes.added.set(id as Ref<Doc>, _hash) changes.added.set(id as Ref<Doc>, _hash)
needRetrieve.push(id as Ref<Doc>) needRetrieve.push(id as Ref<Doc>)
changed++ changed++
@ -759,7 +792,9 @@ export async function backup (
} }
if (currentNeedRetrieveSize > retrieveChunkSize) { if (currentNeedRetrieveSize > retrieveChunkSize) {
if (needRetrieve.length > 0) {
needRetrieveChunks.push(needRetrieve) needRetrieveChunks.push(needRetrieve)
}
currentNeedRetrieveSize = 0 currentNeedRetrieveSize = 0
needRetrieve = [] needRetrieve = []
} }
@ -841,12 +876,17 @@ export async function backup (
const totalChunks = needRetrieveChunks.flatMap((it) => it.length).reduce((p, c) => p + c, 0) const totalChunks = needRetrieveChunks.flatMap((it) => it.length).reduce((p, c) => p + c, 0)
let processed = 0 let processed = 0
let blobs = 0
while (needRetrieveChunks.length > 0) { while (needRetrieveChunks.length > 0) {
if (canceled()) { if (canceled()) {
return return
} }
const needRetrieve = needRetrieveChunks.shift() as Ref<Doc>[] const needRetrieve = needRetrieveChunks.shift() as Ref<Doc>[]
if (needRetrieve.length === 0) {
continue
}
ctx.info('Retrieve chunk', { ctx.info('Retrieve chunk', {
needRetrieve: needRetrieveChunks.reduce((v, docs) => v + docs.length, 0), needRetrieve: needRetrieveChunks.reduce((v, docs) => v + docs.length, 0),
toLoad: needRetrieve.length, toLoad: needRetrieve.length,
@ -855,6 +895,10 @@ export async function backup (
let docs: Doc[] = [] let docs: Doc[] = []
try { try {
docs = await ctx.with('load-docs', {}, async (ctx) => await connection.loadDocs(domain, needRetrieve)) docs = await ctx.with('load-docs', {}, async (ctx) => await connection.loadDocs(domain, needRetrieve))
if (docs.length !== needRetrieve.length) {
const nr = new Set(docs.map((it) => it._id))
ctx.error('failed to retrieve all documents', { missing: needRetrieve.filter((it) => !nr.has(it)) })
}
ops++ ops++
} catch (err: any) { } catch (err: any) {
ctx.error('error loading docs', { domain, err, workspace: workspaceId.name }) ctx.error('error loading docs', { domain, err, workspace: workspaceId.name })
@ -998,7 +1042,8 @@ export async function backup (
ctx.error('error packing file', { err }) ctx.error('error packing file', { err })
} }
}) })
if (blob.size > 1024 * 1024) { blobs++
if (blob.size > 1024 * 1024 || blobs >= 10) {
ctx.info('download blob', { ctx.info('download blob', {
_id: blob._id, _id: blob._id,
contentType: blob.contentType, contentType: blob.contentType,
@ -1006,6 +1051,9 @@ export async function backup (
provider: blob.provider, provider: blob.provider,
pending: docs.length pending: docs.length
}) })
if (blobs >= 10) {
blobs = 0
}
} }
printDownloaded('', blob.size) printDownloaded('', blob.size)
@ -1179,15 +1227,16 @@ export async function backupDownload (storage: BackupStorage, storeIn: string):
const backupInfo: BackupInfo = JSON.parse(gunzipSync(await storage.loadFile(infoFile)).toString()) const backupInfo: BackupInfo = JSON.parse(gunzipSync(await storage.loadFile(infoFile)).toString())
console.log('workspace:', backupInfo.workspace ?? '', backupInfo.version) console.log('workspace:', backupInfo.workspace ?? '', backupInfo.version)
const addFileSize = async (file: string | undefined | null): Promise<void> => {
if (file != null && (await storage.exists(file))) { const addFileSize = async (file: string | undefined | null, force: boolean = false): Promise<void> => {
const fileSize = await storage.stat(file) if (file != null) {
const target = join(storeIn, file) const target = join(storeIn, file)
const dir = dirname(target) const dir = dirname(target)
if (!existsSync(dir)) { if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true }) mkdirSync(dir, { recursive: true })
} }
if (!existsSync(target) || fileSize !== statSync(target).size) { if (!existsSync(target) || force) {
const fileSize = await storage.stat(file)
console.log('downloading', file, fileSize) console.log('downloading', file, fileSize)
const readStream = await storage.load(file) const readStream = await storage.load(file)
const outp = createWriteStream(target) const outp = createWriteStream(target)
@ -1200,8 +1249,10 @@ export async function backupDownload (storage: BackupStorage, storeIn: string):
resolve() resolve()
}) })
}) })
}
size += fileSize size += fileSize
} else {
console.log('file-same', file)
}
} }
} }
@ -1217,7 +1268,7 @@ export async function backupDownload (storage: BackupStorage, storeIn: string):
} }
} }
} }
await addFileSize(infoFile) await addFileSize(infoFile, true)
console.log('Backup size', size / (1024 * 1024), 'Mb') console.log('Backup size', size / (1024 * 1024), 'Mb')
} }
@ -1693,7 +1744,7 @@ export async function compactBackup (
const oldSnapshots = [...backupInfo.snapshots] const oldSnapshots = [...backupInfo.snapshots]
backupInfo.snapshots = [snapshot] backupInfo.snapshots = [snapshot]
let backupIndex = `${backupInfo.snapshotsIndex ?? oldSnapshots.length}` let backupIndex = `${(backupInfo.snapshotsIndex ?? oldSnapshots.length) + 1}`
while (backupIndex.length < 6) { while (backupIndex.length < 6) {
backupIndex = '0' + backupIndex backupIndex = '0' + backupIndex
} }

View File

@ -95,9 +95,12 @@ export class AggregatorStorageAdapter implements StorageAdapter, StorageAdapterE
for (const d of docs) { for (const d of docs) {
const blobInfo = existingBlobs.get(d._id) const blobInfo = existingBlobs.get(d._id)
if ( if (
blobInfo === undefined || blobInfo === undefined || // Blob info undefined
this.doTrimHash(blobInfo.etag) !== this.doTrimHash(d.etag) || // Provider are same and etag or size are diffrent.
blobInfo.size !== d.size (d.provider === blobInfo.provider &&
(this.doTrimHash(blobInfo.etag) !== this.doTrimHash(d.etag) || blobInfo.size !== d.size)) ||
// We have replacement in default
(d.provider === this.defaultAdapter && blobInfo?.provider !== d.provider)
) { ) {
const stat = await this.adapters.get(d.provider)?.stat(ctx, workspaceId, d._id) const stat = await this.adapters.get(d.provider)?.stat(ctx, workspaceId, d._id)
if (stat !== undefined) { if (stat !== undefined) {