mirror of
https://github.com/hcengineering/platform.git
synced 2025-01-08 21:27:45 +03:00
Fix for blobs backup (#6751)
Signed-off-by: Andrey Sobolev <haiodo@gmail.com>
This commit is contained in:
parent
b359d080ac
commit
28dbc1bae5
@ -196,7 +196,13 @@
|
|||||||
{#if isAdmin && ws.lastVisit != null && ws.lastVisit !== 0}
|
{#if isAdmin && ws.lastVisit != null && ws.lastVisit !== 0}
|
||||||
<div class="text-sm">
|
<div class="text-sm">
|
||||||
{#if ws.backupInfo != null}
|
{#if ws.backupInfo != null}
|
||||||
{ws.backupInfo.backupSize}Mb -
|
{@const sz = ws.backupInfo.dataSize + ws.backupInfo.blobsSize}
|
||||||
|
{@const szGb = Math.round((sz * 100) / 1024) / 100}
|
||||||
|
{#if szGb > 0}
|
||||||
|
{Math.round((sz * 100) / 1024) / 100}Gb -
|
||||||
|
{:else}
|
||||||
|
{Math.round(sz)}Mb -
|
||||||
|
{/if}
|
||||||
{/if}
|
{/if}
|
||||||
({lastUsageDays} days)
|
({lastUsageDays} days)
|
||||||
</div>
|
</div>
|
||||||
|
@ -43,7 +43,7 @@ import { BlobClient, createClient } from '@hcengineering/server-client'
|
|||||||
import { fullTextPushStagePrefix, type StorageAdapter } from '@hcengineering/server-core'
|
import { fullTextPushStagePrefix, type StorageAdapter } from '@hcengineering/server-core'
|
||||||
import { generateToken } from '@hcengineering/server-token'
|
import { generateToken } from '@hcengineering/server-token'
|
||||||
import { connect } from '@hcengineering/server-tool'
|
import { connect } from '@hcengineering/server-tool'
|
||||||
import { createWriteStream, existsSync, mkdirSync, statSync } from 'node:fs'
|
import { createWriteStream, existsSync, mkdirSync } from 'node:fs'
|
||||||
import { dirname } from 'node:path'
|
import { dirname } from 'node:path'
|
||||||
import { PassThrough } from 'node:stream'
|
import { PassThrough } from 'node:stream'
|
||||||
import { createGzip } from 'node:zlib'
|
import { createGzip } from 'node:zlib'
|
||||||
@ -132,6 +132,7 @@ async function loadDigest (
|
|||||||
date?: number
|
date?: number
|
||||||
): Promise<Map<Ref<Doc>, string>> {
|
): Promise<Map<Ref<Doc>, string>> {
|
||||||
ctx = ctx.newChild('load digest', { domain, count: snapshots.length })
|
ctx = ctx.newChild('load digest', { domain, count: snapshots.length })
|
||||||
|
ctx.info('load-digest', { domain, count: snapshots.length })
|
||||||
const result = new Map<Ref<Doc>, string>()
|
const result = new Map<Ref<Doc>, string>()
|
||||||
for (const s of snapshots) {
|
for (const s of snapshots) {
|
||||||
const d = s.domains[domain]
|
const d = s.domains[domain]
|
||||||
@ -492,9 +493,9 @@ async function cleanDomain (ctx: MeasureContext, connection: CoreClient & Backup
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function doTrimHash (s: string | undefined): string {
|
function doTrimHash (s: string | undefined): string | undefined {
|
||||||
if (s == null) {
|
if (s == null) {
|
||||||
return ''
|
return undefined
|
||||||
}
|
}
|
||||||
if (s.startsWith('"') && s.endsWith('"')) {
|
if (s.startsWith('"') && s.endsWith('"')) {
|
||||||
return s.slice(1, s.length - 1)
|
return s.slice(1, s.length - 1)
|
||||||
@ -716,6 +717,24 @@ export async function backup (
|
|||||||
time: Date.now() - st,
|
time: Date.now() - st,
|
||||||
workspace: workspaceId.name
|
workspace: workspaceId.name
|
||||||
})
|
})
|
||||||
|
const oldHash = new Map<Ref<Doc>, string>()
|
||||||
|
|
||||||
|
function removeFromNeedRetrieve (needRetrieve: Ref<Doc>[], id: string): void {
|
||||||
|
const pos = needRetrieve.indexOf(id as Ref<Doc>)
|
||||||
|
if (pos !== -1) {
|
||||||
|
needRetrieve.splice(pos, 1)
|
||||||
|
processed--
|
||||||
|
changed--
|
||||||
|
}
|
||||||
|
for (const ch of needRetrieveChunks) {
|
||||||
|
const pos = ch.indexOf(id as Ref<Doc>)
|
||||||
|
if (pos !== -1) {
|
||||||
|
ch.splice(pos, 1)
|
||||||
|
processed--
|
||||||
|
changed--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
const currentChunk = await ctx.with('loadChunk', {}, () => connection.loadChunk(domain, idx, options.recheck))
|
const currentChunk = await ctx.with('loadChunk', {}, () => connection.loadChunk(domain, idx, options.recheck))
|
||||||
@ -741,17 +760,31 @@ export async function backup (
|
|||||||
})
|
})
|
||||||
st = Date.now()
|
st = Date.now()
|
||||||
}
|
}
|
||||||
const _hash = doTrimHash(hash)
|
const _hash = doTrimHash(hash) as string
|
||||||
const kHash = doTrimHash(digest.get(id as Ref<Doc>))
|
const kHash = doTrimHash(digest.get(id as Ref<Doc>) ?? oldHash.get(id as Ref<Doc>))
|
||||||
if (kHash !== undefined) {
|
if (kHash !== undefined) {
|
||||||
digest.delete(id as Ref<Doc>)
|
if (digest.delete(id as Ref<Doc>)) {
|
||||||
|
oldHash.set(id as Ref<Doc>, kHash)
|
||||||
|
}
|
||||||
if (kHash !== _hash) {
|
if (kHash !== _hash) {
|
||||||
|
if (changes.updated.has(id as Ref<Doc>)) {
|
||||||
|
removeFromNeedRetrieve(needRetrieve, id as Ref<Doc>)
|
||||||
|
}
|
||||||
changes.updated.set(id as Ref<Doc>, _hash)
|
changes.updated.set(id as Ref<Doc>, _hash)
|
||||||
needRetrieve.push(id as Ref<Doc>)
|
needRetrieve.push(id as Ref<Doc>)
|
||||||
currentNeedRetrieveSize += size
|
currentNeedRetrieveSize += size
|
||||||
changed++
|
changed++
|
||||||
|
} else if (changes.updated.has(id as Ref<Doc>)) {
|
||||||
|
// We have same
|
||||||
|
changes.updated.delete(id as Ref<Doc>)
|
||||||
|
removeFromNeedRetrieve(needRetrieve, id as Ref<Doc>)
|
||||||
|
processed -= 1
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (domain === DOMAIN_BLOB && changes.added.has(id as Ref<Doc>)) {
|
||||||
|
// We need to clean old need retrieve in case of duplicates.
|
||||||
|
removeFromNeedRetrieve(needRetrieve, id)
|
||||||
|
}
|
||||||
changes.added.set(id as Ref<Doc>, _hash)
|
changes.added.set(id as Ref<Doc>, _hash)
|
||||||
needRetrieve.push(id as Ref<Doc>)
|
needRetrieve.push(id as Ref<Doc>)
|
||||||
changed++
|
changed++
|
||||||
@ -759,7 +792,9 @@ export async function backup (
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (currentNeedRetrieveSize > retrieveChunkSize) {
|
if (currentNeedRetrieveSize > retrieveChunkSize) {
|
||||||
|
if (needRetrieve.length > 0) {
|
||||||
needRetrieveChunks.push(needRetrieve)
|
needRetrieveChunks.push(needRetrieve)
|
||||||
|
}
|
||||||
currentNeedRetrieveSize = 0
|
currentNeedRetrieveSize = 0
|
||||||
needRetrieve = []
|
needRetrieve = []
|
||||||
}
|
}
|
||||||
@ -841,12 +876,17 @@ export async function backup (
|
|||||||
|
|
||||||
const totalChunks = needRetrieveChunks.flatMap((it) => it.length).reduce((p, c) => p + c, 0)
|
const totalChunks = needRetrieveChunks.flatMap((it) => it.length).reduce((p, c) => p + c, 0)
|
||||||
let processed = 0
|
let processed = 0
|
||||||
|
let blobs = 0
|
||||||
|
|
||||||
while (needRetrieveChunks.length > 0) {
|
while (needRetrieveChunks.length > 0) {
|
||||||
if (canceled()) {
|
if (canceled()) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
const needRetrieve = needRetrieveChunks.shift() as Ref<Doc>[]
|
const needRetrieve = needRetrieveChunks.shift() as Ref<Doc>[]
|
||||||
|
|
||||||
|
if (needRetrieve.length === 0) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
ctx.info('Retrieve chunk', {
|
ctx.info('Retrieve chunk', {
|
||||||
needRetrieve: needRetrieveChunks.reduce((v, docs) => v + docs.length, 0),
|
needRetrieve: needRetrieveChunks.reduce((v, docs) => v + docs.length, 0),
|
||||||
toLoad: needRetrieve.length,
|
toLoad: needRetrieve.length,
|
||||||
@ -855,6 +895,10 @@ export async function backup (
|
|||||||
let docs: Doc[] = []
|
let docs: Doc[] = []
|
||||||
try {
|
try {
|
||||||
docs = await ctx.with('load-docs', {}, async (ctx) => await connection.loadDocs(domain, needRetrieve))
|
docs = await ctx.with('load-docs', {}, async (ctx) => await connection.loadDocs(domain, needRetrieve))
|
||||||
|
if (docs.length !== needRetrieve.length) {
|
||||||
|
const nr = new Set(docs.map((it) => it._id))
|
||||||
|
ctx.error('failed to retrieve all documents', { missing: needRetrieve.filter((it) => !nr.has(it)) })
|
||||||
|
}
|
||||||
ops++
|
ops++
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
ctx.error('error loading docs', { domain, err, workspace: workspaceId.name })
|
ctx.error('error loading docs', { domain, err, workspace: workspaceId.name })
|
||||||
@ -998,7 +1042,8 @@ export async function backup (
|
|||||||
ctx.error('error packing file', { err })
|
ctx.error('error packing file', { err })
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
if (blob.size > 1024 * 1024) {
|
blobs++
|
||||||
|
if (blob.size > 1024 * 1024 || blobs >= 10) {
|
||||||
ctx.info('download blob', {
|
ctx.info('download blob', {
|
||||||
_id: blob._id,
|
_id: blob._id,
|
||||||
contentType: blob.contentType,
|
contentType: blob.contentType,
|
||||||
@ -1006,6 +1051,9 @@ export async function backup (
|
|||||||
provider: blob.provider,
|
provider: blob.provider,
|
||||||
pending: docs.length
|
pending: docs.length
|
||||||
})
|
})
|
||||||
|
if (blobs >= 10) {
|
||||||
|
blobs = 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
printDownloaded('', blob.size)
|
printDownloaded('', blob.size)
|
||||||
@ -1179,15 +1227,16 @@ export async function backupDownload (storage: BackupStorage, storeIn: string):
|
|||||||
|
|
||||||
const backupInfo: BackupInfo = JSON.parse(gunzipSync(await storage.loadFile(infoFile)).toString())
|
const backupInfo: BackupInfo = JSON.parse(gunzipSync(await storage.loadFile(infoFile)).toString())
|
||||||
console.log('workspace:', backupInfo.workspace ?? '', backupInfo.version)
|
console.log('workspace:', backupInfo.workspace ?? '', backupInfo.version)
|
||||||
const addFileSize = async (file: string | undefined | null): Promise<void> => {
|
|
||||||
if (file != null && (await storage.exists(file))) {
|
const addFileSize = async (file: string | undefined | null, force: boolean = false): Promise<void> => {
|
||||||
const fileSize = await storage.stat(file)
|
if (file != null) {
|
||||||
const target = join(storeIn, file)
|
const target = join(storeIn, file)
|
||||||
const dir = dirname(target)
|
const dir = dirname(target)
|
||||||
if (!existsSync(dir)) {
|
if (!existsSync(dir)) {
|
||||||
mkdirSync(dir, { recursive: true })
|
mkdirSync(dir, { recursive: true })
|
||||||
}
|
}
|
||||||
if (!existsSync(target) || fileSize !== statSync(target).size) {
|
if (!existsSync(target) || force) {
|
||||||
|
const fileSize = await storage.stat(file)
|
||||||
console.log('downloading', file, fileSize)
|
console.log('downloading', file, fileSize)
|
||||||
const readStream = await storage.load(file)
|
const readStream = await storage.load(file)
|
||||||
const outp = createWriteStream(target)
|
const outp = createWriteStream(target)
|
||||||
@ -1200,8 +1249,10 @@ export async function backupDownload (storage: BackupStorage, storeIn: string):
|
|||||||
resolve()
|
resolve()
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
|
||||||
size += fileSize
|
size += fileSize
|
||||||
|
} else {
|
||||||
|
console.log('file-same', file)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1217,7 +1268,7 @@ export async function backupDownload (storage: BackupStorage, storeIn: string):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
await addFileSize(infoFile)
|
await addFileSize(infoFile, true)
|
||||||
|
|
||||||
console.log('Backup size', size / (1024 * 1024), 'Mb')
|
console.log('Backup size', size / (1024 * 1024), 'Mb')
|
||||||
}
|
}
|
||||||
@ -1693,7 +1744,7 @@ export async function compactBackup (
|
|||||||
const oldSnapshots = [...backupInfo.snapshots]
|
const oldSnapshots = [...backupInfo.snapshots]
|
||||||
|
|
||||||
backupInfo.snapshots = [snapshot]
|
backupInfo.snapshots = [snapshot]
|
||||||
let backupIndex = `${backupInfo.snapshotsIndex ?? oldSnapshots.length}`
|
let backupIndex = `${(backupInfo.snapshotsIndex ?? oldSnapshots.length) + 1}`
|
||||||
while (backupIndex.length < 6) {
|
while (backupIndex.length < 6) {
|
||||||
backupIndex = '0' + backupIndex
|
backupIndex = '0' + backupIndex
|
||||||
}
|
}
|
||||||
|
@ -95,9 +95,12 @@ export class AggregatorStorageAdapter implements StorageAdapter, StorageAdapterE
|
|||||||
for (const d of docs) {
|
for (const d of docs) {
|
||||||
const blobInfo = existingBlobs.get(d._id)
|
const blobInfo = existingBlobs.get(d._id)
|
||||||
if (
|
if (
|
||||||
blobInfo === undefined ||
|
blobInfo === undefined || // Blob info undefined
|
||||||
this.doTrimHash(blobInfo.etag) !== this.doTrimHash(d.etag) ||
|
// Provider are same and etag or size are diffrent.
|
||||||
blobInfo.size !== d.size
|
(d.provider === blobInfo.provider &&
|
||||||
|
(this.doTrimHash(blobInfo.etag) !== this.doTrimHash(d.etag) || blobInfo.size !== d.size)) ||
|
||||||
|
// We have replacement in default
|
||||||
|
(d.provider === this.defaultAdapter && blobInfo?.provider !== d.provider)
|
||||||
) {
|
) {
|
||||||
const stat = await this.adapters.get(d.provider)?.stat(ctx, workspaceId, d._id)
|
const stat = await this.adapters.get(d.provider)?.stat(ctx, workspaceId, d._id)
|
||||||
if (stat !== undefined) {
|
if (stat !== undefined) {
|
||||||
|
Loading…
Reference in New Issue
Block a user