mirror of
https://github.com/hcengineering/platform.git
synced 2024-12-19 08:51:37 +03:00
UBERF-8950 Expose blob list method in datalake (#7484)
Signed-off-by: Alexander Onnikov <Alexander.Onnikov@xored.com>
This commit is contained in:
parent
2ea25fa4ab
commit
d81137729e
@ -25,9 +25,16 @@ export interface ObjectMetadata {
|
|||||||
lastModified: number
|
lastModified: number
|
||||||
name: string
|
name: string
|
||||||
type: string
|
type: string
|
||||||
|
etag: string
|
||||||
size?: number
|
size?: number
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @public */
|
||||||
|
export interface ListObjectOutput {
|
||||||
|
cursor: string | undefined
|
||||||
|
blobs: Omit<ObjectMetadata, 'lastModified'>[]
|
||||||
|
}
|
||||||
|
|
||||||
/** @public */
|
/** @public */
|
||||||
export interface StatObjectOutput {
|
export interface StatObjectOutput {
|
||||||
lastModified: number
|
lastModified: number
|
||||||
@ -36,6 +43,13 @@ export interface StatObjectOutput {
|
|||||||
size?: number
|
size?: number
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @public */
|
||||||
|
export interface UploadObjectParams {
|
||||||
|
lastModified: number
|
||||||
|
type: string
|
||||||
|
size?: number
|
||||||
|
}
|
||||||
|
|
||||||
interface BlobUploadError {
|
interface BlobUploadError {
|
||||||
key: string
|
key: string
|
||||||
error: string
|
error: string
|
||||||
@ -68,6 +82,23 @@ export class DatalakeClient {
|
|||||||
return concatLink(this.endpoint, path)
|
return concatLink(this.endpoint, path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async listObjects (
|
||||||
|
ctx: MeasureContext,
|
||||||
|
workspace: WorkspaceId,
|
||||||
|
cursor: string | undefined
|
||||||
|
): Promise<ListObjectOutput> {
|
||||||
|
const limit = 100
|
||||||
|
const path = `/blob/${workspace.name}`
|
||||||
|
const url = new URL(concatLink(this.endpoint, path))
|
||||||
|
url.searchParams.append('limit', String(limit))
|
||||||
|
if (cursor !== undefined) {
|
||||||
|
url.searchParams.append('cursor', cursor)
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetchSafe(ctx, url)
|
||||||
|
return (await response.json()) as ListObjectOutput
|
||||||
|
}
|
||||||
|
|
||||||
async getObject (ctx: MeasureContext, workspace: WorkspaceId, objectName: string): Promise<Readable> {
|
async getObject (ctx: MeasureContext, workspace: WorkspaceId, objectName: string): Promise<Readable> {
|
||||||
const url = this.getObjectUrl(ctx, workspace, objectName)
|
const url = this.getObjectUrl(ctx, workspace, objectName)
|
||||||
|
|
||||||
@ -166,9 +197,9 @@ export class DatalakeClient {
|
|||||||
workspace: WorkspaceId,
|
workspace: WorkspaceId,
|
||||||
objectName: string,
|
objectName: string,
|
||||||
stream: Readable | Buffer | string,
|
stream: Readable | Buffer | string,
|
||||||
metadata: ObjectMetadata,
|
params: UploadObjectParams
|
||||||
size?: number
|
): Promise<ObjectMetadata> {
|
||||||
): Promise<void> {
|
let size = params.size
|
||||||
if (size === undefined) {
|
if (size === undefined) {
|
||||||
if (Buffer.isBuffer(stream)) {
|
if (Buffer.isBuffer(stream)) {
|
||||||
size = stream.length
|
size = stream.length
|
||||||
@ -182,12 +213,12 @@ export class DatalakeClient {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
if (size === undefined || size < 64 * 1024 * 1024) {
|
if (size === undefined || size < 64 * 1024 * 1024) {
|
||||||
await ctx.with('direct-upload', {}, (ctx) =>
|
return await ctx.with('direct-upload', {}, (ctx) =>
|
||||||
this.uploadWithFormData(ctx, workspace, objectName, stream, metadata)
|
this.uploadWithFormData(ctx, workspace, objectName, stream, { ...params, size })
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
await ctx.with('signed-url-upload', {}, (ctx) =>
|
return await ctx.with('signed-url-upload', {}, (ctx) =>
|
||||||
this.uploadWithSignedURL(ctx, workspace, objectName, stream, metadata)
|
this.uploadWithSignedURL(ctx, workspace, objectName, stream, { ...params, size })
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@ -201,18 +232,18 @@ export class DatalakeClient {
|
|||||||
workspace: WorkspaceId,
|
workspace: WorkspaceId,
|
||||||
objectName: string,
|
objectName: string,
|
||||||
stream: Readable | Buffer | string,
|
stream: Readable | Buffer | string,
|
||||||
metadata: ObjectMetadata
|
params: UploadObjectParams
|
||||||
): Promise<void> {
|
): Promise<ObjectMetadata> {
|
||||||
const path = `/upload/form-data/${workspace.name}`
|
const path = `/upload/form-data/${workspace.name}`
|
||||||
const url = concatLink(this.endpoint, path)
|
const url = concatLink(this.endpoint, path)
|
||||||
|
|
||||||
const form = new FormData()
|
const form = new FormData()
|
||||||
const options: FormData.AppendOptions = {
|
const options: FormData.AppendOptions = {
|
||||||
filename: objectName,
|
filename: objectName,
|
||||||
contentType: metadata.type,
|
contentType: params.type,
|
||||||
knownLength: metadata.size,
|
knownLength: params.size,
|
||||||
header: {
|
header: {
|
||||||
'Last-Modified': metadata.lastModified
|
'Last-Modified': params.lastModified
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
form.append('file', stream, options)
|
form.append('file', stream, options)
|
||||||
@ -229,6 +260,8 @@ export class DatalakeClient {
|
|||||||
if ('error' in uploadResult) {
|
if ('error' in uploadResult) {
|
||||||
throw new DatalakeError('Upload failed: ' + uploadResult.error)
|
throw new DatalakeError('Upload failed: ' + uploadResult.error)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return uploadResult.metadata
|
||||||
}
|
}
|
||||||
|
|
||||||
async uploadMultipart (
|
async uploadMultipart (
|
||||||
@ -236,11 +269,11 @@ export class DatalakeClient {
|
|||||||
workspace: WorkspaceId,
|
workspace: WorkspaceId,
|
||||||
objectName: string,
|
objectName: string,
|
||||||
stream: Readable | Buffer | string,
|
stream: Readable | Buffer | string,
|
||||||
metadata: ObjectMetadata
|
params: UploadObjectParams
|
||||||
): Promise<void> {
|
): Promise<ObjectMetadata> {
|
||||||
const chunkSize = 10 * 1024 * 1024
|
const chunkSize = 10 * 1024 * 1024
|
||||||
|
|
||||||
const multipart = await this.multipartUploadStart(ctx, workspace, objectName, metadata)
|
const multipart = await this.multipartUploadStart(ctx, workspace, objectName, params)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const parts: MultipartUploadPart[] = []
|
const parts: MultipartUploadPart[] = []
|
||||||
@ -252,7 +285,7 @@ export class DatalakeClient {
|
|||||||
partNumber++
|
partNumber++
|
||||||
}
|
}
|
||||||
|
|
||||||
await this.multipartUploadComplete(ctx, workspace, objectName, multipart, parts)
|
return await this.multipartUploadComplete(ctx, workspace, objectName, multipart, parts)
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
await this.multipartUploadAbort(ctx, workspace, objectName, multipart)
|
await this.multipartUploadAbort(ctx, workspace, objectName, multipart)
|
||||||
throw err
|
throw err
|
||||||
@ -264,8 +297,8 @@ export class DatalakeClient {
|
|||||||
workspace: WorkspaceId,
|
workspace: WorkspaceId,
|
||||||
objectName: string,
|
objectName: string,
|
||||||
stream: Readable | Buffer | string,
|
stream: Readable | Buffer | string,
|
||||||
metadata: ObjectMetadata
|
params: UploadObjectParams
|
||||||
): Promise<void> {
|
): Promise<ObjectMetadata> {
|
||||||
const url = await this.signObjectSign(ctx, workspace, objectName)
|
const url = await this.signObjectSign(ctx, workspace, objectName)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -273,8 +306,8 @@ export class DatalakeClient {
|
|||||||
body: stream,
|
body: stream,
|
||||||
method: 'PUT',
|
method: 'PUT',
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': metadata.type,
|
'Content-Type': params.type,
|
||||||
'Content-Length': metadata.size?.toString() ?? '0'
|
'Content-Length': params.size?.toString() ?? '0'
|
||||||
// 'x-amz-meta-last-modified': metadata.lastModified.toString()
|
// 'x-amz-meta-last-modified': metadata.lastModified.toString()
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -284,7 +317,7 @@ export class DatalakeClient {
|
|||||||
throw new DatalakeError('Failed to upload via signed URL')
|
throw new DatalakeError('Failed to upload via signed URL')
|
||||||
}
|
}
|
||||||
|
|
||||||
await this.signObjectComplete(ctx, workspace, objectName)
|
return await this.signObjectComplete(ctx, workspace, objectName)
|
||||||
}
|
}
|
||||||
|
|
||||||
async uploadFromS3 (
|
async uploadFromS3 (
|
||||||
@ -322,10 +355,15 @@ export class DatalakeClient {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private async signObjectComplete (ctx: MeasureContext, workspace: WorkspaceId, objectName: string): Promise<void> {
|
private async signObjectComplete (
|
||||||
|
ctx: MeasureContext,
|
||||||
|
workspace: WorkspaceId,
|
||||||
|
objectName: string
|
||||||
|
): Promise<ObjectMetadata> {
|
||||||
try {
|
try {
|
||||||
const url = this.getSignObjectUrl(workspace, objectName)
|
const url = this.getSignObjectUrl(workspace, objectName)
|
||||||
await fetchSafe(ctx, url, { method: 'PUT' })
|
const res = await fetchSafe(ctx, url, { method: 'PUT' })
|
||||||
|
return (await res.json()) as ObjectMetadata
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
ctx.error('failed to complete signed url upload', { workspace, objectName, err })
|
ctx.error('failed to complete signed url upload', { workspace, objectName, err })
|
||||||
throw new DatalakeError('Failed to complete signed URL upload')
|
throw new DatalakeError('Failed to complete signed URL upload')
|
||||||
@ -353,16 +391,16 @@ export class DatalakeClient {
|
|||||||
ctx: MeasureContext,
|
ctx: MeasureContext,
|
||||||
workspace: WorkspaceId,
|
workspace: WorkspaceId,
|
||||||
objectName: string,
|
objectName: string,
|
||||||
metadata: ObjectMetadata
|
params: UploadObjectParams
|
||||||
): Promise<MultipartUpload> {
|
): Promise<MultipartUpload> {
|
||||||
const path = `/upload/multipart/${workspace.name}/${encodeURIComponent(objectName)}`
|
const path = `/upload/multipart/${workspace.name}/${encodeURIComponent(objectName)}`
|
||||||
const url = concatLink(this.endpoint, path)
|
const url = concatLink(this.endpoint, path)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const headers = {
|
const headers = {
|
||||||
'Content-Type': metadata.type,
|
'Content-Type': params.type,
|
||||||
'Content-Length': metadata.size?.toString() ?? '0',
|
'Content-Length': params.size?.toString() ?? '0',
|
||||||
'Last-Modified': new Date(metadata.lastModified).toUTCString()
|
'Last-Modified': new Date(params.lastModified).toUTCString()
|
||||||
}
|
}
|
||||||
const response = await fetchSafe(ctx, url, { method: 'POST', headers })
|
const response = await fetchSafe(ctx, url, { method: 'POST', headers })
|
||||||
return (await response.json()) as MultipartUpload
|
return (await response.json()) as MultipartUpload
|
||||||
@ -401,14 +439,15 @@ export class DatalakeClient {
|
|||||||
objectName: string,
|
objectName: string,
|
||||||
multipart: MultipartUpload,
|
multipart: MultipartUpload,
|
||||||
parts: MultipartUploadPart[]
|
parts: MultipartUploadPart[]
|
||||||
): Promise<void> {
|
): Promise<ObjectMetadata> {
|
||||||
const path = `/upload/multipart/${workspace.name}/${encodeURIComponent(objectName)}/complete`
|
const path = `/upload/multipart/${workspace.name}/${encodeURIComponent(objectName)}/complete`
|
||||||
const url = new URL(concatLink(this.endpoint, path))
|
const url = new URL(concatLink(this.endpoint, path))
|
||||||
url.searchParams.append('key', multipart.key)
|
url.searchParams.append('key', multipart.key)
|
||||||
url.searchParams.append('uploadId', multipart.uploadId)
|
url.searchParams.append('uploadId', multipart.uploadId)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await fetchSafe(ctx, url, { method: 'POST', body: JSON.stringify({ parts }) })
|
const res = await fetchSafe(ctx, url, { method: 'POST', body: JSON.stringify({ parts }) })
|
||||||
|
return (await res.json()) as ObjectMetadata
|
||||||
} catch (err: any) {
|
} catch (err: any) {
|
||||||
ctx.error('failed to complete multipart upload', { workspace, objectName, err })
|
ctx.error('failed to complete multipart upload', { workspace, objectName, err })
|
||||||
throw new DatalakeError('Failed to complete multipart upload')
|
throw new DatalakeError('Failed to complete multipart upload')
|
||||||
|
@ -18,13 +18,14 @@ import core, { type Blob, type MeasureContext, type Ref, type WorkspaceId, withC
|
|||||||
import {
|
import {
|
||||||
type BlobStorageIterator,
|
type BlobStorageIterator,
|
||||||
type BucketInfo,
|
type BucketInfo,
|
||||||
|
type ListBlobResult,
|
||||||
type StorageAdapter,
|
type StorageAdapter,
|
||||||
type StorageConfig,
|
type StorageConfig,
|
||||||
type StorageConfiguration,
|
type StorageConfiguration,
|
||||||
type UploadedObjectInfo
|
type UploadedObjectInfo
|
||||||
} from '@hcengineering/server-core'
|
} from '@hcengineering/server-core'
|
||||||
import { type Readable } from 'stream'
|
import { type Readable } from 'stream'
|
||||||
import { type ObjectMetadata, DatalakeClient } from './client'
|
import { type UploadObjectParams, DatalakeClient } from './client'
|
||||||
|
|
||||||
export { DatalakeClient }
|
export { DatalakeClient }
|
||||||
|
|
||||||
@ -88,8 +89,36 @@ export class DatalakeService implements StorageAdapter {
|
|||||||
|
|
||||||
@withContext('listStream')
|
@withContext('listStream')
|
||||||
async listStream (ctx: MeasureContext, workspaceId: WorkspaceId): Promise<BlobStorageIterator> {
|
async listStream (ctx: MeasureContext, workspaceId: WorkspaceId): Promise<BlobStorageIterator> {
|
||||||
|
let hasMore = true
|
||||||
|
const buffer: ListBlobResult[] = []
|
||||||
|
let cursor: string | undefined
|
||||||
|
|
||||||
return {
|
return {
|
||||||
next: async () => [],
|
next: async () => {
|
||||||
|
try {
|
||||||
|
while (hasMore && buffer.length < 50) {
|
||||||
|
const res = await this.client.listObjects(ctx, workspaceId, cursor)
|
||||||
|
hasMore = res.cursor !== undefined
|
||||||
|
cursor = res.cursor
|
||||||
|
|
||||||
|
for (const blob of res.blobs) {
|
||||||
|
buffer.push({
|
||||||
|
_id: blob.name as Ref<Blob>,
|
||||||
|
_class: core.class.Blob,
|
||||||
|
etag: blob.etag,
|
||||||
|
size: blob.size ?? 0,
|
||||||
|
provider: this.opt.name,
|
||||||
|
space: core.space.Configuration,
|
||||||
|
modifiedBy: core.account.ConfigUser,
|
||||||
|
modifiedOn: 0
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err: any) {
|
||||||
|
ctx.error('Failed to get list', { error: err, workspaceId: workspaceId.name })
|
||||||
|
}
|
||||||
|
return buffer.splice(0, 50)
|
||||||
|
},
|
||||||
close: async () => {}
|
close: async () => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -131,19 +160,18 @@ export class DatalakeService implements StorageAdapter {
|
|||||||
contentType: string,
|
contentType: string,
|
||||||
size?: number
|
size?: number
|
||||||
): Promise<UploadedObjectInfo> {
|
): Promise<UploadedObjectInfo> {
|
||||||
const metadata: ObjectMetadata = {
|
const params: UploadObjectParams = {
|
||||||
lastModified: Date.now(),
|
lastModified: Date.now(),
|
||||||
name: objectName,
|
|
||||||
type: contentType,
|
type: contentType,
|
||||||
size
|
size
|
||||||
}
|
}
|
||||||
|
|
||||||
await ctx.with('put', {}, (ctx) =>
|
const { etag } = await ctx.with('put', {}, (ctx) =>
|
||||||
withRetry(ctx, 5, () => this.client.putObject(ctx, workspaceId, objectName, stream, metadata, size))
|
withRetry(ctx, 5, () => this.client.putObject(ctx, workspaceId, objectName, stream, params))
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
etag: '',
|
etag,
|
||||||
versionId: ''
|
versionId: ''
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,11 +23,12 @@ import { type BlobRequest, type WorkspaceRequest, type UUID } from './types'
|
|||||||
import { copyVideo, deleteVideo } from './video'
|
import { copyVideo, deleteVideo } from './video'
|
||||||
import { type MetricsContext, LoggedCache } from './metrics'
|
import { type MetricsContext, LoggedCache } from './metrics'
|
||||||
|
|
||||||
interface BlobMetadata {
|
export interface BlobMetadata {
|
||||||
lastModified: number
|
lastModified: number
|
||||||
type: string
|
type: string
|
||||||
size: number
|
size: number
|
||||||
name: string
|
name: string
|
||||||
|
etag: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getBlobURL (request: Request, workspace: string, name: string): string {
|
export function getBlobURL (request: Request, workspace: string, name: string): string {
|
||||||
@ -35,6 +36,28 @@ export function getBlobURL (request: Request, workspace: string, name: string):
|
|||||||
return new URL(path, request.url).toString()
|
return new URL(path, request.url).toString()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function handleBlobList (
|
||||||
|
request: WorkspaceRequest,
|
||||||
|
env: Env,
|
||||||
|
ctx: ExecutionContext,
|
||||||
|
metrics: MetricsContext
|
||||||
|
): Promise<Response> {
|
||||||
|
const { workspace } = request
|
||||||
|
const cursor = extractStrParam(request.query.cursor)
|
||||||
|
const limit = extractIntParam(request.query.limit)
|
||||||
|
|
||||||
|
const response = await withPostgres(env, ctx, metrics, (db) => {
|
||||||
|
return db.listBlobs(workspace, cursor, limit)
|
||||||
|
})
|
||||||
|
|
||||||
|
const blobs = response.blobs.map((blob) => {
|
||||||
|
const { name, size, type, hash } = blob
|
||||||
|
return { name, size, type, etag: hash }
|
||||||
|
})
|
||||||
|
|
||||||
|
return json({ blobs, cursor: response.cursor })
|
||||||
|
}
|
||||||
|
|
||||||
export async function handleBlobGet (
|
export async function handleBlobGet (
|
||||||
request: BlobRequest,
|
request: BlobRequest,
|
||||||
env: Env,
|
env: Env,
|
||||||
@ -68,13 +91,17 @@ export async function handleBlobGet (
|
|||||||
return error(404)
|
return error(404)
|
||||||
}
|
}
|
||||||
|
|
||||||
const headers = r2MetadataHeaders(object)
|
const headers = r2MetadataHeaders(blob.hash, object)
|
||||||
if (range !== undefined && object?.range !== undefined) {
|
if (range !== undefined && object?.range !== undefined) {
|
||||||
headers.set('Content-Range', rangeHeader(object.range, object.size))
|
headers.set('Content-Range', rangeHeader(object.range, object.size))
|
||||||
}
|
}
|
||||||
|
|
||||||
const length = object?.range !== undefined && 'length' in object.range ? object?.range?.length : undefined
|
const length = object?.range !== undefined && 'length' in object.range ? object?.range?.length : undefined
|
||||||
const status = length !== undefined && length < object.size ? 206 : 200
|
const status = length !== undefined && length < object.size ? 206 : 200
|
||||||
|
if (length !== undefined && length < object.size) {
|
||||||
|
// for partial content use etag returned by R2
|
||||||
|
headers.set('ETag', object.httpEtag)
|
||||||
|
}
|
||||||
|
|
||||||
const response = new Response(object?.body, { headers, status })
|
const response = new Response(object?.body, { headers, status })
|
||||||
|
|
||||||
@ -110,7 +137,7 @@ export async function handleBlobHead (
|
|||||||
return error(404)
|
return error(404)
|
||||||
}
|
}
|
||||||
|
|
||||||
const headers = r2MetadataHeaders(head)
|
const headers = r2MetadataHeaders(blob.hash, head)
|
||||||
return new Response(null, { headers, status: 200 })
|
return new Response(null, { headers, status: 200 })
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,22 +231,33 @@ export async function saveBlob (
|
|||||||
const httpMetadata = { contentType: type, cacheControl, lastModified }
|
const httpMetadata = { contentType: type, cacheControl, lastModified }
|
||||||
const filename = getUniqueFilename()
|
const filename = getUniqueFilename()
|
||||||
|
|
||||||
|
const blob = await db.getBlob({ workspace, name })
|
||||||
|
|
||||||
if (size <= hashLimit) {
|
if (size <= hashLimit) {
|
||||||
const [hashStream, uploadStream] = stream.tee()
|
const [hashStream, uploadStream] = stream.tee()
|
||||||
|
|
||||||
const hash = await getSha256(hashStream)
|
const hash = await getSha256(hashStream)
|
||||||
|
|
||||||
|
// Check if we have the same blob already
|
||||||
|
if (blob?.hash === hash && blob?.type === type) {
|
||||||
|
return { type, size, lastModified, name, etag: hash }
|
||||||
|
}
|
||||||
|
|
||||||
const data = await db.getData({ hash, location })
|
const data = await db.getData({ hash, location })
|
||||||
|
|
||||||
if (data !== null) {
|
if (data !== null) {
|
||||||
// Lucky boy, nothing to upload, use existing blob
|
// Lucky boy, nothing to upload, use existing blob
|
||||||
await db.createBlob({ workspace, name, hash, location })
|
await db.createBlob({ workspace, name, hash, location })
|
||||||
|
|
||||||
|
return { type, size, lastModified, name, etag: data.hash }
|
||||||
} else {
|
} else {
|
||||||
await bucket.put(filename, uploadStream, { httpMetadata })
|
await bucket.put(filename, uploadStream, { httpMetadata })
|
||||||
|
|
||||||
await db.createData({ hash, location, filename, type, size })
|
await db.createData({ hash, location, filename, type, size })
|
||||||
await db.createBlob({ workspace, name, hash, location })
|
await db.createBlob({ workspace, name, hash, location })
|
||||||
}
|
|
||||||
|
|
||||||
return { type, size, lastModified, name }
|
return { type, size, lastModified, name, etag: hash }
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// For large files we cannot calculate checksum beforehead
|
// For large files we cannot calculate checksum beforehead
|
||||||
// upload file with unique filename and then obtain checksum
|
// upload file with unique filename and then obtain checksum
|
||||||
@ -229,13 +267,15 @@ export async function saveBlob (
|
|||||||
// We found an existing blob with the same hash
|
// We found an existing blob with the same hash
|
||||||
// we can safely remove the existing blob from storage
|
// we can safely remove the existing blob from storage
|
||||||
await Promise.all([bucket.delete(filename), db.createBlob({ workspace, name, hash, location })])
|
await Promise.all([bucket.delete(filename), db.createBlob({ workspace, name, hash, location })])
|
||||||
|
|
||||||
|
return { type, size, lastModified, name, etag: hash }
|
||||||
} else {
|
} else {
|
||||||
// Otherwise register a new hash and blob
|
// Otherwise register a new hash and blob
|
||||||
await db.createData({ hash, location, filename, type, size })
|
await db.createData({ hash, location, filename, type, size })
|
||||||
await db.createBlob({ workspace, name, hash, location })
|
await db.createBlob({ workspace, name, hash, location })
|
||||||
}
|
|
||||||
|
|
||||||
return { type, size, lastModified, name }
|
return { type, size, lastModified, name, etag: hash }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -246,7 +286,7 @@ export async function handleBlobUploaded (
|
|||||||
workspace: string,
|
workspace: string,
|
||||||
name: string,
|
name: string,
|
||||||
filename: UUID
|
filename: UUID
|
||||||
): Promise<void> {
|
): Promise<BlobMetadata> {
|
||||||
const { location, bucket } = selectStorage(env, workspace)
|
const { location, bucket } = selectStorage(env, workspace)
|
||||||
|
|
||||||
const object = await bucket.head(filename)
|
const object = await bucket.head(filename)
|
||||||
@ -255,19 +295,20 @@ export async function handleBlobUploaded (
|
|||||||
}
|
}
|
||||||
|
|
||||||
const hash = object.checksums.md5 !== undefined ? digestToUUID(object.checksums.md5) : (crypto.randomUUID() as UUID)
|
const hash = object.checksums.md5 !== undefined ? digestToUUID(object.checksums.md5) : (crypto.randomUUID() as UUID)
|
||||||
|
const size = object.size
|
||||||
|
const type = object.httpMetadata?.contentType ?? 'application/octet-stream'
|
||||||
|
|
||||||
await withPostgres(env, ctx, metrics, async (db) => {
|
await withPostgres(env, ctx, metrics, async (db) => {
|
||||||
const data = await db.getData({ hash, location })
|
const data = await db.getData({ hash, location })
|
||||||
if (data !== null) {
|
if (data !== null) {
|
||||||
await Promise.all([bucket.delete(filename), db.createBlob({ workspace, name, hash, location })])
|
await Promise.all([bucket.delete(filename), db.createBlob({ workspace, name, hash, location })])
|
||||||
} else {
|
} else {
|
||||||
const size = object.size
|
|
||||||
const type = object.httpMetadata?.contentType ?? 'application/octet-stream'
|
|
||||||
|
|
||||||
await db.createData({ hash, location, filename, type, size })
|
await db.createData({ hash, location, filename, type, size })
|
||||||
await db.createBlob({ workspace, name, hash, location })
|
await db.createBlob({ workspace, name, hash, location })
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
return { type, size, name, etag: hash, lastModified: object.uploaded.getTime() }
|
||||||
}
|
}
|
||||||
|
|
||||||
async function uploadLargeFile (
|
async function uploadLargeFile (
|
||||||
@ -309,7 +350,7 @@ function rangeHeader (range: R2Range, size: number): string {
|
|||||||
return `bytes ${start}-${end - 1}/${size}`
|
return `bytes ${start}-${end - 1}/${size}`
|
||||||
}
|
}
|
||||||
|
|
||||||
function r2MetadataHeaders (head: R2Object): Headers {
|
function r2MetadataHeaders (hash: string, head: R2Object): Headers {
|
||||||
return head.httpMetadata !== undefined
|
return head.httpMetadata !== undefined
|
||||||
? new Headers({
|
? new Headers({
|
||||||
'Accept-Ranges': 'bytes',
|
'Accept-Ranges': 'bytes',
|
||||||
@ -318,7 +359,7 @@ function r2MetadataHeaders (head: R2Object): Headers {
|
|||||||
'Content-Security-Policy': "default-src 'none';",
|
'Content-Security-Policy': "default-src 'none';",
|
||||||
'Cache-Control': head.httpMetadata.cacheControl ?? cacheControl,
|
'Cache-Control': head.httpMetadata.cacheControl ?? cacheControl,
|
||||||
'Last-Modified': head.uploaded.toUTCString(),
|
'Last-Modified': head.uploaded.toUTCString(),
|
||||||
ETag: head.httpEtag
|
ETag: hash
|
||||||
})
|
})
|
||||||
: new Headers({
|
: new Headers({
|
||||||
'Accept-Ranges': 'bytes',
|
'Accept-Ranges': 'bytes',
|
||||||
@ -326,6 +367,31 @@ function r2MetadataHeaders (head: R2Object): Headers {
|
|||||||
'Content-Security-Policy': "default-src 'none';",
|
'Content-Security-Policy': "default-src 'none';",
|
||||||
'Cache-Control': cacheControl,
|
'Cache-Control': cacheControl,
|
||||||
'Last-Modified': head.uploaded.toUTCString(),
|
'Last-Modified': head.uploaded.toUTCString(),
|
||||||
ETag: head.httpEtag
|
ETag: hash
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function extractStrParam (value: string | string[] | undefined): string | undefined {
|
||||||
|
if (Array.isArray(value)) {
|
||||||
|
return value[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractIntParam (value: string | string[] | undefined): number | undefined {
|
||||||
|
if (value === undefined) {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Array.isArray(value)) {
|
||||||
|
value = value[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
const intValue = Number.parseInt(value)
|
||||||
|
if (Number.isInteger(intValue)) {
|
||||||
|
return intValue
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
@ -39,8 +39,11 @@ export interface BlobRecord extends BlobId {
|
|||||||
deleted: boolean
|
deleted: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface BlobRecordWithFilename extends BlobRecord {
|
export type BlobWithDataRecord = BlobRecord & BlobDataRecord
|
||||||
filename: string
|
|
||||||
|
export interface ListBlobResult {
|
||||||
|
cursor: string | undefined
|
||||||
|
blobs: BlobWithDataRecord[]
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function withPostgres<T> (
|
export async function withPostgres<T> (
|
||||||
@ -72,7 +75,8 @@ export async function withPostgres<T> (
|
|||||||
export interface BlobDB {
|
export interface BlobDB {
|
||||||
getData: (dataId: BlobDataId) => Promise<BlobDataRecord | null>
|
getData: (dataId: BlobDataId) => Promise<BlobDataRecord | null>
|
||||||
createData: (data: BlobDataRecord) => Promise<void>
|
createData: (data: BlobDataRecord) => Promise<void>
|
||||||
getBlob: (blobId: BlobId) => Promise<BlobRecordWithFilename | null>
|
listBlobs: (workspace: string, cursor?: string, limit?: number) => Promise<ListBlobResult>
|
||||||
|
getBlob: (blobId: BlobId) => Promise<BlobWithDataRecord | null>
|
||||||
createBlob: (blob: Omit<BlobRecord, 'filename' | 'deleted'>) => Promise<void>
|
createBlob: (blob: Omit<BlobRecord, 'filename' | 'deleted'>) => Promise<void>
|
||||||
deleteBlob: (blob: BlobId) => Promise<void>
|
deleteBlob: (blob: BlobId) => Promise<void>
|
||||||
}
|
}
|
||||||
@ -99,12 +103,12 @@ export class PostgresDB implements BlobDB {
|
|||||||
`
|
`
|
||||||
}
|
}
|
||||||
|
|
||||||
async getBlob (blobId: BlobId): Promise<BlobRecordWithFilename | null> {
|
async getBlob (blobId: BlobId): Promise<BlobWithDataRecord | null> {
|
||||||
const { workspace, name } = blobId
|
const { workspace, name } = blobId
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const rows = await this.sql<BlobRecordWithFilename[]>`
|
const rows = await this.sql<BlobWithDataRecord[]>`
|
||||||
SELECT b.workspace, b.name, b.hash, b.location, b.deleted, d.filename
|
SELECT b.workspace, b.name, b.hash, b.location, b.deleted, d.filename, d.size, d.type
|
||||||
FROM blob.blob AS b
|
FROM blob.blob AS b
|
||||||
JOIN blob.data AS d ON b.hash = d.hash AND b.location = d.location
|
JOIN blob.data AS d ON b.hash = d.hash AND b.location = d.location
|
||||||
WHERE b.workspace = ${workspace} AND b.name = ${name}
|
WHERE b.workspace = ${workspace} AND b.name = ${name}
|
||||||
@ -120,6 +124,25 @@ export class PostgresDB implements BlobDB {
|
|||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async listBlobs (workspace: string, cursor?: string, limit?: number): Promise<ListBlobResult> {
|
||||||
|
cursor = cursor ?? ''
|
||||||
|
limit = Math.min(limit ?? 100, 1000)
|
||||||
|
|
||||||
|
const rows = await this.sql<BlobWithDataRecord[]>`
|
||||||
|
SELECT b.workspace, b.name, b.hash, b.location, b.deleted, d.filename, d.size, d.type
|
||||||
|
FROM blob.blob AS b
|
||||||
|
JOIN blob.data AS d ON b.hash = d.hash AND b.location = d.location
|
||||||
|
WHERE b.workspace = ${workspace} AND b.name > ${cursor} AND b.deleted = false
|
||||||
|
ORDER BY b.workspace, b.name
|
||||||
|
LIMIT ${limit}
|
||||||
|
`
|
||||||
|
|
||||||
|
return {
|
||||||
|
cursor: rows.length > 0 ? rows[rows.length - 1].name : undefined,
|
||||||
|
blobs: rows
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async createBlob (blob: Omit<BlobRecord, 'filename' | 'deleted'>): Promise<void> {
|
async createBlob (blob: Omit<BlobRecord, 'filename' | 'deleted'>): Promise<void> {
|
||||||
const { workspace, name, hash, location } = blob
|
const { workspace, name, hash, location } = blob
|
||||||
|
|
||||||
@ -154,10 +177,14 @@ export class LoggedDB implements BlobDB {
|
|||||||
await this.ctx.with('db.createData', () => this.db.createData(data))
|
await this.ctx.with('db.createData', () => this.db.createData(data))
|
||||||
}
|
}
|
||||||
|
|
||||||
async getBlob (blobId: BlobId): Promise<BlobRecordWithFilename | null> {
|
async getBlob (blobId: BlobId): Promise<BlobWithDataRecord | null> {
|
||||||
return await this.ctx.with('db.getBlob', () => this.db.getBlob(blobId))
|
return await this.ctx.with('db.getBlob', () => this.db.getBlob(blobId))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async listBlobs (workspace: string, cursor?: string, limit?: number): Promise<ListBlobResult> {
|
||||||
|
return await this.ctx.with('db.listBlobs', () => this.db.listBlobs(workspace, cursor, limit))
|
||||||
|
}
|
||||||
|
|
||||||
async createBlob (blob: Omit<BlobRecord, 'filename' | 'deleted'>): Promise<void> {
|
async createBlob (blob: Omit<BlobRecord, 'filename' | 'deleted'>): Promise<void> {
|
||||||
await this.ctx.with('db.createBlob', () => this.db.createBlob(blob))
|
await this.ctx.with('db.createBlob', () => this.db.createBlob(blob))
|
||||||
}
|
}
|
||||||
|
@ -27,8 +27,6 @@ export const toHex = (buffer: Uint8Array): string => {
|
|||||||
.join('')
|
.join('')
|
||||||
}
|
}
|
||||||
|
|
||||||
export const etag = (id: string): string => `"${id}"`
|
|
||||||
|
|
||||||
export function formatHexAsUUID (hexString: string): UUID {
|
export function formatHexAsUUID (hexString: string): UUID {
|
||||||
if (hexString.length !== 32) {
|
if (hexString.length !== 32) {
|
||||||
throw new Error('Hex string must be exactly 32 characters long.')
|
throw new Error('Hex string must be exactly 32 characters long.')
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
import { WorkerEntrypoint } from 'cloudflare:workers'
|
import { WorkerEntrypoint } from 'cloudflare:workers'
|
||||||
import { type IRequest, type IRequestStrict, type RequestHandler, Router, error, html } from 'itty-router'
|
import { type IRequest, type IRequestStrict, type RequestHandler, Router, error, html } from 'itty-router'
|
||||||
|
|
||||||
import { handleBlobDelete, handleBlobGet, handleBlobHead, handleUploadFormData } from './blob'
|
import { handleBlobDelete, handleBlobGet, handleBlobHead, handleBlobList, handleUploadFormData } from './blob'
|
||||||
import { cors } from './cors'
|
import { cors } from './cors'
|
||||||
import { LoggedKVNamespace, LoggedR2Bucket, MetricsContext } from './metrics'
|
import { LoggedKVNamespace, LoggedR2Bucket, MetricsContext } from './metrics'
|
||||||
import { handleImageGet } from './image'
|
import { handleImageGet } from './image'
|
||||||
@ -59,6 +59,7 @@ const withBlob: RequestHandler<BlobRequest> = (request: BlobRequest) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
router
|
router
|
||||||
|
.get('/blob/:workspace', withWorkspace, handleBlobList)
|
||||||
.get('/blob/:workspace/:name', withBlob, handleBlobGet)
|
.get('/blob/:workspace/:name', withBlob, handleBlobGet)
|
||||||
.get('/blob/:workspace/:name/:filename', withBlob, handleBlobGet)
|
.get('/blob/:workspace/:name/:filename', withBlob, handleBlobGet)
|
||||||
.head('/blob/:workspace/:name', withBlob, handleBlobHead)
|
.head('/blob/:workspace/:name', withBlob, handleBlobHead)
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
import { error, json } from 'itty-router'
|
import { error, json } from 'itty-router'
|
||||||
|
import { type BlobMetadata } from './blob'
|
||||||
import { withPostgres } from './db'
|
import { withPostgres } from './db'
|
||||||
import { cacheControl } from './const'
|
import { cacheControl } from './const'
|
||||||
import { toUUID } from './encodings'
|
import { toUUID } from './encodings'
|
||||||
@ -119,7 +120,15 @@ export async function handleMultipartUploadComplete (
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
return new Response(null, { status: 204 })
|
const metadata: BlobMetadata = {
|
||||||
|
type,
|
||||||
|
size,
|
||||||
|
name,
|
||||||
|
etag: hash,
|
||||||
|
lastModified: object.uploaded.getTime()
|
||||||
|
}
|
||||||
|
|
||||||
|
return json(metadata)
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function handleMultipartUploadAbort (
|
export async function handleMultipartUploadAbort (
|
||||||
|
@ -14,9 +14,9 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
import { AwsClient } from 'aws4fetch'
|
import { AwsClient } from 'aws4fetch'
|
||||||
import { error } from 'itty-router'
|
import { error, json } from 'itty-router'
|
||||||
|
|
||||||
import { handleBlobUploaded } from './blob'
|
import { type BlobMetadata, handleBlobUploaded } from './blob'
|
||||||
import { type MetricsContext } from './metrics'
|
import { type MetricsContext } from './metrics'
|
||||||
import { type Storage, selectStorage } from './storage'
|
import { type Storage, selectStorage } from './storage'
|
||||||
import { type BlobRequest, type UUID } from './types'
|
import { type BlobRequest, type UUID } from './types'
|
||||||
@ -108,8 +108,9 @@ export async function handleSignComplete (
|
|||||||
return error(400)
|
return error(400)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let metadata: BlobMetadata
|
||||||
try {
|
try {
|
||||||
await handleBlobUploaded(env, ctx, metrics, workspace, name, uuid)
|
metadata = await handleBlobUploaded(env, ctx, metrics, workspace, name, uuid)
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const message = err instanceof Error ? err.message : String(err)
|
const message = err instanceof Error ? err.message : String(err)
|
||||||
console.error({ error: message, workspace, name, uuid })
|
console.error({ error: message, workspace, name, uuid })
|
||||||
@ -118,7 +119,7 @@ export async function handleSignComplete (
|
|||||||
|
|
||||||
await env.datalake_blobs.delete(key)
|
await env.datalake_blobs.delete(key)
|
||||||
|
|
||||||
return new Response(null, { status: 201 })
|
return json(metadata)
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function handleSignAbort (request: BlobRequest, env: Env, ctx: ExecutionContext): Promise<Response> {
|
export async function handleSignAbort (request: BlobRequest, env: Env, ctx: ExecutionContext): Promise<Response> {
|
||||||
|
Loading…
Reference in New Issue
Block a user