mirror of
https://github.com/toeverything/AFFiNE.git
synced 2024-12-02 14:33:54 +03:00
fix(infra): better search result (#7774)
This commit is contained in:
parent
2f0e39b702
commit
d968cfe425
@ -244,9 +244,12 @@ export class FullTextInvertedIndex implements InvertedIndex {
|
||||
|
||||
// normalize score
|
||||
const maxScore = submatched.reduce((acc, s) => Math.max(acc, s.score), 0);
|
||||
const minScore = submatched.reduce((acc, s) => Math.min(acc, s.score), 1);
|
||||
const minScore = submatched.reduce((acc, s) => Math.min(acc, s.score), 0);
|
||||
for (const { nid, score, position } of submatched) {
|
||||
const normalizedScore = (score - minScore) / (maxScore - minScore);
|
||||
const normalizedScore =
|
||||
maxScore === minScore
|
||||
? score
|
||||
: (score - minScore) / (maxScore - minScore);
|
||||
const match = matched.get(nid) || {
|
||||
score: [] as number[],
|
||||
positions: new Map(),
|
||||
|
@ -72,6 +72,7 @@
|
||||
"jotai-devtools": "^0.10.0",
|
||||
"jotai-effect": "^1.0.0",
|
||||
"jotai-scope": "^0.7.0",
|
||||
"lib0": "^0.2.95",
|
||||
"lit": "^3.1.3",
|
||||
"lodash-es": "^4.17.21",
|
||||
"lottie-react": "^2.4.0",
|
||||
|
@ -23,11 +23,9 @@ export function isEmptyUpdate(binary: Uint8Array) {
|
||||
const logger = new DebugLogger('crawler');
|
||||
|
||||
interface IndexerJobPayload {
|
||||
docId: string;
|
||||
storageDocId: string;
|
||||
}
|
||||
|
||||
// TODO(@eyhn): simplify this, it's too complex
|
||||
export class DocsIndexer extends Entity {
|
||||
private readonly jobQueue: JobQueue<IndexerJobPayload> =
|
||||
new IndexedDBJobQueue<IndexerJobPayload>(
|
||||
@ -72,13 +70,11 @@ export class DocsIndexer extends Entity {
|
||||
return;
|
||||
}
|
||||
if (event.clientId === this.workspaceEngine.doc.clientId) {
|
||||
const docId = normalizeDocId(event.docId);
|
||||
|
||||
this.jobQueue
|
||||
.enqueue([
|
||||
{
|
||||
batchKey: docId,
|
||||
payload: { docId, storageDocId: event.docId },
|
||||
batchKey: event.docId,
|
||||
payload: { storageDocId: event.docId },
|
||||
},
|
||||
])
|
||||
.catch(err => {
|
||||
@ -93,18 +89,17 @@ export class DocsIndexer extends Entity {
|
||||
return;
|
||||
}
|
||||
|
||||
// jobs should have the same docId, so we just pick the first one
|
||||
const docId = jobs[0].payload.docId;
|
||||
// jobs should have the same storage docId, so we just pick the first one
|
||||
const storageDocId = jobs[0].payload.storageDocId;
|
||||
|
||||
const worker = await this.ensureWorker(signal);
|
||||
|
||||
const startTime = performance.now();
|
||||
logger.debug('Start crawling job for docId:', docId);
|
||||
logger.debug('Start crawling job for storageDocId:', storageDocId);
|
||||
|
||||
let workerOutput;
|
||||
|
||||
if (docId === this.workspaceId) {
|
||||
if (storageDocId === this.workspaceId) {
|
||||
const rootDocBuffer =
|
||||
await this.workspaceEngine.doc.storage.loadDocFromLocal(
|
||||
this.workspaceId
|
||||
@ -151,7 +146,7 @@ export class DocsIndexer extends Entity {
|
||||
workerOutput = await worker.run({
|
||||
type: 'doc',
|
||||
docBuffer,
|
||||
docId,
|
||||
storageDocId,
|
||||
rootDocBuffer,
|
||||
});
|
||||
}
|
||||
@ -190,13 +185,13 @@ export class DocsIndexer extends Entity {
|
||||
}
|
||||
await docIndexWriter.commit();
|
||||
const blockIndexWriter = await this.blockIndex.write();
|
||||
for (const { blocks } of workerOutput.addedDoc) {
|
||||
for (const { id, blocks } of workerOutput.addedDoc) {
|
||||
// delete old blocks
|
||||
const oldBlocks = await blockIndexWriter.search(
|
||||
{
|
||||
type: 'match',
|
||||
field: 'docId',
|
||||
match: docId,
|
||||
match: id,
|
||||
},
|
||||
{
|
||||
pagination: {
|
||||
@ -217,16 +212,20 @@ export class DocsIndexer extends Entity {
|
||||
|
||||
if (workerOutput.reindexDoc) {
|
||||
await this.jobQueue.enqueue(
|
||||
workerOutput.reindexDoc.map(({ docId, storageDocId }) => ({
|
||||
batchKey: docId,
|
||||
payload: { docId, storageDocId },
|
||||
workerOutput.reindexDoc.map(({ storageDocId }) => ({
|
||||
batchKey: storageDocId,
|
||||
payload: { storageDocId },
|
||||
}))
|
||||
);
|
||||
}
|
||||
|
||||
const duration = performance.now() - startTime;
|
||||
logger.debug(
|
||||
'Finish crawling job for docId:' + docId + ' in ' + duration + 'ms '
|
||||
'Finish crawling job for storageDocId:' +
|
||||
storageDocId +
|
||||
' in ' +
|
||||
duration +
|
||||
'ms '
|
||||
);
|
||||
}
|
||||
|
||||
@ -236,7 +235,7 @@ export class DocsIndexer extends Entity {
|
||||
.enqueue([
|
||||
{
|
||||
batchKey: this.workspaceId,
|
||||
payload: { docId: this.workspaceId, storageDocId: this.workspaceId },
|
||||
payload: { storageDocId: this.workspaceId },
|
||||
},
|
||||
])
|
||||
.catch(err => {
|
||||
@ -255,47 +254,3 @@ export class DocsIndexer extends Entity {
|
||||
this.runner.stop();
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeDocId(raw: string) {
|
||||
enum DocVariant {
|
||||
Workspace = 'workspace',
|
||||
Page = 'page',
|
||||
Space = 'space',
|
||||
Settings = 'settings',
|
||||
Unknown = 'unknown',
|
||||
}
|
||||
|
||||
try {
|
||||
if (!raw.length) {
|
||||
throw new Error('Invalid Empty Doc ID');
|
||||
}
|
||||
|
||||
let parts = raw.split(':');
|
||||
|
||||
if (parts.length > 3) {
|
||||
// special adapt case `wsId:space:page:pageId`
|
||||
if (parts[1] === DocVariant.Space && parts[2] === DocVariant.Page) {
|
||||
parts = [parts[0], DocVariant.Space, parts[3]];
|
||||
} else {
|
||||
throw new Error(`Invalid format of Doc ID: ${raw}`);
|
||||
}
|
||||
} else if (parts.length === 2) {
|
||||
// `${variant}:${guid}`
|
||||
throw new Error('not supported');
|
||||
} else if (parts.length === 1) {
|
||||
// ${ws} or ${pageId}
|
||||
parts = ['', DocVariant.Unknown, parts[0]];
|
||||
}
|
||||
|
||||
const docId = parts.at(2);
|
||||
|
||||
if (!docId) {
|
||||
throw new Error('ID is required');
|
||||
}
|
||||
|
||||
return docId;
|
||||
} catch (err) {
|
||||
logger.error('Error on normalize docId ' + raw, err);
|
||||
return raw;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
import type { AffineTextAttributes } from '@blocksuite/blocks';
|
||||
import type { DeltaInsert } from '@blocksuite/inline';
|
||||
import { Document } from '@toeverything/infra';
|
||||
import { toHexString } from 'lib0/buffer.js';
|
||||
import { digest as lib0Digest } from 'lib0/hash/sha256';
|
||||
import { difference } from 'lodash-es';
|
||||
import {
|
||||
applyUpdate,
|
||||
@ -18,24 +20,66 @@ import type {
|
||||
WorkerOutput,
|
||||
} from './types';
|
||||
|
||||
function crawlingDocData({
|
||||
let cachedRootDoc: { doc: YDoc; hash: string } | null = null;
|
||||
|
||||
async function digest(data: Uint8Array) {
|
||||
if (
|
||||
globalThis.crypto &&
|
||||
globalThis.crypto.subtle &&
|
||||
typeof globalThis.crypto.subtle.digest === 'function'
|
||||
) {
|
||||
return new Uint8Array(
|
||||
await globalThis.crypto.subtle.digest('SHA-256', data)
|
||||
);
|
||||
}
|
||||
return lib0Digest(data);
|
||||
}
|
||||
|
||||
async function crawlingDocData({
|
||||
docBuffer,
|
||||
docId,
|
||||
storageDocId,
|
||||
rootDocBuffer,
|
||||
}: WorkerInput & { type: 'doc' }): WorkerOutput {
|
||||
const yRootDoc = new YDoc();
|
||||
applyUpdate(yRootDoc, rootDocBuffer);
|
||||
}: WorkerInput & { type: 'doc' }): Promise<WorkerOutput> {
|
||||
if (isEmptyUpdate(rootDocBuffer)) {
|
||||
console.warn('[worker]: Empty root doc buffer');
|
||||
return {};
|
||||
}
|
||||
|
||||
const rootDocBufferHash = toHexString(await digest(rootDocBuffer));
|
||||
|
||||
let yRootDoc;
|
||||
if (cachedRootDoc && cachedRootDoc.hash === rootDocBufferHash) {
|
||||
yRootDoc = cachedRootDoc.doc;
|
||||
} else {
|
||||
yRootDoc = new YDoc();
|
||||
applyUpdate(yRootDoc, rootDocBuffer);
|
||||
cachedRootDoc = { doc: yRootDoc, hash: rootDocBufferHash };
|
||||
}
|
||||
|
||||
let docId = null;
|
||||
for (const [id, subdoc] of yRootDoc.getMap('spaces')) {
|
||||
if (subdoc instanceof YDoc && storageDocId === subdoc.guid) {
|
||||
docId = id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (docId === null) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const ydoc = new YDoc();
|
||||
|
||||
applyUpdate(ydoc, docBuffer);
|
||||
if (!isEmptyUpdate(docBuffer)) {
|
||||
applyUpdate(ydoc, docBuffer);
|
||||
}
|
||||
|
||||
let docExists: boolean | null = null;
|
||||
|
||||
(
|
||||
yRootDoc.getMap('meta').get('pages') as YArray<YMap<any>> | undefined
|
||||
)?.forEach(page => {
|
||||
if (page.get('id') === docId) {
|
||||
if (page.get('id') === storageDocId) {
|
||||
docExists = !(page.get('trash') ?? false);
|
||||
}
|
||||
});
|
||||
@ -283,7 +327,7 @@ function crawlingRootDocData({
|
||||
};
|
||||
}
|
||||
|
||||
globalThis.onmessage = (event: MessageEvent<WorkerIngoingMessage>) => {
|
||||
globalThis.onmessage = async (event: MessageEvent<WorkerIngoingMessage>) => {
|
||||
const message = event.data;
|
||||
if (message.type === 'init') {
|
||||
postMessage({ type: 'init', msgId: message.msgId });
|
||||
@ -296,7 +340,7 @@ globalThis.onmessage = (event: MessageEvent<WorkerIngoingMessage>) => {
|
||||
if (input.type === 'rootDoc') {
|
||||
data = crawlingRootDocData(input);
|
||||
} else {
|
||||
data = crawlingDocData(input);
|
||||
data = await crawlingDocData(input);
|
||||
}
|
||||
|
||||
postMessage({ type: 'done', msgId: message.msgId, output: data });
|
||||
@ -311,3 +355,10 @@ globalThis.onmessage = (event: MessageEvent<WorkerIngoingMessage>) => {
|
||||
};
|
||||
|
||||
declare function postMessage(message: WorkerOutgoingMessage): void;
|
||||
|
||||
function isEmptyUpdate(binary: Uint8Array) {
|
||||
return (
|
||||
binary.byteLength === 0 ||
|
||||
(binary.byteLength === 2 && binary[0] === 0 && binary[1] === 0)
|
||||
);
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ export type WorkerInput =
|
||||
}
|
||||
| {
|
||||
type: 'doc';
|
||||
docId: string;
|
||||
storageDocId: string;
|
||||
rootDocBuffer: Uint8Array;
|
||||
docBuffer: Uint8Array;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user