mirror of
https://github.com/toeverything/AFFiNE.git
synced 2024-12-03 06:03:21 +03:00
fix(infra): better search result (#7774)
This commit is contained in:
parent
2f0e39b702
commit
d968cfe425
@ -244,9 +244,12 @@ export class FullTextInvertedIndex implements InvertedIndex {
|
|||||||
|
|
||||||
// normalize score
|
// normalize score
|
||||||
const maxScore = submatched.reduce((acc, s) => Math.max(acc, s.score), 0);
|
const maxScore = submatched.reduce((acc, s) => Math.max(acc, s.score), 0);
|
||||||
const minScore = submatched.reduce((acc, s) => Math.min(acc, s.score), 1);
|
const minScore = submatched.reduce((acc, s) => Math.min(acc, s.score), 0);
|
||||||
for (const { nid, score, position } of submatched) {
|
for (const { nid, score, position } of submatched) {
|
||||||
const normalizedScore = (score - minScore) / (maxScore - minScore);
|
const normalizedScore =
|
||||||
|
maxScore === minScore
|
||||||
|
? score
|
||||||
|
: (score - minScore) / (maxScore - minScore);
|
||||||
const match = matched.get(nid) || {
|
const match = matched.get(nid) || {
|
||||||
score: [] as number[],
|
score: [] as number[],
|
||||||
positions: new Map(),
|
positions: new Map(),
|
||||||
|
@ -72,6 +72,7 @@
|
|||||||
"jotai-devtools": "^0.10.0",
|
"jotai-devtools": "^0.10.0",
|
||||||
"jotai-effect": "^1.0.0",
|
"jotai-effect": "^1.0.0",
|
||||||
"jotai-scope": "^0.7.0",
|
"jotai-scope": "^0.7.0",
|
||||||
|
"lib0": "^0.2.95",
|
||||||
"lit": "^3.1.3",
|
"lit": "^3.1.3",
|
||||||
"lodash-es": "^4.17.21",
|
"lodash-es": "^4.17.21",
|
||||||
"lottie-react": "^2.4.0",
|
"lottie-react": "^2.4.0",
|
||||||
|
@ -23,11 +23,9 @@ export function isEmptyUpdate(binary: Uint8Array) {
|
|||||||
const logger = new DebugLogger('crawler');
|
const logger = new DebugLogger('crawler');
|
||||||
|
|
||||||
interface IndexerJobPayload {
|
interface IndexerJobPayload {
|
||||||
docId: string;
|
|
||||||
storageDocId: string;
|
storageDocId: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(@eyhn): simplify this, it's too complex
|
|
||||||
export class DocsIndexer extends Entity {
|
export class DocsIndexer extends Entity {
|
||||||
private readonly jobQueue: JobQueue<IndexerJobPayload> =
|
private readonly jobQueue: JobQueue<IndexerJobPayload> =
|
||||||
new IndexedDBJobQueue<IndexerJobPayload>(
|
new IndexedDBJobQueue<IndexerJobPayload>(
|
||||||
@ -72,13 +70,11 @@ export class DocsIndexer extends Entity {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (event.clientId === this.workspaceEngine.doc.clientId) {
|
if (event.clientId === this.workspaceEngine.doc.clientId) {
|
||||||
const docId = normalizeDocId(event.docId);
|
|
||||||
|
|
||||||
this.jobQueue
|
this.jobQueue
|
||||||
.enqueue([
|
.enqueue([
|
||||||
{
|
{
|
||||||
batchKey: docId,
|
batchKey: event.docId,
|
||||||
payload: { docId, storageDocId: event.docId },
|
payload: { storageDocId: event.docId },
|
||||||
},
|
},
|
||||||
])
|
])
|
||||||
.catch(err => {
|
.catch(err => {
|
||||||
@ -93,18 +89,17 @@ export class DocsIndexer extends Entity {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// jobs should have the same docId, so we just pick the first one
|
// jobs should have the same storage docId, so we just pick the first one
|
||||||
const docId = jobs[0].payload.docId;
|
|
||||||
const storageDocId = jobs[0].payload.storageDocId;
|
const storageDocId = jobs[0].payload.storageDocId;
|
||||||
|
|
||||||
const worker = await this.ensureWorker(signal);
|
const worker = await this.ensureWorker(signal);
|
||||||
|
|
||||||
const startTime = performance.now();
|
const startTime = performance.now();
|
||||||
logger.debug('Start crawling job for docId:', docId);
|
logger.debug('Start crawling job for storageDocId:', storageDocId);
|
||||||
|
|
||||||
let workerOutput;
|
let workerOutput;
|
||||||
|
|
||||||
if (docId === this.workspaceId) {
|
if (storageDocId === this.workspaceId) {
|
||||||
const rootDocBuffer =
|
const rootDocBuffer =
|
||||||
await this.workspaceEngine.doc.storage.loadDocFromLocal(
|
await this.workspaceEngine.doc.storage.loadDocFromLocal(
|
||||||
this.workspaceId
|
this.workspaceId
|
||||||
@ -151,7 +146,7 @@ export class DocsIndexer extends Entity {
|
|||||||
workerOutput = await worker.run({
|
workerOutput = await worker.run({
|
||||||
type: 'doc',
|
type: 'doc',
|
||||||
docBuffer,
|
docBuffer,
|
||||||
docId,
|
storageDocId,
|
||||||
rootDocBuffer,
|
rootDocBuffer,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -190,13 +185,13 @@ export class DocsIndexer extends Entity {
|
|||||||
}
|
}
|
||||||
await docIndexWriter.commit();
|
await docIndexWriter.commit();
|
||||||
const blockIndexWriter = await this.blockIndex.write();
|
const blockIndexWriter = await this.blockIndex.write();
|
||||||
for (const { blocks } of workerOutput.addedDoc) {
|
for (const { id, blocks } of workerOutput.addedDoc) {
|
||||||
// delete old blocks
|
// delete old blocks
|
||||||
const oldBlocks = await blockIndexWriter.search(
|
const oldBlocks = await blockIndexWriter.search(
|
||||||
{
|
{
|
||||||
type: 'match',
|
type: 'match',
|
||||||
field: 'docId',
|
field: 'docId',
|
||||||
match: docId,
|
match: id,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
pagination: {
|
pagination: {
|
||||||
@ -217,16 +212,20 @@ export class DocsIndexer extends Entity {
|
|||||||
|
|
||||||
if (workerOutput.reindexDoc) {
|
if (workerOutput.reindexDoc) {
|
||||||
await this.jobQueue.enqueue(
|
await this.jobQueue.enqueue(
|
||||||
workerOutput.reindexDoc.map(({ docId, storageDocId }) => ({
|
workerOutput.reindexDoc.map(({ storageDocId }) => ({
|
||||||
batchKey: docId,
|
batchKey: storageDocId,
|
||||||
payload: { docId, storageDocId },
|
payload: { storageDocId },
|
||||||
}))
|
}))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const duration = performance.now() - startTime;
|
const duration = performance.now() - startTime;
|
||||||
logger.debug(
|
logger.debug(
|
||||||
'Finish crawling job for docId:' + docId + ' in ' + duration + 'ms '
|
'Finish crawling job for storageDocId:' +
|
||||||
|
storageDocId +
|
||||||
|
' in ' +
|
||||||
|
duration +
|
||||||
|
'ms '
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -236,7 +235,7 @@ export class DocsIndexer extends Entity {
|
|||||||
.enqueue([
|
.enqueue([
|
||||||
{
|
{
|
||||||
batchKey: this.workspaceId,
|
batchKey: this.workspaceId,
|
||||||
payload: { docId: this.workspaceId, storageDocId: this.workspaceId },
|
payload: { storageDocId: this.workspaceId },
|
||||||
},
|
},
|
||||||
])
|
])
|
||||||
.catch(err => {
|
.catch(err => {
|
||||||
@ -255,47 +254,3 @@ export class DocsIndexer extends Entity {
|
|||||||
this.runner.stop();
|
this.runner.stop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeDocId(raw: string) {
|
|
||||||
enum DocVariant {
|
|
||||||
Workspace = 'workspace',
|
|
||||||
Page = 'page',
|
|
||||||
Space = 'space',
|
|
||||||
Settings = 'settings',
|
|
||||||
Unknown = 'unknown',
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (!raw.length) {
|
|
||||||
throw new Error('Invalid Empty Doc ID');
|
|
||||||
}
|
|
||||||
|
|
||||||
let parts = raw.split(':');
|
|
||||||
|
|
||||||
if (parts.length > 3) {
|
|
||||||
// special adapt case `wsId:space:page:pageId`
|
|
||||||
if (parts[1] === DocVariant.Space && parts[2] === DocVariant.Page) {
|
|
||||||
parts = [parts[0], DocVariant.Space, parts[3]];
|
|
||||||
} else {
|
|
||||||
throw new Error(`Invalid format of Doc ID: ${raw}`);
|
|
||||||
}
|
|
||||||
} else if (parts.length === 2) {
|
|
||||||
// `${variant}:${guid}`
|
|
||||||
throw new Error('not supported');
|
|
||||||
} else if (parts.length === 1) {
|
|
||||||
// ${ws} or ${pageId}
|
|
||||||
parts = ['', DocVariant.Unknown, parts[0]];
|
|
||||||
}
|
|
||||||
|
|
||||||
const docId = parts.at(2);
|
|
||||||
|
|
||||||
if (!docId) {
|
|
||||||
throw new Error('ID is required');
|
|
||||||
}
|
|
||||||
|
|
||||||
return docId;
|
|
||||||
} catch (err) {
|
|
||||||
logger.error('Error on normalize docId ' + raw, err);
|
|
||||||
return raw;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
import type { AffineTextAttributes } from '@blocksuite/blocks';
|
import type { AffineTextAttributes } from '@blocksuite/blocks';
|
||||||
import type { DeltaInsert } from '@blocksuite/inline';
|
import type { DeltaInsert } from '@blocksuite/inline';
|
||||||
import { Document } from '@toeverything/infra';
|
import { Document } from '@toeverything/infra';
|
||||||
|
import { toHexString } from 'lib0/buffer.js';
|
||||||
|
import { digest as lib0Digest } from 'lib0/hash/sha256';
|
||||||
import { difference } from 'lodash-es';
|
import { difference } from 'lodash-es';
|
||||||
import {
|
import {
|
||||||
applyUpdate,
|
applyUpdate,
|
||||||
@ -18,24 +20,66 @@ import type {
|
|||||||
WorkerOutput,
|
WorkerOutput,
|
||||||
} from './types';
|
} from './types';
|
||||||
|
|
||||||
function crawlingDocData({
|
let cachedRootDoc: { doc: YDoc; hash: string } | null = null;
|
||||||
|
|
||||||
|
async function digest(data: Uint8Array) {
|
||||||
|
if (
|
||||||
|
globalThis.crypto &&
|
||||||
|
globalThis.crypto.subtle &&
|
||||||
|
typeof globalThis.crypto.subtle.digest === 'function'
|
||||||
|
) {
|
||||||
|
return new Uint8Array(
|
||||||
|
await globalThis.crypto.subtle.digest('SHA-256', data)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return lib0Digest(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function crawlingDocData({
|
||||||
docBuffer,
|
docBuffer,
|
||||||
docId,
|
storageDocId,
|
||||||
rootDocBuffer,
|
rootDocBuffer,
|
||||||
}: WorkerInput & { type: 'doc' }): WorkerOutput {
|
}: WorkerInput & { type: 'doc' }): Promise<WorkerOutput> {
|
||||||
const yRootDoc = new YDoc();
|
if (isEmptyUpdate(rootDocBuffer)) {
|
||||||
applyUpdate(yRootDoc, rootDocBuffer);
|
console.warn('[worker]: Empty root doc buffer');
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
const rootDocBufferHash = toHexString(await digest(rootDocBuffer));
|
||||||
|
|
||||||
|
let yRootDoc;
|
||||||
|
if (cachedRootDoc && cachedRootDoc.hash === rootDocBufferHash) {
|
||||||
|
yRootDoc = cachedRootDoc.doc;
|
||||||
|
} else {
|
||||||
|
yRootDoc = new YDoc();
|
||||||
|
applyUpdate(yRootDoc, rootDocBuffer);
|
||||||
|
cachedRootDoc = { doc: yRootDoc, hash: rootDocBufferHash };
|
||||||
|
}
|
||||||
|
|
||||||
|
let docId = null;
|
||||||
|
for (const [id, subdoc] of yRootDoc.getMap('spaces')) {
|
||||||
|
if (subdoc instanceof YDoc && storageDocId === subdoc.guid) {
|
||||||
|
docId = id;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (docId === null) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
const ydoc = new YDoc();
|
const ydoc = new YDoc();
|
||||||
|
|
||||||
applyUpdate(ydoc, docBuffer);
|
if (!isEmptyUpdate(docBuffer)) {
|
||||||
|
applyUpdate(ydoc, docBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
let docExists: boolean | null = null;
|
let docExists: boolean | null = null;
|
||||||
|
|
||||||
(
|
(
|
||||||
yRootDoc.getMap('meta').get('pages') as YArray<YMap<any>> | undefined
|
yRootDoc.getMap('meta').get('pages') as YArray<YMap<any>> | undefined
|
||||||
)?.forEach(page => {
|
)?.forEach(page => {
|
||||||
if (page.get('id') === docId) {
|
if (page.get('id') === storageDocId) {
|
||||||
docExists = !(page.get('trash') ?? false);
|
docExists = !(page.get('trash') ?? false);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@ -283,7 +327,7 @@ function crawlingRootDocData({
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
globalThis.onmessage = (event: MessageEvent<WorkerIngoingMessage>) => {
|
globalThis.onmessage = async (event: MessageEvent<WorkerIngoingMessage>) => {
|
||||||
const message = event.data;
|
const message = event.data;
|
||||||
if (message.type === 'init') {
|
if (message.type === 'init') {
|
||||||
postMessage({ type: 'init', msgId: message.msgId });
|
postMessage({ type: 'init', msgId: message.msgId });
|
||||||
@ -296,7 +340,7 @@ globalThis.onmessage = (event: MessageEvent<WorkerIngoingMessage>) => {
|
|||||||
if (input.type === 'rootDoc') {
|
if (input.type === 'rootDoc') {
|
||||||
data = crawlingRootDocData(input);
|
data = crawlingRootDocData(input);
|
||||||
} else {
|
} else {
|
||||||
data = crawlingDocData(input);
|
data = await crawlingDocData(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
postMessage({ type: 'done', msgId: message.msgId, output: data });
|
postMessage({ type: 'done', msgId: message.msgId, output: data });
|
||||||
@ -311,3 +355,10 @@ globalThis.onmessage = (event: MessageEvent<WorkerIngoingMessage>) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
declare function postMessage(message: WorkerOutgoingMessage): void;
|
declare function postMessage(message: WorkerOutgoingMessage): void;
|
||||||
|
|
||||||
|
function isEmptyUpdate(binary: Uint8Array) {
|
||||||
|
return (
|
||||||
|
binary.byteLength === 0 ||
|
||||||
|
(binary.byteLength === 2 && binary[0] === 0 && binary[1] === 0)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
@ -34,7 +34,7 @@ export type WorkerInput =
|
|||||||
}
|
}
|
||||||
| {
|
| {
|
||||||
type: 'doc';
|
type: 'doc';
|
||||||
docId: string;
|
storageDocId: string;
|
||||||
rootDocBuffer: Uint8Array;
|
rootDocBuffer: Uint8Array;
|
||||||
docBuffer: Uint8Array;
|
docBuffer: Uint8Array;
|
||||||
};
|
};
|
||||||
|
@ -452,6 +452,7 @@ __metadata:
|
|||||||
jotai-devtools: "npm:^0.10.0"
|
jotai-devtools: "npm:^0.10.0"
|
||||||
jotai-effect: "npm:^1.0.0"
|
jotai-effect: "npm:^1.0.0"
|
||||||
jotai-scope: "npm:^0.7.0"
|
jotai-scope: "npm:^0.7.0"
|
||||||
|
lib0: "npm:^0.2.95"
|
||||||
lit: "npm:^3.1.3"
|
lit: "npm:^3.1.3"
|
||||||
lodash-es: "npm:^4.17.21"
|
lodash-es: "npm:^4.17.21"
|
||||||
lottie-react: "npm:^2.4.0"
|
lottie-react: "npm:^2.4.0"
|
||||||
|
Loading…
Reference in New Issue
Block a user