From f89df5921a80d73af7a6df7e279d96b2b1abe92b Mon Sep 17 00:00:00 2001 From: Andrey Sobolev Date: Wed, 16 Oct 2024 21:14:42 +0700 Subject: [PATCH] UBERF-8499: Optimize indexer operation (#6959) Signed-off-by: Andrey Sobolev --- models/contact/src/index.ts | 3 +- models/controlled-documents/src/index.ts | 9 +- models/core/src/core.ts | 6 +- models/core/src/index.ts | 3 +- models/core/src/migration.ts | 6 +- models/gmail/src/index.ts | 3 +- models/recruit/src/index.ts | 9 +- models/telegram/src/index.ts | 3 +- packages/core/src/classes.ts | 3 +- packages/core/src/component.ts | 4 +- packages/core/src/utils.ts | 39 +++++--- pods/server/package.json | 2 +- server/elastic/src/adapter.ts | 2 +- server/indexer/src/fulltext.ts | 4 +- server/indexer/src/indexer/field.ts | 13 +-- server/indexer/src/indexer/fulltextPush.ts | 105 ++++++++++++++------- server/indexer/src/indexer/indexer.ts | 36 ++++--- server/indexer/src/indexer/summary.ts | 49 +++++++--- server/indexer/src/indexer/types.ts | 12 +++ server/indexer/src/indexer/utils.ts | 63 ++++++------- 20 files changed, 241 insertions(+), 133 deletions(-) diff --git a/models/contact/src/index.ts b/models/contact/src/index.ts index 1109e5549e..e7772d1147 100644 --- a/models/contact/src/index.ts +++ b/models/contact/src/index.ts @@ -955,7 +955,8 @@ export function createModel (builder: Builder): void { ) // Allow to use fuzzy search for mixins - builder.mixin(contact.class.Contact, core.class.Class, core.mixin.FullTextSearchContext, { + builder.createDoc(core.class.FullTextSearchContext, core.space.Model, { + toClass: contact.class.Contact, fullTextSummary: true }) diff --git a/models/controlled-documents/src/index.ts b/models/controlled-documents/src/index.ts index 64145c2ebf..528dfa2fcd 100644 --- a/models/controlled-documents/src/index.ts +++ b/models/controlled-documents/src/index.ts @@ -549,7 +549,8 @@ export function createModel (builder: Builder): void { func: documents.function.GetAllDocumentStates }) - builder.mixin(documents.class.Document, core.class.Class, core.mixin.FullTextSearchContext, { + builder.createDoc(core.class.FullTextSearchContext, core.space.Model, { + toClass: documents.class.Document, fullTextSummary: true, childProcessingAllowed: true }) @@ -886,11 +887,13 @@ export function defineNotifications (builder: Builder): void { } export function defineSearch (builder: Builder): void { - builder.mixin(documents.class.Document, core.class.Class, core.mixin.FullTextSearchContext, { + builder.createDoc(core.class.FullTextSearchContext, core.space.Model, { + toClass: documents.class.Document, parentPropagate: true }) - builder.mixin(documents.class.DocumentMeta, core.class.Class, core.mixin.FullTextSearchContext, { + builder.createDoc(core.class.FullTextSearchContext, core.space.Model, { + toClass: documents.class.DocumentMeta, fullTextSummary: true, childProcessingAllowed: true, propagate: [] diff --git a/models/core/src/core.ts b/models/core/src/core.ts index b7e6651995..3942868da8 100644 --- a/models/core/src/core.ts +++ b/models/core/src/core.ts @@ -361,8 +361,10 @@ export class TDocIndexState extends TDoc implements DocIndexState { generationId?: string } -@MMixin(core.mixin.FullTextSearchContext, core.class.Class) -export class TFullTextSearchContext extends TClass implements FullTextSearchContext {} +@Model(core.class.FullTextSearchContext, core.class.Doc, DOMAIN_MODEL) +export class TFullTextSearchContext extends TDoc implements FullTextSearchContext { + toClass!: Ref>> +} @MMixin(core.mixin.ConfigurationElement, core.class.Class) export class TConfigurationElement extends TClass implements ConfigurationElement { diff --git a/models/core/src/index.ts b/models/core/src/index.ts index b80246b6cf..0ef71c7ae9 100644 --- a/models/core/src/index.ts +++ b/models/core/src/index.ts @@ -309,7 +309,8 @@ export function createModel (builder: Builder): void { ] }) - builder.mixin(core.class.Space, core.class.Class, core.mixin.FullTextSearchContext, { + builder.createDoc(core.class.FullTextSearchContext, core.space.Model, { + toClass: core.class.Space, childProcessingAllowed: false }) diff --git a/models/core/src/migration.ts b/models/core/src/migration.ts index 844d1e96f0..5645eec853 100644 --- a/models/core/src/migration.ts +++ b/models/core/src/migration.ts @@ -262,7 +262,11 @@ export const coreOperation: MigrateOperation = { async migrate (client: MigrationClient): Promise { // We need to delete all documents in doc index state for missing classes const allClasses = client.hierarchy.getDescendants(core.class.Doc) - const allIndexed = allClasses.filter((it) => isClassIndexable(client.hierarchy, it)) + const contexts = new Map( + client.model.findAllSync(core.class.FullTextSearchContext, {}).map((it) => [it.toClass, it]) + ) + + const allIndexed = allClasses.filter((it) => isClassIndexable(client.hierarchy, it, contexts)) // Next remove all non indexed classes and missing classes as well. await client.update( diff --git a/models/gmail/src/index.ts b/models/gmail/src/index.ts index 6e02c0d38c..c7853dcdb4 100644 --- a/models/gmail/src/index.ts +++ b/models/gmail/src/index.ts @@ -214,7 +214,8 @@ export function createModel (builder: Builder): void { gmail.action.WriteEmail ) - builder.mixin(gmail.class.Message, core.class.Class, core.mixin.FullTextSearchContext, { + builder.createDoc(core.class.FullTextSearchContext, core.space.Model, { + toClass: gmail.class.Message, parentPropagate: false }) diff --git a/models/recruit/src/index.ts b/models/recruit/src/index.ts index b1f588e624..5a337a6351 100644 --- a/models/recruit/src/index.ts +++ b/models/recruit/src/index.ts @@ -1438,13 +1438,15 @@ export function createModel (builder: Builder): void { ) // Allow to use fuzzy search for mixins - builder.mixin(recruit.class.Vacancy, core.class.Class, core.mixin.FullTextSearchContext, { + builder.createDoc(core.class.FullTextSearchContext, core.space.Model, { + toClass: recruit.class.Vacancy, fullTextSummary: true, childProcessingAllowed: true, propagate: [] }) - builder.mixin(recruit.mixin.Candidate, core.class.Class, core.mixin.FullTextSearchContext, { + builder.createDoc(core.class.FullTextSearchContext, core.space.Model, { + toClass: recruit.mixin.Candidate, fullTextSummary: true, propagate: [recruit.class.Applicant], childProcessingAllowed: true, @@ -1457,7 +1459,8 @@ export function createModel (builder: Builder): void { }) // Allow to use fuzzy search for mixins - builder.mixin(recruit.class.Applicant, core.class.Class, core.mixin.FullTextSearchContext, { + builder.createDoc(core.class.FullTextSearchContext, core.space.Model, { + toClass: recruit.class.Applicant, fullTextSummary: true, forceIndex: true, childProcessingAllowed: true, diff --git a/models/telegram/src/index.ts b/models/telegram/src/index.ts index c950f28c55..80e8a139c4 100644 --- a/models/telegram/src/index.ts +++ b/models/telegram/src/index.ts @@ -179,7 +179,8 @@ export function createModel (builder: Builder): void { telegram.ids.TelegramMessageSharedActivityViewlet ) - builder.mixin(telegram.class.Message, core.class.Class, core.mixin.FullTextSearchContext, { + builder.createDoc(core.class.FullTextSearchContext, core.space.Model, { + toClass: telegram.class.Message, parentPropagate: false, childProcessingAllowed: true }) diff --git a/packages/core/src/classes.ts b/packages/core/src/classes.ts index 0ce2d198f6..064205df7c 100644 --- a/packages/core/src/classes.ts +++ b/packages/core/src/classes.ts @@ -578,7 +578,8 @@ export interface BlobLookup extends Blob { * * If defined for class, this class will be enabled for embedding search like openai. */ -export interface FullTextSearchContext extends Class { +export interface FullTextSearchContext extends Doc { + toClass: Ref> fullTextSummary?: boolean forceIndex?: boolean diff --git a/packages/core/src/component.ts b/packages/core/src/component.ts index 08343ec7f3..5349a3a8c2 100644 --- a/packages/core/src/component.ts +++ b/packages/core/src/component.ts @@ -142,10 +142,10 @@ export default plugin(coreId, { StatusCategory: '' as Ref>, MigrationState: '' as Ref>, - BenchmarkDoc: '' as Ref> + BenchmarkDoc: '' as Ref>, + FullTextSearchContext: '' as Ref> }, mixin: { - FullTextSearchContext: '' as Ref>, ConfigurationElement: '' as Ref>, IndexConfiguration: '' as Ref>>, SpacesTypeData: '' as Ref> diff --git a/packages/core/src/utils.ts b/packages/core/src/utils.ts index f27ed14624..4987f6d617 100644 --- a/packages/core/src/utils.ts +++ b/packages/core/src/utils.ts @@ -46,9 +46,9 @@ import core from './component' import { Hierarchy } from './hierarchy' import { TxOperations } from './operations' import { isPredicate } from './predicate' +import { Branding, BrandingMap } from './server' import { DocumentQuery, FindResult } from './storage' import { DOMAIN_TX } from './tx' -import { Branding, BrandingMap } from './server' function toHex (value: number, chars: number): string { const result = value.toString(16) @@ -686,39 +686,48 @@ export function getFullTextIndexableAttributes ( return result } +const ctxKey = 'indexer_ftc' /** * @public */ export function getFullTextContext ( hierarchy: Hierarchy, - objectClass: Ref> + objectClass: Ref>, + contexts: Map>, FullTextSearchContext> ): Omit> { - let objClass = hierarchy.getClass(objectClass) - - while (true) { - if (hierarchy.hasMixin(objClass, core.mixin.FullTextSearchContext)) { - const ctx = hierarchy.as, FullTextSearchContext>(objClass, core.mixin.FullTextSearchContext) + let ctx: Omit> | undefined = hierarchy.getClassifierProp(objectClass, ctxKey) + if (ctx !== undefined) { + return ctx + } + if (typeof ctx !== 'string') { + const anc = hierarchy.getAncestors(objectClass) + for (const oc of anc) { + const ctx = contexts.get(oc) if (ctx !== undefined) { + hierarchy.setClassifierProp(objectClass, ctxKey, ctx) return ctx } } - if (objClass.extends === undefined) { - break - } - objClass = hierarchy.getClass(objClass.extends) } - return { + ctx = { + toClass: objectClass, fullTextSummary: false, forceIndex: false, propagate: [], childProcessingAllowed: true } + hierarchy.setClassifierProp(objectClass, ctxKey, ctx) + return ctx } /** * @public */ -export function isClassIndexable (hierarchy: Hierarchy, c: Ref>): boolean { +export function isClassIndexable ( + hierarchy: Hierarchy, + c: Ref>, + contexts: Map>, FullTextSearchContext> +): boolean { const indexed = hierarchy.getClassifierProp(c, 'class_indexed') if (indexed !== undefined) { return indexed as boolean @@ -756,13 +765,13 @@ export function isClassIndexable (hierarchy: Hierarchy, c: Ref>): boo let result = true - if (attrs.length === 0 && !(getFullTextContext(hierarchy, c)?.forceIndex ?? false)) { + if (attrs.length === 0 && !(getFullTextContext(hierarchy, c, contexts)?.forceIndex ?? false)) { result = false // We need check if document has collections with indexable fields. const attrs = hierarchy.getAllAttributes(c).values() for (const attr of attrs) { if (attr.type._class === core.class.Collection) { - if (isClassIndexable(hierarchy, (attr.type as Collection).of)) { + if (isClassIndexable(hierarchy, (attr.type as Collection).of, contexts)) { result = true break } diff --git a/pods/server/package.json b/pods/server/package.json index f4a303072a..fe0c10e910 100644 --- a/pods/server/package.json +++ b/pods/server/package.json @@ -8,7 +8,7 @@ "template": "@hcengineering/node-package", "license": "EPL-2.0", "scripts": { - "start": "rush bundle --to @hcengineering/pod-server && cross-env NODE_ENV=production ELASTIC_INDEX_NAME=local_storage_index MODEL_VERSION=$(node ../../common/scripts/show_version.js) ACCOUNTS_URL=http://localhost:3000 REKONI_URL=http://localhost:4004 MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 FRONT_URL=http://localhost:8087 UPLOAD_URL=/upload MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret OPERATION_PROFILING=false MODEL_JSON=../../models/all/bundle/model.json node bundle/bundle.js", + "start": "rush bundle --to @hcengineering/pod-server && cross-env NODE_ENV=production ELASTIC_INDEX_NAME=local_storage_index MODEL_VERSION=$(node ../../common/scripts/show_version.js) ACCOUNTS_URL=http://localhost:3000 REKONI_URL=http://localhost:4004 MONGO_URL=mongodb://localhost:27017 DB_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 FRONT_URL=http://localhost:8087 UPLOAD_URL=/upload MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret OPERATION_PROFILING=false MODEL_JSON=../../models/all/bundle/model.json node --inspect bundle/bundle.js", "start-u": "rush bundle --to @hcengineering/pod-server && ./bundle/ && cross-env NODE_ENV=production SERVER_PROVIDER=uweb ELASTIC_INDEX_NAME=local_storage_index MODEL_VERSION=$(node ../../common/scripts/show_version.js) ACCOUNTS_URL=http://localhost:3000 REKONI_URL=http://localhost:4004 MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 FRONT_URL=http://localhost:8087 UPLOAD_URL=/upload MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret MODEL_JSON=../../models/all/bundle/model.json node bundle/bundle.js", "start-flame": "rush bundle --to @hcengineering/pod-server && cross-env NODE_ENV=production ELASTIC_INDEX_NAME=local_storage_index MODEL_VERSION=$(node ../../common/scripts/show_version.js) ACCOUNTS_URL=http://localhost:3000 REKONI_URL=http://localhost:4004 MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 FRONT_URL=http://localhost:8087 UPLOAD_URL=/upload MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret MODEL_JSON=../../models/all/bundle/model.json clinic flame --dest ./out -- node --nolazy -r ts-node/register --enable-source-maps src/__start.ts", "build": "compile", diff --git a/server/elastic/src/adapter.ts b/server/elastic/src/adapter.ts index 2ae65b372c..7b782abf14 100644 --- a/server/elastic/src/adapter.ts +++ b/server/elastic/src/adapter.ts @@ -522,7 +522,7 @@ class ElasticAdapter implements FullTextAdapter { async updateMany (docs: IndexedDoc[]): Promise { const parts = Array.from(docs) while (parts.length > 0) { - const part = parts.splice(0, 1000) + const part = parts.splice(0, 500) const operations = part.flatMap((doc) => { const wsDoc = { workspaceId: this.workspaceString, ...doc } diff --git a/server/indexer/src/fulltext.ts b/server/indexer/src/fulltext.ts index 1eaa7df8fc..b31805a42c 100644 --- a/server/indexer/src/fulltext.ts +++ b/server/indexer/src/fulltext.ts @@ -45,9 +45,9 @@ import core, { toFindResult } from '@hcengineering/core' import type { FullTextAdapter, IndexedDoc, SessionFindAll, StorageAdapter, WithFind } from '@hcengineering/server-core' -import { getScoringConfig, mapSearchResultDoc } from './mapper' import { type FullTextIndexPipeline } from './indexer' import { createStateDoc } from './indexer/utils' +import { getScoringConfig, mapSearchResultDoc } from './mapper' /** * @public @@ -90,7 +90,7 @@ export class FullTextIndex implements WithFind { if (TxProcessor.isExtendsCUD(tx._class)) { const cud = tx as TxCUD - if (!isClassIndexable(this.hierarchy, cud.objectClass)) { + if (!isClassIndexable(this.hierarchy, cud.objectClass, this.indexer.contexts)) { // No need, since no indixable fields or attachments. continue } diff --git a/server/indexer/src/indexer/field.ts b/server/indexer/src/indexer/field.ts index 0e4ce1373b..c2e4914cfc 100644 --- a/server/indexer/src/indexer/field.ts +++ b/server/indexer/src/indexer/field.ts @@ -50,6 +50,7 @@ export class IndexedFieldStage implements FullTextPipelineStage { updateFields: DocUpdateHandler[] = [] enabled = true + constructor (private readonly dbStorageFindAll: SessionFindAll) {} async initialize (ctx: MeasureContext, storage: DbAdapter, pipeline: FullTextPipeline): Promise {} @@ -150,7 +151,7 @@ export class IndexedFieldStage implements FullTextPipelineStage { } if (docState.attachedTo != null && changes > 0) { - const ctx = getFullTextContext(pipeline.hierarchy, objClass) + const ctx = getFullTextContext(pipeline.hierarchy, objClass, pipeline.contexts) if (ctx.parentPropagate ?? true) { // We need to clear field stage from parent, so it will be re indexed. await pipeline.update(docState.attachedTo as Ref, false, {}) @@ -173,17 +174,13 @@ export class IndexedFieldStage implements FullTextPipelineStage { { attachedTo: ids.length === 1 ? ids[0] : { $in: ids } }, - { limit: ids.length } + { limit: ids.length, skipSpace: true, skipClass: true } ) ) } const childs = allChildDocs.filter((it) => it.attachedTo === docState._id) - for (const u of childs) { - if (propagate.some((it) => pipeline.hierarchy.isDerived(u.objectClass, it))) { - pipeline.add(u) - await pipeline.update(u._id, false, {}) - } - } + // Marck childs to be indexed on next step + await pipeline.queue(metrics, new Map(childs.map((it) => [it._id, { updated: true, removed: false }]))) } await pipeline.update(docState._id, true, docUpdate) diff --git a/server/indexer/src/indexer/fulltextPush.ts b/server/indexer/src/indexer/fulltextPush.ts index 26bf4f9c18..410eec6ed3 100644 --- a/server/indexer/src/indexer/fulltextPush.ts +++ b/server/indexer/src/indexer/fulltextPush.ts @@ -30,12 +30,13 @@ import core, { type MeasureContext, RateLimiter, type Ref, + SortingOrder, toIdMap, type WorkspaceId } from '@hcengineering/core' import { type DbAdapter, type FullTextAdapter, type IndexedDoc, type SessionFindAll } from '@hcengineering/server-core' -import { updateDocWithPresenter } from '../mapper' import { jsonToText, markupToJSON } from '@hcengineering/text' +import { updateDocWithPresenter } from '../mapper' import { contentStageId, type DocUpdateHandler, @@ -118,7 +119,7 @@ export class FullTextPushStage implements FullTextPipelineStage { const childIds = toIndexPart .filter((it) => { - const fctx = getFullTextContext(pipeline.hierarchy, it.objectClass) + const fctx = getFullTextContext(pipeline.hierarchy, it.objectClass, pipeline.contexts) return fctx.childProcessingAllowed ?? true }) .map((it) => it._id) @@ -127,9 +128,17 @@ export class FullTextPushStage implements FullTextPipelineStage { 'find-child', {}, async (ctx) => - await this.dbStorageFindAll(ctx, core.class.DocIndexState, { - attachedTo: childIds.length === 1 ? childIds[0] : { $in: childIds } - }) + await this.dbStorageFindAll( + ctx, + core.class.DocIndexState, + { + attachedTo: childIds.length === 1 ? childIds[0] : { $in: childIds } + }, + { + skipClass: true, + skipSpace: true + } + ) ) // spaces @@ -138,14 +147,22 @@ export class FullTextPushStage implements FullTextPipelineStage { 'find-spaces', {}, async (ctx) => - await this.dbStorageFindAll(ctx, core.class.DocIndexState, { - _id: { - $in: toIndexPart.map( - (doc) => - (doc.attributes[docKey('space', { _class: doc.objectClass })] ?? doc.space) as Ref - ) + await this.dbStorageFindAll( + ctx, + core.class.DocIndexState, + { + _id: { + $in: toIndexPart.map( + (doc) => + (doc.attributes[docKey('space', { _class: doc.objectClass })] ?? doc.space) as Ref + ) + } + }, + { + skipClass: true, + skipSpace: true } - }) + ) ) ) @@ -163,7 +180,7 @@ export class FullTextPushStage implements FullTextPipelineStage { const childDocs = allChildDocs.filter((it) => it.attachedTo === doc._id) if (childDocs.length > 0) { for (const c of childDocs) { - const fctx = getFullTextContext(pipeline.hierarchy, c.objectClass) + const fctx = getFullTextContext(pipeline.hierarchy, c.objectClass, pipeline.contexts) if (fctx.parentPropagate ?? true) { ctx.withSync('updateDoc2Elastic', {}, (ctx) => { updateDoc2Elastic( @@ -195,7 +212,11 @@ export class FullTextPushStage implements FullTextPipelineStage { { _id: doc.attachedTo as Ref }, - { limit: 1 } + { + limit: 1, + skipClass: true, + skipSpace: true + } ) ).shift() )) @@ -217,23 +238,43 @@ export class FullTextPushStage implements FullTextPipelineStage { const collectClasses = collectPropagateClasses(pipeline, parentDoc.objectClass) if (collectClasses.length > 0) { - const collections = await this.dbStorageFindAll(ctx, core.class.DocIndexState, { - attachedTo: parentDoc._id, - objectClass: { $in: collectClasses } - }) - for (const c of collections) { - ctx.withSync('updateDoc2Elastic', {}, (ctx) => { - updateDoc2Elastic( - this.allAttrs, - ctx, - c.attributes, - elasticDoc, - c._id, - undefined, - pipeline.hierarchy, - true - ) - }) + let last: number = 0 + while (true) { + const collections = await this.dbStorageFindAll( + ctx, + core.class.DocIndexState, + { + attachedTo: parentDoc._id, + objectClass: { $in: collectClasses }, + modifiedOn: { $gt: last } + }, + { + sort: { + modifiedOn: SortingOrder.Ascending + }, + skipClass: true, + skipSpace: true, + limit: 500 + } + ) + if (collections.length === 0) { + break + } + last = collections[collections.length - 1].modifiedOn + for (const c of collections) { + ctx.withSync('updateDoc2Elastic', {}, (ctx) => { + updateDoc2Elastic( + this.allAttrs, + ctx, + c.attributes, + elasticDoc, + c._id, + undefined, + pipeline.hierarchy, + true + ) + }) + } } } } @@ -262,7 +303,7 @@ export class FullTextPushStage implements FullTextPipelineStage { } // Perform bulk update to elastic - void pushQueue.add(async () => { + await pushQueue.exec(async () => { try { try { await ctx.with('push-elastic', {}, async () => { diff --git a/server/indexer/src/indexer/indexer.ts b/server/indexer/src/indexer/indexer.ts index c27d7e1baf..9c82cc32e5 100644 --- a/server/indexer/src/indexer/indexer.ts +++ b/server/indexer/src/indexer/indexer.ts @@ -21,6 +21,7 @@ import core, { type DocIndexState, type DocumentQuery, type DocumentUpdate, + type FullTextSearchContext, type Hierarchy, type MeasureContext, type ModelDb, @@ -79,6 +80,10 @@ export class FullTextIndexPipeline implements FullTextPipeline { uploadOps: DocIndexState[] = [] + contexts: Map>, FullTextSearchContext> + propogage = new Map>, Ref>[]>() + propogageClasses = new Map>, Ref>[]>() + constructor ( private readonly storage: DbAdapter, private readonly stages: FullTextPipelineStage[], @@ -90,6 +95,7 @@ export class FullTextIndexPipeline implements FullTextPipeline { ) { this.readyStages = stages.map((it) => it.stageId) this.readyStages.sort() + this.contexts = new Map(model.findAllSync(core.class.FullTextSearchContext, {}).map((it) => [it.toClass, it])) } async cancel (): Promise { @@ -386,8 +392,6 @@ export class FullTextIndexPipeline implements FullTextPipeline { ) // Also update doc index state queries. - _classes.push(core.class.DocIndexState) - _classes.forEach((it) => this.broadcastClasses.add(it)) if (this.triggerCounts > 0) { @@ -410,10 +414,16 @@ export class FullTextIndexPipeline implements FullTextPipeline { } }, 5000) + let notified = false await new Promise((resolve) => { this.triggerIndexing = () => { this.triggerCounts++ - resolve(null) + if (!notified) { + notified = true + setTimeout(() => { + resolve(null) + }, 500) // Start indexing only after cooldown + } } }) } @@ -435,14 +445,18 @@ export class FullTextIndexPipeline implements FullTextPipeline { }) let result: DocIndexState[] | undefined = await ctx.with('get-indexable', {}, async () => { - const q: DocumentQuery = { - needIndex: true - } - return await this.storage.findAll(ctx, core.class.DocIndexState, q, { - limit: globalIndexer.processingSize, - skipClass: true, - skipSpace: true - }) + return await this.storage.findAll( + ctx, + core.class.DocIndexState, + { + needIndex: true + }, + { + limit: globalIndexer.processingSize, + skipClass: true, + skipSpace: true + } + ) }) if (result === undefined) { // No more results diff --git a/server/indexer/src/indexer/summary.ts b/server/indexer/src/indexer/summary.ts index 6a6212317d..56096ab8b9 100644 --- a/server/indexer/src/indexer/summary.ts +++ b/server/indexer/src/indexer/summary.ts @@ -114,7 +114,7 @@ export class FullSummaryStage implements FullTextPipelineStage { const childDocs = allChildDocs.filter((it) => it.attachedTo === doc._id) if (childDocs.length > 0) { for (const c of childDocs) { - const ctx = getFullTextContext(pipeline.hierarchy, c.objectClass) + const ctx = getFullTextContext(pipeline.hierarchy, c.objectClass, pipeline.contexts) if (ctx.parentPropagate ?? true) { if (embeddingText.length > this.summaryLimit) { break @@ -137,22 +137,43 @@ export class FullSummaryStage implements FullTextPipelineStage { metrics, core.class.DocIndexState, { _id: doc.attachedTo as Ref }, - { limit: 1 } + { + limit: 1, + skipSpace: true, + skipClass: true + } ) if (parentDoc !== undefined) { const ctx = collectPropagateClasses(pipeline, parentDoc.objectClass) if (ctx.length > 0) { - const collections = await this.dbStorageFindAll(metrics, core.class.DocIndexState, { - attachedTo: parentDoc._id, - objectClass: ctx.length === 1 ? ctx[0] : { $in: ctx } - }) - for (const c of collections) { - embeddingText += - '\n' + - (await extractIndexedValues(c, pipeline.hierarchy, { - matchExtra: this.matchExtra, - fieldFilter: this.fieldFilter - })) + let last = 0 + while (true) { + const collections = await this.dbStorageFindAll( + metrics, + core.class.DocIndexState, + { + attachedTo: parentDoc._id, + objectClass: ctx.length === 1 ? ctx[0] : { $in: ctx }, + modifiedOn: { $gt: last } + }, + { + limit: 250, + skipClass: true, + skipSpace: true + } + ) + if (collections.length === 0) { + break + } + last = collections[collections.length - 1].modifiedOn + for (const c of collections) { + embeddingText += + '\n' + + (await extractIndexedValues(c, pipeline.hierarchy, { + matchExtra: this.matchExtra, + fieldFilter: this.fieldFilter + })) + } } } @@ -188,7 +209,7 @@ export class FullSummaryStage implements FullTextPipelineStage { * @public */ export function isIndexingRequired (pipeline: FullTextPipeline, doc: DocIndexState): boolean { - return getFullTextContext(pipeline.hierarchy, doc.objectClass).fullTextSummary ?? false + return getFullTextContext(pipeline.hierarchy, doc.objectClass, pipeline.contexts).fullTextSummary ?? false } /** diff --git a/server/indexer/src/indexer/types.ts b/server/indexer/src/indexer/types.ts index e55b2dc4f1..91bec382a4 100644 --- a/server/indexer/src/indexer/types.ts +++ b/server/indexer/src/indexer/types.ts @@ -19,6 +19,7 @@ import { type DocIndexState, type DocumentQuery, type DocumentUpdate, + type FullTextSearchContext, type Hierarchy, type MeasureContext, type ModelDb, @@ -32,6 +33,12 @@ import type { DbAdapter, IndexedDoc } from '@hcengineering/server-core' export interface FullTextPipeline { hierarchy: Hierarchy model: ModelDb + + contexts: Map>, FullTextSearchContext> + + propogage: Map>, Ref>[]> + propogageClasses: Map>, Ref>[]> + update: ( docId: Ref, mark: boolean, @@ -49,6 +56,11 @@ export interface FullTextPipeline { from?: number ) => Promise<{ docs: IndexedDoc[], pass: boolean }> + queue: ( + ctx: MeasureContext, + updates: Map, { create?: DocIndexState, updated: boolean, removed: boolean }> + ) => Promise + cancelling: boolean } diff --git a/server/indexer/src/indexer/utils.ts b/server/indexer/src/indexer/utils.ts index 8ca1d1ab38..2f1e57bf25 100644 --- a/server/indexer/src/indexer/utils.ts +++ b/server/indexer/src/indexer/utils.ts @@ -82,58 +82,55 @@ export function traverseFullTextContexts ( pipeline: FullTextPipeline, objectClass: Ref>, op: (ftc: Omit>) => void -): Ref>[] { - const desc = new Set(pipeline.hierarchy.getDescendants(objectClass)) - const propagate = new Set>>() - - const ftContext = getFullTextContext(pipeline.hierarchy, objectClass) +): void { + const cl = pipeline.hierarchy.getBaseClass(objectClass) + const ftContext = getFullTextContext(pipeline.hierarchy, cl, pipeline.contexts) if (ftContext !== undefined) { op(ftContext) } - - // Add all parent mixins as well - for (const a of pipeline.hierarchy.getAncestors(objectClass)) { - const ftContext = getFullTextContext(pipeline.hierarchy, a) - if (ftContext !== undefined) { - op(ftContext) - } - const dsca = pipeline.hierarchy.getDescendants(a) - for (const dd of dsca) { - if (pipeline.hierarchy.isMixin(dd)) { - desc.add(dd) - } + const dsca = pipeline.hierarchy.getDescendants(cl) + for (const dd of dsca) { + const mContext = getFullTextContext(pipeline.hierarchy, dd, pipeline.contexts) + if (mContext !== undefined) { + op(mContext) } } - - for (const d of desc) { - if (pipeline.hierarchy.isMixin(d)) { - const mContext = getFullTextContext(pipeline.hierarchy, d) - if (mContext !== undefined) { - op(mContext) - } - } - } - return Array.from(propagate.values()) } /** * @public */ export function collectPropagate (pipeline: FullTextPipeline, objectClass: Ref>): Ref>[] { - const propagate = new Set>>() - traverseFullTextContexts(pipeline, objectClass, (fts) => fts?.propagate?.forEach((it) => propagate.add(it))) + let propagate = pipeline.propogage.get(objectClass) + if (propagate !== undefined) { + return propagate + } + const set = new Set>>() + traverseFullTextContexts(pipeline, objectClass, (fts) => { + fts?.propagate?.forEach((it) => { + set.add(it) + }) + }) - return Array.from(propagate.values()) + propagate = Array.from(set.values()) + pipeline.propogage.set(objectClass, propagate) + return propagate } /** * @public */ export function collectPropagateClasses (pipeline: FullTextPipeline, objectClass: Ref>): Ref>[] { - const propagate = new Set>>() - traverseFullTextContexts(pipeline, objectClass, (fts) => fts?.propagateClasses?.forEach((it) => propagate.add(it))) + let propagate = pipeline.propogageClasses.get(objectClass) + if (propagate !== undefined) { + return propagate + } + const set = new Set>>() + traverseFullTextContexts(pipeline, objectClass, (fts) => fts?.propagateClasses?.forEach((it) => set.add(it))) - return Array.from(propagate.values()) + propagate = Array.from(set.values()) + pipeline.propogageClasses.set(objectClass, propagate) + return propagate } const CUSTOM_ATTR_KEY = 'customAttributes'