UBERF-8499: Optimize indexer operation (#6959)

Signed-off-by: Andrey Sobolev <haiodo@gmail.com>
This commit is contained in:
Andrey Sobolev 2024-10-16 21:14:42 +07:00 committed by GitHub
parent 51679bf82c
commit f89df5921a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 241 additions and 133 deletions

View File

@ -955,7 +955,8 @@ export function createModel (builder: Builder): void {
) )
// Allow to use fuzzy search for mixins // Allow to use fuzzy search for mixins
builder.mixin(contact.class.Contact, core.class.Class, core.mixin.FullTextSearchContext, { builder.createDoc(core.class.FullTextSearchContext, core.space.Model, {
toClass: contact.class.Contact,
fullTextSummary: true fullTextSummary: true
}) })

View File

@ -549,7 +549,8 @@ export function createModel (builder: Builder): void {
func: documents.function.GetAllDocumentStates func: documents.function.GetAllDocumentStates
}) })
builder.mixin(documents.class.Document, core.class.Class, core.mixin.FullTextSearchContext, { builder.createDoc(core.class.FullTextSearchContext, core.space.Model, {
toClass: documents.class.Document,
fullTextSummary: true, fullTextSummary: true,
childProcessingAllowed: true childProcessingAllowed: true
}) })
@ -886,11 +887,13 @@ export function defineNotifications (builder: Builder): void {
} }
export function defineSearch (builder: Builder): void { export function defineSearch (builder: Builder): void {
builder.mixin(documents.class.Document, core.class.Class, core.mixin.FullTextSearchContext, { builder.createDoc(core.class.FullTextSearchContext, core.space.Model, {
toClass: documents.class.Document,
parentPropagate: true parentPropagate: true
}) })
builder.mixin(documents.class.DocumentMeta, core.class.Class, core.mixin.FullTextSearchContext, { builder.createDoc(core.class.FullTextSearchContext, core.space.Model, {
toClass: documents.class.DocumentMeta,
fullTextSummary: true, fullTextSummary: true,
childProcessingAllowed: true, childProcessingAllowed: true,
propagate: [] propagate: []

View File

@ -361,8 +361,10 @@ export class TDocIndexState extends TDoc implements DocIndexState {
generationId?: string generationId?: string
} }
@MMixin(core.mixin.FullTextSearchContext, core.class.Class) @Model(core.class.FullTextSearchContext, core.class.Doc, DOMAIN_MODEL)
export class TFullTextSearchContext extends TClass implements FullTextSearchContext {} export class TFullTextSearchContext extends TDoc implements FullTextSearchContext {
toClass!: Ref<Class<Doc<Space>>>
}
@MMixin(core.mixin.ConfigurationElement, core.class.Class) @MMixin(core.mixin.ConfigurationElement, core.class.Class)
export class TConfigurationElement extends TClass implements ConfigurationElement { export class TConfigurationElement extends TClass implements ConfigurationElement {

View File

@ -309,7 +309,8 @@ export function createModel (builder: Builder): void {
] ]
}) })
builder.mixin(core.class.Space, core.class.Class, core.mixin.FullTextSearchContext, { builder.createDoc(core.class.FullTextSearchContext, core.space.Model, {
toClass: core.class.Space,
childProcessingAllowed: false childProcessingAllowed: false
}) })

View File

@ -262,7 +262,11 @@ export const coreOperation: MigrateOperation = {
async migrate (client: MigrationClient): Promise<void> { async migrate (client: MigrationClient): Promise<void> {
// We need to delete all documents in doc index state for missing classes // We need to delete all documents in doc index state for missing classes
const allClasses = client.hierarchy.getDescendants(core.class.Doc) const allClasses = client.hierarchy.getDescendants(core.class.Doc)
const allIndexed = allClasses.filter((it) => isClassIndexable(client.hierarchy, it)) const contexts = new Map(
client.model.findAllSync(core.class.FullTextSearchContext, {}).map((it) => [it.toClass, it])
)
const allIndexed = allClasses.filter((it) => isClassIndexable(client.hierarchy, it, contexts))
// Next remove all non indexed classes and missing classes as well. // Next remove all non indexed classes and missing classes as well.
await client.update( await client.update(

View File

@ -214,7 +214,8 @@ export function createModel (builder: Builder): void {
gmail.action.WriteEmail gmail.action.WriteEmail
) )
builder.mixin(gmail.class.Message, core.class.Class, core.mixin.FullTextSearchContext, { builder.createDoc(core.class.FullTextSearchContext, core.space.Model, {
toClass: gmail.class.Message,
parentPropagate: false parentPropagate: false
}) })

View File

@ -1438,13 +1438,15 @@ export function createModel (builder: Builder): void {
) )
// Allow to use fuzzy search for mixins // Allow to use fuzzy search for mixins
builder.mixin(recruit.class.Vacancy, core.class.Class, core.mixin.FullTextSearchContext, { builder.createDoc(core.class.FullTextSearchContext, core.space.Model, {
toClass: recruit.class.Vacancy,
fullTextSummary: true, fullTextSummary: true,
childProcessingAllowed: true, childProcessingAllowed: true,
propagate: [] propagate: []
}) })
builder.mixin(recruit.mixin.Candidate, core.class.Class, core.mixin.FullTextSearchContext, { builder.createDoc(core.class.FullTextSearchContext, core.space.Model, {
toClass: recruit.mixin.Candidate,
fullTextSummary: true, fullTextSummary: true,
propagate: [recruit.class.Applicant], propagate: [recruit.class.Applicant],
childProcessingAllowed: true, childProcessingAllowed: true,
@ -1457,7 +1459,8 @@ export function createModel (builder: Builder): void {
}) })
// Allow to use fuzzy search for mixins // Allow to use fuzzy search for mixins
builder.mixin(recruit.class.Applicant, core.class.Class, core.mixin.FullTextSearchContext, { builder.createDoc(core.class.FullTextSearchContext, core.space.Model, {
toClass: recruit.class.Applicant,
fullTextSummary: true, fullTextSummary: true,
forceIndex: true, forceIndex: true,
childProcessingAllowed: true, childProcessingAllowed: true,

View File

@ -179,7 +179,8 @@ export function createModel (builder: Builder): void {
telegram.ids.TelegramMessageSharedActivityViewlet telegram.ids.TelegramMessageSharedActivityViewlet
) )
builder.mixin(telegram.class.Message, core.class.Class, core.mixin.FullTextSearchContext, { builder.createDoc(core.class.FullTextSearchContext, core.space.Model, {
toClass: telegram.class.Message,
parentPropagate: false, parentPropagate: false,
childProcessingAllowed: true childProcessingAllowed: true
}) })

View File

@ -578,7 +578,8 @@ export interface BlobLookup extends Blob {
* *
* If defined for class, this class will be enabled for embedding search like openai. * If defined for class, this class will be enabled for embedding search like openai.
*/ */
export interface FullTextSearchContext extends Class<Doc> { export interface FullTextSearchContext extends Doc {
toClass: Ref<Class<Doc>>
fullTextSummary?: boolean fullTextSummary?: boolean
forceIndex?: boolean forceIndex?: boolean

View File

@ -142,10 +142,10 @@ export default plugin(coreId, {
StatusCategory: '' as Ref<Class<StatusCategory>>, StatusCategory: '' as Ref<Class<StatusCategory>>,
MigrationState: '' as Ref<Class<MigrationState>>, MigrationState: '' as Ref<Class<MigrationState>>,
BenchmarkDoc: '' as Ref<Class<BenchmarkDoc>> BenchmarkDoc: '' as Ref<Class<BenchmarkDoc>>,
FullTextSearchContext: '' as Ref<Mixin<FullTextSearchContext>>
}, },
mixin: { mixin: {
FullTextSearchContext: '' as Ref<Mixin<FullTextSearchContext>>,
ConfigurationElement: '' as Ref<Mixin<ConfigurationElement>>, ConfigurationElement: '' as Ref<Mixin<ConfigurationElement>>,
IndexConfiguration: '' as Ref<Mixin<IndexingConfiguration<Doc>>>, IndexConfiguration: '' as Ref<Mixin<IndexingConfiguration<Doc>>>,
SpacesTypeData: '' as Ref<Mixin<Space>> SpacesTypeData: '' as Ref<Mixin<Space>>

View File

@ -46,9 +46,9 @@ import core from './component'
import { Hierarchy } from './hierarchy' import { Hierarchy } from './hierarchy'
import { TxOperations } from './operations' import { TxOperations } from './operations'
import { isPredicate } from './predicate' import { isPredicate } from './predicate'
import { Branding, BrandingMap } from './server'
import { DocumentQuery, FindResult } from './storage' import { DocumentQuery, FindResult } from './storage'
import { DOMAIN_TX } from './tx' import { DOMAIN_TX } from './tx'
import { Branding, BrandingMap } from './server'
function toHex (value: number, chars: number): string { function toHex (value: number, chars: number): string {
const result = value.toString(16) const result = value.toString(16)
@ -686,39 +686,48 @@ export function getFullTextIndexableAttributes (
return result return result
} }
const ctxKey = 'indexer_ftc'
/** /**
* @public * @public
*/ */
export function getFullTextContext ( export function getFullTextContext (
hierarchy: Hierarchy, hierarchy: Hierarchy,
objectClass: Ref<Class<Doc>> objectClass: Ref<Class<Doc>>,
contexts: Map<Ref<Class<Doc>>, FullTextSearchContext>
): Omit<FullTextSearchContext, keyof Class<Doc>> { ): Omit<FullTextSearchContext, keyof Class<Doc>> {
let objClass = hierarchy.getClass(objectClass) let ctx: Omit<FullTextSearchContext, keyof Class<Doc>> | undefined = hierarchy.getClassifierProp(objectClass, ctxKey)
while (true) {
if (hierarchy.hasMixin(objClass, core.mixin.FullTextSearchContext)) {
const ctx = hierarchy.as<Class<Doc>, FullTextSearchContext>(objClass, core.mixin.FullTextSearchContext)
if (ctx !== undefined) { if (ctx !== undefined) {
return ctx return ctx
} }
if (typeof ctx !== 'string') {
const anc = hierarchy.getAncestors(objectClass)
for (const oc of anc) {
const ctx = contexts.get(oc)
if (ctx !== undefined) {
hierarchy.setClassifierProp(objectClass, ctxKey, ctx)
return ctx
} }
if (objClass.extends === undefined) {
break
} }
objClass = hierarchy.getClass(objClass.extends)
} }
return { ctx = {
toClass: objectClass,
fullTextSummary: false, fullTextSummary: false,
forceIndex: false, forceIndex: false,
propagate: [], propagate: [],
childProcessingAllowed: true childProcessingAllowed: true
} }
hierarchy.setClassifierProp(objectClass, ctxKey, ctx)
return ctx
} }
/** /**
* @public * @public
*/ */
export function isClassIndexable (hierarchy: Hierarchy, c: Ref<Class<Doc>>): boolean { export function isClassIndexable (
hierarchy: Hierarchy,
c: Ref<Class<Doc>>,
contexts: Map<Ref<Class<Doc>>, FullTextSearchContext>
): boolean {
const indexed = hierarchy.getClassifierProp(c, 'class_indexed') const indexed = hierarchy.getClassifierProp(c, 'class_indexed')
if (indexed !== undefined) { if (indexed !== undefined) {
return indexed as boolean return indexed as boolean
@ -756,13 +765,13 @@ export function isClassIndexable (hierarchy: Hierarchy, c: Ref<Class<Doc>>): boo
let result = true let result = true
if (attrs.length === 0 && !(getFullTextContext(hierarchy, c)?.forceIndex ?? false)) { if (attrs.length === 0 && !(getFullTextContext(hierarchy, c, contexts)?.forceIndex ?? false)) {
result = false result = false
// We need check if document has collections with indexable fields. // We need check if document has collections with indexable fields.
const attrs = hierarchy.getAllAttributes(c).values() const attrs = hierarchy.getAllAttributes(c).values()
for (const attr of attrs) { for (const attr of attrs) {
if (attr.type._class === core.class.Collection) { if (attr.type._class === core.class.Collection) {
if (isClassIndexable(hierarchy, (attr.type as Collection<AttachedDoc>).of)) { if (isClassIndexable(hierarchy, (attr.type as Collection<AttachedDoc>).of, contexts)) {
result = true result = true
break break
} }

View File

@ -8,7 +8,7 @@
"template": "@hcengineering/node-package", "template": "@hcengineering/node-package",
"license": "EPL-2.0", "license": "EPL-2.0",
"scripts": { "scripts": {
"start": "rush bundle --to @hcengineering/pod-server && cross-env NODE_ENV=production ELASTIC_INDEX_NAME=local_storage_index MODEL_VERSION=$(node ../../common/scripts/show_version.js) ACCOUNTS_URL=http://localhost:3000 REKONI_URL=http://localhost:4004 MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 FRONT_URL=http://localhost:8087 UPLOAD_URL=/upload MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret OPERATION_PROFILING=false MODEL_JSON=../../models/all/bundle/model.json node bundle/bundle.js", "start": "rush bundle --to @hcengineering/pod-server && cross-env NODE_ENV=production ELASTIC_INDEX_NAME=local_storage_index MODEL_VERSION=$(node ../../common/scripts/show_version.js) ACCOUNTS_URL=http://localhost:3000 REKONI_URL=http://localhost:4004 MONGO_URL=mongodb://localhost:27017 DB_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 FRONT_URL=http://localhost:8087 UPLOAD_URL=/upload MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret OPERATION_PROFILING=false MODEL_JSON=../../models/all/bundle/model.json node --inspect bundle/bundle.js",
"start-u": "rush bundle --to @hcengineering/pod-server && ./bundle/ && cross-env NODE_ENV=production SERVER_PROVIDER=uweb ELASTIC_INDEX_NAME=local_storage_index MODEL_VERSION=$(node ../../common/scripts/show_version.js) ACCOUNTS_URL=http://localhost:3000 REKONI_URL=http://localhost:4004 MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 FRONT_URL=http://localhost:8087 UPLOAD_URL=/upload MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret MODEL_JSON=../../models/all/bundle/model.json node bundle/bundle.js", "start-u": "rush bundle --to @hcengineering/pod-server && ./bundle/ && cross-env NODE_ENV=production SERVER_PROVIDER=uweb ELASTIC_INDEX_NAME=local_storage_index MODEL_VERSION=$(node ../../common/scripts/show_version.js) ACCOUNTS_URL=http://localhost:3000 REKONI_URL=http://localhost:4004 MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 FRONT_URL=http://localhost:8087 UPLOAD_URL=/upload MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret MODEL_JSON=../../models/all/bundle/model.json node bundle/bundle.js",
"start-flame": "rush bundle --to @hcengineering/pod-server && cross-env NODE_ENV=production ELASTIC_INDEX_NAME=local_storage_index MODEL_VERSION=$(node ../../common/scripts/show_version.js) ACCOUNTS_URL=http://localhost:3000 REKONI_URL=http://localhost:4004 MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 FRONT_URL=http://localhost:8087 UPLOAD_URL=/upload MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret MODEL_JSON=../../models/all/bundle/model.json clinic flame --dest ./out -- node --nolazy -r ts-node/register --enable-source-maps src/__start.ts", "start-flame": "rush bundle --to @hcengineering/pod-server && cross-env NODE_ENV=production ELASTIC_INDEX_NAME=local_storage_index MODEL_VERSION=$(node ../../common/scripts/show_version.js) ACCOUNTS_URL=http://localhost:3000 REKONI_URL=http://localhost:4004 MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 FRONT_URL=http://localhost:8087 UPLOAD_URL=/upload MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret MODEL_JSON=../../models/all/bundle/model.json clinic flame --dest ./out -- node --nolazy -r ts-node/register --enable-source-maps src/__start.ts",
"build": "compile", "build": "compile",

View File

@ -522,7 +522,7 @@ class ElasticAdapter implements FullTextAdapter {
async updateMany (docs: IndexedDoc[]): Promise<TxResult[]> { async updateMany (docs: IndexedDoc[]): Promise<TxResult[]> {
const parts = Array.from(docs) const parts = Array.from(docs)
while (parts.length > 0) { while (parts.length > 0) {
const part = parts.splice(0, 1000) const part = parts.splice(0, 500)
const operations = part.flatMap((doc) => { const operations = part.flatMap((doc) => {
const wsDoc = { workspaceId: this.workspaceString, ...doc } const wsDoc = { workspaceId: this.workspaceString, ...doc }

View File

@ -45,9 +45,9 @@ import core, {
toFindResult toFindResult
} from '@hcengineering/core' } from '@hcengineering/core'
import type { FullTextAdapter, IndexedDoc, SessionFindAll, StorageAdapter, WithFind } from '@hcengineering/server-core' import type { FullTextAdapter, IndexedDoc, SessionFindAll, StorageAdapter, WithFind } from '@hcengineering/server-core'
import { getScoringConfig, mapSearchResultDoc } from './mapper'
import { type FullTextIndexPipeline } from './indexer' import { type FullTextIndexPipeline } from './indexer'
import { createStateDoc } from './indexer/utils' import { createStateDoc } from './indexer/utils'
import { getScoringConfig, mapSearchResultDoc } from './mapper'
/** /**
* @public * @public
@ -90,7 +90,7 @@ export class FullTextIndex implements WithFind {
if (TxProcessor.isExtendsCUD(tx._class)) { if (TxProcessor.isExtendsCUD(tx._class)) {
const cud = tx as TxCUD<Doc> const cud = tx as TxCUD<Doc>
if (!isClassIndexable(this.hierarchy, cud.objectClass)) { if (!isClassIndexable(this.hierarchy, cud.objectClass, this.indexer.contexts)) {
// No need, since no indixable fields or attachments. // No need, since no indixable fields or attachments.
continue continue
} }

View File

@ -50,6 +50,7 @@ export class IndexedFieldStage implements FullTextPipelineStage {
updateFields: DocUpdateHandler[] = [] updateFields: DocUpdateHandler[] = []
enabled = true enabled = true
constructor (private readonly dbStorageFindAll: SessionFindAll) {} constructor (private readonly dbStorageFindAll: SessionFindAll) {}
async initialize (ctx: MeasureContext, storage: DbAdapter, pipeline: FullTextPipeline): Promise<void> {} async initialize (ctx: MeasureContext, storage: DbAdapter, pipeline: FullTextPipeline): Promise<void> {}
@ -150,7 +151,7 @@ export class IndexedFieldStage implements FullTextPipelineStage {
} }
if (docState.attachedTo != null && changes > 0) { if (docState.attachedTo != null && changes > 0) {
const ctx = getFullTextContext(pipeline.hierarchy, objClass) const ctx = getFullTextContext(pipeline.hierarchy, objClass, pipeline.contexts)
if (ctx.parentPropagate ?? true) { if (ctx.parentPropagate ?? true) {
// We need to clear field stage from parent, so it will be re indexed. // We need to clear field stage from parent, so it will be re indexed.
await pipeline.update(docState.attachedTo as Ref<DocIndexState>, false, {}) await pipeline.update(docState.attachedTo as Ref<DocIndexState>, false, {})
@ -173,17 +174,13 @@ export class IndexedFieldStage implements FullTextPipelineStage {
{ {
attachedTo: ids.length === 1 ? ids[0] : { $in: ids } attachedTo: ids.length === 1 ? ids[0] : { $in: ids }
}, },
{ limit: ids.length } { limit: ids.length, skipSpace: true, skipClass: true }
) )
) )
} }
const childs = allChildDocs.filter((it) => it.attachedTo === docState._id) const childs = allChildDocs.filter((it) => it.attachedTo === docState._id)
for (const u of childs) { // Marck childs to be indexed on next step
if (propagate.some((it) => pipeline.hierarchy.isDerived(u.objectClass, it))) { await pipeline.queue(metrics, new Map(childs.map((it) => [it._id, { updated: true, removed: false }])))
pipeline.add(u)
await pipeline.update(u._id, false, {})
}
}
} }
await pipeline.update(docState._id, true, docUpdate) await pipeline.update(docState._id, true, docUpdate)

View File

@ -30,12 +30,13 @@ import core, {
type MeasureContext, type MeasureContext,
RateLimiter, RateLimiter,
type Ref, type Ref,
SortingOrder,
toIdMap, toIdMap,
type WorkspaceId type WorkspaceId
} from '@hcengineering/core' } from '@hcengineering/core'
import { type DbAdapter, type FullTextAdapter, type IndexedDoc, type SessionFindAll } from '@hcengineering/server-core' import { type DbAdapter, type FullTextAdapter, type IndexedDoc, type SessionFindAll } from '@hcengineering/server-core'
import { updateDocWithPresenter } from '../mapper'
import { jsonToText, markupToJSON } from '@hcengineering/text' import { jsonToText, markupToJSON } from '@hcengineering/text'
import { updateDocWithPresenter } from '../mapper'
import { import {
contentStageId, contentStageId,
type DocUpdateHandler, type DocUpdateHandler,
@ -118,7 +119,7 @@ export class FullTextPushStage implements FullTextPipelineStage {
const childIds = toIndexPart const childIds = toIndexPart
.filter((it) => { .filter((it) => {
const fctx = getFullTextContext(pipeline.hierarchy, it.objectClass) const fctx = getFullTextContext(pipeline.hierarchy, it.objectClass, pipeline.contexts)
return fctx.childProcessingAllowed ?? true return fctx.childProcessingAllowed ?? true
}) })
.map((it) => it._id) .map((it) => it._id)
@ -127,9 +128,17 @@ export class FullTextPushStage implements FullTextPipelineStage {
'find-child', 'find-child',
{}, {},
async (ctx) => async (ctx) =>
await this.dbStorageFindAll(ctx, core.class.DocIndexState, { await this.dbStorageFindAll(
ctx,
core.class.DocIndexState,
{
attachedTo: childIds.length === 1 ? childIds[0] : { $in: childIds } attachedTo: childIds.length === 1 ? childIds[0] : { $in: childIds }
}) },
{
skipClass: true,
skipSpace: true
}
)
) )
// spaces // spaces
@ -138,14 +147,22 @@ export class FullTextPushStage implements FullTextPipelineStage {
'find-spaces', 'find-spaces',
{}, {},
async (ctx) => async (ctx) =>
await this.dbStorageFindAll(ctx, core.class.DocIndexState, { await this.dbStorageFindAll(
ctx,
core.class.DocIndexState,
{
_id: { _id: {
$in: toIndexPart.map( $in: toIndexPart.map(
(doc) => (doc) =>
(doc.attributes[docKey('space', { _class: doc.objectClass })] ?? doc.space) as Ref<DocIndexState> (doc.attributes[docKey('space', { _class: doc.objectClass })] ?? doc.space) as Ref<DocIndexState>
) )
} }
}) },
{
skipClass: true,
skipSpace: true
}
)
) )
) )
@ -163,7 +180,7 @@ export class FullTextPushStage implements FullTextPipelineStage {
const childDocs = allChildDocs.filter((it) => it.attachedTo === doc._id) const childDocs = allChildDocs.filter((it) => it.attachedTo === doc._id)
if (childDocs.length > 0) { if (childDocs.length > 0) {
for (const c of childDocs) { for (const c of childDocs) {
const fctx = getFullTextContext(pipeline.hierarchy, c.objectClass) const fctx = getFullTextContext(pipeline.hierarchy, c.objectClass, pipeline.contexts)
if (fctx.parentPropagate ?? true) { if (fctx.parentPropagate ?? true) {
ctx.withSync('updateDoc2Elastic', {}, (ctx) => { ctx.withSync('updateDoc2Elastic', {}, (ctx) => {
updateDoc2Elastic( updateDoc2Elastic(
@ -195,7 +212,11 @@ export class FullTextPushStage implements FullTextPipelineStage {
{ {
_id: doc.attachedTo as Ref<DocIndexState> _id: doc.attachedTo as Ref<DocIndexState>
}, },
{ limit: 1 } {
limit: 1,
skipClass: true,
skipSpace: true
}
) )
).shift() ).shift()
)) ))
@ -217,10 +238,29 @@ export class FullTextPushStage implements FullTextPipelineStage {
const collectClasses = collectPropagateClasses(pipeline, parentDoc.objectClass) const collectClasses = collectPropagateClasses(pipeline, parentDoc.objectClass)
if (collectClasses.length > 0) { if (collectClasses.length > 0) {
const collections = await this.dbStorageFindAll<DocIndexState>(ctx, core.class.DocIndexState, { let last: number = 0
while (true) {
const collections = await this.dbStorageFindAll<DocIndexState>(
ctx,
core.class.DocIndexState,
{
attachedTo: parentDoc._id, attachedTo: parentDoc._id,
objectClass: { $in: collectClasses } objectClass: { $in: collectClasses },
}) modifiedOn: { $gt: last }
},
{
sort: {
modifiedOn: SortingOrder.Ascending
},
skipClass: true,
skipSpace: true,
limit: 500
}
)
if (collections.length === 0) {
break
}
last = collections[collections.length - 1].modifiedOn
for (const c of collections) { for (const c of collections) {
ctx.withSync('updateDoc2Elastic', {}, (ctx) => { ctx.withSync('updateDoc2Elastic', {}, (ctx) => {
updateDoc2Elastic( updateDoc2Elastic(
@ -239,6 +279,7 @@ export class FullTextPushStage implements FullTextPipelineStage {
} }
} }
} }
}
const spaceDoc = spaceDocs.get( const spaceDoc = spaceDocs.get(
(doc.attributes[docKey('space', { _class: doc.objectClass })] ?? doc.space) as Ref<DocIndexState> (doc.attributes[docKey('space', { _class: doc.objectClass })] ?? doc.space) as Ref<DocIndexState>
) )
@ -262,7 +303,7 @@ export class FullTextPushStage implements FullTextPipelineStage {
} }
// Perform bulk update to elastic // Perform bulk update to elastic
void pushQueue.add(async () => { await pushQueue.exec(async () => {
try { try {
try { try {
await ctx.with('push-elastic', {}, async () => { await ctx.with('push-elastic', {}, async () => {

View File

@ -21,6 +21,7 @@ import core, {
type DocIndexState, type DocIndexState,
type DocumentQuery, type DocumentQuery,
type DocumentUpdate, type DocumentUpdate,
type FullTextSearchContext,
type Hierarchy, type Hierarchy,
type MeasureContext, type MeasureContext,
type ModelDb, type ModelDb,
@ -79,6 +80,10 @@ export class FullTextIndexPipeline implements FullTextPipeline {
uploadOps: DocIndexState[] = [] uploadOps: DocIndexState[] = []
contexts: Map<Ref<Class<Doc>>, FullTextSearchContext>
propogage = new Map<Ref<Class<Doc>>, Ref<Class<Doc>>[]>()
propogageClasses = new Map<Ref<Class<Doc>>, Ref<Class<Doc>>[]>()
constructor ( constructor (
private readonly storage: DbAdapter, private readonly storage: DbAdapter,
private readonly stages: FullTextPipelineStage[], private readonly stages: FullTextPipelineStage[],
@ -90,6 +95,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
) { ) {
this.readyStages = stages.map((it) => it.stageId) this.readyStages = stages.map((it) => it.stageId)
this.readyStages.sort() this.readyStages.sort()
this.contexts = new Map(model.findAllSync(core.class.FullTextSearchContext, {}).map((it) => [it.toClass, it]))
} }
async cancel (): Promise<void> { async cancel (): Promise<void> {
@ -386,8 +392,6 @@ export class FullTextIndexPipeline implements FullTextPipeline {
) )
// Also update doc index state queries. // Also update doc index state queries.
_classes.push(core.class.DocIndexState)
_classes.forEach((it) => this.broadcastClasses.add(it)) _classes.forEach((it) => this.broadcastClasses.add(it))
if (this.triggerCounts > 0) { if (this.triggerCounts > 0) {
@ -410,10 +414,16 @@ export class FullTextIndexPipeline implements FullTextPipeline {
} }
}, 5000) }, 5000)
let notified = false
await new Promise((resolve) => { await new Promise((resolve) => {
this.triggerIndexing = () => { this.triggerIndexing = () => {
this.triggerCounts++ this.triggerCounts++
if (!notified) {
notified = true
setTimeout(() => {
resolve(null) resolve(null)
}, 500) // Start indexing only after cooldown
}
} }
}) })
} }
@ -435,14 +445,18 @@ export class FullTextIndexPipeline implements FullTextPipeline {
}) })
let result: DocIndexState[] | undefined = await ctx.with('get-indexable', {}, async () => { let result: DocIndexState[] | undefined = await ctx.with('get-indexable', {}, async () => {
const q: DocumentQuery<DocIndexState> = { return await this.storage.findAll(
ctx,
core.class.DocIndexState,
{
needIndex: true needIndex: true
} },
return await this.storage.findAll(ctx, core.class.DocIndexState, q, { {
limit: globalIndexer.processingSize, limit: globalIndexer.processingSize,
skipClass: true, skipClass: true,
skipSpace: true skipSpace: true
}) }
)
}) })
if (result === undefined) { if (result === undefined) {
// No more results // No more results

View File

@ -114,7 +114,7 @@ export class FullSummaryStage implements FullTextPipelineStage {
const childDocs = allChildDocs.filter((it) => it.attachedTo === doc._id) const childDocs = allChildDocs.filter((it) => it.attachedTo === doc._id)
if (childDocs.length > 0) { if (childDocs.length > 0) {
for (const c of childDocs) { for (const c of childDocs) {
const ctx = getFullTextContext(pipeline.hierarchy, c.objectClass) const ctx = getFullTextContext(pipeline.hierarchy, c.objectClass, pipeline.contexts)
if (ctx.parentPropagate ?? true) { if (ctx.parentPropagate ?? true) {
if (embeddingText.length > this.summaryLimit) { if (embeddingText.length > this.summaryLimit) {
break break
@ -137,15 +137,35 @@ export class FullSummaryStage implements FullTextPipelineStage {
metrics, metrics,
core.class.DocIndexState, core.class.DocIndexState,
{ _id: doc.attachedTo as Ref<DocIndexState> }, { _id: doc.attachedTo as Ref<DocIndexState> },
{ limit: 1 } {
limit: 1,
skipSpace: true,
skipClass: true
}
) )
if (parentDoc !== undefined) { if (parentDoc !== undefined) {
const ctx = collectPropagateClasses(pipeline, parentDoc.objectClass) const ctx = collectPropagateClasses(pipeline, parentDoc.objectClass)
if (ctx.length > 0) { if (ctx.length > 0) {
const collections = await this.dbStorageFindAll(metrics, core.class.DocIndexState, { let last = 0
while (true) {
const collections = await this.dbStorageFindAll(
metrics,
core.class.DocIndexState,
{
attachedTo: parentDoc._id, attachedTo: parentDoc._id,
objectClass: ctx.length === 1 ? ctx[0] : { $in: ctx } objectClass: ctx.length === 1 ? ctx[0] : { $in: ctx },
}) modifiedOn: { $gt: last }
},
{
limit: 250,
skipClass: true,
skipSpace: true
}
)
if (collections.length === 0) {
break
}
last = collections[collections.length - 1].modifiedOn
for (const c of collections) { for (const c of collections) {
embeddingText += embeddingText +=
'\n' + '\n' +
@ -155,6 +175,7 @@ export class FullSummaryStage implements FullTextPipelineStage {
})) }))
} }
} }
}
if (embeddingText.length > this.summaryLimit) { if (embeddingText.length > this.summaryLimit) {
break break
@ -188,7 +209,7 @@ export class FullSummaryStage implements FullTextPipelineStage {
* @public * @public
*/ */
export function isIndexingRequired (pipeline: FullTextPipeline, doc: DocIndexState): boolean { export function isIndexingRequired (pipeline: FullTextPipeline, doc: DocIndexState): boolean {
return getFullTextContext(pipeline.hierarchy, doc.objectClass).fullTextSummary ?? false return getFullTextContext(pipeline.hierarchy, doc.objectClass, pipeline.contexts).fullTextSummary ?? false
} }
/** /**

View File

@ -19,6 +19,7 @@ import {
type DocIndexState, type DocIndexState,
type DocumentQuery, type DocumentQuery,
type DocumentUpdate, type DocumentUpdate,
type FullTextSearchContext,
type Hierarchy, type Hierarchy,
type MeasureContext, type MeasureContext,
type ModelDb, type ModelDb,
@ -32,6 +33,12 @@ import type { DbAdapter, IndexedDoc } from '@hcengineering/server-core'
export interface FullTextPipeline { export interface FullTextPipeline {
hierarchy: Hierarchy hierarchy: Hierarchy
model: ModelDb model: ModelDb
contexts: Map<Ref<Class<Doc>>, FullTextSearchContext>
propogage: Map<Ref<Class<Doc>>, Ref<Class<Doc>>[]>
propogageClasses: Map<Ref<Class<Doc>>, Ref<Class<Doc>>[]>
update: ( update: (
docId: Ref<DocIndexState>, docId: Ref<DocIndexState>,
mark: boolean, mark: boolean,
@ -49,6 +56,11 @@ export interface FullTextPipeline {
from?: number from?: number
) => Promise<{ docs: IndexedDoc[], pass: boolean }> ) => Promise<{ docs: IndexedDoc[], pass: boolean }>
queue: (
ctx: MeasureContext,
updates: Map<Ref<DocIndexState>, { create?: DocIndexState, updated: boolean, removed: boolean }>
) => Promise<void>
cancelling: boolean cancelling: boolean
} }

View File

@ -82,58 +82,55 @@ export function traverseFullTextContexts (
pipeline: FullTextPipeline, pipeline: FullTextPipeline,
objectClass: Ref<Class<Doc>>, objectClass: Ref<Class<Doc>>,
op: (ftc: Omit<FullTextSearchContext, keyof Class<Doc>>) => void op: (ftc: Omit<FullTextSearchContext, keyof Class<Doc>>) => void
): Ref<Class<Doc>>[] { ): void {
const desc = new Set(pipeline.hierarchy.getDescendants(objectClass)) const cl = pipeline.hierarchy.getBaseClass(objectClass)
const propagate = new Set<Ref<Class<Doc>>>() const ftContext = getFullTextContext(pipeline.hierarchy, cl, pipeline.contexts)
const ftContext = getFullTextContext(pipeline.hierarchy, objectClass)
if (ftContext !== undefined) { if (ftContext !== undefined) {
op(ftContext) op(ftContext)
} }
const dsca = pipeline.hierarchy.getDescendants(cl)
// Add all parent mixins as well
for (const a of pipeline.hierarchy.getAncestors(objectClass)) {
const ftContext = getFullTextContext(pipeline.hierarchy, a)
if (ftContext !== undefined) {
op(ftContext)
}
const dsca = pipeline.hierarchy.getDescendants(a)
for (const dd of dsca) { for (const dd of dsca) {
if (pipeline.hierarchy.isMixin(dd)) { const mContext = getFullTextContext(pipeline.hierarchy, dd, pipeline.contexts)
desc.add(dd)
}
}
}
for (const d of desc) {
if (pipeline.hierarchy.isMixin(d)) {
const mContext = getFullTextContext(pipeline.hierarchy, d)
if (mContext !== undefined) { if (mContext !== undefined) {
op(mContext) op(mContext)
} }
} }
} }
return Array.from(propagate.values())
}
/** /**
* @public * @public
*/ */
export function collectPropagate (pipeline: FullTextPipeline, objectClass: Ref<Class<Doc>>): Ref<Class<Doc>>[] { export function collectPropagate (pipeline: FullTextPipeline, objectClass: Ref<Class<Doc>>): Ref<Class<Doc>>[] {
const propagate = new Set<Ref<Class<Doc>>>() let propagate = pipeline.propogage.get(objectClass)
traverseFullTextContexts(pipeline, objectClass, (fts) => fts?.propagate?.forEach((it) => propagate.add(it))) if (propagate !== undefined) {
return propagate
}
const set = new Set<Ref<Class<Doc>>>()
traverseFullTextContexts(pipeline, objectClass, (fts) => {
fts?.propagate?.forEach((it) => {
set.add(it)
})
})
return Array.from(propagate.values()) propagate = Array.from(set.values())
pipeline.propogage.set(objectClass, propagate)
return propagate
} }
/** /**
* @public * @public
*/ */
export function collectPropagateClasses (pipeline: FullTextPipeline, objectClass: Ref<Class<Doc>>): Ref<Class<Doc>>[] { export function collectPropagateClasses (pipeline: FullTextPipeline, objectClass: Ref<Class<Doc>>): Ref<Class<Doc>>[] {
const propagate = new Set<Ref<Class<Doc>>>() let propagate = pipeline.propogageClasses.get(objectClass)
traverseFullTextContexts(pipeline, objectClass, (fts) => fts?.propagateClasses?.forEach((it) => propagate.add(it))) if (propagate !== undefined) {
return propagate
}
const set = new Set<Ref<Class<Doc>>>()
traverseFullTextContexts(pipeline, objectClass, (fts) => fts?.propagateClasses?.forEach((it) => set.add(it)))
return Array.from(propagate.values()) propagate = Array.from(set.values())
pipeline.propogageClasses.set(objectClass, propagate)
return propagate
} }
const CUSTOM_ATTR_KEY = 'customAttributes' const CUSTOM_ATTR_KEY = 'customAttributes'