From bccf97eeb74b31240dbf67a72da10bcdd2c1d3ca Mon Sep 17 00:00:00 2001 From: Andrey Sobolev Date: Sat, 13 Apr 2024 14:07:37 +0700 Subject: [PATCH] UBERF-6540: Fix isIndexable and clean wrong indexed documents (#5347) --- models/activity/src/index.ts | 73 ++++++---- models/core/src/migration.ts | 20 ++- models/guest/src/index.ts | 17 ++- models/notification/src/index.ts | 2 +- packages/core/src/utils.ts | 134 ++++++++++++++++++- server-plugins/collaboration/src/fulltext.ts | 6 +- server/core/src/fulltext.ts | 5 +- server/core/src/indexer/content.ts | 9 +- server/core/src/indexer/field.ts | 11 +- server/core/src/indexer/fulltextPush.ts | 5 +- server/core/src/indexer/indexer.ts | 12 +- server/core/src/indexer/summary.ts | 11 +- server/core/src/indexer/utils.ts | 122 +---------------- 13 files changed, 246 insertions(+), 181 deletions(-) diff --git a/models/activity/src/index.ts b/models/activity/src/index.ts index d36afd8d95..3692f3ae1b 100644 --- a/models/activity/src/index.ts +++ b/models/activity/src/index.ts @@ -15,68 +15,69 @@ import { type ActivityAttributeUpdatesPresenter, - type ActivityInfoMessage, type ActivityDoc, type ActivityExtension, type ActivityExtensionKind, + type ActivityInfoMessage, type ActivityMessage, + type ActivityMessageControl, type ActivityMessageExtension, type ActivityMessageExtensionKind, + type ActivityMessagePreview, type ActivityMessagesFilter, + type ActivityReference, type DocAttributeUpdates, type DocUpdateAction, type DocUpdateMessage, type DocUpdateMessageViewlet, type DocUpdateMessageViewletAttributesConfig, - type Reaction, - type TxViewlet, - type ActivityMessageControl, - type SavedMessage, type IgnoreActivity, - type ActivityReference, - type ActivityMessagePreview + type Reaction, + type SavedMessage, + type TxViewlet } from '@hcengineering/activity' +import contact, { type Person } from '@hcengineering/contact' import core, { DOMAIN_MODEL, + IndexKind, + type Account, type Class, type Doc, type DocumentQuery, - type Ref, - type Tx, - IndexKind, - type TxCUD, type Domain, - type Account, - type Timestamp + type IndexingConfiguration, + type Ref, + type Timestamp, + type Tx, + type TxCUD } from '@hcengineering/core' import { - Model, - type Builder, - Prop, - Index, - TypeRef, - TypeString, - Mixin, + ArrOf, Collection, + Index, + Mixin, + Model, + Prop, TypeBoolean, TypeIntlString, - ArrOf, + TypeMarkup, + TypeRef, + TypeString, TypeTimestamp, UX, - TypeMarkup + type Builder } from '@hcengineering/model' import { TAttachedDoc, TClass, TDoc } from '@hcengineering/model-core' +import preference, { TPreference } from '@hcengineering/model-preference' +import view from '@hcengineering/model-view' +import notification from '@hcengineering/notification' import type { Asset, IntlString, Resource } from '@hcengineering/platform' import { type AnyComponent } from '@hcengineering/ui/src/types' -import contact, { type Person } from '@hcengineering/contact' -import preference, { TPreference } from '@hcengineering/model-preference' -import notification from '@hcengineering/notification' -import view from '@hcengineering/model-view' import activity from './plugin' -export { activityOperation } from './migration' export { activityId } from '@hcengineering/activity' +export { activityOperation } from './migration' export const DOMAIN_ACTIVITY = 'activity' as Domain @@ -369,6 +370,24 @@ export function createModel (builder: Builder): void { labelPresenter: activity.component.ActivityMessageNotificationLabel }) + builder.mixin, IndexingConfiguration>( + activity.class.DocUpdateMessage, + core.class.Class, + core.mixin.IndexConfiguration, + { + searchDisabled: true + } + ) + + builder.mixin, IndexingConfiguration>( + activity.class.Reaction, + core.class.Class, + core.mixin.IndexConfiguration, + { + searchDisabled: true + } + ) + builder.createDoc( notification.class.NotificationType, core.space.Model, diff --git a/models/core/src/migration.ts b/models/core/src/migration.ts index 452b70132b..c13d8a2d0e 100644 --- a/models/core/src/migration.ts +++ b/models/core/src/migration.ts @@ -13,7 +13,7 @@ // limitations under the License. // -import core, { coreId, DOMAIN_DOC_INDEX_STATE, TxOperations } from '@hcengineering/core' +import core, { coreId, DOMAIN_DOC_INDEX_STATE, isClassIndexable, TxOperations } from '@hcengineering/core' import { tryUpgrade, type MigrateOperation, @@ -24,8 +24,22 @@ import { export const coreOperation: MigrateOperation = { async migrate (client: MigrationClient): Promise { // We need to delete all documents in doc index state for missing classes - const allDocs = client.hierarchy.getDescendants(core.class.Doc) - await client.deleteMany(DOMAIN_DOC_INDEX_STATE, { objectClass: { $nin: allDocs } }) + const allClasses = client.hierarchy.getDescendants(core.class.Doc) + const allIndexed = allClasses.filter((it) => isClassIndexable(client.hierarchy, it)) + const indexed = new Set(allIndexed) + const skipped = allClasses.filter((it) => !indexed.has(it)) + + // Next remove all non indexed classes and missing classes as well. + const updated = await client.update( + DOMAIN_DOC_INDEX_STATE, + { objectClass: { $nin: allIndexed } }, + { + $set: { + removed: true + } + } + ) + console.log('clearing non indexed documents', skipped, updated.updated, updated.matched) }, async upgrade (client: MigrationUpgradeClient): Promise { await tryUpgrade(client, coreId, [ diff --git a/models/guest/src/index.ts b/models/guest/src/index.ts index 15b4b5e134..28d5938807 100644 --- a/models/guest/src/index.ts +++ b/models/guest/src/index.ts @@ -1,4 +1,11 @@ -import { AccountRole, type Doc, type Domain, type Ref } from '@hcengineering/core' +import { + AccountRole, + type Class, + type IndexingConfiguration, + type Doc, + type Domain, + type Ref +} from '@hcengineering/core' import { type PublicLink, type Restrictions, guestAccountEmail } from '@hcengineering/guest' import { type Builder, Model } from '@hcengineering/model' import core, { TDoc } from '@hcengineering/model-core' @@ -39,6 +46,14 @@ export function createModel (builder: Builder): void { { createdOn: -1 } ] }) + builder.mixin, IndexingConfiguration>( + guest.class.PublicLink, + core.class.Class, + core.mixin.IndexConfiguration, + { + searchDisabled: true + } + ) } export { guestId } from '@hcengineering/guest' diff --git a/models/notification/src/index.ts b/models/notification/src/index.ts index fd7b8a95d1..b1fdadd194 100644 --- a/models/notification/src/index.ts +++ b/models/notification/src/index.ts @@ -60,8 +60,8 @@ import { type CommonInboxNotification, type CommonNotificationType, type DocNotifyContext, - type DocUpdateTx, type DocUpdates, + type DocUpdateTx, type InboxNotification, type MentionInboxNotification, type NotificationContextPresenter, diff --git a/packages/core/src/utils.ts b/packages/core/src/utils.ts index a2f2fcb425..26ff3948cc 100644 --- a/packages/core/src/utils.ts +++ b/packages/core/src/utils.ts @@ -17,10 +17,19 @@ import { deepEqual } from 'fast-equals' import { Account, AnyAttribute, + AttachedDoc, Class, + ClassifierKind, + Collection, Doc, DocData, DocIndexState, + DOMAIN_BLOB, + DOMAIN_DOC_INDEX_STATE, + DOMAIN_FULLTEXT_BLOB, + DOMAIN_MODEL, + DOMAIN_TRANSIENT, + FullTextSearchContext, IndexKind, Obj, Permission, @@ -31,9 +40,10 @@ import { } from './classes' import core from './component' import { Hierarchy } from './hierarchy' +import { TxOperations } from './operations' import { isPredicate } from './predicate' import { DocumentQuery, FindResult } from './storage' -import { TxOperations } from './operations' +import { DOMAIN_TX } from './tx' function toHex (value: number, chars: number): string { const result = value.toString(16) @@ -582,3 +592,125 @@ export async function checkPermission ( return myPermissions.has(_id) } + +/** + * @public + */ +export function getFullTextIndexableAttributes ( + hierarchy: Hierarchy, + clazz: Ref>, + skipDocs: boolean = false +): AnyAttribute[] { + const allAttributes = hierarchy.getAllAttributes(clazz) + const result: AnyAttribute[] = [] + for (const [, attr] of allAttributes) { + if (skipDocs && (attr.attributeOf === core.class.Doc || attr.attributeOf === core.class.AttachedDoc)) { + continue + } + if (isFullTextAttribute(attr) || isIndexedAttribute(attr)) { + result.push(attr) + } + } + + hierarchy + .getDescendants(clazz) + .filter((m) => hierarchy.getClass(m).kind === ClassifierKind.MIXIN) + .forEach((m) => { + for (const [, v] of hierarchy.getAllAttributes(m, clazz)) { + if (skipDocs && (v.attributeOf === core.class.Doc || v.attributeOf === core.class.AttachedDoc)) { + continue + } + if (isFullTextAttribute(v) || isIndexedAttribute(v)) { + result.push(v) + } + } + }) + return result +} + +/** + * @public + */ +export function getFullTextContext ( + hierarchy: Hierarchy, + objectClass: Ref> +): Omit> { + let objClass = hierarchy.getClass(objectClass) + + while (true) { + if (hierarchy.hasMixin(objClass, core.mixin.FullTextSearchContext)) { + const ctx = hierarchy.as, FullTextSearchContext>(objClass, core.mixin.FullTextSearchContext) + if (ctx !== undefined) { + return ctx + } + } + if (objClass.extends === undefined) { + break + } + objClass = hierarchy.getClass(objClass.extends) + } + return { + fullTextSummary: false, + forceIndex: false, + propagate: [], + childProcessingAllowed: true + } +} + +/** + * @public + */ +export function isClassIndexable (hierarchy: Hierarchy, c: Ref>): boolean { + const indexed = hierarchy.getClassifierProp(c, 'class_indexed') + if (indexed !== undefined) { + return indexed as boolean + } + const domain = hierarchy.findDomain(c) + if (domain === undefined) { + hierarchy.setClassifierProp(c, 'class_indexed', false) + return false + } + + if ( + domain === DOMAIN_DOC_INDEX_STATE || + domain === DOMAIN_TX || + domain === DOMAIN_MODEL || + domain === DOMAIN_BLOB || + domain === DOMAIN_FULLTEXT_BLOB || + domain === DOMAIN_TRANSIENT + ) { + hierarchy.setClassifierProp(c, 'class_indexed', false) + return false + } + + const indexMixin = hierarchy.classHierarchyMixin(c, core.mixin.IndexConfiguration) + if (indexMixin?.searchDisabled !== undefined && indexMixin?.searchDisabled) { + hierarchy.setClassifierProp(c, 'class_indexed', false) + return false + } + + const attrs = getFullTextIndexableAttributes(hierarchy, c, true) + for (const d of hierarchy.getDescendants(c)) { + if (hierarchy.isMixin(d)) { + attrs.push(...getFullTextIndexableAttributes(hierarchy, d, true)) + } + } + + let result = true + + if (attrs.length === 0 && !(getFullTextContext(hierarchy, c)?.forceIndex ?? false)) { + result = false + // We need check if document has collections with indexable fields. + const attrs = hierarchy.getAllAttributes(c).values() + for (const attr of attrs) { + if (attr.type._class === core.class.Collection) { + if (isClassIndexable(hierarchy, (attr.type as Collection).of)) { + result = true + break + } + } + } + } + hierarchy.setClassifierProp(c, 'class_indexed', result) + return result +} diff --git a/server-plugins/collaboration/src/fulltext.ts b/server-plugins/collaboration/src/fulltext.ts index e7f6cb2bc9..a49a812aab 100644 --- a/server-plugins/collaboration/src/fulltext.ts +++ b/server-plugins/collaboration/src/fulltext.ts @@ -24,7 +24,8 @@ import core, { MeasureContext, Ref, WorkspaceId, - collaborativeDocParse + collaborativeDocParse, + getFullTextIndexableAttributes } from '@hcengineering/core' import { ContentTextAdapter, @@ -37,8 +38,7 @@ import { contentStageId, docKey, docUpdKey, - fieldStateId, - getFullTextIndexableAttributes + fieldStateId } from '@hcengineering/server-core' /** diff --git a/server/core/src/fulltext.ts b/server/core/src/fulltext.ts index bf441dc50c..6a26dd923a 100644 --- a/server/core/src/fulltext.ts +++ b/server/core/src/fulltext.ts @@ -39,10 +39,11 @@ import core, { docKey, isFullTextAttribute, isIndexedAttribute, - toFindResult + toFindResult, + isClassIndexable } from '@hcengineering/core' import { type FullTextIndexPipeline } from './indexer' -import { createStateDoc, isClassIndexable } from './indexer/utils' +import { createStateDoc } from './indexer/utils' import { getScoringConfig, mapSearchResultDoc } from './mapper' import { type StorageAdapter } from './storage' import type { FullTextAdapter, IndexedDoc, WithFind } from './types' diff --git a/server/core/src/indexer/content.ts b/server/core/src/indexer/content.ts index 625fa7b21b..dd6183b73b 100644 --- a/server/core/src/indexer/content.ts +++ b/server/core/src/indexer/content.ts @@ -14,6 +14,7 @@ // import core, { + getFullTextIndexableAttributes, type Blob, type Class, type Doc, @@ -28,13 +29,13 @@ import { type DbAdapter } from '../adapter' import { type StorageAdapter } from '../storage' import { type ContentTextAdapter, type IndexedDoc } from '../types' import { + contentStageId, + fieldStateId, type DocUpdateHandler, type FullTextPipeline, - type FullTextPipelineStage, - contentStageId, - fieldStateId + type FullTextPipelineStage } from './types' -import { docKey, docUpdKey, getFullTextIndexableAttributes } from './utils' +import { docKey, docUpdKey } from './utils' /** * @public diff --git a/server/core/src/indexer/field.ts b/server/core/src/indexer/field.ts index 7b5886dcb8..89bc893a1a 100644 --- a/server/core/src/indexer/field.ts +++ b/server/core/src/indexer/field.ts @@ -23,7 +23,9 @@ import core, { type IndexStageState, type MeasureContext, type Ref, - type ServerStorage + type ServerStorage, + getFullTextIndexableAttributes, + getFullTextContext } from '@hcengineering/core' import { deepEqual } from 'fast-equals' import { type DbAdapter } from '../adapter' @@ -41,8 +43,6 @@ import { docUpdKey, getContent, getCustomAttrKeys, - getFullTextContext, - getFullTextIndexableAttributes, isFullTextAttribute, loadIndexStageStage } from './utils' @@ -250,6 +250,7 @@ export class IndexedFieldStage implements FullTextPipelineStage { } } + // Remove should be safe to missing class async remove (docs: DocIndexState[], pipeline: FullTextPipeline): Promise { for (const doc of docs) { if (doc.attachedTo !== undefined) { @@ -260,8 +261,8 @@ export class IndexedFieldStage implements FullTextPipelineStage { const { _class, attr, extra, docId } = extractDocKey(k) if (_class !== undefined && docId === undefined) { - const keyAttr = pipeline.hierarchy.getAttribute(_class, attr) - if (isFullTextAttribute(keyAttr)) { + const keyAttr = pipeline.hierarchy.findAttribute(_class, attr) + if (keyAttr !== undefined && isFullTextAttribute(keyAttr)) { ;(parentDocUpdate as any)[docUpdKey(attr, { _class, docId: doc._id, extra })] = null } } diff --git a/server/core/src/indexer/fulltextPush.ts b/server/core/src/indexer/fulltextPush.ts index 00b3b36e11..1870179316 100644 --- a/server/core/src/indexer/fulltextPush.ts +++ b/server/core/src/indexer/fulltextPush.ts @@ -26,7 +26,8 @@ import core, { type MeasureContext, type Ref, type ServerStorage, - type WorkspaceId + type WorkspaceId, + getFullTextContext } from '@hcengineering/core' import { jsonToText, markupToJSON } from '@hcengineering/text' import { type DbAdapter } from '../adapter' @@ -41,7 +42,7 @@ import { type FullTextPipelineStage, fullTextPushStageId } from './types' -import { collectPropagate, collectPropagateClasses, docKey, getFullTextContext, isCustomAttr } from './utils' +import { collectPropagate, collectPropagateClasses, docKey, isCustomAttr } from './utils' /** * @public diff --git a/server/core/src/indexer/indexer.ts b/server/core/src/indexer/indexer.ts index e27b2c4b32..047d784313 100644 --- a/server/core/src/indexer/indexer.ts +++ b/server/core/src/indexer/indexer.ts @@ -29,6 +29,7 @@ import core, { type WorkspaceId, _getOperator, docKey, + groupByArray, setObjectValue, toFindResult } from '@hcengineering/core' @@ -539,6 +540,7 @@ export class FullTextIndexPipeline implements FullTextPipeline { } private async processRemove (): Promise { + let total = 0 while (true) { const result = await this.storage.findAll( this.metrics, @@ -547,9 +549,7 @@ export class FullTextIndexPipeline implements FullTextPipeline { removed: true }, { - sort: { - modifiedOn: 1 - }, + limit: 1000, projection: { _id: 1, stages: 1, @@ -584,6 +584,12 @@ export class FullTextIndexPipeline implements FullTextPipeline { await this.flush(true) if (toRemoveIds.length > 0) { await this.storage.clean(this.metrics, DOMAIN_DOC_INDEX_STATE, toRemoveIds) + total += toRemoveIds.length + await this.metrics.info('indexer', { + _classes: Array.from(groupByArray(toIndex, (it) => it.objectClass).keys()), + total, + count: toRemoveIds.length + }) } else { break } diff --git a/server/core/src/indexer/summary.ts b/server/core/src/indexer/summary.ts index d4d16605f0..053173ca0d 100644 --- a/server/core/src/indexer/summary.ts +++ b/server/core/src/indexer/summary.ts @@ -26,7 +26,8 @@ import core, { isFullTextAttribute, type MeasureContext, type Ref, - type ServerStorage + type ServerStorage, + getFullTextContext } from '@hcengineering/core' import { translate } from '@hcengineering/platform' import { jsonToText, markupToJSON } from '@hcengineering/text' @@ -39,13 +40,7 @@ import { type FullTextPipeline, type FullTextPipelineStage } from './types' -import { - collectPropagate, - collectPropagateClasses, - getFullTextContext, - isCustomAttr, - loadIndexStageStage -} from './utils' +import { collectPropagate, collectPropagateClasses, isCustomAttr, loadIndexStageStage } from './utils' /** * @public diff --git a/server/core/src/indexer/utils.ts b/server/core/src/indexer/utils.ts index f25fd5ed0f..fee342200c 100644 --- a/server/core/src/indexer/utils.ts +++ b/server/core/src/indexer/utils.ts @@ -15,25 +15,15 @@ import core, { type AnyAttribute, - type AttachedDoc, type Class, - ClassifierKind, - type Collection, type Data, type Doc, type DocIndexState, - DOMAIN_BLOB, - DOMAIN_DOC_INDEX_STATE, - DOMAIN_FULLTEXT_BLOB, - DOMAIN_MODEL, - DOMAIN_TRANSIENT, - DOMAIN_TX, type FullTextSearchContext, generateId, + getFullTextContext, type Hierarchy, type IndexStageState, - isFullTextAttribute, - isIndexedAttribute, type MeasureContext, type Obj, type Ref, @@ -44,30 +34,6 @@ import { deepEqual } from 'fast-equals' import { type DbAdapter } from '../adapter' import plugin from '../plugin' import { type FullTextPipeline } from './types' -/** - * @public - */ -export function getFullTextIndexableAttributes (hierarchy: Hierarchy, clazz: Ref>): AnyAttribute[] { - const allAttributes = hierarchy.getAllAttributes(clazz) - const result: AnyAttribute[] = [] - for (const [, attr] of allAttributes) { - if (isFullTextAttribute(attr) || isIndexedAttribute(attr)) { - result.push(attr) - } - } - - hierarchy - .getDescendants(clazz) - .filter((m) => hierarchy.getClass(m).kind === ClassifierKind.MIXIN) - .forEach((m) => { - for (const [, v] of hierarchy.getAllAttributes(m, clazz)) { - if (isFullTextAttribute(v) || isIndexedAttribute(v)) { - result.push(v) - } - } - }) - return result -} export { docKey, docUpdKey, extractDocKey, isFullTextAttribute } from '@hcengineering/core' export type { IndexKeyOptions } from '@hcengineering/core' @@ -96,63 +62,6 @@ export function getContent ( return attrs } -/** - * @public - */ -export function isClassIndexable (hierarchy: Hierarchy, c: Ref>): boolean { - const indexed = hierarchy.getClassifierProp(c, 'class_indexed') - if (indexed !== undefined) { - return indexed as boolean - } - const domain = hierarchy.findDomain(c) - if (domain === undefined) { - hierarchy.setClassifierProp(c, 'class_indexed', false) - return false - } - - if ( - domain === DOMAIN_DOC_INDEX_STATE || - domain === DOMAIN_TX || - domain === DOMAIN_MODEL || - domain === DOMAIN_BLOB || - domain === DOMAIN_FULLTEXT_BLOB || - domain === DOMAIN_TRANSIENT - ) { - hierarchy.setClassifierProp(c, 'class_indexed', false) - return false - } - - const indexMixin = hierarchy.classHierarchyMixin(c, core.mixin.IndexConfiguration) - if (indexMixin?.searchDisabled !== undefined && indexMixin?.searchDisabled) { - hierarchy.setClassifierProp(c, 'class_indexed', false) - return false - } - - const attrs = getFullTextIndexableAttributes(hierarchy, c) - for (const d of hierarchy.getDescendants(c)) { - if (hierarchy.isMixin(d)) { - attrs.push(...getFullTextIndexableAttributes(hierarchy, d)) - } - } - - let result = true - - if (attrs.length === 0 && !(getFullTextContext(hierarchy, c)?.forceIndex ?? false)) { - result = false - // We need check if document has collections with indexable fields. - const attrs = hierarchy.getAllAttributes(c).values() - for (const attr of attrs) { - if (attr.type._class === core.class.Collection) { - if (isClassIndexable(hierarchy, (attr.type as Collection).of)) { - result = true - break - } - } - } - } - hierarchy.setClassifierProp(c, 'class_indexed', result) - return result -} /** * @public */ @@ -225,35 +134,6 @@ export async function loadIndexStageStage ( return [result, state] } -/** - * @public - */ -export function getFullTextContext ( - hierarchy: Hierarchy, - objectClass: Ref> -): Omit> { - let objClass = hierarchy.getClass(objectClass) - - while (true) { - if (hierarchy.hasMixin(objClass, core.mixin.FullTextSearchContext)) { - const ctx = hierarchy.as, FullTextSearchContext>(objClass, core.mixin.FullTextSearchContext) - if (ctx !== undefined) { - return ctx - } - } - if (objClass.extends === undefined) { - break - } - objClass = hierarchy.getClass(objClass.extends) - } - return { - fullTextSummary: false, - forceIndex: false, - propagate: [], - childProcessingAllowed: true - } -} - /** * @public */