UBERF-6540: Fix isIndexable and clean wrong indexed documents (#5347)

This commit is contained in:
Andrey Sobolev 2024-04-13 14:07:37 +07:00 committed by GitHub
parent 9e3c9a928e
commit bccf97eeb7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 246 additions and 181 deletions

View File

@ -15,68 +15,69 @@
import {
type ActivityAttributeUpdatesPresenter,
type ActivityInfoMessage,
type ActivityDoc,
type ActivityExtension,
type ActivityExtensionKind,
type ActivityInfoMessage,
type ActivityMessage,
type ActivityMessageControl,
type ActivityMessageExtension,
type ActivityMessageExtensionKind,
type ActivityMessagePreview,
type ActivityMessagesFilter,
type ActivityReference,
type DocAttributeUpdates,
type DocUpdateAction,
type DocUpdateMessage,
type DocUpdateMessageViewlet,
type DocUpdateMessageViewletAttributesConfig,
type Reaction,
type TxViewlet,
type ActivityMessageControl,
type SavedMessage,
type IgnoreActivity,
type ActivityReference,
type ActivityMessagePreview
type Reaction,
type SavedMessage,
type TxViewlet
} from '@hcengineering/activity'
import contact, { type Person } from '@hcengineering/contact'
import core, {
DOMAIN_MODEL,
IndexKind,
type Account,
type Class,
type Doc,
type DocumentQuery,
type Ref,
type Tx,
IndexKind,
type TxCUD,
type Domain,
type Account,
type Timestamp
type IndexingConfiguration,
type Ref,
type Timestamp,
type Tx,
type TxCUD
} from '@hcengineering/core'
import {
Model,
type Builder,
Prop,
Index,
TypeRef,
TypeString,
Mixin,
ArrOf,
Collection,
Index,
Mixin,
Model,
Prop,
TypeBoolean,
TypeIntlString,
ArrOf,
TypeMarkup,
TypeRef,
TypeString,
TypeTimestamp,
UX,
TypeMarkup
type Builder
} from '@hcengineering/model'
import { TAttachedDoc, TClass, TDoc } from '@hcengineering/model-core'
import preference, { TPreference } from '@hcengineering/model-preference'
import view from '@hcengineering/model-view'
import notification from '@hcengineering/notification'
import type { Asset, IntlString, Resource } from '@hcengineering/platform'
import { type AnyComponent } from '@hcengineering/ui/src/types'
import contact, { type Person } from '@hcengineering/contact'
import preference, { TPreference } from '@hcengineering/model-preference'
import notification from '@hcengineering/notification'
import view from '@hcengineering/model-view'
import activity from './plugin'
export { activityOperation } from './migration'
export { activityId } from '@hcengineering/activity'
export { activityOperation } from './migration'
export const DOMAIN_ACTIVITY = 'activity' as Domain
@ -369,6 +370,24 @@ export function createModel (builder: Builder): void {
labelPresenter: activity.component.ActivityMessageNotificationLabel
})
builder.mixin<Class<DocUpdateMessage>, IndexingConfiguration<DocUpdateMessage>>(
activity.class.DocUpdateMessage,
core.class.Class,
core.mixin.IndexConfiguration,
{
searchDisabled: true
}
)
builder.mixin<Class<DocUpdateMessage>, IndexingConfiguration<DocUpdateMessage>>(
activity.class.Reaction,
core.class.Class,
core.mixin.IndexConfiguration,
{
searchDisabled: true
}
)
builder.createDoc(
notification.class.NotificationType,
core.space.Model,

View File

@ -13,7 +13,7 @@
// limitations under the License.
//
import core, { coreId, DOMAIN_DOC_INDEX_STATE, TxOperations } from '@hcengineering/core'
import core, { coreId, DOMAIN_DOC_INDEX_STATE, isClassIndexable, TxOperations } from '@hcengineering/core'
import {
tryUpgrade,
type MigrateOperation,
@ -24,8 +24,22 @@ import {
export const coreOperation: MigrateOperation = {
async migrate (client: MigrationClient): Promise<void> {
// We need to delete all documents in doc index state for missing classes
const allDocs = client.hierarchy.getDescendants(core.class.Doc)
await client.deleteMany(DOMAIN_DOC_INDEX_STATE, { objectClass: { $nin: allDocs } })
const allClasses = client.hierarchy.getDescendants(core.class.Doc)
const allIndexed = allClasses.filter((it) => isClassIndexable(client.hierarchy, it))
const indexed = new Set(allIndexed)
const skipped = allClasses.filter((it) => !indexed.has(it))
// Next remove all non indexed classes and missing classes as well.
const updated = await client.update(
DOMAIN_DOC_INDEX_STATE,
{ objectClass: { $nin: allIndexed } },
{
$set: {
removed: true
}
}
)
console.log('clearing non indexed documents', skipped, updated.updated, updated.matched)
},
async upgrade (client: MigrationUpgradeClient): Promise<void> {
await tryUpgrade(client, coreId, [

View File

@ -1,4 +1,11 @@
import { AccountRole, type Doc, type Domain, type Ref } from '@hcengineering/core'
import {
AccountRole,
type Class,
type IndexingConfiguration,
type Doc,
type Domain,
type Ref
} from '@hcengineering/core'
import { type PublicLink, type Restrictions, guestAccountEmail } from '@hcengineering/guest'
import { type Builder, Model } from '@hcengineering/model'
import core, { TDoc } from '@hcengineering/model-core'
@ -39,6 +46,14 @@ export function createModel (builder: Builder): void {
{ createdOn: -1 }
]
})
builder.mixin<Class<PublicLink>, IndexingConfiguration<PublicLink>>(
guest.class.PublicLink,
core.class.Class,
core.mixin.IndexConfiguration,
{
searchDisabled: true
}
)
}
export { guestId } from '@hcengineering/guest'

View File

@ -60,8 +60,8 @@ import {
type CommonInboxNotification,
type CommonNotificationType,
type DocNotifyContext,
type DocUpdateTx,
type DocUpdates,
type DocUpdateTx,
type InboxNotification,
type MentionInboxNotification,
type NotificationContextPresenter,

View File

@ -17,10 +17,19 @@ import { deepEqual } from 'fast-equals'
import {
Account,
AnyAttribute,
AttachedDoc,
Class,
ClassifierKind,
Collection,
Doc,
DocData,
DocIndexState,
DOMAIN_BLOB,
DOMAIN_DOC_INDEX_STATE,
DOMAIN_FULLTEXT_BLOB,
DOMAIN_MODEL,
DOMAIN_TRANSIENT,
FullTextSearchContext,
IndexKind,
Obj,
Permission,
@ -31,9 +40,10 @@ import {
} from './classes'
import core from './component'
import { Hierarchy } from './hierarchy'
import { TxOperations } from './operations'
import { isPredicate } from './predicate'
import { DocumentQuery, FindResult } from './storage'
import { TxOperations } from './operations'
import { DOMAIN_TX } from './tx'
function toHex (value: number, chars: number): string {
const result = value.toString(16)
@ -582,3 +592,125 @@ export async function checkPermission (
return myPermissions.has(_id)
}
/**
* @public
*/
export function getFullTextIndexableAttributes (
hierarchy: Hierarchy,
clazz: Ref<Class<Obj>>,
skipDocs: boolean = false
): AnyAttribute[] {
const allAttributes = hierarchy.getAllAttributes(clazz)
const result: AnyAttribute[] = []
for (const [, attr] of allAttributes) {
if (skipDocs && (attr.attributeOf === core.class.Doc || attr.attributeOf === core.class.AttachedDoc)) {
continue
}
if (isFullTextAttribute(attr) || isIndexedAttribute(attr)) {
result.push(attr)
}
}
hierarchy
.getDescendants(clazz)
.filter((m) => hierarchy.getClass(m).kind === ClassifierKind.MIXIN)
.forEach((m) => {
for (const [, v] of hierarchy.getAllAttributes(m, clazz)) {
if (skipDocs && (v.attributeOf === core.class.Doc || v.attributeOf === core.class.AttachedDoc)) {
continue
}
if (isFullTextAttribute(v) || isIndexedAttribute(v)) {
result.push(v)
}
}
})
return result
}
/**
* @public
*/
export function getFullTextContext (
hierarchy: Hierarchy,
objectClass: Ref<Class<Doc>>
): Omit<FullTextSearchContext, keyof Class<Doc>> {
let objClass = hierarchy.getClass(objectClass)
while (true) {
if (hierarchy.hasMixin(objClass, core.mixin.FullTextSearchContext)) {
const ctx = hierarchy.as<Class<Doc>, FullTextSearchContext>(objClass, core.mixin.FullTextSearchContext)
if (ctx !== undefined) {
return ctx
}
}
if (objClass.extends === undefined) {
break
}
objClass = hierarchy.getClass(objClass.extends)
}
return {
fullTextSummary: false,
forceIndex: false,
propagate: [],
childProcessingAllowed: true
}
}
/**
* @public
*/
export function isClassIndexable (hierarchy: Hierarchy, c: Ref<Class<Doc>>): boolean {
const indexed = hierarchy.getClassifierProp(c, 'class_indexed')
if (indexed !== undefined) {
return indexed as boolean
}
const domain = hierarchy.findDomain(c)
if (domain === undefined) {
hierarchy.setClassifierProp(c, 'class_indexed', false)
return false
}
if (
domain === DOMAIN_DOC_INDEX_STATE ||
domain === DOMAIN_TX ||
domain === DOMAIN_MODEL ||
domain === DOMAIN_BLOB ||
domain === DOMAIN_FULLTEXT_BLOB ||
domain === DOMAIN_TRANSIENT
) {
hierarchy.setClassifierProp(c, 'class_indexed', false)
return false
}
const indexMixin = hierarchy.classHierarchyMixin(c, core.mixin.IndexConfiguration)
if (indexMixin?.searchDisabled !== undefined && indexMixin?.searchDisabled) {
hierarchy.setClassifierProp(c, 'class_indexed', false)
return false
}
const attrs = getFullTextIndexableAttributes(hierarchy, c, true)
for (const d of hierarchy.getDescendants(c)) {
if (hierarchy.isMixin(d)) {
attrs.push(...getFullTextIndexableAttributes(hierarchy, d, true))
}
}
let result = true
if (attrs.length === 0 && !(getFullTextContext(hierarchy, c)?.forceIndex ?? false)) {
result = false
// We need check if document has collections with indexable fields.
const attrs = hierarchy.getAllAttributes(c).values()
for (const attr of attrs) {
if (attr.type._class === core.class.Collection) {
if (isClassIndexable(hierarchy, (attr.type as Collection<AttachedDoc>).of)) {
result = true
break
}
}
}
}
hierarchy.setClassifierProp(c, 'class_indexed', result)
return result
}

View File

@ -24,7 +24,8 @@ import core, {
MeasureContext,
Ref,
WorkspaceId,
collaborativeDocParse
collaborativeDocParse,
getFullTextIndexableAttributes
} from '@hcengineering/core'
import {
ContentTextAdapter,
@ -37,8 +38,7 @@ import {
contentStageId,
docKey,
docUpdKey,
fieldStateId,
getFullTextIndexableAttributes
fieldStateId
} from '@hcengineering/server-core'
/**

View File

@ -39,10 +39,11 @@ import core, {
docKey,
isFullTextAttribute,
isIndexedAttribute,
toFindResult
toFindResult,
isClassIndexable
} from '@hcengineering/core'
import { type FullTextIndexPipeline } from './indexer'
import { createStateDoc, isClassIndexable } from './indexer/utils'
import { createStateDoc } from './indexer/utils'
import { getScoringConfig, mapSearchResultDoc } from './mapper'
import { type StorageAdapter } from './storage'
import type { FullTextAdapter, IndexedDoc, WithFind } from './types'

View File

@ -14,6 +14,7 @@
//
import core, {
getFullTextIndexableAttributes,
type Blob,
type Class,
type Doc,
@ -28,13 +29,13 @@ import { type DbAdapter } from '../adapter'
import { type StorageAdapter } from '../storage'
import { type ContentTextAdapter, type IndexedDoc } from '../types'
import {
contentStageId,
fieldStateId,
type DocUpdateHandler,
type FullTextPipeline,
type FullTextPipelineStage,
contentStageId,
fieldStateId
type FullTextPipelineStage
} from './types'
import { docKey, docUpdKey, getFullTextIndexableAttributes } from './utils'
import { docKey, docUpdKey } from './utils'
/**
* @public

View File

@ -23,7 +23,9 @@ import core, {
type IndexStageState,
type MeasureContext,
type Ref,
type ServerStorage
type ServerStorage,
getFullTextIndexableAttributes,
getFullTextContext
} from '@hcengineering/core'
import { deepEqual } from 'fast-equals'
import { type DbAdapter } from '../adapter'
@ -41,8 +43,6 @@ import {
docUpdKey,
getContent,
getCustomAttrKeys,
getFullTextContext,
getFullTextIndexableAttributes,
isFullTextAttribute,
loadIndexStageStage
} from './utils'
@ -250,6 +250,7 @@ export class IndexedFieldStage implements FullTextPipelineStage {
}
}
// Remove should be safe to missing class
async remove (docs: DocIndexState[], pipeline: FullTextPipeline): Promise<void> {
for (const doc of docs) {
if (doc.attachedTo !== undefined) {
@ -260,8 +261,8 @@ export class IndexedFieldStage implements FullTextPipelineStage {
const { _class, attr, extra, docId } = extractDocKey(k)
if (_class !== undefined && docId === undefined) {
const keyAttr = pipeline.hierarchy.getAttribute(_class, attr)
if (isFullTextAttribute(keyAttr)) {
const keyAttr = pipeline.hierarchy.findAttribute(_class, attr)
if (keyAttr !== undefined && isFullTextAttribute(keyAttr)) {
;(parentDocUpdate as any)[docUpdKey(attr, { _class, docId: doc._id, extra })] = null
}
}

View File

@ -26,7 +26,8 @@ import core, {
type MeasureContext,
type Ref,
type ServerStorage,
type WorkspaceId
type WorkspaceId,
getFullTextContext
} from '@hcengineering/core'
import { jsonToText, markupToJSON } from '@hcengineering/text'
import { type DbAdapter } from '../adapter'
@ -41,7 +42,7 @@ import {
type FullTextPipelineStage,
fullTextPushStageId
} from './types'
import { collectPropagate, collectPropagateClasses, docKey, getFullTextContext, isCustomAttr } from './utils'
import { collectPropagate, collectPropagateClasses, docKey, isCustomAttr } from './utils'
/**
* @public

View File

@ -29,6 +29,7 @@ import core, {
type WorkspaceId,
_getOperator,
docKey,
groupByArray,
setObjectValue,
toFindResult
} from '@hcengineering/core'
@ -539,6 +540,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
}
private async processRemove (): Promise<void> {
let total = 0
while (true) {
const result = await this.storage.findAll(
this.metrics,
@ -547,9 +549,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
removed: true
},
{
sort: {
modifiedOn: 1
},
limit: 1000,
projection: {
_id: 1,
stages: 1,
@ -584,6 +584,12 @@ export class FullTextIndexPipeline implements FullTextPipeline {
await this.flush(true)
if (toRemoveIds.length > 0) {
await this.storage.clean(this.metrics, DOMAIN_DOC_INDEX_STATE, toRemoveIds)
total += toRemoveIds.length
await this.metrics.info('indexer', {
_classes: Array.from(groupByArray(toIndex, (it) => it.objectClass).keys()),
total,
count: toRemoveIds.length
})
} else {
break
}

View File

@ -26,7 +26,8 @@ import core, {
isFullTextAttribute,
type MeasureContext,
type Ref,
type ServerStorage
type ServerStorage,
getFullTextContext
} from '@hcengineering/core'
import { translate } from '@hcengineering/platform'
import { jsonToText, markupToJSON } from '@hcengineering/text'
@ -39,13 +40,7 @@ import {
type FullTextPipeline,
type FullTextPipelineStage
} from './types'
import {
collectPropagate,
collectPropagateClasses,
getFullTextContext,
isCustomAttr,
loadIndexStageStage
} from './utils'
import { collectPropagate, collectPropagateClasses, isCustomAttr, loadIndexStageStage } from './utils'
/**
* @public

View File

@ -15,25 +15,15 @@
import core, {
type AnyAttribute,
type AttachedDoc,
type Class,
ClassifierKind,
type Collection,
type Data,
type Doc,
type DocIndexState,
DOMAIN_BLOB,
DOMAIN_DOC_INDEX_STATE,
DOMAIN_FULLTEXT_BLOB,
DOMAIN_MODEL,
DOMAIN_TRANSIENT,
DOMAIN_TX,
type FullTextSearchContext,
generateId,
getFullTextContext,
type Hierarchy,
type IndexStageState,
isFullTextAttribute,
isIndexedAttribute,
type MeasureContext,
type Obj,
type Ref,
@ -44,30 +34,6 @@ import { deepEqual } from 'fast-equals'
import { type DbAdapter } from '../adapter'
import plugin from '../plugin'
import { type FullTextPipeline } from './types'
/**
* @public
*/
export function getFullTextIndexableAttributes (hierarchy: Hierarchy, clazz: Ref<Class<Obj>>): AnyAttribute[] {
const allAttributes = hierarchy.getAllAttributes(clazz)
const result: AnyAttribute[] = []
for (const [, attr] of allAttributes) {
if (isFullTextAttribute(attr) || isIndexedAttribute(attr)) {
result.push(attr)
}
}
hierarchy
.getDescendants(clazz)
.filter((m) => hierarchy.getClass(m).kind === ClassifierKind.MIXIN)
.forEach((m) => {
for (const [, v] of hierarchy.getAllAttributes(m, clazz)) {
if (isFullTextAttribute(v) || isIndexedAttribute(v)) {
result.push(v)
}
}
})
return result
}
export { docKey, docUpdKey, extractDocKey, isFullTextAttribute } from '@hcengineering/core'
export type { IndexKeyOptions } from '@hcengineering/core'
@ -96,63 +62,6 @@ export function getContent (
return attrs
}
/**
* @public
*/
export function isClassIndexable (hierarchy: Hierarchy, c: Ref<Class<Doc>>): boolean {
const indexed = hierarchy.getClassifierProp(c, 'class_indexed')
if (indexed !== undefined) {
return indexed as boolean
}
const domain = hierarchy.findDomain(c)
if (domain === undefined) {
hierarchy.setClassifierProp(c, 'class_indexed', false)
return false
}
if (
domain === DOMAIN_DOC_INDEX_STATE ||
domain === DOMAIN_TX ||
domain === DOMAIN_MODEL ||
domain === DOMAIN_BLOB ||
domain === DOMAIN_FULLTEXT_BLOB ||
domain === DOMAIN_TRANSIENT
) {
hierarchy.setClassifierProp(c, 'class_indexed', false)
return false
}
const indexMixin = hierarchy.classHierarchyMixin(c, core.mixin.IndexConfiguration)
if (indexMixin?.searchDisabled !== undefined && indexMixin?.searchDisabled) {
hierarchy.setClassifierProp(c, 'class_indexed', false)
return false
}
const attrs = getFullTextIndexableAttributes(hierarchy, c)
for (const d of hierarchy.getDescendants(c)) {
if (hierarchy.isMixin(d)) {
attrs.push(...getFullTextIndexableAttributes(hierarchy, d))
}
}
let result = true
if (attrs.length === 0 && !(getFullTextContext(hierarchy, c)?.forceIndex ?? false)) {
result = false
// We need check if document has collections with indexable fields.
const attrs = hierarchy.getAllAttributes(c).values()
for (const attr of attrs) {
if (attr.type._class === core.class.Collection) {
if (isClassIndexable(hierarchy, (attr.type as Collection<AttachedDoc>).of)) {
result = true
break
}
}
}
}
hierarchy.setClassifierProp(c, 'class_indexed', result)
return result
}
/**
* @public
*/
@ -225,35 +134,6 @@ export async function loadIndexStageStage (
return [result, state]
}
/**
* @public
*/
export function getFullTextContext (
hierarchy: Hierarchy,
objectClass: Ref<Class<Doc>>
): Omit<FullTextSearchContext, keyof Class<Doc>> {
let objClass = hierarchy.getClass(objectClass)
while (true) {
if (hierarchy.hasMixin(objClass, core.mixin.FullTextSearchContext)) {
const ctx = hierarchy.as<Class<Doc>, FullTextSearchContext>(objClass, core.mixin.FullTextSearchContext)
if (ctx !== undefined) {
return ctx
}
}
if (objClass.extends === undefined) {
break
}
objClass = hierarchy.getClass(objClass.extends)
}
return {
fullTextSummary: false,
forceIndex: false,
propagate: [],
childProcessingAllowed: true
}
}
/**
* @public
*/