UBER-137: Fix application search (#3309)

Signed-off-by: Andrey Sobolev <haiodo@gmail.com>
This commit is contained in:
Andrey Sobolev 2023-06-01 14:08:57 +07:00 committed by GitHub
parent eec4f67335
commit 03a8cf3414
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 163 additions and 45 deletions

View File

@ -1 +1 @@
{ "major": 0, "minor": 6, "patch": 97 }
{ "major": 0, "minor": 6, "patch": 99 }

View File

@ -1360,7 +1360,13 @@ export function createModel (builder: Builder): void {
builder.mixin(recruit.mixin.Candidate, core.class.Class, core.mixin.FullTextSearchContext, {
fullTextSummary: true,
propagate: [recruit.class.Applicant]
propagate: [recruit.class.Applicant],
propagateClasses: [
tags.class.TagReference,
chunter.class.Comment,
attachment.class.Attachment,
contact.class.Channel
]
})
// Allow to use fuzzy search for mixins

View File

@ -419,6 +419,8 @@ export interface FullTextSearchContext extends Class<Doc> {
// If defined, will propagate changes to child's with defined set of classes
propagate?: Ref<Class<Doc>>[]
// If defined, will propagate all document from child's based on class
propagateClasses?: Ref<Class<Doc>>[]
// Do we need to propagate child value to parent one. Default(true)
parentPropagate?: boolean

View File

@ -61,6 +61,7 @@
function _close (result: any): void {
if (onClose !== undefined) onClose(result)
overlay = false
close()
}

View File

@ -78,6 +78,7 @@ export function addNotification (
component: AnyComponent | AnySvelteComponent,
params?: { [key: string]: any }
): void {
const closeTimeout = parseInt(localStorage.getItem('#platform.notification.timeout') ?? '10000')
const notification: Notification = {
id: generateId(),
title,
@ -85,11 +86,13 @@ export function addNotification (
severity: NotificationSeverity.Success,
position: NotificationPosition.BottomRight,
component,
closeTimeout: parseInt(localStorage.getItem('#platform.notification.timeout') ?? '10000'),
closeTimeout,
params
}
if (closeTimeout !== 0) {
notificationsStore.addNotification(notification)
}
}
/**

View File

@ -192,7 +192,7 @@ async function generateLocation (loc: Location, shortLink: string): Promise<Reso
return undefined
}
const classLabel = tokens[0]
const lastId = tokens[1] as Ref<Doc>
const lastId = tokens.slice(1).join('-') as Ref<Doc>
const client = getClient()
const hierarchy = client.getHierarchy()
const classes = [chunter.class.Message, chunter.class.ThreadMessage, chunter.class.Comment]

View File

@ -46,7 +46,7 @@ async function generateIdLocation (loc: Location, shortLink: string): Promise<Re
const hierarchy = client.getHierarchy()
const classLabel = tokens[0]
const _id = tokens[1]
const _id = tokens.slice(1).join('-')
const classes = [recruit.mixin.VacancyList, recruit.mixin.Candidate]
let _class: Ref<Class<Doc>> | undefined
for (const clazz of classes) {

View File

@ -5,10 +5,10 @@
"author": "Anticrm Platform Contributors",
"license": "EPL-2.0",
"scripts": {
"start": "cross-env MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=false SERVER_SECRET=secret REKONI_URL=http://localhost:4004 FRONT_URL=http://localhost:8080 node --nolazy -r ts-node/register src/__start.ts",
"start": "cross-env MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=false SERVER_SECRET=secret REKONI_URL=http://localhost:4004 FRONT_URL=http://localhost:8080 MODEL_VERSION=$(node ../../models/all/lib/__showversion.js) node --nolazy -r ts-node/register src/__start.ts",
"build": "heft build",
"lint:fix": "eslint --fix src",
"bundle": "esbuild src/__start.ts --bundle --sourcemap=inline --minify --platform=node --external:bufferutil > bundle.js",
"bundle": "esbuild src/__start.ts --bundle --sourcemap=inline --minify --platform=node --external:bufferutil --define:process.env.MODEL_VERSION=$(node ../../models/all/lib/__showversion.js) > bundle.js",
"bundle:u": "esbuild src/__start.ts --bundle --sourcemap=inline --minify --platform=node > bundle.js && mkdir -p ./dist && cp -r ./node_modules/uWebSockets.js/*.node ./dist",
"docker:build": "docker build -t hardcoreeng/transactor .",
"docker:staging": "../../common/scripts/docker_tag.sh hardcoreeng/transactor staging",

View File

@ -62,17 +62,28 @@ export class IndexedFieldStage implements FullTextPipelineStage {
constructor (private readonly dbStorage: ServerStorage) {}
async initialize (storage: Storage, pipeline: FullTextPipeline): Promise<void> {
const indexable = (
await pipeline.model.findAll(core.class.Class, { [core.mixin.FullTextSearchContext + '.propagate']: true })
).map((it) => it._id)
const indexablePropogate = (
await pipeline.model.findAll(core.class.Class, {
[core.mixin.FullTextSearchContext]: { $exists: true }
})
)
.map((it) => pipeline.hierarchy.as(it, core.mixin.FullTextSearchContext))
.filter((it) => it.propagate != null || it.parentPropagate)
.map((it) =>
JSON.stringify({
id: it._id,
propogate: it.propagate,
parentPropgate: it.parentPropagate
})
)
const forceIndexing = (
await pipeline.model.findAll(core.class.Class, { [core.mixin.FullTextSearchContext + '.forceIndex']: true })
).map((it) => it._id)
indexable.sort()
indexablePropogate.sort()
;[this.stageValue, this.indexState] = await loadIndexStageStage(storage, this.indexState, this.stageId, 'config', {
classes: indexable,
classes: indexablePropogate,
forceIndex: forceIndexing
})
}
@ -143,9 +154,11 @@ export class IndexedFieldStage implements FullTextPipelineStage {
// Full re-index in case stage value is changed
if (!deepEqual(docState.attributes[dKey], v.value)) {
changes++
if (typeof v.value !== 'object') {
;(docUpdate as any)[dUKey] = v.value
}
}
}
if (docState.attachedTo != null && changes > 0) {
const ctx = getFullTextContext(pipeline.hierarchy, objClass)
if (ctx.parentPropagate ?? true) {

View File

@ -36,7 +36,7 @@ import {
FullTextPipelineStage,
fullTextPushStageId
} from './types'
import { collectPropagate, docKey, getFullTextContext } from './utils'
import { collectPropagate, collectPropagateClasses, docKey, getFullTextContext } from './utils'
/**
* @public
@ -49,7 +49,7 @@ export class FullTextPushStage implements FullTextPipelineStage {
updateFields: DocUpdateHandler[] = []
limit = 100
limit = 10
dimmVectors: Record<string, number[]> = {}
@ -144,6 +144,20 @@ export class FullTextPushStage implements FullTextPipelineStage {
)
if (parentDoc !== undefined) {
updateDoc2Elastic(parentDoc.attributes, elasticDoc, parentDoc._id)
const ctx = collectPropagateClasses(pipeline, parentDoc.objectClass)
if (ctx.length > 0) {
for (const p of ctx) {
const collections = await this.dbStorage.findAll(
metrics.newChild('propagate', {}),
core.class.DocIndexState,
{ attachedTo: parentDoc._id, objectClass: p }
)
for (const c of collections) {
updateDoc2Elastic(c.attributes, elasticDoc, c._id)
}
}
}
}
}
}
@ -215,7 +229,9 @@ function updateDoc2Elastic (attributes: Record<string, any>, doc: IndexedDoc, do
docId = docIdOverride ?? docId
if (docId === undefined) {
if (typeof vv !== 'object') {
doc[k] = vv
}
continue
}
const docIdAttr = '|' + docKey(attr, { _class, extra: extra.filter((it) => it !== 'base64') })
@ -223,8 +239,10 @@ function updateDoc2Elastic (attributes: Record<string, any>, doc: IndexedDoc, do
// Since we replace array of values, we could ignore null
doc[docIdAttr] = [...(doc[docIdAttr] ?? [])]
if (vv !== '') {
if (typeof vv !== 'object') {
doc[docIdAttr].push(vv)
}
}
}
}
}

View File

@ -29,11 +29,12 @@ import core, {
TxFactory,
WorkspaceId,
_getOperator,
setObjectValue
setObjectValue,
versionToString
} from '@hcengineering/core'
import { DbAdapter } from '../adapter'
import type { IndexedDoc } from '../types'
import { RateLimitter } from '../limitter'
import type { IndexedDoc } from '../types'
import { FullTextPipeline, FullTextPipelineStage } from './types'
import { createStateDoc, isClassIndexable } from './utils'
@ -226,9 +227,11 @@ export class FullTextIndexPipeline implements FullTextPipeline {
// Filter unsupported stages
udoc.stages = update.stages
if (Object.keys(update).length > 0) {
this.currentStages[stageId] = (this.currentStages[stageId] ?? 0) + 1
this.stageChanged++
}
}
const current = this.pending.get(docId)
if (current === undefined) {
@ -530,6 +533,17 @@ export class FullTextIndexPipeline implements FullTextPipeline {
}
async checkIndexConsistency (dbStorage: ServerStorage): Promise<void> {
if (process.env.MODEL_VERSION !== undefined) {
const modelVersion = await (await this.model.findAll(core.class.Version, {})).shift()
if (modelVersion !== undefined) {
const modelVersionString = versionToString(modelVersion)
if (modelVersionString !== process.env.MODEL_VERSION) {
console.error('Indexer: Model version mismatch', modelVersionString, process.env.MODEL_VERSION)
return
}
}
}
this.hierarchy.domains()
const allClasses = this.hierarchy.getDescendants(core.class.Doc)
for (const c of allClasses) {
@ -542,6 +556,8 @@ export class FullTextIndexPipeline implements FullTextPipeline {
continue
}
console.log(this.workspace.name, 'checking index', c)
// All saved state documents
const states = (
await this.storage.findAll(core.class.DocIndexState, { objectClass: c }, { projection: { _id: 1 } })

View File

@ -33,12 +33,12 @@ import { translate } from '@hcengineering/platform'
import { convert } from 'html-to-text'
import { IndexedDoc } from '../types'
import { contentStageId, DocUpdateHandler, fieldStateId, FullTextPipeline, FullTextPipelineStage } from './types'
import { collectPropagate, getFullTextContext, loadIndexStageStage } from './utils'
import { collectPropagate, collectPropagateClasses, getFullTextContext, loadIndexStageStage } from './utils'
/**
* @public
*/
export const summaryStageId = 'sum-v4'
export const summaryStageId = 'sum-v5'
/**
* @public
@ -53,7 +53,7 @@ export class FullSummaryStage implements FullTextPipelineStage {
updateFields: DocUpdateHandler[] = []
// If specified, index only fields with content speciffied.
// If specified, index only fields with content specified.
matchExtra: string[] = [] // 'content', 'base64'] // '#en'
fieldFilter: ((attr: AnyAttribute, value: string) => boolean)[] = []
@ -69,8 +69,11 @@ export class FullSummaryStage implements FullTextPipelineStage {
async initialize (storage: Storage, pipeline: FullTextPipeline): Promise<void> {
const indexable = (
await pipeline.model.findAll(core.class.Class, { [core.mixin.FullTextSearchContext + '.fullTextSummary']: true })
).map((it) => it._id)
await pipeline.model.findAll(core.class.Class, { [core.mixin.FullTextSearchContext]: { $exists: true } })
)
.map((it) => pipeline.hierarchy.as(it, core.mixin.FullTextSearchContext))
.filter((it) => it.fullTextSummary)
.map((it) => it._id + (it.propagateClasses ?? []).join('|'))
indexable.sort()
;[this.stageValue, this.indexState] = await loadIndexStageStage(storage, this.indexState, this.stageId, 'config', {
classes: indexable,
@ -130,10 +133,12 @@ export class FullSummaryStage implements FullTextPipelineStage {
if (embeddingText.length > this.summaryLimit) {
break
}
embeddingText += await extractIndexedValues(c, pipeline.hierarchy, {
embeddingText +=
'\n' +
(await extractIndexedValues(c, pipeline.hierarchy, {
matchExtra: this.matchExtra,
fieldFilter: this.fieldFilter
})
}))
}
}
}
@ -148,13 +153,34 @@ export class FullSummaryStage implements FullTextPipelineStage {
{ _id: doc.attachedTo as Ref<DocIndexState> }
)
if (parentDoc !== undefined) {
const ctx = collectPropagateClasses(pipeline, parentDoc.objectClass)
if (ctx.length > 0) {
for (const p of ctx) {
const collections = await this.dbStorage.findAll(
metrics.newChild('propagate', {}),
core.class.DocIndexState,
{ attachedTo: parentDoc._id, objectClass: p }
)
for (const c of collections) {
embeddingText +=
'\n' +
(await extractIndexedValues(c, pipeline.hierarchy, {
matchExtra: this.matchExtra,
fieldFilter: this.fieldFilter
}))
}
}
}
if (embeddingText.length > this.summaryLimit) {
break
}
embeddingText += await extractIndexedValues(parentDoc, pipeline.hierarchy, {
embeddingText +=
'\n' +
(await extractIndexedValues(parentDoc, pipeline.hierarchy, {
matchExtra: this.matchExtra,
fieldFilter: this.fieldFilter
})
}))
}
}
}

View File

@ -102,9 +102,9 @@ export const contentStageId = 'cnt-v2b'
/**
* @public
*/
export const fieldStateId = 'fld-v4'
export const fieldStateId = 'fld-v5'
/**
* @public
*/
export const fullTextPushStageId = 'fts-v2'
export const fullTextPushStageId = 'fts-v4'

View File

@ -242,18 +242,25 @@ export function getFullTextContext (
/**
* @public
*/
export function collectPropagate (pipeline: FullTextPipeline, objectClass: Ref<Class<Doc>>): Ref<Class<Doc>>[] {
export function traverseFullTextContexts (
pipeline: FullTextPipeline,
objectClass: Ref<Class<Doc>>,
op: (ftc: Omit<FullTextSearchContext, keyof Class<Doc>>) => void
): Ref<Class<Doc>>[] {
const desc = new Set(pipeline.hierarchy.getDescendants(objectClass))
const propagate = new Set<Ref<Class<Doc>>>()
const ftContext = getFullTextContext(pipeline.hierarchy, objectClass)
ftContext?.propagate?.forEach((it) => propagate.add(it))
if (ftContext !== undefined) {
op(ftContext)
}
// Add all parent mixins as well
for (const a of pipeline.hierarchy.getAncestors(objectClass)) {
const ftContext = getFullTextContext(pipeline.hierarchy, a)
ftContext?.propagate?.forEach((it) => propagate.add(it))
if (ftContext !== undefined) {
op(ftContext)
}
const dsca = pipeline.hierarchy.getDescendants(a)
for (const dd of dsca) {
if (pipeline.hierarchy.isMixin(dd)) {
@ -265,8 +272,30 @@ export function collectPropagate (pipeline: FullTextPipeline, objectClass: Ref<C
for (const d of desc) {
if (pipeline.hierarchy.isMixin(d)) {
const mContext = getFullTextContext(pipeline.hierarchy, d)
mContext?.propagate?.forEach((it) => propagate.add(it))
if (mContext !== undefined) {
op(mContext)
}
}
}
return Array.from(propagate.values())
}
/**
* @public
*/
export function collectPropagate (pipeline: FullTextPipeline, objectClass: Ref<Class<Doc>>): Ref<Class<Doc>>[] {
const propagate = new Set<Ref<Class<Doc>>>()
traverseFullTextContexts(pipeline, objectClass, (fts) => fts?.propagate?.forEach((it) => propagate.add(it)))
return Array.from(propagate.values())
}
/**
* @public
*/
export function collectPropagateClasses (pipeline: FullTextPipeline, objectClass: Ref<Class<Doc>>): Ref<Class<Doc>>[] {
const propagate = new Set<Ref<Class<Doc>>>()
traverseFullTextContexts(pipeline, objectClass, (fts) => fts?.propagateClasses?.forEach((it) => propagate.add(it)))
return Array.from(propagate.values())
}

View File

@ -293,8 +293,12 @@ class ElasticAdapter implements FullTextAdapter {
const response = await this.client.bulk({ refresh: true, body: operations })
if ((response as any).body.errors === true) {
const errors = response.body.items.filter((it: any) => it.index.error !== undefined)
const errorIds = new Set(errors.map((it: any) => it.index._id))
const erroDocs = docs.filter((it) => errorIds.has(it.id))
// Collect only errors
throw new Error(`Failed to process bulk request: ${JSON.stringify((response as any).body)}`)
const errs = Array.from(errors.map((it: any) => it.index.error.reason as string)).join('\n')
console.error(`Failed to process bulk request: ${errs} ${JSON.stringify(erroDocs)}`)
}
}
return []

View File

@ -18,7 +18,7 @@
},
{
"name": "#platform.notification.timeout",
"value": "1"
"value": "0"
},
{
"name": "#platform.notification.logging",

View File

@ -147,7 +147,7 @@ test('report-time-from-issue-card', async ({ page }) => {
await page.waitForSelector('text="View issue"')
await page.click('text="View issue"')
} finally {
await page.evaluate(() => localStorage.setItem('#platform.notification.timeout', '1'))
await page.evaluate(() => localStorage.setItem('#platform.notification.timeout', '0'))
}
await page.click('#ReportedTimeEditor')