From 76e71712e69a8c3c0aa581de556c74ad5757e76c Mon Sep 17 00:00:00 2001 From: Andrey Sobolev Date: Thu, 20 Apr 2023 17:11:22 +0700 Subject: [PATCH] TSK-1237: Improve full text indexer (#3025) Signed-off-by: Andrey Sobolev --- .gitignore | 1 + .vscode/launch.json | 3 +- dev/generator/src/issues.ts | 2 +- dev/generator/src/recruit.ts | 2 +- dev/tool/src/clean.ts | 2 +- models/core/src/core.ts | 20 ++- models/core/src/index.ts | 29 +++- models/gmail/src/index.ts | 4 + models/recruit/src/index.ts | 6 +- models/telegram/src/index.ts | 4 + packages/core/src/classes.ts | 5 +- packages/core/src/client.ts | 19 ++- packages/core/src/component.ts | 2 + packages/core/src/measurements/metrics.ts | 77 +++++++-- packages/core/src/storage.ts | 3 + packages/core/src/utils.ts | 6 +- packages/model/src/dsl.ts | 7 + .../profiles/default/tsconfig.json | 3 +- .../platform-rig/profiles/ui/tsconfig.json | 1 + packages/platform/src/event.ts | 5 +- .../presentation/src/components/Card.svelte | 2 +- .../components/IndexedDocumentContent.svelte | 11 +- .../components/IndexedDocumentPreview.svelte | 2 +- .../src/components/PDFViewer.svelte | 3 - packages/query/src/index.ts | 24 ++- .../ui/src/components/internal/Root.svelte | 26 ++- .../src/components/internal/icons/WiFi.svelte | 7 +- packages/ui/src/plugin.ts | 5 +- packages/ui/src/utils.ts | 6 + .../presenters/ChecklistsPresenter.svelte | 21 ++- .../board-resources/src/utils/BoardUtils.ts | 2 +- .../EditChannelDescriptionAttachments.svelte | 3 +- .../src/components/Thread.svelte | 3 +- plugins/client-resources/src/connection.ts | 15 ++ plugins/client/src/index.ts | 3 + .../typeEditors/EnumTypeEditor.svelte | 4 +- .../SetParentIssueActionPopup.svelte | 3 - .../src/components/Table.svelte | 2 +- .../src/components/ServerStatistics.svelte | 53 ++++++ .../src/components/WorkbenchApp.svelte | 2 +- plugins/workbench-resources/src/connect.ts | 40 ++++- pods/collaborator/src/metrics.ts | 4 +- pods/server/src/server.ts | 8 +- server-plugins/chunter-resources/src/index.ts | 2 +- server-plugins/openai/src/openai.ts | 15 +- server/account/tsconfig.json | 2 +- server/core/src/adapter.ts | 6 + server/core/src/fulltext.ts | 5 +- server/core/src/indexer/field.ts | 110 +++++------- server/core/src/indexer/fulltextPush.ts | 122 +++++++++----- server/core/src/indexer/indexer.ts | 157 ++++++++++++------ server/core/src/indexer/summary.ts | 113 ++++++++++--- server/core/src/indexer/types.ts | 9 +- server/core/src/indexer/utils.ts | 35 +++- server/core/src/storage.ts | 7 +- server/elastic/src/adapter.ts | 4 + server/elastic/src/backup.ts | 3 + server/front/src/index.ts | 83 ++++----- server/mongo/src/storage.ts | 55 +++--- server/server/src/metrics.ts | 4 +- server/server/src/minio.ts | 3 + server/translate/src/retranslate.ts | 4 +- server/ws/src/server.ts | 63 +++++-- 63 files changed, 879 insertions(+), 373 deletions(-) create mode 100644 plugins/workbench-resources/src/components/ServerStatistics.svelte diff --git a/.gitignore b/.gitignore index 5a7f8e2dc4..3b283ba8e2 100644 --- a/.gitignore +++ b/.gitignore @@ -80,3 +80,4 @@ tsdoc-metadata.json pods/front/dist *.cpuprofile *.pyc +metrics.txt diff --git a/.vscode/launch.json b/.vscode/launch.json index fbc22b56fa..1f0ba715d9 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -37,7 +37,8 @@ "ELASTIC_URL": "http://localhost:9200", "MONGO_URL": "mongodb://localhost:27017", "APM_SERVER_URL2": "http://localhost:8200", - "METRICS_CONSOLE": "true", // Show metrics in console evert 30 seconds., + "METRICS_CONSOLE": "false", + "METRICS_FILE": "${workspaceRoot}/metrics.txt", // Show metrics in console evert 30 seconds., "MINIO_ENDPOINT": "localhost", "MINIO_ACCESS_KEY": "minioadmin", "MINIO_SECRET_KEY": "minioadmin", diff --git a/dev/generator/src/issues.ts b/dev/generator/src/issues.ts index 7b3689587c..860d9ef219 100644 --- a/dev/generator/src/issues.ts +++ b/dev/generator/src/issues.ts @@ -66,7 +66,7 @@ export async function generateIssues ( await connection.close() ctx.end() - console.info(metricsToString(ctx.metrics, 'Client')) + console.info(metricsToString(ctx.metrics, 'Client', 70)) } async function genIssue (client: TxOperations, statuses: Ref[]): Promise { diff --git a/dev/generator/src/recruit.ts b/dev/generator/src/recruit.ts index 2051901a61..f6f98beca9 100644 --- a/dev/generator/src/recruit.ts +++ b/dev/generator/src/recruit.ts @@ -73,7 +73,7 @@ export async function generateContacts ( await connection.close() ctx.end() - console.info(metricsToString(ctx.metrics, 'Client')) + console.info(metricsToString(ctx.metrics, 'Client', 70)) } async function genVacansyApplicants ( diff --git a/dev/tool/src/clean.ts b/dev/tool/src/clean.ts index d15f3d7987..b28c1e3e80 100644 --- a/dev/tool/src/clean.ts +++ b/dev/tool/src/clean.ts @@ -152,7 +152,7 @@ export async function cleanRemovedTransactions (workspaceId: WorkspaceId, transa ) count += toRemove.length - console.log('processed', count, removedDocs.total) + console.log('processed', count) } console.log('total docs with remove', count) diff --git a/models/core/src/core.ts b/models/core/src/core.ts index 3e3e048050..60a21d583f 100644 --- a/models/core/src/core.ts +++ b/models/core/src/core.ts @@ -59,12 +59,13 @@ import { Prop, TypeBoolean, TypeIntlString, + TypeRecord, TypeRef, TypeString, TypeTimestamp, UX } from '@hcengineering/model' -import type { IntlString } from '@hcengineering/platform' +import { getEmbeddedLabel, IntlString } from '@hcengineering/platform' import core from './component' // C O R E @@ -256,14 +257,17 @@ export class TFulltextData extends TDoc implements FullTextData { @Model(core.class.DocIndexState, core.class.Doc, DOMAIN_DOC_INDEX_STATE) export class TDocIndexState extends TDoc implements DocIndexState { - objectClass!: Ref> + @Prop(TypeRef(core.class.Class), core.string.Class) + @Index(IndexKind.Indexed) + @Hidden() + objectClass!: Ref> @Prop(TypeRef(core.class.Doc), core.string.AttachedTo) @Index(IndexKind.Indexed) @Hidden() attachedTo?: Ref - @Prop(TypeRef(core.class.Doc), core.string.AttachedToClass) + @Prop(TypeRef(core.class.Class), core.string.AttachedToClass) @Index(IndexKind.Indexed) @Hidden() attachedToClass?: Ref> @@ -271,10 +275,16 @@ export class TDocIndexState extends TDoc implements DocIndexState { // Indexable attributes of document. attributes!: Record - removed!: boolean + @Prop(TypeBoolean(), getEmbeddedLabel('Removed')) + @Index(IndexKind.Indexed) + @Hidden() + removed!: boolean // States for different stages - stages!: Record + @Prop(TypeRecord(), getEmbeddedLabel('Stages')) + @Index(IndexKind.Indexed) + @Hidden() + stages!: Record } @Model(core.class.IndexStageState, core.class.Doc, DOMAIN_DOC_INDEX_STATE) diff --git a/models/core/src/index.ts b/models/core/src/index.ts index f98436830c..74e91439d4 100644 --- a/models/core/src/index.ts +++ b/models/core/src/index.ts @@ -20,7 +20,8 @@ import { AttachedDoc, IndexingConfiguration, Class, - systemAccountEmail + systemAccountEmail, + DocIndexState } from '@hcengineering/core' import { Builder } from '@hcengineering/model' import core from './component' @@ -157,4 +158,30 @@ export function createModel (builder: Builder): void { ] } ) + + builder.mixin, IndexingConfiguration>>( + core.class.DocIndexState, + core.class.Class, + core.mixin.IndexConfiguration, + { + indexes: [ + { + _class: 1, + stages: 1, + _id: 1, + modifiedOn: 1 + }, + { + _class: 1, + _id: 1, + modifiedOn: 1 + }, + { + _class: 1, + _id: 1, + objectClass: 1 + } + ] + } + ) } diff --git a/models/gmail/src/index.ts b/models/gmail/src/index.ts index ea0a8e4c32..c7a1a90735 100644 --- a/models/gmail/src/index.ts +++ b/models/gmail/src/index.ts @@ -208,6 +208,10 @@ export function createModel (builder: Builder): void { }, gmail.action.WriteEmail ) + + builder.mixin(gmail.class.Message, core.class.Class, core.mixin.FullTextSearchContext, { + parentPropagate: false + }) } export { gmailOperation } from './migration' diff --git a/models/recruit/src/index.ts b/models/recruit/src/index.ts index 20394405c4..47acf34634 100644 --- a/models/recruit/src/index.ts +++ b/models/recruit/src/index.ts @@ -1117,19 +1117,19 @@ export function createModel (builder: Builder): void { // Allow to use fuzzy search for mixins builder.mixin(recruit.class.Vacancy, core.class.Class, core.mixin.FullTextSearchContext, { fullTextSummary: true, - propogate: [] + propagate: [] }) builder.mixin(recruit.mixin.Candidate, core.class.Class, core.mixin.FullTextSearchContext, { fullTextSummary: true, - propogate: [recruit.class.Applicant] + propagate: [recruit.class.Applicant] }) // Allow to use fuzzy search for mixins builder.mixin(recruit.class.Applicant, core.class.Class, core.mixin.FullTextSearchContext, { fullTextSummary: true, forceIndex: true, - propogate: [] + propagate: [] }) createAction(builder, { diff --git a/models/telegram/src/index.ts b/models/telegram/src/index.ts index f1a9c67cf0..3190e74b2b 100644 --- a/models/telegram/src/index.ts +++ b/models/telegram/src/index.ts @@ -170,6 +170,10 @@ export function createModel (builder: Builder): void { }, telegram.ids.TxSharedCreate ) + + builder.mixin(telegram.class.Message, core.class.Class, core.mixin.FullTextSearchContext, { + parentPropagate: false + }) } export { telegramOperation } from './migration' diff --git a/packages/core/src/classes.ts b/packages/core/src/classes.ts index c6160eb71f..b72652d6a1 100644 --- a/packages/core/src/classes.ts +++ b/packages/core/src/classes.ts @@ -405,7 +405,10 @@ export interface FullTextSearchContext extends Class { forceIndex?: boolean // If defined, will propagate changes to child's with defined set of classes - propogate?: Ref>[] + propagate?: Ref>[] + + // Do we need to propagate child value to parent one. Default(true) + parentPropagate?: boolean } /** diff --git a/packages/core/src/client.ts b/packages/core/src/client.ts index 024901c95b..7bce7a57f4 100644 --- a/packages/core/src/client.ts +++ b/packages/core/src/client.ts @@ -25,6 +25,7 @@ import { Tx, TxCUD, TxCollectionCUD, TxCreateDoc, TxProcessor, TxUpdateDoc } fro import { toFindResult } from './utils' const transactionThreshold = 500 +const modelTransactionThreshold = 50 /** * @public @@ -194,7 +195,11 @@ export async function createClient ( const oldOnConnect: ((apply: boolean) => void) | undefined = conn.onConnect conn.onConnect = async () => { // Find all new transactions and apply - await loadModel(conn, loadedTxIds, allowedPlugins, configs, hierarchy, model) + if (!(await loadModel(conn, loadedTxIds, allowedPlugins, configs, hierarchy, model, true))) { + // We need full refresh + await oldOnConnect?.(false) + return + } // We need to look for last {transactionThreshold} transactions and if it is more since lastTx one we receive, we need to perform full refresh. const atxes = await conn.findAll( @@ -216,7 +221,7 @@ export async function createClient ( } } - if (atxes.total < transactionThreshold && !needFullRefresh) { + if (atxes.length < transactionThreshold && !needFullRefresh) { console.log('applying input transactions', atxes.length) for (const tx of atxes) { txHandler(tx) @@ -236,8 +241,9 @@ async function loadModel ( allowedPlugins: Plugin[] | undefined, configs: Map, PluginConfiguration>, hierarchy: Hierarchy, - model: ModelDb -): Promise { + model: ModelDb, + reload = false +): Promise { const t = Date.now() const atxes = await conn.findAll( @@ -246,6 +252,10 @@ async function loadModel ( { sort: { modifiedOn: SortingOrder.Ascending, _id: SortingOrder.Ascending } } ) + if (reload && atxes.length > modelTransactionThreshold) { + return true + } + let systemTx: Tx[] = [] const userTx: Tx[] = [] console.log('find' + (processedTx.size === 0 ? 'full model' : 'model diff'), atxes.length, Date.now() - t) @@ -289,6 +299,7 @@ async function loadModel ( console.error('failed to apply model transaction, skipping', JSON.stringify(tx), err) } } + return false } function fillConfiguration (systemTx: Tx[], configs: Map, PluginConfiguration>): void { diff --git a/packages/core/src/component.ts b/packages/core/src/component.ts index d9fd444239..645912278d 100644 --- a/packages/core/src/component.ts +++ b/packages/core/src/component.ts @@ -97,6 +97,7 @@ export default plugin(coreId, { TypeHyperlink: '' as Ref>>, TypeNumber: '' as Ref>>, TypeMarkup: '' as Ref>>, + TypeRecord: '' as Ref>>>, TypeBoolean: '' as Ref>>, TypeTimestamp: '' as Ref>>, TypeDate: '' as Ref>>, @@ -151,6 +152,7 @@ export default plugin(coreId, { AttachedTo: '' as IntlString, AttachedToClass: '' as IntlString, String: '' as IntlString, + Record: '' as IntlString, Markup: '' as IntlString, Number: '' as IntlString, Boolean: '' as IntlString, diff --git a/packages/core/src/measurements/metrics.ts b/packages/core/src/measurements/metrics.ts index 4c6fd579e9..5f65f45b4a 100644 --- a/packages/core/src/measurements/metrics.ts +++ b/packages/core/src/measurements/metrics.ts @@ -68,7 +68,10 @@ export function childMetrics (root: Metrics, path: string[]): Metrics { return oop } -function aggregate (m: Metrics): Metrics { +/** + * @public + */ +export function metricsAggregate (m: Metrics): Metrics { const ms = aggregateMetrics(m.measurements) // Use child overage, if there is no top level value specified. @@ -105,34 +108,38 @@ function aggregate (m: Metrics): Metrics { function aggregateMetrics (m: Record): Record { const result: Record = {} for (const [k, v] of Object.entries(m).sort((a, b) => b[1].time - a[1].time)) { - result[k] = aggregate(v) + result[k] = metricsAggregate(v) } return result } -function toLen (val: string, sep: string, len = 50): string { +function toLen (val: string, sep: string, len: number): string { while (val.length < len) { val += sep } return val } -function printMetricsChildren (params: Record, offset: number): string { +function printMetricsChildren (params: Record, offset: number, length: number): string { let r = '' if (Object.keys(params).length > 0) { r += '\n' + toLen('', ' ', offset) r += Object.entries(params) - .map(([k, vv]) => toString(k, vv, offset)) + .map(([k, vv]) => toString(k, vv, offset, length)) .join('\n' + toLen('', ' ', offset)) } return r } -function printMetricsParams (params: Record>, offset: number): string { +function printMetricsParams ( + params: Record>, + offset: number, + length: number +): string { let r = '' const joinP = (key: string, data: Record): string[] => { return Object.entries(data).map(([k, vv]) => - `${toLen('', ' ', offset)}${toLen(key + '=' + k, '-', 70 - offset)}: avg ${ + `${toLen('', ' ', offset)}${toLen(key + '=' + k, '-', length - offset)}: avg ${ vv.time / (vv.operations > 0 ? vv.operations : 1) } total: ${vv.time} ops: ${vv.operations}`.trim() ) @@ -145,18 +152,62 @@ function printMetricsParams (params: Record> return r } -function toString (name: string, m: Metrics, offset: number): string { - let r = `${toLen('', ' ', offset)}${toLen(name, '-', 70 - offset)}: avg ${ +function toString (name: string, m: Metrics, offset: number, length: number): string { + let r = `${toLen('', ' ', offset)}${toLen(name, '-', length - offset)}: avg ${ m.time / (m.operations > 0 ? m.operations : 1) } total: ${m.time} ops: ${m.operations}`.trim() - r += printMetricsParams(m.params, offset + 4) - r += printMetricsChildren(m.measurements, offset + 4) + r += printMetricsParams(m.params, offset + 4, length) + r += printMetricsChildren(m.measurements, offset + 4, length) return r } /** * @public */ -export function metricsToString (metrics: Metrics, name = 'System'): string { - return toString(name, aggregate(metrics), 0) +export function metricsToString (metrics: Metrics, name = 'System', length: number): string { + return toString(name, metricsAggregate(metrics), 0, length) +} + +function printMetricsParamsRows ( + params: Record>, + offset: number +): (string | number)[][] { + const r: (string | number)[][] = [] + function joinP (key: string, data: Record): (string | number)[][] { + return Object.entries(data).map(([k, vv]) => [ + offset, + `${key}=${k}`, + vv.time / (vv.operations > 0 ? vv.operations : 1), + vv.time, + vv.operations + ]) + } + for (const [k, v] of Object.entries(params)) { + r.push(...joinP(k, v)) + } + return r +} + +function printMetricsChildrenRows (params: Record, offset: number): (string | number)[][] { + const r: (string | number)[][] = [] + if (Object.keys(params).length > 0) { + Object.entries(params).forEach(([k, vv]) => r.push(...toStringRows(k, vv, offset))) + } + return r +} + +function toStringRows (name: string, m: Metrics, offset: number): (number | string)[][] { + const r: (number | string)[][] = [ + [offset, name, m.time / (m.operations > 0 ? m.operations : 1), m.time, m.operations] + ] + r.push(...printMetricsParamsRows(m.params, offset + 1)) + r.push(...printMetricsChildrenRows(m.measurements, offset + 1)) + return r +} + +/** + * @public + */ +export function metricsToRows (metrics: Metrics, name = 'System'): (number | string)[][] { + return toStringRows(name, metricsAggregate(metrics), 0) } diff --git a/packages/core/src/storage.ts b/packages/core/src/storage.ts index e20351a6a1..3773d124c8 100644 --- a/packages/core/src/storage.ts +++ b/packages/core/src/storage.ts @@ -121,6 +121,9 @@ export type FindOptions = { sort?: SortingQuery lookup?: Lookup projection?: Projection + + // If specified total will be returned + total?: boolean } /** diff --git a/packages/core/src/utils.ts b/packages/core/src/utils.ts index 2ef4a6ae28..6835925182 100644 --- a/packages/core/src/utils.ts +++ b/packages/core/src/utils.ts @@ -172,7 +172,11 @@ export function extractDocKey (key: string): { * @public */ export function isFullTextAttribute (attr: AnyAttribute): boolean { - return attr.index === IndexKind.FullText || attr.type._class === core.class.TypeAttachment + return ( + attr.index === IndexKind.FullText || + attr.type._class === core.class.TypeAttachment || + attr.type._class === core.class.EnumOf + ) } /** diff --git a/packages/model/src/dsl.ts b/packages/model/src/dsl.ts index 164da47184..aed8be6a16 100644 --- a/packages/model/src/dsl.ts +++ b/packages/model/src/dsl.ts @@ -383,6 +383,13 @@ export function TypeMarkup (): Type { return { _class: core.class.TypeMarkup, label: core.string.Markup } } +/** + * @public + */ +export function TypeRecord (): Type { + return { _class: core.class.TypeRecord, label: core.string.Record } +} + /** * @public */ diff --git a/packages/platform-rig/profiles/default/tsconfig.json b/packages/platform-rig/profiles/default/tsconfig.json index 0074c36d4f..7eba506e7b 100644 --- a/packages/platform-rig/profiles/default/tsconfig.json +++ b/packages/platform-rig/profiles/default/tsconfig.json @@ -8,6 +8,7 @@ "resolveJsonModule": true, "types": ["heft-jest"], "skipLibCheck": true, - "incremental": true + "incremental": true, + "declarationMap": true } } \ No newline at end of file diff --git a/packages/platform-rig/profiles/ui/tsconfig.json b/packages/platform-rig/profiles/ui/tsconfig.json index 199554d16c..7e059d1483 100644 --- a/packages/platform-rig/profiles/ui/tsconfig.json +++ b/packages/platform-rig/profiles/ui/tsconfig.json @@ -9,6 +9,7 @@ "skipLibCheck": true, "incremental": true, "esModuleInterop": true, + "declarationMap": true, "lib": [ "esnext", "dom" diff --git a/packages/platform/src/event.ts b/packages/platform/src/event.ts index 015e899034..1a3b69d58c 100644 --- a/packages/platform/src/event.ts +++ b/packages/platform/src/event.ts @@ -54,7 +54,10 @@ export function removeEventListener (event: string, listener: EventListener): vo } } -async function broadcastEvent (event: string, data: any): Promise { +/** + * @public + */ +export async function broadcastEvent (event: string, data: any): Promise { const listeners = eventListeners.get(event) if (listeners !== undefined) { const promises = listeners.map(async (listener) => await listener(event, data)) diff --git a/packages/presentation/src/components/Card.svelte b/packages/presentation/src/components/Card.svelte index a99b46291d..c965985910 100644 --- a/packages/presentation/src/components/Card.svelte +++ b/packages/presentation/src/components/Card.svelte @@ -73,7 +73,7 @@ /> - +
diff --git a/packages/presentation/src/components/IndexedDocumentContent.svelte b/packages/presentation/src/components/IndexedDocumentContent.svelte index f8229837fa..904ead2d79 100644 --- a/packages/presentation/src/components/IndexedDocumentContent.svelte +++ b/packages/presentation/src/components/IndexedDocumentContent.svelte @@ -11,6 +11,9 @@ const client = getClient() function getContent (extra: string[], value: string): string[] { + if (value == null || value === '') { + return [] + } const result = extra.includes('base64') ? decodeURIComponent(escape(atob(value))) : value return `${result}`.split('\n') @@ -43,8 +46,12 @@ {#if summary} {#if search.length > 0} Result: - {#each summary.split('\n').filter((line) => line.toLowerCase().includes(search.toLowerCase())) as line} - {line} + {#each summary.split('\n').filter((line, idx, arr) => { + return line.toLowerCase().includes(search.toLowerCase()) || arr[idx - 1] + ?.toLowerCase() + .includes(search.toLowerCase()) + }) as line} + {line} {/each}
{/if} diff --git a/packages/presentation/src/components/IndexedDocumentPreview.svelte b/packages/presentation/src/components/IndexedDocumentPreview.svelte index 1349b56200..aed7e0c5ab 100644 --- a/packages/presentation/src/components/IndexedDocumentPreview.svelte +++ b/packages/presentation/src/components/IndexedDocumentPreview.svelte @@ -53,7 +53,7 @@ display: grid; overflow: auto; min-width: 50rem; - max-width: 200rem; + max-width: 80rem; } .indexed-background { background-color: white; diff --git a/packages/presentation/src/components/PDFViewer.svelte b/packages/presentation/src/components/PDFViewer.svelte index 039b97ca45..89b10648fd 100644 --- a/packages/presentation/src/components/PDFViewer.svelte +++ b/packages/presentation/src/components/PDFViewer.svelte @@ -19,7 +19,6 @@ import presentation from '..' import { getFileUrl } from '../utils' import Download from './icons/Download.svelte' - import IndexedDocumentPreview from './IndexedDocumentPreview.svelte' export let file: string export let name: string @@ -79,8 +78,6 @@
- {:else if contentType && contentType.startsWith('application/msword')} - {:else}