Configurable OpenAI (#2529)

Signed-off-by: Andrey Sobolev <haiodo@gmail.com>
This commit is contained in:
Andrey Sobolev 2023-01-24 20:42:47 +07:00 committed by GitHub
parent 3daa9a3136
commit ba8ab45dc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
111 changed files with 2784 additions and 883 deletions

1
.gitignore vendored
View File

@ -79,3 +79,4 @@ ingest-attachment-*.zip
tsdoc-metadata.json
pods/front/dist
*.cpuprofile
*.pyc

2
.nvmrc
View File

@ -1 +1 @@
16.14.2
18.13.0

94
.vscode/launch.json vendored
View File

@ -4,13 +4,23 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Embeddings",
"type": "python",
"request": "launch",
"program": "./pods/embeddings/server.py",
"args": ["--model", "sentence-transformers/all-MiniLM-L6-v2", "--device", "cpu"],
"console": "integratedTerminal",
"justMyCode": true,
"env": {
"PYTORCH_ENABLE_MPS_FALLBACK":"1"
}
},
{
"name": "Debug server",
"type": "node",
"request": "launch",
"args": [
"src/__start.ts"
],
"args": ["src/__start.ts"],
"env": {
"ELASTIC_URL": "http://localhost:9200",
"MONGO_URL": "mongodb://localhost:27017",
@ -21,16 +31,11 @@
"MINIO_SECRET_KEY": "minioadmin",
"SERVER_SECRET": "secret",
"REKONI_URL": "http://localhost:4004",
// "OPENAI_TOKEN": "",
// "RETRANSLATE_URL": "http://127.0.0.1:4500",
//"RETRANSLATE_URL": "https://208.167.249.201",
// "RETRANSLATE_TOKEN": ""
},
"runtimeArgs": [
"--nolazy",
"-r",
"ts-node/register"
],
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
"showAsyncStacks": true,
"sourceMaps": true,
"cwd": "${workspaceRoot}/pods/server",
@ -40,20 +45,14 @@
"name": "Debug Account",
"type": "node",
"request": "launch",
"args": [
"src/__start.ts"
],
"args": ["src/__start.ts"],
"env": {
"MONGO_URL": "mongodb://localhost:27018",
"SERVER_SECRET": "secret",
"TRANSACTOR_URL": "ws:/localhost:3333",
"ACCOUNT_PORT": "3000"
},
"runtimeArgs": [
"--nolazy",
"-r",
"ts-node/register"
],
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
"sourceMaps": true,
"cwd": "${workspaceRoot}/pods/account",
"protocol": "inspector"
@ -64,13 +63,7 @@
"name": "Debug Jest tests",
"program": "${fileDirname}/../../node_modules/@rushstack/heft/lib/start.js",
"cwd": "${fileDirname}/../../",
"args": [
"--debug",
"test",
"--clean",
"--test-path-pattern",
"${file}"
],
"args": ["--debug", "test", "--clean", "--test-path-pattern", "${file}"],
"console": "integratedTerminal",
"sourceMaps": true,
"protocol": "inspector"
@ -79,23 +72,14 @@
"name": "Debug generator",
"type": "node",
"request": "launch",
"args": [
"src/index.ts",
"gen-recruit",
"ws1",
"20"
],
"args": ["src/index.ts", "gen-recruit", "ws1", "20"],
"env": {
"TRANSACTOR_URL": "ws:/localhost:3333",
"MINIO_ACCESS_KEY": "minioadmin",
"MINIO_SECRET_KEY": "minioadmin",
"MINIO_ENDPOINT": "localhost"
},
"runtimeArgs": [
"--nolazy",
"-r",
"ts-node/register"
],
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
"sourceMaps": true,
"cwd": "${workspaceRoot}/dev/generator",
"protocol": "inspector"
@ -104,12 +88,7 @@
"name": "Debug tool import-lead-csv",
"type": "node",
"request": "launch",
"args": [
"src/index.ts",
"import-lead-csv",
"ws1",
"../../../suho/COMPANIES_Agency_of_AlexeyS.csv"
],
"args": ["src/index.ts", "import-lead-csv", "ws1", "../../../suho/COMPANIES_Agency_of_AlexeyS.csv"],
"env": {
"MINIO_ACCESS_KEY": "minioadmin",
"MINIO_SECRET_KEY": "minioadmin",
@ -119,11 +98,7 @@
"TELEGRAM_DATABASE": "telegram-service",
"ELASTIC_URL": "http://localhost:9200"
},
"runtimeArgs": [
"--nolazy",
"-r",
"ts-node/register"
],
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
"sourceMaps": true,
"cwd": "${workspaceRoot}/dev/tool"
},
@ -131,10 +106,7 @@
"name": "Debug tool upgrade",
"type": "node",
"request": "launch",
"args": [
"src/index.ts",
"upgrade"
],
"args": ["src/index.ts", "upgrade"],
"env": {
"SERVER_SECRET": "secret",
"MINIO_ACCESS_KEY": "minioadmin",
@ -146,11 +118,7 @@
"ELASTIC_URL": "http://localhost:9200",
"REKONI_URL": "http://localhost:4004"
},
"runtimeArgs": [
"--nolazy",
"-r",
"ts-node/register"
],
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
"sourceMaps": true,
"cwd": "${workspaceRoot}/dev/tool"
},
@ -158,23 +126,17 @@
"name": "Debug Collaborator",
"type": "node",
"request": "launch",
"args": [
"src/__start.ts"
],
"args": ["src/__start.ts"],
"env": {
"SECRET": "secret",
"MINIO_ACCESS_KEY": "minioadmin",
"MINIO_SECRET_KEY": "minioadmin",
"MINIO_ENDPOINT": "localhost",
"MINIO_ENDPOINT": "localhost"
},
"runtimeArgs": [
"--nolazy",
"-r",
"ts-node/register"
],
"runtimeArgs": ["--nolazy", "-r", "ts-node/register"],
"sourceMaps": true,
"cwd": "${workspaceRoot}/pods/collaborator",
"protocol": "inspector"
},
}
]
}
}

View File

@ -101,6 +101,7 @@ specifiers:
'@rush-temp/model-server-inventory': file:./projects/model-server-inventory.tgz
'@rush-temp/model-server-lead': file:./projects/model-server-lead.tgz
'@rush-temp/model-server-notification': file:./projects/model-server-notification.tgz
'@rush-temp/model-server-openai': file:./projects/model-server-openai.tgz
'@rush-temp/model-server-recruit': file:./projects/model-server-recruit.tgz
'@rush-temp/model-server-request': file:./projects/model-server-request.tgz
'@rush-temp/model-server-setting': file:./projects/model-server-setting.tgz
@ -108,6 +109,7 @@ specifiers:
'@rush-temp/model-server-task': file:./projects/model-server-task.tgz
'@rush-temp/model-server-telegram': file:./projects/model-server-telegram.tgz
'@rush-temp/model-server-tracker': file:./projects/model-server-tracker.tgz
'@rush-temp/model-server-translate': file:./projects/model-server-translate.tgz
'@rush-temp/model-setting': file:./projects/model-setting.tgz
'@rush-temp/model-tags': file:./projects/model-tags.tgz
'@rush-temp/model-task': file:./projects/model-task.tgz
@ -243,6 +245,7 @@ specifiers:
'@types/express-fileupload': ^1.1.7
'@types/faker': ~5.5.9
'@types/heft-jest': ^1.0.3
'@types/html-to-text': ^8.1.1
'@types/jest': ~28.1.0
'@types/koa': ^2.13.4
'@types/koa-bodyparser': ^4.3.3
@ -296,6 +299,7 @@ specifiers:
file-loader: ^6.2.0
filesize: ^8.0.3
got: ^11.8.3
html-to-text: ^9.0.3
html-webpack-plugin: ^5.5.0
intl-messageformat: ^9.7.1
jpeg-js: ~0.4.3
@ -461,6 +465,7 @@ dependencies:
'@rush-temp/model-server-inventory': file:projects/model-server-inventory.tgz_typescript@4.8.4
'@rush-temp/model-server-lead': file:projects/model-server-lead.tgz_typescript@4.8.4
'@rush-temp/model-server-notification': file:projects/model-server-notification.tgz_typescript@4.8.4
'@rush-temp/model-server-openai': file:projects/model-server-openai.tgz_typescript@4.8.4
'@rush-temp/model-server-recruit': file:projects/model-server-recruit.tgz_typescript@4.8.4
'@rush-temp/model-server-request': file:projects/model-server-request.tgz_typescript@4.8.4
'@rush-temp/model-server-setting': file:projects/model-server-setting.tgz_typescript@4.8.4
@ -468,6 +473,7 @@ dependencies:
'@rush-temp/model-server-task': file:projects/model-server-task.tgz_typescript@4.8.4
'@rush-temp/model-server-telegram': file:projects/model-server-telegram.tgz_typescript@4.8.4
'@rush-temp/model-server-tracker': file:projects/model-server-tracker.tgz_typescript@4.8.4
'@rush-temp/model-server-translate': file:projects/model-server-translate.tgz_typescript@4.8.4
'@rush-temp/model-setting': file:projects/model-setting.tgz_typescript@4.8.4
'@rush-temp/model-tags': file:projects/model-tags.tgz_typescript@4.8.4
'@rush-temp/model-task': file:projects/model-task.tgz_typescript@4.8.4
@ -603,6 +609,7 @@ dependencies:
'@types/express-fileupload': 1.4.1
'@types/faker': 5.5.9
'@types/heft-jest': 1.0.3
'@types/html-to-text': 8.1.1
'@types/jest': 28.1.8
'@types/koa': 2.13.5
'@types/koa-bodyparser': 4.3.10
@ -656,6 +663,7 @@ dependencies:
file-loader: 6.2.0_webpack@5.75.0
filesize: 8.0.7
got: 11.8.5
html-to-text: 9.0.3
html-webpack-plugin: 5.5.0_webpack@5.75.0
intl-messageformat: 9.13.0
jpeg-js: 0.4.4
@ -2655,6 +2663,13 @@ packages:
string-argv: 0.3.1
dev: false
/@selderee/plugin-htmlparser2/0.10.0:
resolution: {integrity: sha512-gW69MEamZ4wk1OsOq1nG1jcyhXIQcnrsX5JwixVw/9xaiav8TCyjESAruu1Rz9yyInhgBXxkNwMeygKnN2uxNA==}
dependencies:
domhandler: 5.0.3
selderee: 0.10.0
dev: false
/@sinclair/typebox/0.24.51:
resolution: {integrity: sha512-1P1OROm/rdubP5aFDSZQILU0vrLCJ4fvHt6EoqHEM+2D/G5MK3bIaymUKLit8Js9gbns5UyJnkP/TZROLw4tUA==}
dev: false
@ -3270,6 +3285,10 @@ packages:
resolution: {integrity: sha512-oh/6byDPnL1zeNXFrDXFLyZjkr1MsBG667IM792caf1L2UPOOMf65NFzjUH/ltyfwjAGfs1rsX1eftK0jC/KIg==}
dev: false
/@types/html-to-text/8.1.1:
resolution: {integrity: sha512-QFcqfc7TiVbvIX8Fc2kWUxakruI1Ay6uitaGCYHzI5M0WHQROV5D2XeSaVrK0FmvssivXum4yERVnJsiuH61Ww==}
dev: false
/@types/http-assert/1.5.3:
resolution: {integrity: sha512-FyAOrDuQmBi8/or3ns4rwPno7/9tJTijVW6aQQjK02+kOQ8zmoNg2XJtAuQhvQcy1ASJq38wirX5//9J1EqoUA==}
dev: false
@ -5040,6 +5059,16 @@ packages:
nth-check: 2.1.1
dev: false
/css-select/5.1.0:
resolution: {integrity: sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==}
dependencies:
boolbase: 1.0.0
css-what: 6.1.0
domhandler: 5.0.3
domutils: 3.0.1
nth-check: 2.1.1
dev: false
/css-tree/1.1.3:
resolution: {integrity: sha512-tRpdppF7TRazZrjJ6v3stzv93qxRcSsFmW6cX0Zm2NVKpxE1WV1HblnghVv9TreireHkqI/VDEsfolRF1p6y7Q==}
engines: {node: '>=8.0.0'}
@ -5343,6 +5372,14 @@ packages:
entities: 2.2.0
dev: false
/dom-serializer/2.0.0:
resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==}
dependencies:
domelementtype: 2.3.0
domhandler: 5.0.3
entities: 4.4.0
dev: false
/domelementtype/2.3.0:
resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==}
dev: false
@ -5361,6 +5398,13 @@ packages:
domelementtype: 2.3.0
dev: false
/domhandler/5.0.3:
resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==}
engines: {node: '>= 4'}
dependencies:
domelementtype: 2.3.0
dev: false
/domutils/2.8.0:
resolution: {integrity: sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==}
dependencies:
@ -5369,6 +5413,14 @@ packages:
domhandler: 4.3.1
dev: false
/domutils/3.0.1:
resolution: {integrity: sha512-z08c1l761iKhDFtfXO04C7kTdPBLi41zwOZl00WS8b5eiaebNpY00HKbztwBq+e3vyqWNwWF3mP9YLUeqIrF+Q==}
dependencies:
dom-serializer: 2.0.0
domelementtype: 2.3.0
domhandler: 5.0.3
dev: false
/dot-case/3.0.4:
resolution: {integrity: sha512-Kv5nKlh6yRrdrGvxeJ2e5y2eRUpkUosIW4A2AS38zwSz27zu7ufDwQPi5Jhs3XAlGNetl3bmnGhQsMtkKJnj3w==}
dependencies:
@ -5534,6 +5586,11 @@ packages:
resolution: {integrity: sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==}
dev: false
/entities/4.4.0:
resolution: {integrity: sha512-oYp7156SP8LkeGD0GF85ad1X9Ai79WtRsZ2gxJqtBuzH+98YUV6jkHEKlZkMbcrjJjIVJNIDP/3WL9wQkoPbWA==}
engines: {node: '>=0.12'}
dev: false
/envinfo/7.8.1:
resolution: {integrity: sha512-/o+BXHmB7ocbHEAs6F2EnG0ogybVVUdkRunTT2glZU9XAaGmhqskrvKwqXuDfNjEO0LZKWdejEEpnq8aM0tOaw==}
engines: {node: '>=4'}
@ -6664,6 +6721,17 @@ packages:
terser: 5.15.1
dev: false
/html-to-text/9.0.3:
resolution: {integrity: sha512-hxDF1kVCF2uw4VUJ3vr2doc91pXf2D5ngKcNviSitNkhP9OMOaJkDrFIFL6RMvko7NisWTEiqGpQ9LAxcVok1w==}
engines: {node: '>=14'}
dependencies:
'@selderee/plugin-htmlparser2': 0.10.0
deepmerge: 4.2.2
dom-serializer: 2.0.0
htmlparser2: 8.0.1
selderee: 0.10.0
dev: false
/html-webpack-plugin/5.5.0_webpack@5.75.0:
resolution: {integrity: sha512-sy88PC2cRTVxvETRgUHFrL4No3UxvcH8G1NepGhqaTT+GXN2kTamqasot0inS5hXeg1cMbFDt27zzo9p35lZVw==}
engines: {node: '>=10.13.0'}
@ -6687,6 +6755,15 @@ packages:
entities: 2.2.0
dev: false
/htmlparser2/8.0.1:
resolution: {integrity: sha512-4lVbmc1diZC7GUJQtRQ5yBAeUCL1exyMwmForWkRLnwyzWBFxN633SALPMGYaWZvKe9j1pRZJpauvmxENSp/EA==}
dependencies:
domelementtype: 2.3.0
domhandler: 5.0.3
domutils: 3.0.1
entities: 4.4.0
dev: false
/http-assert/1.5.0:
resolution: {integrity: sha512-uPpH7OKX4H25hBmU6G1jWNaqJGpTXxey+YOUizJUAgu0AjLUeC8D73hTrhvDS5D+GJN1DN1+hhc/eF/wpxtp0w==}
engines: {node: '>= 0.8'}
@ -7970,6 +8047,10 @@ packages:
- supports-color
dev: false
/leac/0.6.0:
resolution: {integrity: sha512-y+SqErxb8h7nE/fiEX07jsbuhrpO9lL8eca7/Y1nuWV2moNlXhyd59iDGcRf6moVyDMbmTNzL40SUyrFU/yDpg==}
dev: false
/level-codec/9.0.2:
resolution: {integrity: sha512-UyIwNb1lJBChJnGfjmO0OR+ezh2iVu1Kas3nvBS/BzGnx79dv6g7unpKIDNPMhfdTEGoc7mC8uAu51XEtX+FHQ==}
engines: {node: '>=6'}
@ -8560,6 +8641,13 @@ packages:
dev: false
optional: true
/node-html-parser/6.1.4:
resolution: {integrity: sha512-3muP9Uy/Pz7bQa9TNYVQzWJhNZMqyCx7xJle8kz2/y1UgzAUyXXShc1IcPaJy6u07CE3K5rQcRwlvHzmlySRjg==}
dependencies:
css-select: 5.1.0
he: 1.2.0
dev: false
/node-int64/0.4.0:
resolution: {integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==}
dev: false
@ -8882,6 +8970,13 @@ packages:
resolution: {integrity: sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==}
dev: false
/parseley/0.11.0:
resolution: {integrity: sha512-VfcwXlBWgTF+unPcr7yu3HSSA6QUdDaDnrHcytVfj5Z8azAyKBDrYnSIfeSxlrEayndNcLmrXzg+Vxbo6DWRXQ==}
dependencies:
leac: 0.6.0
peberminta: 0.8.0
dev: false
/parseurl/1.3.3:
resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
engines: {node: '>= 0.8'}
@ -8946,6 +9041,10 @@ packages:
png-js: 1.0.0
dev: false
/peberminta/0.8.0:
resolution: {integrity: sha512-YYEs+eauIjDH5nUEGi18EohWE0nV2QbGTqmxQcqgZ/0g+laPCQmuIqq7EBLVi9uim9zMgfJv0QBZEnQ3uHw/Tw==}
dev: false
/picocolors/1.0.0:
resolution: {integrity: sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==}
dev: false
@ -9744,6 +9843,12 @@ packages:
resolution: {integrity: sha512-ZQruFgZnIWH+WyO9t5rWt4ZEGqCKPwhiw+YbzTwpmT9elgLrLcfuyUiSnwwjUiVy9r4VM3urtbNF1xmEh9IL2w==}
dev: false
/selderee/0.10.0:
resolution: {integrity: sha512-DEL/RW/f4qLw/NrVg97xKaEBC8IpzIG2fvxnzCp3Z4yk4jQ3MXom+Imav9wApjxX2dfS3eW7x0DXafJr85i39A==}
dependencies:
parseley: 0.11.0
dev: false
/select-hose/2.0.0:
resolution: {integrity: sha512-mEugaLK+YfkijB4fx0e6kImuJdCIt2LxCRcbEYPqRGCs4F2ogyfZU5IAZRdjCP8JPq2AtdNoC/Dux63d9Kiryg==}
dev: false
@ -12979,7 +13084,7 @@ packages:
dev: false
file:projects/model-all.tgz_typescript@4.8.4:
resolution: {integrity: sha512-YJ24m8JisesWHIeu07W27itLMJx/1zvCWzMJJ9rG4uj84b5HWzTvKN7FH+0NVqCHAbWAYbI9YA4kulsXibMx+Q==, tarball: file:projects/model-all.tgz}
resolution: {integrity: sha512-AejnJ5pKXrdrHEdXX2AoOtbSX2mx0ceRbISCPyuvr1O9w8VKnmp43TGLOZEKVY3D+dmnjKj1+2+A+bAxMCUsQQ==, tarball: file:projects/model-all.tgz}
id: file:projects/model-all.tgz
name: '@rush-temp/model-all'
version: 0.0.0
@ -13385,7 +13490,7 @@ packages:
dev: false
file:projects/model-request.tgz_typescript@4.8.4:
resolution: {integrity: sha512-00s0oDh2VK4P0RC2clxG3ib9WL0uxHregVDglu62MFlx6bJYuoDfxXkohCXxQz22pMHTPw+8l+3pxh7l4+iiyA==, tarball: file:projects/model-request.tgz}
resolution: {integrity: sha512-qXH8qzGsB6WvZGrI31n6ZkpovnLiHXCK7eRaNiUADKFDiqJlGKvPf3EjQERG6ozuLcmsye69jOvmHFwavUdaoA==, tarball: file:projects/model-request.tgz}
id: file:projects/model-request.tgz
name: '@rush-temp/model-request'
version: 0.0.0
@ -13634,6 +13739,27 @@ packages:
- typescript
dev: false
file:projects/model-server-openai.tgz_typescript@4.8.4:
resolution: {integrity: sha512-p427LbMH/TgKUaWcXhn+2agHvOHvRxR3NYMorowkpSqe4VzS3A/IoKF/wlxZBxPSfqhke4wPa1vqRYehWvYBpg==, tarball: file:projects/model-server-openai.tgz}
id: file:projects/model-server-openai.tgz
name: '@rush-temp/model-server-openai'
version: 0.0.0
dependencies:
'@rushstack/heft': 0.47.11
'@types/heft-jest': 1.0.3
'@typescript-eslint/eslint-plugin': 5.42.1_d506b9be61cb4ac2646ecbc6e0680464
'@typescript-eslint/parser': 5.42.1_eslint@8.27.0+typescript@4.8.4
eslint: 8.27.0
eslint-config-standard-with-typescript: 23.0.0_c9fe9619f50f4e82337a86c3af25e566
eslint-plugin-import: 2.26.0_eslint@8.27.0
eslint-plugin-n: 15.5.1_eslint@8.27.0
eslint-plugin-promise: 6.1.1_eslint@8.27.0
prettier: 2.7.1
transitivePeerDependencies:
- supports-color
- typescript
dev: false
file:projects/model-server-recruit.tgz_typescript@4.8.4:
resolution: {integrity: sha512-udSatCWhK7BkKIbEZFhCd1Hm4pzwwDw9poiOR8MXv6lNmHKz3elXPo2rkjaak03PDBSe7S0lqDwcja+UUaknHA==, tarball: file:projects/model-server-recruit.tgz}
id: file:projects/model-server-recruit.tgz
@ -13781,6 +13907,27 @@ packages:
- typescript
dev: false
file:projects/model-server-translate.tgz_typescript@4.8.4:
resolution: {integrity: sha512-gD/Mrri41y0Ss7/f/wK5XuRNz+sjn3ZiaOoHM9cput5763rm0EmC5R5j5sL5fVqj+Uo5ECYO6gH6y0ewJZbbYg==, tarball: file:projects/model-server-translate.tgz}
id: file:projects/model-server-translate.tgz
name: '@rush-temp/model-server-translate'
version: 0.0.0
dependencies:
'@rushstack/heft': 0.47.11
'@types/heft-jest': 1.0.3
'@typescript-eslint/eslint-plugin': 5.42.1_d506b9be61cb4ac2646ecbc6e0680464
'@typescript-eslint/parser': 5.42.1_eslint@8.27.0+typescript@4.8.4
eslint: 8.27.0
eslint-config-standard-with-typescript: 23.0.0_c9fe9619f50f4e82337a86c3af25e566
eslint-plugin-import: 2.26.0_eslint@8.27.0
eslint-plugin-n: 15.5.1_eslint@8.27.0
eslint-plugin-promise: 6.1.1_eslint@8.27.0
prettier: 2.7.1
transitivePeerDependencies:
- supports-color
- typescript
dev: false
file:projects/model-setting.tgz_typescript@4.8.4:
resolution: {integrity: sha512-VNYzzzsdvUfp71pgoPt52l+/VZ8/OV5JQJIDLOuT/2KkQEepZRaR+DZ2HXCg5v8Jvu/t/a9tQTPDN4juziuVew==, tarball: file:projects/model-setting.tgz}
id: file:projects/model-setting.tgz
@ -13929,7 +14076,7 @@ packages:
dev: false
file:projects/model-view.tgz_typescript@4.8.4:
resolution: {integrity: sha512-fnQyZEHhMGWtr7Jn2J70q9Hn/5J+7H48QB1i/kQgq9emhR43qz1R7tNlfyDx0IKi4l4pSratkCc/k17CL5nUZA==, tarball: file:projects/model-view.tgz}
resolution: {integrity: sha512-xSdAkhRAW0NreiqzR/EeLXsGTLhzGMIjTyZ7wi07R/ee3gl7kAib1GMQIEcTqVAyFiDlDkmL9h+yTap51ltCvA==, tarball: file:projects/model-view.tgz}
id: file:projects/model-view.tgz
name: '@rush-temp/model-view'
version: 0.0.0
@ -14110,12 +14257,13 @@ packages:
dev: false
file:projects/openai.tgz:
resolution: {integrity: sha512-kj0S76WwqQt/Ntn+c7kK4ELOb5bYEkbfWBFlWyQe379574DHwDg69Qyk58XNrUveq7yMqyekWX9kJWO1sEac3w==, tarball: file:projects/openai.tgz}
resolution: {integrity: sha512-nplFlMK8VBiE2EBm11HhSpX74sZapWZ30sEWOHZfAv8AZqCINY0FOHP0Jwc+AVFtMzClx8NTCo6pgaNTLs0ixA==, tarball: file:projects/openai.tgz}
name: '@rush-temp/openai'
version: 0.0.0
dependencies:
'@rushstack/heft': 0.47.11
'@types/heft-jest': 1.0.3
'@types/html-to-text': 8.1.1
'@types/node': 16.11.68
'@typescript-eslint/eslint-plugin': 5.42.1_d506b9be61cb4ac2646ecbc6e0680464
'@typescript-eslint/parser': 5.42.1_eslint@8.27.0+typescript@4.8.4
@ -14127,6 +14275,8 @@ packages:
eslint-plugin-promise: 6.1.1_eslint@8.27.0
fast-equals: 2.0.4
got: 11.8.5
html-to-text: 9.0.3
node-html-parser: 6.1.4
prettier: 2.7.1
typescript: 4.8.4
transitivePeerDependencies:
@ -14362,7 +14512,7 @@ packages:
dev: false
file:projects/pod-server.tgz:
resolution: {integrity: sha512-Qxljr6dIG19YobZ8ejPW4lvpnweN0geci7Ra+Aa9vxPXxAlcGrQOi+0nW2+IeWDY1Sp3vX+eK8gD5OCqG6CpVg==, tarball: file:projects/pod-server.tgz}
resolution: {integrity: sha512-4IjhfCueH7UYdgcWfpeiciOQADpXCKj1C41rP6ZgHaIWzUErhMl2X/T5C2SQfp0nnbvpPWqWeqSPYYK2GGGkdw==, tarball: file:projects/pod-server.tgz}
name: '@rush-temp/pod-server'
version: 0.0.0
dependencies:
@ -14660,7 +14810,7 @@ packages:
dev: false
file:projects/request-resources.tgz_a1d864769aaf53d09b76fe134ab55e60:
resolution: {integrity: sha512-XxsNDKjd8LE9KTaTIx59hSkVt3hBmfrm/tVYjiwGccjU8Rlr6DrC8OIYEhiaJ9S1GeTr81ZuucMtdy8na+xcCA==, tarball: file:projects/request-resources.tgz}
resolution: {integrity: sha512-I5i+xjZnEMgt+sj1B4xRlsWV2vDHqB9RW1fJML/0DPQS8bs2JkNrJ4QhXtE8dl/ZZc8gCmjs187lwDGq4AjKlQ==, tarball: file:projects/request-resources.tgz}
id: file:projects/request-resources.tgz
name: '@rush-temp/request-resources'
version: 0.0.0
@ -14696,7 +14846,7 @@ packages:
dev: false
file:projects/request.tgz:
resolution: {integrity: sha512-Wk75CaPLTFw+DS2xYG9Ma6RnXh/hJTxsaENt4pCohrTtD2pLai6iEykmWumisNZszH/EJlOZLhXwXr46QfLB4A==, tarball: file:projects/request.tgz}
resolution: {integrity: sha512-y69rGft03euHUUNVPSEds55Mj4C7qIwSB59/K2zs4619mk4TaLFNx6plutOJtmMPzRPJR0IOtZQ+/k4EN4gSoQ==, tarball: file:projects/request.tgz}
name: '@rush-temp/request'
version: 0.0.0
dependencies:
@ -14905,12 +15055,13 @@ packages:
dev: false
file:projects/server-core.tgz:
resolution: {integrity: sha512-dOWEA7jD+yUtVWidEKaEbMsGXbB1+vXW25QYdWdChOKOIxL9vjX1QpgloF3K2f2d5IjjCPkEkRuAOZVDQqs9MA==, tarball: file:projects/server-core.tgz}
resolution: {integrity: sha512-6RQpWN9YCSt0vS35XBHo6BmT9mwEvYaRFX7MWi/uhZ2v0JvCw5OGoO1/mhW4wjkK30j56DtoRSG+kY3WZ5QzNA==, tarball: file:projects/server-core.tgz}
name: '@rush-temp/server-core'
version: 0.0.0
dependencies:
'@rushstack/heft': 0.47.11
'@types/heft-jest': 1.0.3
'@types/html-to-text': 8.1.1
'@types/minio': 7.0.14
'@types/node': 16.11.68
'@typescript-eslint/eslint-plugin': 5.42.1_d506b9be61cb4ac2646ecbc6e0680464
@ -14921,6 +15072,7 @@ packages:
eslint-plugin-n: 15.5.1_eslint@8.27.0
eslint-plugin-promise: 6.1.1_eslint@8.27.0
fast-equals: 2.0.4
html-to-text: 9.0.3
minio: 7.0.32
prettier: 2.7.1
typescript: 4.8.4
@ -16066,7 +16218,7 @@ packages:
dev: false
file:projects/tool.tgz:
resolution: {integrity: sha512-FOiaLAqv3oU/lf10gys5x53y4lM9AMhApwOR0NLoSrfDnupW/AojIe38D78Zm4UEqqdEtiHH9/iqY7XvddVNgg==, tarball: file:projects/tool.tgz}
resolution: {integrity: sha512-VVNc6+f2BssTik3I3y+lBrBVkMz6sHygnom2/PvO/ZwEVKeWDN2rwpzZxj6n+RlPeCCvvcII9sXpPOAoJAnxnA==, tarball: file:projects/tool.tgz}
name: '@rush-temp/tool'
version: 0.0.0
dependencies:

View File

@ -115,7 +115,8 @@ export async function connect (handler: (tx: Tx) => void): Promise<ClientConnect
fulltextAdapter: {
factory: createNullFullTextAdapter,
url: '',
metrics: new MeasureMetricsContext('', {})
metrics: new MeasureMetricsContext('', {}),
stages: () => []
},
contentAdapter: {
url: '',

View File

@ -19,6 +19,7 @@ services:
- 9001:9001
volumes:
- files:/data
restart: unless-stopped
elastic:
image: 'elasticsearch:7.14.2'
command: |
@ -39,6 +40,7 @@ services:
interval: 20s
retries: 10
test: curl -s http://localhost:9200/_cluster/health | grep -vq '"status":"red"'
restart: unless-stopped
account:
image: hardcoreeng/account
links:
@ -56,6 +58,7 @@ services:
- MINIO_ENDPOINT=minio
- MINIO_ACCESS_KEY=minioadmin
- MINIO_SECRET_KEY=minioadmin
restart: unless-stopped
collaborator:
image: hardcoreeng/collaborator
links:
@ -68,6 +71,7 @@ services:
- MINIO_ENDPOINT=minio
- MINIO_ACCESS_KEY=minioadmin
- MINIO_SECRET_KEY=minioadmin
restart: unless-stopped
front:
image: hardcoreeng/front
links:
@ -89,6 +93,7 @@ services:
- MINIO_ENDPOINT=minio
- MINIO_ACCESS_KEY=minioadmin
- MINIO_SECRET_KEY=minioadmin
restart: unless-stopped
# tracker-front:
# image: hardcoreeng/tracker-front
# links:
@ -130,6 +135,7 @@ services:
- MINIO_SECRET_KEY=minioadmin
- REKONI_URL=http://rekoni:4004
# - APM_SERVER_URL=http://apm-server:8200
restart: unless-stopped
rekoni:
image: hardcoreeng/rekoni-service
restart: on-failure
@ -138,43 +144,22 @@ services:
deploy:
resources:
limits:
memory: 1024M
# apm-server:
# image: docker.elastic.co/apm/apm-server:7.14.2
# depends_on:
# - "elastic"
# - "kibana"
# cap_add: ["CHOWN", "DAC_OVERRIDE", "SETGID", "SETUID"]
# cap_drop: ["ALL"]
# ports:
# - 8200:8200
# command: |
# apm-server -e
# -E apm-server.rum.enabled=true
# -E setup.kibana.host=kibana:5601
# -E setup.template.settings.index.number_of_replicas=0
# -E apm-server.kibana.enabled=true
# -E apm-server.kibana.host=kibana:5601
# -E output.elasticsearch.hosts=["elastic:9200"]
# healthcheck:
# interval: 10s
# retries: 12
# test: curl --write-out 'HTTP %{http_code}' --fail --silent --output /dev/null http://localhost:8200/
# kibana:
# image: docker.elastic.co/kibana/kibana:7.14.2
# depends_on:
# - "elastic"
memory: 500M
restart: unless-stopped
# etcd:
# container_name: milvus-etcd
# image: quay.io/coreos/etcd:v3.5.0
# environment:
# ELASTICSEARCH_URL: http://elastic:9200
# ELASTICSEARCH_HOSTS: http://elastic:9200
# ports:
# - 5601:5601
# healthcheck:
# interval: 10s
# retries: 20
# test: curl --write-out 'HTTP %{http_code}' --fail --silent --output /dev/null http://localhost:5601/api/status
# - ETCD_AUTO_COMPACTION_MODE=revision
# - ETCD_AUTO_COMPACTION_RETENTION=1000
# - ETCD_QUOTA_BACKEND_BYTES=4294967296
# - ETCD_SNAPSHOT_COUNT=50000
# volumes:
# - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
# command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
# restart: unless-stopped
volumes:
db:
files:
elastic:
etcd:

View File

@ -0,0 +1,51 @@
version: '3.5'
services:
etcd:
container_name: milvus-etcd
image: quay.io/coreos/etcd:v3.5.0
environment:
- ETCD_AUTO_COMPACTION_MODE=revision
- ETCD_AUTO_COMPACTION_RETENTION=1000
- ETCD_QUOTA_BACKEND_BYTES=4294967296
- ETCD_SNAPSHOT_COUNT=50000
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
minio:
container_name: milvus-minio
image: minio/minio:RELEASE.2022-03-17T06-34-49Z
environment:
MINIO_ACCESS_KEY: minioadmin
MINIO_SECRET_KEY: minioadmin
ports:
- "9001:9001"
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
command: minio server /minio_data --console-address ":9001"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
standalone:
container_name: milvus-standalone
image: milvusdb/milvus:v2.2.2
command: ["milvus", "run", "standalone"]
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
ports:
- "19530:19530"
- "9091:9091"
depends_on:
- "etcd"
- "minio"
networks:
default:
name: milvus

View File

@ -63,7 +63,8 @@ export async function start (port: number, host?: string): Promise<void> {
fulltextAdapter: {
factory: createNullFullTextAdapter,
url: '',
metrics: new MeasureMetricsContext('', {})
metrics: new MeasureMetricsContext('', {}),
stages: () => []
},
contentAdapter: {
url: '',

View File

@ -114,6 +114,7 @@
"email-addresses": "^5.0.0",
"libphonenumber-js": "^1.9.46",
"@hcengineering/setting": "^0.6.2",
"@hcengineering/minio": "^0.6.0"
"@hcengineering/minio": "^0.6.0",
"@hcengineering/openai": "^0.6.0"
}
}

View File

@ -48,7 +48,9 @@ import { diffWorkspace, dumpWorkspace, restoreWorkspace } from './workspace'
import { Data, getWorkspaceId, Tx, Version } from '@hcengineering/core'
import { MinioService } from '@hcengineering/minio'
import { MigrateOperation } from '@hcengineering/model'
import { openAIConfigDefaults } from '@hcengineering/openai'
import { rebuildElastic } from './elastic'
import { openAIConfig } from './openai'
/**
* @public
@ -136,6 +138,30 @@ export function devTool (
})
})
program
.command('openai <workspace>')
.description('assign workspace')
.requiredOption('-t, --token <token>', 'OpenAI token')
.option('-h, --host <host>', 'OpenAI API Host', openAIConfigDefaults.endpoint)
.option('--enable <value>', 'Enable or disable', true)
.option('--embeddings <embeddings>', 'Enable or disable embeddings generation', true)
.option('--tokenLimit <tokenLimit>', 'Acceptable token limit', `${openAIConfigDefaults.tokenLimit}`)
.action(
async (
workspace: string,
cmd: { token: string, host: string, enable: string, tokenLimit: string, embeddings: string }
) => {
console.log(`enabling OpenAI for workspace ${workspace}...`)
await openAIConfig(transactorUrl, workspace, productId, {
token: cmd.token,
endpoint: cmd.host,
enabled: cmd.enable === 'true',
tokenLimit: parseInt(cmd.tokenLimit),
embeddings: cmd.embeddings === 'true'
})
}
)
program
.command('show-user <email>')
.description('show user')

34
dev/tool/src/openai.ts Normal file
View File

@ -0,0 +1,34 @@
import core, { getWorkspaceId, TxOperations } from '@hcengineering/core'
import openai, { openAIConfigDefaults } from '@hcengineering/openai'
import { connect } from '@hcengineering/server-tool'
export async function openAIConfig (
transactorUrl: string,
workspace: string,
productId: string,
opt: { endpoint: string, token: string, enabled: boolean, tokenLimit: number, embeddings: boolean }
): Promise<void> {
const connection = await connect(transactorUrl, getWorkspaceId(workspace, productId), '#configurator@hc.engineering')
try {
const ops = new TxOperations(connection, core.account.System)
// Check if EmployeeAccoun is not exists
const config = await ops.findOne(openai.class.OpenAIConfiguration, {})
if (config === undefined) {
await ops.createDoc(openai.class.OpenAIConfiguration, core.space.Configuration, {
...openAIConfigDefaults,
...opt
})
} else {
await ops.update(config, {
...openAIConfigDefaults,
...opt
})
}
} catch (err: any) {
console.trace(err)
} finally {
await connection.close()
}
}

View File

@ -77,6 +77,8 @@
"@hcengineering/model-document": "^0.6.0",
"@hcengineering/model-bitrix": "^0.6.0",
"@hcengineering/model-request": "^0.6.0",
"@hcengineering/model-server-request": "^0.6.0"
"@hcengineering/model-server-request": "^0.6.0",
"@hcengineering/model-server-openai": "^0.6.0",
"@hcengineering/model-server-translate": "^0.6.0"
}
}

View File

@ -63,6 +63,9 @@ import { createModel as bitrixModel } from '@hcengineering/model-bitrix'
import { createModel as requestModel } from '@hcengineering/model-request'
import { createModel as serverRequestModel } from '@hcengineering/model-server-request'
import { createModel as serverTranslate } from '@hcengineering/model-server-translate'
import { createModel as serverOpenAI } from '@hcengineering/model-server-openai'
export const version: Data<Version> = jsonVersion as Data<Version>
const builder = new Builder()
@ -113,7 +116,9 @@ const builders: [(b: Builder) => void, string][] = [
[serverHrModel, 'server-hr'],
[serverNotificationModel, 'server-notification'],
[serverRequestModel, 'server-request'],
[automationModel, 'automation']
[automationModel, 'automation'],
[serverTranslate, 'translate'],
[serverOpenAI, 'openai']
]
for (const [b, id] of builders) {

View File

@ -58,6 +58,7 @@ export class TAttachment extends TAttachedDoc implements Attachment {
lastModified!: number
@Prop(TypeString(), attachment.string.Description)
@Index(IndexKind.FullText)
description!: string
@Prop(TypeBoolean(), attachment.string.Pinned)

View File

@ -524,8 +524,8 @@ export function createModel (builder: Builder): void {
)
// Allow to use fuzzy search for mixins
builder.mixin(contact.class.Contact, core.class.Class, core.mixin.AISearchContext, {
index: true
builder.mixin(contact.class.Contact, core.class.Class, core.mixin.FullTextSearchContext, {
fullTextSummary: true
})
}

View File

@ -15,7 +15,6 @@
import {
Account,
AISearchContext,
AnyAttribute,
ArrOf,
AttachedDoc,
@ -23,16 +22,20 @@ import {
Class,
ClassifierKind,
Collection,
Configuration,
ConfigurationElement,
Doc,
DocIndexState,
Domain,
DOMAIN_BLOB,
DOMAIN_CONFIGURATION,
DOMAIN_DOC_INDEX_STATE,
DOMAIN_FULLTEXT_BLOB,
DOMAIN_MODEL,
Enum,
EnumOf,
FullTextData,
FullTextSearchContext,
IndexKind,
Interface,
Mixin,
@ -48,14 +51,15 @@ import {
import {
Hidden,
Index,
Mixin as MMixin,
Model,
Prop,
TypeBoolean,
TypeIntlString,
TypeRef,
TypeString,
TypeTimestamp,
UX,
Mixin as MMixin
UX
} from '@hcengineering/model'
import type { IntlString } from '@hcengineering/platform'
import core from './component'
@ -250,7 +254,22 @@ export class TDocIndexState extends TDoc implements DocIndexState {
stages!: Record<string, boolean>
}
@MMixin(core.mixin.AISearchContext, core.class.Class)
export class TAISearchContext extends TClass implements AISearchContext {
index!: boolean
@MMixin(core.mixin.FullTextSearchContext, core.class.Class)
export class TFullTextSearchContext extends TClass implements FullTextSearchContext {
fullTextSummary!: boolean
}
@MMixin(core.mixin.ConfigurationElement, core.class.Class)
export class TConfigurationElement extends TClass implements ConfigurationElement {
@Prop(TypeIntlString(), core.string.Private)
title!: IntlString
@Prop(TypeIntlString(), core.string.Private)
group!: IntlString
}
@Model(core.class.Configuration, core.class.Doc, DOMAIN_CONFIGURATION)
export class TConfiguration extends TDoc implements Configuration {
@Prop(TypeBoolean(), core.string.Private)
enabled!: boolean
}

View File

@ -16,13 +16,15 @@
import { Builder } from '@hcengineering/model'
import core from './component'
import {
TAISearchContext,
TFullTextSearchContext,
TArrOf,
TAttachedDoc,
TAttribute,
TBlobData,
TClass,
TCollection,
TConfiguration,
TConfigurationElement,
TDoc,
TDocIndexState,
TEnum,
@ -97,6 +99,8 @@ export function createModel (builder: Builder): void {
TFulltextData,
TTypeRelatedDocument,
TDocIndexState,
TAISearchContext
TFullTextSearchContext,
TConfiguration,
TConfigurationElement
)
}

View File

@ -66,6 +66,7 @@ export class TCollaboratorDocument extends TAttachment implements CollaboratorDo
@UX(document.string.Version)
export class TDocumentVersion extends TAttachedDoc implements DocumentVersion {
@Prop(TypeNumber(), document.string.Version)
@Index(IndexKind.FullText)
@ReadOnly()
version!: number

View File

@ -0,0 +1,7 @@
module.exports = {
extends: ['./node_modules/@hcengineering/model-rig/profiles/default/config/eslint.config.json'],
parserOptions: {
tsconfigRootDir: __dirname,
project: './tsconfig.json'
}
}

View File

@ -0,0 +1,17 @@
{
"name": "@hcengineering/model-server-core",
"entries": [
{
"version": "0.6.0",
"tag": "@hcengineering/model-server-core_v0.6.0",
"date": "Sun, 08 Aug 2021 10:14:57 GMT",
"comments": {
"dependency": [
{
"comment": "Updating dependency \"@hcengineering/platform\" from `~0.6.3` to `~0.6.4`"
}
]
}
}
]
}

View File

@ -0,0 +1,9 @@
# Change Log - @hcengineering/model-server-core
This log was last generated on Sun, 08 Aug 2021 10:14:57 GMT and should not be manually modified.
## 0.6.0
Sun, 08 Aug 2021 10:14:57 GMT
_Initial release_

View File

@ -0,0 +1,18 @@
// The "rig.json" file directs tools to look for their config files in an external package.
// Documentation for this system: https://www.npmjs.com/package/@rushstack/rig-package
{
"$schema": "https://developer.microsoft.com/json-schemas/rig-package/rig.schema.json",
/**
* (Required) The name of the rig package to inherit from.
* It should be an NPM package name with the "-rig" suffix.
*/
"rigPackageName": "@hcengineering/model-rig"
/**
* (Optional) Selects a config profile from the rig package. The name must consist of
* lowercase alphanumeric words separated by hyphens, for example "sample-profile".
* If omitted, then the "default" profile will be used."
*/
// "rigProfile": "your-profile-name"
}

View File

@ -0,0 +1,35 @@
{
"name": "@hcengineering/model-server-openai",
"version": "0.6.0",
"main": "lib/index.js",
"author": "Anticrm Platform Contributors",
"license": "EPL-2.0",
"scripts": {
"build": "heft build",
"build:watch": "tsc",
"lint:fix": "eslint --fix src",
"lint": "eslint src",
"format": "prettier --write src && eslint --fix src"
},
"devDependencies": {
"@hcengineering/model-rig": "^0.6.0",
"@typescript-eslint/eslint-plugin": "^5.41.0",
"eslint-plugin-import": "^2.26.0",
"eslint-plugin-promise": "^6.1.1",
"eslint-plugin-n": "^15.4.0",
"eslint": "^8.26.0",
"@types/heft-jest": "^1.0.3",
"@typescript-eslint/parser": "^5.41.0",
"eslint-config-standard-with-typescript": "^23.0.0",
"prettier": "^2.7.1",
"@rushstack/heft": "^0.47.9"
},
"dependencies": {
"@hcengineering/core": "^0.6.20",
"@hcengineering/model": "^0.6.0",
"@hcengineering/platform": "^0.6.8",
"@hcengineering/model-core": "^0.6.0",
"@hcengineering/openai": "^0.6.0",
"@hcengineering/server-core": "^0.6.1"
}
}

View File

@ -0,0 +1,47 @@
//
// Copyright © 2020, 2021 Anticrm Platform Contributors.
// Copyright © 2021 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { Builder, Model, Prop, TypeBoolean, TypeNumber, TypeString, UX } from '@hcengineering/model'
import { TConfiguration } from '@hcengineering/model-core'
import { getEmbeddedLabel } from '@hcengineering/platform'
import core, { DOMAIN_CONFIGURATION } from '@hcengineering/core'
import openai, { OpenAIConfiguration } from '@hcengineering/openai/src/plugin'
import serverCore from '@hcengineering/server-core'
@Model(openai.class.OpenAIConfiguration, core.class.Configuration, DOMAIN_CONFIGURATION)
@UX(getEmbeddedLabel('OpenAI'))
export class TOpenAIConfiguration extends TConfiguration implements OpenAIConfiguration {
@Prop(TypeString(), getEmbeddedLabel('Token'))
token!: string
@Prop(TypeString(), getEmbeddedLabel('Endpoint'))
endpoint!: string
@Prop(TypeNumber(), getEmbeddedLabel('Token Limit'))
tokenLimit!: number
@Prop(TypeBoolean(), getEmbeddedLabel('Use embeddings'))
embeddings!: boolean
}
export function createModel (builder: Builder): void {
builder.createModel(TOpenAIConfiguration)
builder.createDoc(serverCore.class.Trigger, core.space.Model, {
trigger: openai.trigger.OnGPTRequest
})
}

View File

@ -0,0 +1,8 @@
{
"extends": "./node_modules/@hcengineering/model-rig/profiles/default/tsconfig.json",
"compilerOptions": {
"rootDir": "./src",
"outDir": "./lib",
}
}

View File

@ -0,0 +1,7 @@
module.exports = {
extends: ['./node_modules/@hcengineering/model-rig/profiles/default/config/eslint.config.json'],
parserOptions: {
tsconfigRootDir: __dirname,
project: './tsconfig.json'
}
}

View File

@ -0,0 +1,4 @@
*
!/lib/**
!CHANGELOG.md
/lib/**/__tests__/

View File

@ -0,0 +1,17 @@
{
"name": "@hcengineering/model-server-core",
"entries": [
{
"version": "0.6.0",
"tag": "@hcengineering/model-server-core_v0.6.0",
"date": "Sun, 08 Aug 2021 10:14:57 GMT",
"comments": {
"dependency": [
{
"comment": "Updating dependency \"@hcengineering/platform\" from `~0.6.3` to `~0.6.4`"
}
]
}
}
]
}

View File

@ -0,0 +1,9 @@
# Change Log - @hcengineering/model-server-core
This log was last generated on Sun, 08 Aug 2021 10:14:57 GMT and should not be manually modified.
## 0.6.0
Sun, 08 Aug 2021 10:14:57 GMT
_Initial release_

View File

@ -0,0 +1,18 @@
// The "rig.json" file directs tools to look for their config files in an external package.
// Documentation for this system: https://www.npmjs.com/package/@rushstack/rig-package
{
"$schema": "https://developer.microsoft.com/json-schemas/rig-package/rig.schema.json",
/**
* (Required) The name of the rig package to inherit from.
* It should be an NPM package name with the "-rig" suffix.
*/
"rigPackageName": "@hcengineering/model-rig"
/**
* (Optional) Selects a config profile from the rig package. The name must consist of
* lowercase alphanumeric words separated by hyphens, for example "sample-profile".
* If omitted, then the "default" profile will be used."
*/
// "rigProfile": "your-profile-name"
}

View File

@ -0,0 +1,34 @@
{
"name": "@hcengineering/model-server-translate",
"version": "0.6.0",
"main": "lib/index.js",
"author": "Anticrm Platform Contributors",
"license": "EPL-2.0",
"scripts": {
"build": "heft build",
"build:watch": "tsc",
"lint:fix": "eslint --fix src",
"lint": "eslint src",
"format": "prettier --write src && eslint --fix src"
},
"devDependencies": {
"@hcengineering/model-rig": "^0.6.0",
"@typescript-eslint/eslint-plugin": "^5.41.0",
"eslint-plugin-import": "^2.26.0",
"eslint-plugin-promise": "^6.1.1",
"eslint-plugin-n": "^15.4.0",
"eslint": "^8.26.0",
"@types/heft-jest": "^1.0.3",
"@typescript-eslint/parser": "^5.41.0",
"eslint-config-standard-with-typescript": "^23.0.0",
"prettier": "^2.7.1",
"@rushstack/heft": "^0.47.9"
},
"dependencies": {
"@hcengineering/core": "^0.6.20",
"@hcengineering/model": "^0.6.0",
"@hcengineering/platform": "^0.6.8",
"@hcengineering/model-core": "^0.6.0",
"@hcengineering/translate": "^0.6.0"
}
}

View File

@ -0,0 +1,35 @@
//
// Copyright © 2022 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { Builder, Model, Prop, TypeString, UX } from '@hcengineering/model'
import { TConfiguration } from '@hcengineering/model-core'
import { getEmbeddedLabel } from '@hcengineering/platform'
import core, { DOMAIN_CONFIGURATION } from '@hcengineering/core'
import translate, { TranslateConfiguration } from '@hcengineering/translate/src/plugin'
@Model(translate.class.TranslateConfiguration, core.class.Configuration, DOMAIN_CONFIGURATION)
@UX(getEmbeddedLabel('Retranslation'))
export class TTranslateConfiguration extends TConfiguration implements TranslateConfiguration {
@Prop(TypeString(), getEmbeddedLabel('Token'))
token!: string
@Prop(TypeString(), getEmbeddedLabel('Token'))
endpoint!: string
}
export function createModel (builder: Builder): void {
builder.createModel(TTranslateConfiguration)
}

View File

@ -0,0 +1,8 @@
{
"extends": "./node_modules/@hcengineering/model-rig/profiles/default/tsconfig.json",
"compilerOptions": {
"rootDir": "./src",
"outDir": "./lib",
}
}

View File

@ -120,6 +120,7 @@ export class TTask extends TAttachedDoc implements Task {
doneState!: Ref<DoneState> | null
@Prop(TypeString(), task.string.TaskNumber)
@Index(IndexKind.FullText)
number!: number
// @Prop(TypeRef(contact.class.Employee), task.string.TaskAssignee)

View File

@ -33,6 +33,7 @@
"@hcengineering/view-resources": "^0.6.0",
"@hcengineering/model-core": "^0.6.0",
"@hcengineering/preference": "^0.6.2",
"@hcengineering/model-preference": "^0.6.0"
"@hcengineering/model-preference": "^0.6.0",
"@hcengineering/model-presentation": "^0.6.0"
}
}

View File

@ -18,7 +18,9 @@ import { DOMAIN_MODEL } from '@hcengineering/core'
import { Builder, Mixin, Model } from '@hcengineering/model'
import core, { TClass, TDoc } from '@hcengineering/model-core'
import preference, { TPreference } from '@hcengineering/model-preference'
import presenation from '@hcengineering/model-presentation'
import type { Asset, IntlString, Resource, Status } from '@hcengineering/platform'
import { DOMAIN_PREFERENCE } from '@hcengineering/preference'
import type { AnyComponent, Location } from '@hcengineering/ui'
import type {
Action,
@ -33,6 +35,7 @@ import type {
CollectionEditor,
CollectionPresenter,
Filter,
FilteredView,
FilterMode,
IgnoreActions,
InlineAttributEditor,
@ -57,11 +60,9 @@ import type {
Viewlet,
ViewletDescriptor,
ViewletPreference,
ViewOptionsModel,
FilteredView
ViewOptionsModel
} from '@hcengineering/view'
import view from './plugin'
import { DOMAIN_PREFERENCE } from '@hcengineering/preference'
export { viewOperation } from './migration'
export { ViewAction, Viewlet }
@ -700,6 +701,23 @@ export function createModel (builder: Builder): void {
)
classPresenter(builder, core.class.EnumOf, view.component.StringPresenter, view.component.EnumEditor)
createAction(builder, {
action: view.actionImpl.ShowPopup,
actionProps: {
component: view.component.IndexedDocumentPreview,
fillProps: {
_id: 'objectId'
}
},
label: presenation.string.DocumentPreview,
keyBinding: [''],
input: 'focus',
icon: view.icon.Open,
category: view.category.GeneralNavigation,
target: core.class.Doc,
context: { mode: ['context', 'browser', 'editor'] }
})
}
export default view

View File

@ -63,7 +63,8 @@ export default mergeIds(viewId, view, {
HTMLEditor: '' as AnyComponent,
MarkupEditor: '' as AnyComponent,
MarkupEditorPopup: '' as AnyComponent,
ListView: '' as AnyComponent
ListView: '' as AnyComponent,
IndexedDocumentPreview: '' as AnyComponent
},
string: {
Table: '' as IntlString,

View File

@ -245,6 +245,11 @@ export interface TypeHyperlink extends Type<Hyperlink> {}
*/
export const DOMAIN_MODEL = 'model' as Domain
/**
* @public
*/
export const DOMAIN_CONFIGURATION = '_configuration' as Domain
/**
* @public
*/
@ -351,6 +356,10 @@ export interface DocIndexState extends Doc {
// Indexable attributes, including child ones.
attributes: Record<string, any>
// Full Summary
fullSummary?: Markup | null
shortSummary?: Markup | null
}
/**
@ -358,8 +367,29 @@ export interface DocIndexState extends Doc {
*
* If defined for class, this class will be enabled for embedding search like openai.
*/
export interface AISearchContext extends Class<Doc> {
index: boolean
export interface FullTextSearchContext extends Class<Doc> {
fullTextSummary: boolean
}
/**
* @public
*/
export interface ConfigurationElement extends Class<Doc> {
// Title will be presented to owner.
title: IntlString
// Group for groupping.
group: IntlString
}
/**
* @public
*
* Define configuration value configuration for workspace.
*
* Configuration is accessble only for owners of workspace and underhood services.
*/
export interface Configuration extends Doc {
enabled: boolean
}
/**

View File

@ -25,7 +25,7 @@ import type {
Collection,
Doc,
DocIndexState,
AISearchContext,
FullTextSearchContext,
Enum,
EnumOf,
FullTextData,
@ -39,7 +39,9 @@ import type {
Space,
Timestamp,
Type,
UserStatus
UserStatus,
Configuration,
ConfigurationElement
} from './classes'
import type {
Tx,
@ -99,16 +101,20 @@ export default plugin(coreId, {
BlobData: '' as Ref<Class<BlobData>>,
FulltextData: '' as Ref<Class<FullTextData>>,
TypeRelatedDocument: '' as Ref<Class<Type<RelatedDocument>>>,
DocIndexState: '' as Ref<Class<DocIndexState>>
DocIndexState: '' as Ref<Class<DocIndexState>>,
Configuration: '' as Ref<Class<Configuration>>
},
mixin: {
AISearchContext: '' as Ref<Mixin<AISearchContext>>
FullTextSearchContext: '' as Ref<Mixin<FullTextSearchContext>>,
ConfigurationElement: '' as Ref<Mixin<ConfigurationElement>>
},
space: {
Tx: '' as Ref<Space>,
DerivedTx: '' as Ref<Space>,
Model: '' as Ref<Space>,
Space: '' as Ref<Space>
Space: '' as Ref<Space>,
Configuration: '' as Ref<Space>
},
account: {
System: '' as Ref<Account>

View File

@ -34,6 +34,7 @@
"CategoryProjectLead": "Project lead",
"CategoryProjectMembers": "Project members",
"CategoryOther": "Other",
"InltPropsValue": "{value}"
"InltPropsValue": "{value}",
"DocumentPreview": "Preview"
}
}

View File

@ -34,6 +34,7 @@
"CategoryProjectLead": "Руководитель проекта",
"CategoryProjectMembers": "Участники проекта",
"CategoryOther": "Прочие",
"InltPropsValue": "{value}"
"InltPropsValue": "{value}",
"DocumentPreview": "Предпросмотр"
}
}

View File

@ -1,6 +1,7 @@
<script lang="ts">
import core, { AnyAttribute, Doc, DocIndexState, extractDocKey, isFullTextAttribute, Ref } from '@hcengineering/core'
import { EditBox, Label } from '@hcengineering/ui'
import { EditBox, Label, Panel } from '@hcengineering/ui'
import Icon from '@hcengineering/ui/src/components/Icon.svelte'
import { createQuery, getClient } from '../utils'
@ -25,6 +26,8 @@
}
let search = ''
$: summary = (indexDoc?.attributes as any).summary
$: attributes =
indexDoc !== undefined
? Object.entries(indexDoc.attributes).reduce<[AnyAttribute, string[][]][]>((a, b) => {
@ -47,46 +50,54 @@
: []
</script>
<EditBox bind:value={search} kind="search-style" />
<div class="indexed-background">
<div class="indexed-doc flex-row text-base">
{#if indexDoc}
{#each attributes as attr}
{@const clOf = client.getHierarchy().getClass(attr[0].attributeOf)}
<div class="flex-row-center">
{#if clOf.icon}
<div class="mr-1">
<Icon size={'medium'} icon={clOf.icon} />
</div>
{/if}
<Label label={clOf.label} />.<Label label={attr[0].label} />
</div>
<div class="p-1 flex-row flex-wrap">
{#each attr[1] as doc}
<div class="p-1" class:flex-col={doc.length > 1}>
{#each doc as line}
{@const hl = search.length > 0 && line.toLowerCase().includes(search.toLowerCase())}
<span class:text-md={!hl} class:highlight={hl}>{line}</span>
{/each}
</div>
{/each}
</div>
{/each}
{/if}
<Panel on:changeContent on:close>
<EditBox bind:value={search} kind="search-style" />
<div class="indexed-background">
<div class="indexed-doc text-base max-h-125">
{#if summary}
Summary:
{#each summary.split('\n') as line}
{@const hl = search.length > 0 && line.toLowerCase().includes(search.toLowerCase())}
<span class:text-md={!hl} class:highlight={hl}>{line}</span>
{/each}
{:else if indexDoc}
{#each attributes as attr}
{@const clOf = client.getHierarchy().getClass(attr[0].attributeOf)}
<div class="flex-row-center">
{#if clOf.icon}
<div class="mr-1">
<Icon size={'medium'} icon={clOf.icon} />
</div>
{/if}
<Label label={clOf.label} />.<Label label={attr[0].label} />
</div>
<div class="p-1 flex-row flex-wrap">
{#each attr[1] as doc}
<div class="p-1" class:flex-col={doc.length > 1}>
{#each doc as line}
{@const hl = search.length > 0 && line.toLowerCase().includes(search.toLowerCase())}
<span class:text-md={!hl} class:highlight={hl}>{line}</span>
{/each}
</div>
{/each}
</div>
{/each}
{/if}
</div>
</div>
</div>
</Panel>
<style lang="scss">
.indexed-doc {
padding: 2.5rem;
display: grid;
overflow: auto;
min-width: 50rem;
max-width: 200rem;
}
.indexed-background {
overflow: auto;
height: 80rem !important;
min-width: 120rem;
background-color: white;
color: black;
height: 100%;
user-select: text;
.highlight {
color: red;

View File

@ -20,13 +20,13 @@
import ui, {
Button,
createFocusManager,
deviceOptionsStore,
EditBox,
FocusHandler,
IconSearch,
Label,
ListView,
resizeObserver,
deviceOptionsStore
resizeObserver
} from '@hcengineering/ui'
import { createEventDispatcher } from 'svelte'
import presentation from '../plugin'

View File

@ -15,14 +15,14 @@
<script lang="ts">
import { createEventDispatcher } from 'svelte'
import { DropdownLabelsIntl, AnySvelteComponent, showPopup, Label } from '@hcengineering/ui'
import { AvatarType, buildGravatarId, checkHasGravatar, getAvatarColorForId } from '@hcengineering/contact'
import { Asset } from '@hcengineering/platform'
import { AnySvelteComponent, DropdownLabelsIntl, Label, showPopup } from '@hcengineering/ui'
import presentation from '..'
import { getAvatarTypeDropdownItems, getFileUrl } from '../utils'
import Card from './Card.svelte'
import AvatarComponent from './Avatar.svelte'
import Card from './Card.svelte'
import EditAvatarPopup from './EditAvatarPopup.svelte'
export let avatar: string | undefined

View File

@ -63,7 +63,8 @@ export default plugin(presentationId, {
CategoryProjectLead: '' as IntlString,
CategoryProjectMembers: '' as IntlString,
CategoryOther: '' as IntlString,
InltPropsValue: '' as IntlString
InltPropsValue: '' as IntlString,
DocumentPreview: '' as IntlString
},
metadata: {
RequiredVersion: '' as Metadata<string>,

View File

@ -172,6 +172,7 @@
background-color: transparent;
transform-origin: center;
opacity: 0;
max-width: 95%;
&.preShow {
transform: scale(0.9);

View File

@ -46,7 +46,7 @@
<div class="avatar">
<Avatar size={'medium'} />
</div>
<div class="flex-grow flex-col">
<div class="flex-grow flex-col select-text">
<div class="header">
<div class="fs-title">
{#await getUser(client, value.modifiedBy) then user}

View File

@ -54,7 +54,7 @@
let refInput: AttachmentRefInput
</script>
<div class:editing>
<div class:editing class="content-accent-color">
{#if edit}
<AttachmentRefInput
bind:this={refInput}

View File

@ -27,7 +27,7 @@
"ContactInfo": "Contact Info",
"Contact": "Contact",
"Location": "Location",
"Channel": "Channel",
"Channel": "Contact record",
"ChannelProvider": "Channel provider",
"Person": "Person",
"Organization": "Organization",

View File

@ -27,7 +27,7 @@
"ContactInfo": "Контактная информация",
"Contact": "Контакт",
"Location": "Местоположение",
"Channel": "Канал",
"Channel": "Контактная информация",
"ChannelProvider": "Провайдер канала",
"Person": "Персона",
"Organization": "Организация",

View File

@ -1,7 +1,7 @@
{
"string": {
"TagElementLabel": "Tag",
"TitleLabel": "Title",
"TitleLabel": "Tag",
"DescriptionLabel": "Description",
"ColorLabel": "Color",
"CategoryLabel": "Category",

View File

@ -1,7 +1,7 @@
{
"string": {
"TagElementLabel": "Тег",
"TitleLabel": "Заголовок",
"TitleLabel": "Тег",
"DescriptionLabel": "Описание",
"ColorLabel": "Цвет",
"CategoryLabel": "Категория",

View File

@ -87,7 +87,7 @@
acc[val] = sortOrder
return acc
}, {})
: { [sortKey]: sortOrder }
: { ...(options?.sort ?? {}), [sortKey]: sortOrder }
const update = q.query(
_class,
query,

View File

@ -74,6 +74,8 @@ import {
nestedDontMatchResult
} from './filter'
import { IndexedDocumentPreview } from '@hcengineering/presentation'
function PositionElementAlignment (e?: Event): PopupAlignment | undefined {
return getEventPopupPositionElement(e)
}
@ -168,7 +170,8 @@ export default async (): Promise<Resources> => ({
ValueSelector,
HTMLEditor,
ListView,
GrowPresenter
GrowPresenter,
IndexedDocumentPreview
},
popup: {
PositionElementAlignment

View File

@ -13,8 +13,8 @@
// limitations under the License.
-->
<script lang="ts">
import { Class, Doc, DocumentQuery, Ref, WithLookup, Space } from '@hcengineering/core'
import { Asset, IntlString } from '@hcengineering/platform'
import { Class, Doc, DocumentQuery, Ref, Space, WithLookup } from '@hcengineering/core'
import { Asset, getEmbeddedLabel, IntlString } from '@hcengineering/platform'
import { createQuery, getClient } from '@hcengineering/presentation'
import {
AnyComponent,
@ -36,6 +36,7 @@
setActiveViewletId,
ViewletSettingButton
} from '@hcengineering/view-resources'
import SourcePresenter from './search/SourcePresenter.svelte'
export let _class: Ref<Class<Doc>>
export let space: Ref<Space> | undefined = undefined
@ -108,7 +109,7 @@
<div class="ac-header__wrap-title mr-3">
<span class="ac-header__icon"><Icon {icon} size={'small'} /></span>
<span class="ac-header__title"><Label {label} /></span>
<div class="ml-4"><FilterButton {_class} {space} /></div>
<div class="ml-4"><FilterButton {_class} /></div>
</div>
<SearchEdit bind:value={search} />
@ -138,7 +139,20 @@
_class,
space,
options: viewlet.options,
config: preference?.config ?? viewlet.config,
config: [
...(search !== ''
? [
{
key: '',
presenter: SourcePresenter,
label: getEmbeddedLabel('#'),
sortingKey: '#score',
props: { search }
}
]
: []),
...(preference?.config ?? viewlet.config)
],
viewlet,
viewOptions,
createItemDialog: createComponent,

View File

@ -15,12 +15,15 @@
<script lang="ts">
import { Doc, WithLookup } from '@hcengineering/core'
import { IndexedDocumentPreview } from '@hcengineering/presentation'
import { tooltip } from '@hcengineering/ui'
import { showPopup } from '@hcengineering/ui'
export let value: WithLookup<Doc>
export let search: string
</script>
<span use:tooltip={{ component: IndexedDocumentPreview, props: { objectId: value._id, search } }}
>{value.$source?.$score}</span
<!-- svelte-ignore a11y-click-events-have-key-events -->
<span
on:click={() => {
showPopup(IndexedDocumentPreview, { objectId: value._id, search })
}}>{value.$source?.$score}</span
>

View File

@ -1,4 +1,4 @@
FROM node:16-alpine
FROM node:18-alpine
WORKDIR /usr/src/app

View File

@ -1,5 +1,5 @@
FROM node:16-alpine
FROM node:18-alpine
WORKDIR /usr/src/app

View File

@ -1,5 +1,5 @@
FROM node:16-alpine
FROM node:18-alpine
WORKDIR /usr/src/app

View File

70
pods/embeddings/model.py Normal file
View File

@ -0,0 +1,70 @@
from transformers import AutoTokenizer, AutoModel
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Model
import torch
import time
# from ratelimiter import RateLimiter
class EmbeddingService(object):
cos = torch.nn.CosineSimilarity(dim=0)
model: GPT2Model
tokenizer: GPT2Tokenizer
def __init__(self, modelId, device='cpu'):
#Load AutoModel from huggingface model repository
# mps for macos is also seems supported, but it not work with GPT2 based models, with predictions.
self.device = torch.device(device)
self.tokenizer = AutoTokenizer.from_pretrained(modelId, use_fast=True) # AutoTokenizer.from_pretrained(modelId)
self.model = AutoModel.from_pretrained(modelId).to(self.device)# AutoModel.from_pretrained(modelId).to(self.device)
# self.hmodel = GPT2LMHeadModel.from_pretrained(modelId).to(self.device)# AutoModel.from_pretrained(modelId).to(self.device)
# self.model.parallelize()
print('using', torch.get_num_threads())
#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(self, model_output, attention_mask):
token_embeddings = model_output[0] #First element of model_output contains all token embeddings
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
return sum_embeddings / sum_mask
# @RateLimiter(max_calls=10)
def embeddings(self, sentences):
#Tokenize sentences
st = time.time()
encoded_input = self.tokenizer(sentences, truncation=True, return_tensors='pt').to(self.device)
print('token', len(sentences), len(encoded_input), len(encoded_input[0]))
try:
mi = self.model.wpe.num_embeddings
if len(encoded_input[0]) > mi:
raise Exception("This model's maximum context length is " + str(mi) + " tokens, however you requested " + str(len(encoded_input[0])) + " tokens")
except:
pass
try:
mi = self.model.embeddings.position_embeddings.num_embeddings
if len(encoded_input[0]) > mi:
raise Exception("This model's maximum context length is " + str(mi) + " tokens, however you requested " + str(len(encoded_input[0])) + " tokens")
except:
pass
#Compute token embeddings
with torch.no_grad():
model_output = self.model(**encoded_input)
#Perform pooling. In this case, mean pooling
sentence_embeddings = self.mean_pooling(model_output, encoded_input['attention_mask'])
ed = time.time()
print('token', len(sentences), len(encoded_input), len(encoded_input[0]), ' time', (ed-st))
return [sentence_embeddings[0], len(encoded_input[0])]
def completion(self, text, do_sample=True, temperature=1.3, max_length=2048, **kwargs):
input_ids = self.tokenizer.encode(text, return_tensors="pt").to(self.device)
out = self.model.generate(input_ids, do_sample=do_sample, temperature=temperature, max_length=max_length, **kwargs)
return list(map(self.tokenizer.decode, out))[0]
def compare(self, e1, e2):
return self.cos(e1, e2)

View File

@ -0,0 +1,9 @@
#Overview
Enable Embeddings demo server for workspace.
```bash
cd ./dev/tool
rushx run-local openai <workspace> --host http://localhost:4070 --token 'token' --enable true --embeddings true
```

View File

@ -0,0 +1,2 @@
transformers
torch

154
pods/embeddings/server.py Normal file
View File

@ -0,0 +1,154 @@
from functools import partial
from http.server import BaseHTTPRequestHandler, HTTPServer
import json
import model as embeddings
import argparse
import traceback
def toArray(emb):
return [v.item() for v in emb]
class EmbeddingsServer(BaseHTTPRequestHandler):
embService: embeddings.EmbeddingService
def __init__(self, embService, *args, **kwargs):
self.embService = embService
super().__init__(*args, **kwargs)
def do_POST(self):
try:
if self.path == '/embeddings':
self.sendEmbeddings()
return
if self.path == '/completion':
self.sendCompletion()
return
if self.path == '/compare':
self.sendCompare()
return
except BaseException as e:
print('Failed to process', e)
pass
self.send_response(200)
self.send_header("Content-type", "text/json")
self.end_headers()
obj = {
"result": False,
"error": "Unknown service"
}
self.wfile.write(bytes(json.dumps(obj), "utf-8"))
def sendEmbeddings(self):
data = self.rfile.read(int(self.headers['Content-Length']))
jsbody = json.loads(data)
model = jsbody["model"]
try:
embeddings = self.embService.embeddings(jsbody["input"])
emb = toArray(embeddings[0])
obj = {
"data": [
{
"embedding": emb,
"size": len(emb)
}
],
"model": model,
"usage": {
"prompt_tokens": embeddings[1],
"total_tokens": 1
}
}
self.send_response(200)
self.send_header("Content-type", "text/json")
self.end_headers()
self.wfile.write(bytes(json.dumps(obj), "utf-8"))
except BaseException as e:
# self.send_response(400, str(e))
self.send_error(400, str(e))
self.end_headers()
print('error', e)
traceback.print_exc()
pass
def sendCompletion(self):
self.send_response(200)
self.send_header("Content-type", "text/json")
self.end_headers()
data = self.rfile.read(int(self.headers['Content-Length']))
jsbody = json.loads(data)
completion = self.embService.completion(jsbody["input"], max_length=jsbody["max_length"], temperature=jsbody["temperature"] )
model = jsbody["model"]
obj = {
"data": [
{
"completion": completion
}
],
"model": model
}
self.wfile.write(bytes(json.dumps(obj), "utf-8"))
def sendCompare(self):
self.send_response(200)
self.send_header("Content-type", "text/json")
self.end_headers()
data = self.rfile.read(int(self.headers['Content-Length']))
jsbody = json.loads(data)
emb1 = self.embService.embeddings(jsbody["input"])
emb2 = self.embService.embeddings(jsbody["compare"])
model = jsbody["model"]
e1 = toArray(emb1[0])
e2 = toArray(emb2[0])
obj = {
"similarity": self.embService.compare(emb1[0], emb2[0]).item(),
"input": e1,
"input_len": len(e1),
"compare": e2,
"compare_len": len(e2),
"model": model
}
self.wfile.write(bytes(json.dumps(obj), "utf-8"))
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog = 'Embedding\'s service')
# 1024, sentence-transformers/all-roberta-large-v1
# 386, sentence-transformers/all-MiniLM-L6-v2
parser.add_argument('--model', default="sentence-transformers/all-MiniLM-L6-v2")
parser.add_argument('--host', default="0.0.0.0")
parser.add_argument('--device', default='cpu')
parser.add_argument('--port', default=4070) # option that takes a value
args = parser.parse_args()
hostName = args.host
serverPort = args.port
device = args.device
model = args.model
print('loading model:', model, ' on device:', device)
emb = embeddings.EmbeddingService(model, device)
webServer = HTTPServer((hostName, serverPort), partial(EmbeddingsServer, emb), bind_and_activate=False)
webServer.allow_reuse_address = True
webServer.daemon_threads = True
webServer.server_bind()
webServer.server_activate()
print("Embedding started http://%s:%s" % (hostName, serverPort))
try:
webServer.serve_forever()
except KeyboardInterrupt:
pass
webServer.server_close()
print("Server stopped.")

View File

@ -1,4 +1,4 @@
FROM node:16-alpine
FROM node:18-alpine
RUN apk add dumb-init
ENV NODE_ENV production

View File

@ -1,4 +1,4 @@
FROM node:16
FROM node:18
WORKDIR /usr/src/app

View File

@ -5,7 +5,7 @@
"author": "Anticrm Platform Contributors",
"license": "EPL-2.0",
"scripts": {
"start": "cross-env MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 FRONT_URL=http://localhost:8087 MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret ts-node src/__start.ts",
"start": "cross-env MONGO_URL=mongodb://localhost:27017 ELASTIC_URL=http://localhost:9200 MINIO_ENDPOINT=localhost MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin METRICS_CONSOLE=true SERVER_SECRET=secret REKONI_URL=http://localhost:4004 ts-node src/__start.ts",
"build": "heft build",
"lint:fix": "eslint --fix src",
"bundle": "esbuild src/__start.ts --bundle --sourcemap=inline --minify --platform=node > bundle.js",
@ -81,6 +81,60 @@
"@hcengineering/middleware": "^0.6.0",
"@hcengineering/minio": "^0.6.0",
"@hcengineering/openai": "^0.6.0",
"@hcengineering/translate": "^0.6.0"
"@hcengineering/translate": "^0.6.0",
"@hcengineering/login-assets": "^0.6.0",
"@hcengineering/view-assets": "^0.6.0",
"@hcengineering/task-assets": "^0.6.0",
"@hcengineering/chunter-assets": "^0.6.0",
"@hcengineering/recruit-assets": "^0.6.0",
"@hcengineering/setting-assets": "^0.6.0",
"@hcengineering/contact-assets": "^0.6.0",
"@hcengineering/activity-assets": "^0.6.0",
"@hcengineering/automation-assets": "^0.6.0",
"@hcengineering/telegram-assets": "^0.6.0",
"@hcengineering/workbench-assets": "^0.6.0",
"@hcengineering/attachment-assets": "^0.6.0",
"@hcengineering/lead-assets": "^0.6.0",
"@hcengineering/gmail-assets": "^0.6.0",
"@hcengineering/inventory-assets": "^0.6.0",
"@hcengineering/templates-assets": "^0.6.0",
"@hcengineering/notification-assets": "^0.6.0",
"@hcengineering/preference-assets": "^0.6.0",
"@hcengineering/tags-assets": "^0.6.0",
"@hcengineering/calendar-assets": "^0.6.0",
"@hcengineering/tracker-assets": "^0.6.0",
"@hcengineering/board-assets": "^0.6.0",
"@hcengineering/hr-assets": "^0.6.0",
"@hcengineering/document-assets": "^0.6.0",
"@hcengineering/bitrix-assets": "^0.6.0",
"@hcengineering/request-assets": "^0.6.0",
"@hcengineering/login": "^0.6.1",
"@hcengineering/view": "^0.6.2",
"@hcengineering/task": "^0.6.1",
"@hcengineering/chunter": "^0.6.2",
"@hcengineering/recruit": "^0.6.4",
"@hcengineering/setting": "^0.6.2",
"@hcengineering/contact": "^0.6.9",
"@hcengineering/activity": "^0.6.0",
"@hcengineering/automation": "^0.6.0",
"@hcengineering/telegram": "^0.6.2",
"@hcengineering/workbench": "^0.6.2",
"@hcengineering/attachment": "^0.6.1",
"@hcengineering/lead": "^0.6.0",
"@hcengineering/gmail": "^0.6.0",
"@hcengineering/inventory": "^0.6.0",
"@hcengineering/templates": "^0.6.0",
"@hcengineering/notification": "^0.6.5",
"@hcengineering/preference": "^0.6.2",
"@hcengineering/tags": "^0.6.3",
"@hcengineering/calendar": "^0.6.2",
"@hcengineering/tracker": "^0.6.1",
"@hcengineering/board": "^0.6.0",
"@hcengineering/hr": "^0.6.0",
"@hcengineering/document": "^0.6.0",
"@hcengineering/bitrix": "^0.6.0",
"@hcengineering/request": "^0.6.0"
}
}

View File

@ -51,14 +51,6 @@ if (minioSecretKey === undefined) {
process.exit(1)
}
const openAIToken = process.env.OPENAI_TOKEN
if (openAIToken === undefined || openAIToken === '') {
console.info('OpenAI: Not enabled, please specify OPENAI_TOKEN.')
// process.exit(1)
} else {
console.info('OpenAI: Enabled.')
}
const minioConf = {
endPoint: minioEndpoint,
accessKey: minioAccessKey,
@ -77,33 +69,11 @@ if (rekoniUrl === undefined) {
process.exit(1)
}
const retranslateUrl = process.env.RETRANSLATE_URL
if (rekoniUrl === undefined) {
console.log('Please provide RETRANSLATE_URL url for translations')
}
const retranslateToken = process.env.RETRANSLATE_TOKEN
if (retranslateToken === undefined) {
console.log('Please provide retranslateToken url for translations token')
}
setMetadata(serverToken.metadata.Secret, serverSecret)
// eslint-disable-next-line @typescript-eslint/no-floating-promises
console.log(`starting server on ${serverPort}`)
const shutdown = start(
url,
elasticUrl,
minioConf,
{
rekoniUrl,
openAIToken,
retranslateUrl,
retranslateToken
},
serverPort,
''
)
const shutdown = start(url, elasticUrl, minioConf, rekoniUrl, serverPort, '')
const close = (): void => {
console.trace('Exiting from server')

View File

@ -20,14 +20,15 @@ import {
DOMAIN_TRANSIENT,
DOMAIN_TX,
MeasureContext,
ServerStorage,
WorkspaceId
} from '@hcengineering/core'
import { createElasticAdapter, createElasticBackupDataAdapter } from '@hcengineering/elastic'
import { ModifiedMiddleware, PrivateMiddleware } from '@hcengineering/middleware'
import { ConfigurationMiddleware, ModifiedMiddleware, PrivateMiddleware } from '@hcengineering/middleware'
import { MinioService } from '@hcengineering/minio'
import { createMongoAdapter, createMongoTxAdapter } from '@hcengineering/mongo'
import { OpenAIEmbeddingsStage } from '@hcengineering/openai'
import { addLocation } from '@hcengineering/platform'
import { OpenAIEmbeddingsStage, openAIId, openAIPluginImpl } from '@hcengineering/openai'
import { addLocation, addStringsLoader } from '@hcengineering/platform'
import {
BackupClientSession,
createMinioDataAdapter,
@ -41,10 +42,17 @@ import { serverCalendarId } from '@hcengineering/server-calendar'
import { serverChunterId } from '@hcengineering/server-chunter'
import { serverContactId } from '@hcengineering/server-contact'
import {
ContentRetrievalStage,
ContentTextAdapter,
createInMemoryAdapter,
createPipeline,
DbConfiguration,
FullTextPipelineStageFactory,
FullSummaryStage,
FullTextAdapter,
FullTextPipelineStage,
FullTextPushStage,
globalIndexer,
IndexedFieldStage,
MiddlewareCreator,
Pipeline
} from '@hcengineering/server-core'
@ -62,7 +70,72 @@ import { serverTelegramId } from '@hcengineering/server-telegram'
import { Token } from '@hcengineering/server-token'
import { serverTrackerId } from '@hcengineering/server-tracker'
import { BroadcastCall, ClientSession, start as startJsonRpc } from '@hcengineering/server-ws'
import { LibRetranslateStage } from '@hcengineering/translate'
import { activityId } from '@hcengineering/activity'
import { attachmentId } from '@hcengineering/attachment'
import { automationId } from '@hcengineering/automation'
import { bitrixId } from '@hcengineering/bitrix'
import { boardId } from '@hcengineering/board'
import { calendarId } from '@hcengineering/calendar'
import { chunterId } from '@hcengineering/chunter'
import { contactId } from '@hcengineering/contact'
import { documentId } from '@hcengineering/document'
import { gmailId } from '@hcengineering/gmail'
import { hrId } from '@hcengineering/hr'
import { inventoryId } from '@hcengineering/inventory'
import { leadId } from '@hcengineering/lead'
import { loginId } from '@hcengineering/login'
import { notificationId } from '@hcengineering/notification'
import { preferenceId } from '@hcengineering/preference'
import { recruitId } from '@hcengineering/recruit'
import { requestId } from '@hcengineering/request'
import { settingId } from '@hcengineering/setting'
import { tagsId } from '@hcengineering/tags'
import { taskId } from '@hcengineering/task'
import { telegramId } from '@hcengineering/telegram'
import { templatesId } from '@hcengineering/templates'
import { trackerId } from '@hcengineering/tracker'
import { viewId } from '@hcengineering/view'
import { workbenchId } from '@hcengineering/workbench'
addStringsLoader(loginId, async (lang: string) => await import(`@hcengineering/login-assets/lang/${lang}.json`))
addStringsLoader(taskId, async (lang: string) => await import(`@hcengineering/task-assets/lang/${lang}.json`))
addStringsLoader(viewId, async (lang: string) => await import(`@hcengineering/view-assets/lang/${lang}.json`))
addStringsLoader(chunterId, async (lang: string) => await import(`@hcengineering/chunter-assets/lang/${lang}.json`))
addStringsLoader(
attachmentId,
async (lang: string) => await import(`@hcengineering/attachment-assets/lang/${lang}.json`)
)
addStringsLoader(contactId, async (lang: string) => await import(`@hcengineering/contact-assets/lang/${lang}.json`))
addStringsLoader(recruitId, async (lang: string) => await import(`@hcengineering/recruit-assets/lang/${lang}.json`))
addStringsLoader(activityId, async (lang: string) => await import(`@hcengineering/activity-assets/lang/${lang}.json`))
addStringsLoader(
automationId,
async (lang: string) => await import(`@hcengineering/automation-assets/lang/${lang}.json`)
)
addStringsLoader(settingId, async (lang: string) => await import(`@hcengineering/setting-assets/lang/${lang}.json`))
addStringsLoader(telegramId, async (lang: string) => await import(`@hcengineering/telegram-assets/lang/${lang}.json`))
addStringsLoader(leadId, async (lang: string) => await import(`@hcengineering/lead-assets/lang/${lang}.json`))
addStringsLoader(gmailId, async (lang: string) => await import(`@hcengineering/gmail-assets/lang/${lang}.json`))
addStringsLoader(workbenchId, async (lang: string) => await import(`@hcengineering/workbench-assets/lang/${lang}.json`))
addStringsLoader(inventoryId, async (lang: string) => await import(`@hcengineering/inventory-assets/lang/${lang}.json`))
addStringsLoader(templatesId, async (lang: string) => await import(`@hcengineering/templates-assets/lang/${lang}.json`))
addStringsLoader(
notificationId,
async (lang: string) => await import(`@hcengineering/notification-assets/lang/${lang}.json`)
)
addStringsLoader(tagsId, async (lang: string) => await import(`@hcengineering/tags-assets/lang/${lang}.json`))
addStringsLoader(calendarId, async (lang: string) => await import(`@hcengineering/calendar-assets/lang/${lang}.json`))
addStringsLoader(trackerId, async (lang: string) => await import(`@hcengineering/tracker-assets/lang/${lang}.json`))
addStringsLoader(boardId, async (lang: string) => await import(`@hcengineering/board-assets/lang/${lang}.json`))
addStringsLoader(
preferenceId,
async (lang: string) => await import(`@hcengineering/preference-assets/lang/${lang}.json`)
)
addStringsLoader(hrId, async (lang: string) => await import(`@hcengineering/hr-assets/lang/${lang}.json`))
addStringsLoader(documentId, async (lang: string) => await import(`@hcengineering/document-assets/lang/${lang}.json`))
addStringsLoader(bitrixId, async (lang: string) => await import(`@hcengineering/bitrix-assets/lang/${lang}.json`))
addStringsLoader(requestId, async (lang: string) => await import(`@hcengineering/request-assets/lang/${lang}.json`))
/**
* @public
@ -71,12 +144,7 @@ export function start (
dbUrl: string,
fullTextUrl: string,
minioConf: MinioConfig,
services: {
rekoniUrl: string
openAIToken?: string
retranslateUrl?: string
retranslateToken?: string
},
rekoniUrl: string,
port: number,
productId: string,
host?: string
@ -97,45 +165,62 @@ export function start (
addLocation(serverTelegramId, () => import('@hcengineering/server-telegram-resources'))
addLocation(serverRequestId, () => import('@hcengineering/server-request-resources'))
addLocation(serverHrId, () => import('@hcengineering/server-hr-resources'))
addLocation(openAIId, () => Promise.resolve({ default: openAIPluginImpl }))
const middlewares: MiddlewareCreator[] = [ModifiedMiddleware.create, PrivateMiddleware.create]
const middlewares: MiddlewareCreator[] = [
ModifiedMiddleware.create,
PrivateMiddleware.create,
ConfigurationMiddleware.create
]
const fullText = getMetricsContext().newChild('fulltext', {})
function createIndexStages (fullText: MeasureContext, workspace: WorkspaceId): FullTextPipelineStageFactory[] {
const stages: FullTextPipelineStageFactory[] = []
function createIndexStages (
fullText: MeasureContext,
workspace: WorkspaceId,
adapter: FullTextAdapter,
storage: ServerStorage,
storageAdapter: MinioService,
contentAdapter: ContentTextAdapter
): FullTextPipelineStage[] {
// Allow 2 workspaces to be indexed in parallel
globalIndexer.allowParallel = 2
globalIndexer.processingSize = 1000
if (services.retranslateUrl !== undefined && services.retranslateUrl !== '') {
// Add translation stage
stages.push((adapter, stages) => {
const stage = new LibRetranslateStage(
fullText.newChild('retranslate', {}),
services.retranslateUrl as string,
services.retranslateToken ?? '',
workspace
)
for (const st of stages) {
// Clear retranslation on content change.
st.updateFields.push((doc, upd, el) => stage.update(doc, upd, el))
}
return stage
})
}
if (services.openAIToken !== undefined) {
const token = services.openAIToken
stages.push((adapter, stages) => {
const stage = new OpenAIEmbeddingsStage(adapter, fullText.newChild('embeddings', {}), token, workspace)
for (const st of stages) {
// Clear embeddings in case of any changes.
st.updateFields.push((doc, upd, el) => stage.update(doc, upd, el))
}
// We depend on all available stages.
stage.require = stages.map((it) => it.stageId)
const stages: FullTextPipelineStage[] = []
// Add regular stage to for indexable fields change tracking.
stages.push(new IndexedFieldStage(storage, fullText.newChild('fields', {})))
// Obtain text content from storage(like minio) and use content adapter to convert files to text content.
stages.push(new ContentRetrievalStage(storageAdapter, workspace, fullText.newChild('content', {}), contentAdapter))
// // Add any => english language translation
// const retranslateStage = new LibRetranslateStage(fullText.newChild('retranslate', {}), workspace)
// retranslateStage.clearExcept = stages.map(it => it.stageId)
// for (const st of stages) {
// // Clear retranslation on content change.
// st.updateFields.push((doc, upd) => retranslateStage.update(doc, upd))
// }
// stages.push(retranslateStage)
// Summary stage
const summaryStage = new FullSummaryStage()
stages.push(summaryStage)
// Push all content to elastic search
const pushStage = new FullTextPushStage(adapter, workspace, fullText.newChild('push', {}))
stages.push(pushStage)
// OpenAI prepare stage
const openAIStage = new OpenAIEmbeddingsStage(adapter, fullText.newChild('embeddings', {}), workspace)
// We depend on all available stages.
openAIStage.require = stages.map((it) => it.stageId)
openAIStage.updateSummary(summaryStage)
stages.push(openAIStage)
// Do not clear anything
stage.clearExcept = [...stages.map((it) => it.stageId), stage.stageId]
return stage
})
}
return stages
}
@ -181,11 +266,12 @@ export function start (
factory: createElasticAdapter,
url: fullTextUrl,
metrics: fullText,
stages: createIndexStages(fullText, workspace)
stages: (adapter, storage, storageAdapter, contentAdapter) =>
createIndexStages(fullText, workspace, adapter, storage, storageAdapter, contentAdapter)
},
contentAdapter: {
factory: createRekoniAdapter,
url: services.rekoniUrl,
url: rekoniUrl,
metrics: getMetricsContext().newChild('content', {})
},
storageFactory: () =>

View File

@ -5,6 +5,7 @@
"rootDir": "./src",
"outDir": "./lib",
"esModuleInterop": true,
"resolveJsonModule": true,
"types": ["node"]
}
}

View File

@ -1471,13 +1471,23 @@
},
{
"packageName": "@hcengineering/openai",
"projectFolder": "server/openai",
"projectFolder": "server-plugins/openai",
"shouldPublish": true
},
{
"packageName": "@hcengineering/translate",
"projectFolder": "server/translate",
"shouldPublish": true
}
},
{
"packageName": "@hcengineering/model-server-translate",
"projectFolder": "models/server-translate",
"shouldPublish": true
},
{
"packageName": "@hcengineering/model-server-openai",
"projectFolder": "models/server-openai",
"shouldPublish": true
},
]
}

View File

@ -0,0 +1,4 @@
*
!/lib/**
!CHANGELOG.md
/lib/**/__tests__/

View File

@ -24,14 +24,17 @@
"eslint-config-standard-with-typescript": "^23.0.0",
"prettier": "^2.7.1",
"@rushstack/heft": "^0.47.9",
"typescript": "^4.3.5"
"typescript": "^4.3.5",
"@types/html-to-text": "^8.1.1"
},
"dependencies": {
"@hcengineering/core": "^0.6.20",
"@hcengineering/platform": "^0.6.8",
"@hcengineering/server-core": "^0.6.1",
"@hcengineering/server": "^0.6.4",
"@hcengineering/chunter": "^0.6.2",
"got": "^11.8.3",
"fast-equals": "^2.0.3"
"fast-equals": "^2.0.3",
"html-to-text": "^9.0.3"
}
}

View File

@ -184,6 +184,30 @@ export function encode (text: string): number[] {
return bpeTokens
}
export function chunks (text: string, limit: number): string[] {
const result: string[] = []
let bpeTokens: number[] = []
const matches = Array.from(text.matchAll(pat)).map((x) => x[0])
for (const token of matches) {
const tkn = encodeStr(token)
.map((x) => {
return byteEncoder[x]
})
.join('')
const newTokens: Array<any> = (bpe(tkn) ?? '').split(' ').map((x: any) => (encoder as any)[x] as number)
if (bpeTokens.length + newTokens.length > limit) {
result.push(decode(bpeTokens))
bpeTokens = []
}
bpeTokens = bpeTokens.concat(newTokens)
}
if (bpeTokens.length > 0) {
result.push(decode(bpeTokens))
}
return result
}
export function decode (tokens: number[]) {
let text = tokens.map((x) => decoder[x]).join('')
text = decodeStr(text.split('').map((x) => byteDecoder[x]))

View File

@ -0,0 +1,5 @@
export * from './openai'
export * from './types'
export { default } from './plugin'
export * from './plugin'
export * from './resources'

View File

@ -0,0 +1,354 @@
//
// Copyright © 2022 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import core, {
Class,
Doc,
DocIndexState,
DocumentQuery,
DocumentUpdate,
docUpdKey,
MeasureContext,
Ref,
Storage,
WorkspaceId
} from '@hcengineering/core'
import {
contentStageId,
docKey,
DocUpdateHandler,
fieldStateId,
FullSummaryStage,
FullTextAdapter,
FullTextPipeline,
FullTextPipelineStage,
IndexedDoc,
isIndexingRequired,
RateLimitter
} from '@hcengineering/server-core'
import got from 'got'
import { chunks } from './encoder/encoder'
import openaiPlugin, { openAIRatelimitter } from './plugin'
/**
* @public
*/
export const openAIstage = 'emb-v3'
/**
* @public
*/
export interface OpenAIEmbeddingResponse {
data: {
embedding: number[]
}[]
usage: {
prompt_tokens: number
total_tokens: number
}
}
/**
* @public
*/
export class OpenAIEmbeddingsStage implements FullTextPipelineStage {
require = [fieldStateId, contentStageId]
stageId = openAIstage
treshold = 50
unauthorized = false
field = 'openai_embedding'
field_enabled = '_use'
summary_field = 'summary'
enabled = true
clearExcept?: string[] = undefined
updateFields: DocUpdateHandler[] = []
model = process.env.OPENAI_MODEL ?? 'text-embedding-ada-002'
tokenLimit = 8191
endpoint = process.env.OPENAI_HOST ?? 'https://api.openai.com/v1/embeddings'
token = ''
rate = 5
limitter = new RateLimitter(() => ({ rate: this.rate }))
async update (doc: DocIndexState, update: DocumentUpdate<DocIndexState>): Promise<void> {}
constructor (readonly adapter: FullTextAdapter, readonly metrics: MeasureContext, readonly workspaceId: WorkspaceId) {}
updateSummary (summary: FullSummaryStage): void {
summary.fieldFilter.push((attr, value) => {
if (
attr.type._class === core.class.TypeMarkup &&
(value.toLocaleLowerCase().startsWith('gpt:') || value.toLocaleLowerCase().startsWith('gpt Answer:'))
) {
return false
}
return true
})
}
async initialize (storage: Storage, pipeline: FullTextPipeline): Promise<void> {
try {
// Just do nothing
const config = await storage.findAll(openaiPlugin.class.OpenAIConfiguration, {})
let needCheck = 0
if (config.length > 0) {
if (this.enabled !== config[0].embeddings) {
needCheck++
this.enabled = config[0].embeddings
}
if (this.token !== config[0].token) {
this.token = config[0].token
needCheck++
}
const ep = (config[0].endpoint + '/embeddings').replace('//', '/')
if (this.endpoint !== ep) {
this.endpoint = ep
needCheck++
}
if (this.tokenLimit !== config[0].tokenLimit) {
this.tokenLimit = config[0].tokenLimit
needCheck++
}
} else {
this.enabled = false
}
if (needCheck > 0 && this.enabled) {
// Initialize field embedding
const emb = await this.getEmbedding('dummy')
const dim = emb.data[0].embedding.length
this.field = `${this.field}_${dim}`
this.field_enabled = this.field + this.field_enabled
await this.adapter.initMapping({ key: this.field, dims: dim })
}
} catch (err: any) {
console.error(err)
this.enabled = false
}
}
async getEmbedding (text: string): Promise<OpenAIEmbeddingResponse> {
if (this.token === '') {
return {
data: [
{
embedding: []
}
],
usage: {
total_tokens: 0,
prompt_tokens: 0
}
}
}
let l = this.tokenLimit
let response: OpenAIEmbeddingResponse | undefined
while (true) {
const chs = chunks(text, l)
let chunkChange = false
for (const c of chs) {
try {
const embeddingData = await openAIRatelimitter.exec(
async () =>
await got.post(this.endpoint, {
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${this.token}`
},
json: {
input: c,
model: this.model
},
timeout: 15000
})
)
const res = JSON.parse(embeddingData.body) as OpenAIEmbeddingResponse
if (response === undefined) {
response = res
} else {
// Combine parts
response.data[0].embedding = response.data[0].embedding.map((it, idx) => it + res.data[0].embedding[idx])
response.usage.prompt_tokens += res.usage.prompt_tokens
response.usage.total_tokens += res.usage.total_tokens
}
} catch (err: any) {
const msg = (err.message ?? '') as string
if (msg.includes('maximum context length is')) {
// We need to split chunks and try again.
l = l / 2
chunkChange = true
response = undefined
break
}
throw err
}
}
if (chunkChange) {
continue
}
break
}
if (response === undefined) {
throw new Error('Failed to retrieve embedding')
}
return response
}
async search (
_classes: Ref<Class<Doc>>[],
query: DocumentQuery<Doc>,
size: number | undefined,
from?: number
): Promise<{ docs: IndexedDoc[], pass: boolean }> {
if (this.token === '' || !this.enabled) {
return {
docs: [],
pass: true
}
}
if (query.$search === undefined) return { docs: [], pass: true }
const embeddingData = await this.getEmbedding(query.$search)
const embedding = embeddingData.data[0].embedding
console.log('search embedding', embedding)
const docs = await this.adapter.searchEmbedding(_classes, query, embedding, {
size,
from,
minScore: 0,
embeddingBoost: 100,
field: this.field,
field_enable: this.field_enabled,
fulltextBoost: 1
})
return {
docs,
pass: docs.length === 0
}
}
async collect (toIndex: DocIndexState[], pipeline: FullTextPipeline): Promise<void> {
for (const doc of toIndex) {
if (pipeline.cancelling) {
return
}
await this.limitter.add(() => this.collectDoc(doc, pipeline))
}
await this.limitter.waitProcessing()
}
async collectDoc (doc: DocIndexState, pipeline: FullTextPipeline): Promise<void> {
if (pipeline.cancelling) {
return
}
const needIndex = isIndexingRequired(pipeline, doc)
// Copy content attributes as well.
const update: DocumentUpdate<DocIndexState> = {}
// Mark as empty by default and matck dimm size for elastic
await this.update(doc, update)
// No need to index this class, mark embeddings as empty ones.
if (!needIndex) {
await pipeline.update(doc._id, true, {})
return
}
if (this.token === '') {
// No token, just do nothing.
await pipeline.update(doc._id, true, {})
return
}
try {
if (this.unauthorized) {
return
}
const embeddingText = (doc.attributes[this.summary_field] as string) ?? ''
if (embeddingText.length > this.treshold) {
const embeddText = embeddingText
console.log('calculate embeddings:', doc.objectClass, doc._id)
let embeddingData: OpenAIEmbeddingResponse | undefined
while (true) {
try {
embeddingData = await this.metrics.with(
'fetch-embeddings',
{},
async () => await this.getEmbedding(embeddText)
)
break
} catch (err: any) {
if (((err.message as string) ?? '').includes('connect ECONNREFUSED')) {
await new Promise((resolve) => setTimeout(resolve, 1000))
}
if (err.message === 'Response code 429 (Too Many Requests)') {
await new Promise((resolve) => setTimeout(resolve, 1000))
continue
}
throw new Error(err)
}
}
const embedding: number[] = embeddingData.data[0].embedding
await this.adapter.update(doc._id, {
[this.field]: embedding,
[this.field_enabled]: true
})
;(update as any)[docUpdKey(this.field)] = embedding.length
;(update as any)[docUpdKey(this.field_enabled)] = true
}
} catch (err: any) {
if (err.message === 'Response code 401 (Unauthorized)') {
this.unauthorized = true
}
const wasError = doc.attributes.error !== undefined
await pipeline.update(doc._id, false, { [docKey('error')]: JSON.stringify(err) })
if (wasError) {
return
}
// Print error only first time, and update it in doc index
console.error(err)
return
}
// We need to collect all fields and prepare embedding document.
await pipeline.update(doc._id, true, update)
}
async remove (docs: DocIndexState[], pipeline: FullTextPipeline): Promise<void> {
// will be handled by field processor
for (const doc of docs) {
await pipeline.update(doc._id, true, {})
}
}
}

View File

@ -0,0 +1,44 @@
//
// Copyright © 2022 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import type { Plugin, Resource } from '@hcengineering/platform'
import { plugin } from '@hcengineering/platform'
import type { Account, Class, Ref } from '@hcengineering/core'
import { TriggerFunc } from '@hcengineering/server-core'
import type { OpenAIConfiguration } from './types'
export * from './types'
/**
* @public
*/
export const openAIId = 'openai' as Plugin
/**
* @public
*/
const openaiPlugin = plugin(openAIId, {
trigger: {
OnGPTRequest: '' as Resource<TriggerFunc>
},
class: {
OpenAIConfiguration: '' as Ref<Class<OpenAIConfiguration>>
},
account: {
GPT: '' as Ref<Account>
}
})
export default openaiPlugin

View File

@ -0,0 +1,203 @@
//
// Copyright © 2022 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
/* eslint-disable @typescript-eslint/explicit-function-return-type */
import chunter, { Comment } from '@hcengineering/chunter'
import core, {
Doc,
DocIndexState,
Ref,
Tx,
TxCollectionCUD,
TxCreateDoc,
TxCUD,
TxProcessor,
TxUpdateDoc
} from '@hcengineering/core'
import type { TriggerControl } from '@hcengineering/server-core'
import got from 'got'
import { convert } from 'html-to-text'
import { encode } from './encoder/encoder'
import openai, { openAIRatelimitter } from './plugin'
/**
* @public
*/
export async function OnGPTRequest (tx: Tx, tc: TriggerControl): Promise<Tx[]> {
const actualTx = TxProcessor.extractTx(tx)
if (tc.hierarchy.isDerived(actualTx._class, core.class.TxCUD) && actualTx.modifiedBy !== openai.account.GPT) {
const cud: TxCUD<Doc> = actualTx as TxCUD<Doc>
//
if (tc.hierarchy.isDerived(cud.objectClass, chunter.class.Comment)) {
let msg = ''
//
if (actualTx._class === core.class.TxCreateDoc) {
msg = (cud as TxCreateDoc<Comment>).attributes.message
} else if (actualTx._class === core.class.TxUpdateDoc) {
msg = (cud as TxUpdateDoc<Comment>).operations.message ?? ''
}
const text = convert(msg, {
preserveNewlines: true,
selectors: [{ selector: 'img', format: 'skip' }]
})
if (text.toLocaleLowerCase().startsWith('gpt:')) {
const [config] = await tc.findAll(openai.class.OpenAIConfiguration, {})
if (config?.enabled ?? false) {
// Elanbed, we could complete.
const split = text.split('\n')
let prompt = split.slice(1).join('\n').trim()
// Do prompt modifications.
const matches: string[] = []
for (const m of prompt.matchAll(/\${(\w+)}/gm)) {
for (const mm of m.values()) {
if (!mm.startsWith('${')) {
matches.push(mm)
}
}
}
const parentTx = tx as TxCollectionCUD<Doc, Comment>
const [indexedData] = await tc.findAll(core.class.DocIndexState, {
_id: parentTx.objectId as Ref<DocIndexState>
})
const [parentDoc] = await tc.findAll(parentTx.objectClass, { _id: parentTx.objectId as Ref<DocIndexState> })
if (matches.length > 0) {
if (indexedData !== undefined) {
// Fill values in prompt.
for (const m of matches) {
const val = indexedData.attributes[m] ?? (parentDoc as any)[m]
if (val !== undefined) {
prompt = prompt.replace(`\${${m}}`, val)
}
}
}
}
const options = {
max_tokens: 4000,
temperature: 0.9,
top_p: 1,
n: 1,
stop: null as string | null
}
const configLine = split[0].slice(4).split(',')
for (const cfg of configLine) {
const vals = cfg.trim().split('=')
if (vals.length === 2) {
switch (vals[0].trim()) {
case 'max_tokens':
options.max_tokens = parseInt(vals[1])
break
case 'temperature':
options.temperature = parseFloat(vals[1])
break
case 'top_p':
options.top_p = parseInt(vals[1])
break
case 'n':
options.n = parseInt(vals[1])
break
case 'stop':
options.stop = vals[1]
break
}
}
}
const ep = config.endpoint + '/completions'
const tokens = encode(prompt).length
let response: any
try {
response = await openAIRatelimitter.exec(
async () =>
await got
.post(ep, {
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${config.token}`
},
json: {
model: 'text-davinci-003',
prompt,
max_tokens: options.max_tokens - tokens,
temperature: options.temperature,
top_p: options.top_p,
n: options.n,
stream: false,
logprobs: null,
stop: options.stop
},
timeout: 60000
})
.json()
)
} catch (e: any) {
console.error(e)
}
console.log('response is good')
const result: Tx[] = []
for (const choices of response.choices) {
const msgTx = tc.txFactory.createTxCreateDoc(chunter.class.Comment, tx.objectSpace, {
message: 'gpt Answer:\n<br/>' + (choices.text as string),
attachedTo: parentTx.objectId,
attachedToClass: parentTx.objectClass,
collection: parentTx.collection
})
// msgTx.modifiedBy = openai.account.GPT
const col = tc.txFactory.createTxCollectionCUD(
parentTx.objectClass,
parentTx.objectId,
parentTx.objectSpace,
parentTx.collection,
msgTx
)
// col.modifiedBy = openai.account.GPT
result.push(col)
}
// Store response transactions
await tc.txFx(async (st) => {
for (const t of result) {
await st.tx(t)
}
})
return result
}
}
}
}
return []
}
/**
* @public
*/
export const openAIPluginImpl = async () => ({
trigger: {
OnGPTRequest
}
})

View File

@ -0,0 +1,33 @@
import { Configuration } from '@hcengineering/core'
import { RateLimitter } from '@hcengineering/server-core'
/**
* @public
*/
export interface OpenAIConfiguration extends Configuration {
token: string
endpoint: string
tokenLimit: number
embeddings: boolean
}
/**
* @public
*/
export const openAIConfigDefaults: {
[key in keyof Pick<
OpenAIConfiguration,
'enabled' | 'endpoint' | 'tokenLimit' | 'embeddings'
>]: OpenAIConfiguration[key]
} = {
endpoint: 'https://api.openai.com/v1',
tokenLimit: 8191,
embeddings: false,
enabled: true
}
/**
* @public
*
*/
export const openAIRatelimitter = new RateLimitter(() => ({ rate: 3 }))

View File

@ -179,7 +179,6 @@ export async function backup (transactorUrl: string, workspaceId: WorkspaceId, s
try {
const it = await connection.loadChunk(c, idx)
idx = it.idx
console.log(needRetrieveChunks.length)
const needRetrieve: Ref<Doc>[] = []

View File

@ -24,12 +24,14 @@
"eslint-config-standard-with-typescript": "^23.0.0",
"prettier": "^2.7.1",
"@rushstack/heft": "^0.47.9",
"typescript": "^4.3.5"
"typescript": "^4.3.5",
"@types/html-to-text": "^8.1.1"
},
"dependencies": {
"@hcengineering/core": "^0.6.20",
"@hcengineering/platform": "^0.6.8",
"@hcengineering/minio": "^0.6.0",
"fast-equals": "^2.0.3"
"fast-equals": "^2.0.3",
"html-to-text": "^9.0.3"
}
}

View File

@ -144,7 +144,7 @@ export class FullTextIndex implements WithFind {
classes = classes.filter((it, idx, arr) => arr.indexOf(it) === idx)
const fullTextLimit = 10000
const fullTextLimit = options?.limit ?? 200
let { docs, pass } = await this.indexer.search(classes, query, fullTextLimit)
if (docs.length === 0 && pass) {

View File

@ -21,6 +21,7 @@ import core, {
DocumentUpdate,
MeasureContext,
Ref,
Storage,
WorkspaceId
} from '@hcengineering/core'
import { MinioService } from '@hcengineering/minio'
@ -38,12 +39,14 @@ export class ContentRetrievalStage implements FullTextPipelineStage {
extra = ['content', 'base64']
digest = '^digest'
enabled = true
// Clear all except following.
clearExcept: string[] = [fieldStateId, contentStageId]
updateFields: DocUpdateHandler[] = []
limit = 100
textLimit = 100 * 1024
constructor (
readonly storageAdapter: MinioService | undefined,
@ -52,10 +55,14 @@ export class ContentRetrievalStage implements FullTextPipelineStage {
private readonly contentAdapter: ContentTextAdapter
) {}
async initialize (storage: Storage, pipeline: FullTextPipeline): Promise<void> {
// Just do nothing
}
async search (
_classes: Ref<Class<Doc>>[],
search: DocumentQuery<Doc>,
size: number | undefined,
size?: number,
from?: number
): Promise<{ docs: IndexedDoc[], pass: boolean }> {
return { docs: [], pass: true }
@ -74,7 +81,6 @@ export class ContentRetrievalStage implements FullTextPipelineStage {
const attributes = getFullTextAttributes(pipeline.hierarchy, doc.objectClass)
// Copy content attributes as well.
const update: DocumentUpdate<DocIndexState> = {}
const elasticUpdate: Partial<IndexedDoc> = {}
if (pipeline.cancelling) {
return
@ -92,13 +98,15 @@ export class ContentRetrievalStage implements FullTextPipelineStage {
} catch (err: any) {
// not found.
}
if (docInfo !== undefined) {
if (docInfo !== undefined && docInfo.size < 30 * 1024 * 1024) {
// We have blob, we need to decode it to string.
const contentType = (docInfo.metaData['content-type'] as string) ?? ''
const contentType = ((docInfo.metaData['content-type'] as string) ?? '').split(';')[0]
if (!contentType.includes('image')) {
const digest = docInfo.etag
if (doc.attributes[docKey(val.name + this.digest, { _class: val.attributeOf })] !== digest) {
;(update as any)[docUpdKey(val.name + this.digest, { _class: val.attributeOf })] = digest
const digestKey = docKey(val.name + '.' + val.attributeOf + this.digest)
if (doc.attributes[digestKey] !== digest) {
;(update as any)[docUpdKey(digestKey)] = digest
const readable = await this.storageAdapter?.get(this.workspace, ref)
@ -110,38 +118,28 @@ export class ContentRetrievalStage implements FullTextPipelineStage {
)
textContent = textContent
.split(/ |\t|\f/)
.split(/ +|\t+|\f+/)
.filter((it) => it)
.join(' ')
.split(/\n+/)
.split(/\n\n+/)
.join('\n')
// if (textContent.length > 100 * 1024) {
// textContent = textContent.substring(0, 100 * 1024) // Allow only first 128kb of data.
// }
// trim to large content
if (textContent.length > this.textLimit) {
textContent = textContent.slice(0, this.textLimit)
}
textContent = Buffer.from(textContent).toString('base64')
;(update as any)[docUpdKey(val.name, { _class: val.attributeOf, extra: this.extra })] = textContent
elasticUpdate[docKey(val.name, { _class: val.attributeOf, extra: this.extra })] = textContent
if (doc.attachedTo != null) {
const parentUpdate: DocumentUpdate<DocIndexState> = {}
const parentElasticUpdate: Partial<IndexedDoc> = {}
;(parentUpdate as any)[
docUpdKey(val.name, { _class: val.attributeOf, docId: doc._id, extra: this.extra })
] = textContent
parentElasticUpdate[
docKey(val.name, { _class: val.attributeOf, docId: doc._id, extra: this.extra })
] = textContent
// We do not need to pull stage, just update elastic with document.
await pipeline.update(
doc.attachedTo as Ref<DocIndexState>,
true,
parentUpdate,
parentElasticUpdate
)
await pipeline.update(doc.attachedTo as Ref<DocIndexState>, true, parentUpdate)
}
}
}
@ -153,7 +151,7 @@ export class ContentRetrievalStage implements FullTextPipelineStage {
} catch (err: any) {
const wasError = (doc as any).error !== undefined
await pipeline.update(doc._id, false, { [docKey('error')]: JSON.stringify({ message: err.message, err }) }, {})
await pipeline.update(doc._id, false, { [docKey('error')]: JSON.stringify({ message: err.message, err }) })
if (wasError) {
return
}
@ -162,10 +160,13 @@ export class ContentRetrievalStage implements FullTextPipelineStage {
return
}
await pipeline.update(doc._id, true, update, elasticUpdate)
await pipeline.update(doc._id, true, update)
}
async remove (docs: DocIndexState[], pipeline: FullTextPipeline): Promise<void> {
// will be handled by field processor
for (const doc of docs) {
await pipeline.update(doc._id, true, {})
}
}
}

View File

@ -22,7 +22,8 @@ import {
extractDocKey,
MeasureContext,
Ref,
ServerStorage
ServerStorage,
Storage
} from '@hcengineering/core'
import { IndexedDoc } from '../types'
import {
@ -40,20 +41,25 @@ import { docKey, docUpdKey, getContent, getFullTextAttributes, isFullTextAttribu
export class IndexedFieldStage implements FullTextPipelineStage {
require = []
stageId = fieldStageId
clearExcept: string[] = [fieldStageId, contentStageId]
// Do not clear downloaded content
clearExcept: string[] = [contentStageId]
clearField: string[] = []
updateFields: DocUpdateHandler[] = []
limit = 1000
enabled = true
constructor (readonly dbStorage: ServerStorage, readonly metrics: MeasureContext) {}
constructor (private readonly dbStorage: ServerStorage, readonly metrics: MeasureContext) {}
async initialize (storage: Storage, pipeline: FullTextPipeline): Promise<void> {
// Just do nothing
}
async search (
_classes: Ref<Class<Doc>>[],
search: DocumentQuery<Doc>,
size: number | undefined,
size?: number,
from?: number
): Promise<{ docs: IndexedDoc[], pass: boolean }> {
return { docs: [], pass: true }
@ -88,10 +94,8 @@ export class IndexedFieldStage implements FullTextPipelineStage {
const content = getContent(pipeline.hierarchy, attributes, doc)
const docUpdate: DocumentUpdate<DocIndexState> = {}
const elasticUpdate: Partial<IndexedDoc> = {}
const parentDocUpdate: DocumentUpdate<DocIndexState> = {}
const parentDocElasticUpdate: Partial<IndexedDoc> = {}
for (const [, v] of Object.entries(content)) {
// Check for content changes and collect update
@ -99,28 +103,19 @@ export class IndexedFieldStage implements FullTextPipelineStage {
const dUKey = docUpdKey(v.attr.name, { _class: v.attr.attributeOf })
if (docState.attributes[dKey] !== v.value) {
;(docUpdate as any)[dUKey] = v.value
elasticUpdate[dKey] = v.value
// Aswell I need to update my parent with my attributes.
if (docState.attachedTo != null) {
;(parentDocUpdate as any)[docUpdKey(v.attr.name, { _class: v.attr.attributeOf, docId: docState._id })] =
v.value
;(parentDocElasticUpdate as any)[
docKey(v.attr.name, { _class: v.attr.attributeOf, docId: docState._id })
] = v.value
}
}
}
if (docState.attachedTo != null) {
// We need to clear field stage from parent, so it will be re indexed.
await pipeline.update(
docState.attachedTo as Ref<DocIndexState>,
false,
parentDocUpdate,
parentDocElasticUpdate
)
await pipeline.update(docState.attachedTo as Ref<DocIndexState>, false, parentDocUpdate)
}
await pipeline.update(docState._id, true, docUpdate, elasticUpdate)
await pipeline.update(docState._id, true, docUpdate)
} catch (err: any) {
console.error(err)
continue
@ -134,7 +129,6 @@ export class IndexedFieldStage implements FullTextPipelineStage {
if (doc.attachedTo !== undefined) {
const attachedTo = doc.attachedTo as Ref<DocIndexState>
const parentDocUpdate: DocumentUpdate<DocIndexState> = {}
const parentDocElasticUpdate: Partial<IndexedDoc> = {}
for (const [k] of Object.entries(doc.attributes)) {
const { _class, attr, extra, docId } = extractDocKey(k)
@ -143,15 +137,15 @@ export class IndexedFieldStage implements FullTextPipelineStage {
const keyAttr = pipeline.hierarchy.getAttribute(_class, attr)
if (isFullTextAttribute(keyAttr)) {
;(parentDocUpdate as any)[docUpdKey(attr, { _class, docId: doc._id, extra })] = null
;(parentDocElasticUpdate as any)[docKey(attr, { _class, docId: doc._id, extra })] = null
}
}
}
if (Object.keys(parentDocUpdate).length > 0) {
await pipeline.update(attachedTo, false, parentDocUpdate, parentDocElasticUpdate)
await pipeline.update(attachedTo, false, parentDocUpdate)
}
}
await pipeline.update(doc._id, true, {})
}
}
}

View File

@ -0,0 +1,176 @@
//
// Copyright © 2022 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import {
Class,
Doc,
DocIndexState,
DocumentQuery,
DocumentUpdate,
extractDocKey,
MeasureContext,
Ref,
Storage,
WorkspaceId
} from '@hcengineering/core'
import { FullTextAdapter, IndexedDoc } from '../types'
import { summaryStageId } from './summary'
import {
contentStageId,
DocUpdateHandler,
fieldStateId,
FullTextPipeline,
FullTextPipelineStage,
fullTextPushStageId
} from './types'
import { docKey } from './utils'
/**
* @public
*/
export class FullTextPushStage implements FullTextPipelineStage {
require = [fieldStateId, contentStageId, summaryStageId]
stageId = fullTextPushStageId
enabled = true
updateFields: DocUpdateHandler[] = []
limit = 100
dimmVectors: Record<string, number[]> = {}
field_enabled = '_use'
constructor (
readonly fulltextAdapter: FullTextAdapter,
readonly workspace: WorkspaceId,
readonly metrics: MeasureContext
) {}
async initialize (storage: Storage, pipeline: FullTextPipeline): Promise<void> {
// Just do nothing
try {
const r = await this.fulltextAdapter.initMapping()
for (const [k, v] of Object.entries(r)) {
this.dimmVectors[k] = Array.from(Array(v).keys()).map((it) => 0)
}
} catch (err: any) {
console.error(err)
}
}
async update (doc: DocIndexState, update: DocumentUpdate<DocIndexState>): Promise<void> {}
checkIntegrity (indexedDoc: IndexedDoc): void {
for (const [k, dimms] of Object.entries(this.dimmVectors)) {
if (indexedDoc[k] === undefined || indexedDoc[k].length !== dimms.length) {
indexedDoc[k] = dimms
indexedDoc[`${k}${this.field_enabled}`] = false
}
}
}
async search (
_classes: Ref<Class<Doc>>[],
search: DocumentQuery<Doc>,
size?: number,
from?: number
): Promise<{ docs: IndexedDoc[], pass: boolean }> {
return { docs: [], pass: true }
}
async collect (toIndex: DocIndexState[], pipeline: FullTextPipeline): Promise<void> {
const bulk: IndexedDoc[] = []
for (const doc of toIndex) {
if (pipeline.cancelling) {
return
}
if (pipeline.cancelling) {
return
}
try {
const elasticDoc = createElasticDoc(doc)
updateDoc2Elastic(doc.attributes, elasticDoc)
this.checkIntegrity(elasticDoc)
bulk.push(elasticDoc)
} catch (err: any) {
const wasError = (doc as any).error !== undefined
await pipeline.update(doc._id, false, { [docKey('error')]: JSON.stringify({ message: err.message, err }) })
if (wasError) {
continue
}
// Print error only first time, and update it in doc index
console.error(err)
continue
}
}
// Perform bulk update to elastic
try {
await this.fulltextAdapter.updateMany(bulk)
} catch (err: any) {
console.error(err)
}
for (const doc of toIndex) {
await pipeline.update(doc._id, true, {})
}
}
async remove (docs: DocIndexState[], pipeline: FullTextPipeline): Promise<void> {
await this.fulltextAdapter.remove(docs.map((it) => it._id))
// will be handled by field processor
for (const doc of docs) {
await pipeline.update(doc._id, true, {})
}
}
}
/**
* @public
*/
export function createElasticDoc (upd: DocIndexState): IndexedDoc {
const doc = {
id: upd._id,
_class: upd.objectClass,
modifiedBy: upd.modifiedBy,
modifiedOn: upd.modifiedOn,
space: upd.space,
attachedTo: upd.attachedTo,
attachedToClass: upd.attachedToClass
}
return doc
}
function updateDoc2Elastic (attributes: Record<string, any>, doc: IndexedDoc): IndexedDoc {
for (const [k, v] of Object.entries(attributes)) {
const { _class, attr, docId, extra } = extractDocKey(k)
let vv: any = v
if (extra.includes('base64')) {
vv = Buffer.from(v, 'base64').toString()
}
if (docId === undefined) {
doc[k] = vv
continue
}
const docIdAttr = '|' + docKey(attr, { _class, extra: extra.filter((it) => it !== 'base64') })
if (vv !== null) {
// Since we replace array of values, we could ignore null
doc[docIdAttr] = [...(doc[docIdAttr] ?? []), vv]
}
}
return doc
}

View File

@ -18,3 +18,6 @@ export * from './field'
export * from './indexer'
export * from './types'
export * from './utils'
export * from './fulltextPush'
export * from './summary'
export * from './limitter'

View File

@ -22,6 +22,7 @@ import core, {
DocumentUpdate,
DOMAIN_DOC_INDEX_STATE,
Hierarchy,
MeasureContext,
Ref,
ServerStorage,
setObjectValue,
@ -29,15 +30,28 @@ import core, {
_getOperator
} from '@hcengineering/core'
import { DbAdapter } from '../adapter'
import type { FullTextAdapter, IndexedDoc } from '../types'
import { fieldStateId, FullTextPipeline, FullTextPipelineStage } from './types'
import { createStateDoc, docKey, extractDocKey, isClassIndexable } from './utils'
import type { IndexedDoc } from '../types'
import { RateLimitter } from './limitter'
import { FullTextPipeline, FullTextPipelineStage } from './types'
import { createStateDoc, isClassIndexable } from './utils'
export * from './content'
export * from './field'
export * from './types'
export * from './utils'
// Global Memory management configuration
/**
* @public
*/
export const globalIndexer = {
allowParallel: 2,
processingSize: 1000
}
const rateLimitter = new RateLimitter(() => ({ rate: globalIndexer.allowParallel }))
/**
* @public
*/
@ -47,8 +61,6 @@ export class FullTextIndexPipeline implements FullTextPipeline {
toIndexParents: Map<Ref<DocIndexState>, DocIndexState> = new Map()
stageChanged = 0
pendingElastic: Map<Ref<DocIndexState>, IndexedDoc> = new Map()
cancelling: boolean = false
currentStage: FullTextPipelineStage | undefined
@ -63,9 +75,9 @@ export class FullTextIndexPipeline implements FullTextPipeline {
constructor (
private readonly storage: DbAdapter,
private readonly stages: FullTextPipelineStage[],
private readonly adapter: FullTextAdapter,
readonly hierarchy: Hierarchy,
readonly workspace: WorkspaceId
readonly workspace: WorkspaceId,
readonly metrics: MeasureContext
) {
this.readyStages = stages.map((it) => it.stageId)
this.readyStages.sort()
@ -101,35 +113,8 @@ export class FullTextIndexPipeline implements FullTextPipeline {
async flush (force = false): Promise<void> {
if (this.pending.size > 0 && (this.pending.size >= 50 || force)) {
// Push all pending changes to storage.
// We need convert elastic update to a proper document.
const toUpdate: IndexedDoc[] = []
for (const o of this.pendingElastic.values()) {
const doc: IndexedDoc = {
_class: o._class,
id: o.id,
space: o.space,
modifiedBy: o.modifiedBy,
modifiedOn: o.modifiedOn
}
updateDoc2Elastic(o, doc)
toUpdate.push(doc)
}
const promises: Promise<void>[] = []
if (toUpdate.length > 0) {
promises.push(
this.adapter.updateMany(toUpdate).then(() => {
this.pendingElastic.clear()
})
)
}
// Push all pending changes to storage.
promises.push(
this.storage.update(DOMAIN_DOC_INDEX_STATE, this.pending).then(() => {
this.pending.clear()
})
)
await Promise.all(promises)
await this.storage.update(DOMAIN_DOC_INDEX_STATE, this.pending)
this.pending.clear()
}
}
@ -156,7 +141,11 @@ export class FullTextIndexPipeline implements FullTextPipeline {
if (!create) {
const ops = new Map<Ref<DocIndexState>, DocumentUpdate<DocIndexState>>()
ops.set(docId, { ['stages.' + fieldStateId]: false, removed })
const upd: DocumentUpdate<DocIndexState> = { removed }
for (const st of this.stages) {
;(upd as any)['stages.' + st.stageId] = false
}
ops.set(docId, upd)
await this.storage.update(DOMAIN_DOC_INDEX_STATE, ops)
}
this.triggerIndexing()
@ -167,22 +156,20 @@ export class FullTextIndexPipeline implements FullTextPipeline {
docId: Ref<DocIndexState>,
mark: boolean,
update: DocumentUpdate<DocIndexState>,
elasticUpdate: Partial<IndexedDoc>,
flush?: boolean
): Promise<void> {
let udoc = this.toIndex.get(docId)
if (udoc !== undefined) {
await this.stageUpdate(udoc, update, elasticUpdate)
await this.stageUpdate(udoc, update)
udoc = this.updateDoc(udoc, update, mark)
this.toIndex.set(docId, udoc)
}
// For Elastic we also need to check parent
if (udoc === undefined) {
udoc = this.toIndexParents.get(docId)
if (udoc !== undefined) {
await this.stageUpdate(udoc, update, elasticUpdate)
await this.stageUpdate(udoc, update)
udoc = this.updateDoc(udoc, update, mark)
this.toIndexParents.set(docId, udoc)
}
@ -199,9 +186,11 @@ export class FullTextIndexPipeline implements FullTextPipeline {
update.stages = { ...(udoc.stages ?? {}) }
update.stages[stageId] = mark
for (const [k] of Object.entries(update.stages)) {
if (!this.currentStage.clearExcept.includes(k)) {
update.stages[k] = false
if (this.currentStage.clearExcept !== undefined) {
for (const [k] of Object.entries(update.stages)) {
if (k !== this.currentStage.stageId && !this.currentStage.clearExcept.includes(k)) {
update.stages[k] = false
}
}
}
@ -212,13 +201,6 @@ export class FullTextIndexPipeline implements FullTextPipeline {
this.stageChanged++
}
// Collect elastic update
if (udoc !== undefined && Object.keys(elasticUpdate).length !== 0) {
const currentElastic = await this.getElastic(udoc)
currentElastic.modifiedOn = Date.now()
this.pendingElastic.set(docId, { ...currentElastic, ...elasticUpdate })
}
const current = this.pending.get(docId)
if (current === undefined) {
this.pending.set(docId, update)
@ -229,26 +211,13 @@ export class FullTextIndexPipeline implements FullTextPipeline {
await this.flush(flush ?? false)
}
async getElastic (doc: DocIndexState): Promise<IndexedDoc> {
let current = this.pendingElastic.get(doc._id)
if (current === undefined) {
current = createElasticDoc(doc)
this.pendingElastic.set(doc._id, current)
}
return current
}
triggerIndexing = (): void => {}
waitTimeout: any
stats: Record<string, number> = {}
private async stageUpdate (
udoc: DocIndexState,
update: DocumentUpdate<DocIndexState>,
elasticUpdate: Partial<IndexedDoc>
): Promise<void> {
private async stageUpdate (udoc: DocIndexState, update: DocumentUpdate<DocIndexState>): Promise<void> {
for (const u of this.currentStage?.updateFields ?? []) {
await u(udoc, update, elasticUpdate)
await u(udoc, update)
}
}
@ -256,6 +225,12 @@ export class FullTextIndexPipeline implements FullTextPipeline {
this.indexing = this.doIndexing()
}
async initializeStages (): Promise<void> {
for (const st of this.stages) {
await st.initialize(this.storage, this)
}
}
async doIndexing (): Promise<void> {
// Check model is upgraded to support indexer.
@ -267,8 +242,11 @@ export class FullTextIndexPipeline implements FullTextPipeline {
}
await this.initStates()
while (!this.cancelling) {
await this.initializeStages()
await this.processRemove()
await this.processIndex()
console.log('Indexing:', this.workspace)
await rateLimitter.exec(() => this.processIndex())
if (this.toIndex.size === 0 || this.stageChanged === 0) {
if (this.toIndex.size === 0) {
@ -297,12 +275,17 @@ export class FullTextIndexPipeline implements FullTextPipeline {
}
private async processIndex (): Promise<void> {
let idx = 0
for (const st of this.stages) {
idx++
while (true) {
try {
if (this.cancelling) {
return
}
if (!st.enabled) {
break
}
await this.flush(true)
const toSkip = Array.from(this.skipped.entries())
.filter((it) => it[1] > 3)
@ -316,7 +299,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
removed: false
},
{
limit: st.limit,
limit: globalIndexer.processingSize,
sort: {
modifiedOn: 1
}
@ -347,6 +330,16 @@ export class FullTextIndexPipeline implements FullTextPipeline {
// Do Indexing
this.currentStage = st
await st.collect(toIndex, this)
// go with next stages if they accept it
for (const nst of this.stages.slice(idx)) {
const toIndex2 = this.matchStates(nst)
if (toIndex2.length > 0) {
this.currentStage = nst
await nst.collect(toIndex2, this)
}
}
} else {
break
}
@ -374,12 +367,13 @@ export class FullTextIndexPipeline implements FullTextPipeline {
removed: true
},
{
limit: 1000,
sort: {
modifiedOn: 1
},
lookup: {
attachedTo: core.class.DocIndexState
projection: {
_id: 1,
stages: 1,
objectClass: 1
}
}
)
@ -388,10 +382,9 @@ export class FullTextIndexPipeline implements FullTextPipeline {
this.toIndexParents.clear()
const toRemoveIds = Array.from(this.toIndex.keys())
const toIndex = Array.from(this.toIndex.values())
const toRemoveIds = []
for (const st of this.stages) {
const toIndex = Array.from(this.toIndex.values())
if (toIndex.length > 0) {
// Do Indexing
this.currentStage = st
@ -400,11 +393,17 @@ export class FullTextIndexPipeline implements FullTextPipeline {
break
}
}
// If all stages are complete, remove document
const allStageIds = this.stages.map((it) => it.stageId)
for (const doc of toIndex) {
if (allStageIds.every((it) => doc.stages[it])) {
toRemoveIds.push(doc._id)
}
}
await this.flush(true)
if (toRemoveIds.length > 0) {
await this.storage.clean(DOMAIN_DOC_INDEX_STATE, toRemoveIds)
await this.adapter.remove(toRemoveIds)
} else {
break
}
@ -414,12 +413,14 @@ export class FullTextIndexPipeline implements FullTextPipeline {
private async initStates (): Promise<void> {
const statistics = await this.storage.findAll(core.class.DocIndexState, {}, { projection: { stages: 1 } })
this.stats = {}
const allStageIds = new Set(this.stages.map((it) => it.stageId))
for (const st of this.stages) {
this.stats[st.stageId] = 0
}
for (const st of statistics) {
for (const [s, v] of Object.entries(st.stages ?? {})) {
if (v) {
if (v && allStageIds.has(s)) {
this.stats[s] = (this.stats[s] ?? 0) + 1
}
}
@ -428,9 +429,10 @@ export class FullTextIndexPipeline implements FullTextPipeline {
private matchStates (st: FullTextPipelineStage): DocIndexState[] {
const toIndex: DocIndexState[] = []
const require = [...st.require].filter((it) => this.stages.find((q) => q.stageId === it && q.enabled))
for (const o of this.toIndex.values()) {
// We need to contain all state values
if (st.require.every((it) => o.stages?.[it])) {
if (require.every((it) => o.stages?.[it])) {
toIndex.push(o)
}
}
@ -439,7 +441,6 @@ export class FullTextIndexPipeline implements FullTextPipeline {
async checkIndexConsistency (dbStorage: ServerStorage): Promise<void> {
this.hierarchy.domains()
// await this.rebuildElastic()
const allClasses = this.hierarchy.getDescendants(core.class.Doc)
for (const c of allClasses) {
if (this.cancelling) {
@ -459,7 +460,7 @@ export class FullTextIndexPipeline implements FullTextPipeline {
while (true) {
const newDocs: DocIndexState[] = (
await dbStorage.findAll<Doc>(
this.adapter.metrics(),
this.metrics,
c,
{ _class: c, _id: { $nin: states } },
{ limit: 1000, projection: { _id: 1, attachedTo: 1, attachedToClass: 1 } as any }
@ -490,75 +491,10 @@ export class FullTextIndexPipeline implements FullTextPipeline {
console.log('Updated state for: ', c, newDocs.length)
}
const statesSet = new Set(states)
const docIds = (
await dbStorage.findAll<Doc>(this.adapter.metrics(), c, { _class: c }, { projection: { _id: 1 } })
)
const docIds = (await dbStorage.findAll<Doc>(this.metrics, c, { _class: c }, { projection: { _id: 1 } }))
.filter((it) => !statesSet.has(it._id as Ref<DocIndexState>))
.map((it) => it._id)
await this.storage.clean(DOMAIN_DOC_INDEX_STATE, docIds)
}
}
async rebuildElastic (): Promise<void> {
// rebuild elastic
const allDocs = await this.storage.findAll(core.class.DocIndexState, {})
const toUpdate: DocIndexState[] = allDocs.filter((it) => it.attributes.openai_embedding_use)
while (toUpdate.length > 0) {
this.toIndex = new Map<Ref<DocIndexState>, DocIndexState>(toUpdate.splice(0, 500).map((it) => [it._id, it]))
const elasticDocs = await this.adapter.load(Array.from(this.toIndex.keys()))
let hasUpdates = false
for (const o of elasticDocs) {
const odoc = this.toIndex.get(o.id as Ref<DocIndexState>) as DocIndexState
if (odoc.attributes.openai_embedding_use as boolean) {
hasUpdates = true
odoc.attributes.openai_embedding = o.openai_embedding
o.openai_embedding_use = true
}
}
if (hasUpdates) {
try {
await this.storage.upload(DOMAIN_DOC_INDEX_STATE, Array.from(this.toIndex.values()))
await this.adapter.updateMany(elasticDocs)
} catch (err: any) {
console.error(err)
}
}
}
}
}
/**
* @public
*/
export function createElasticDoc (upd: DocIndexState): IndexedDoc {
const doc = {
...upd.attributes,
id: upd._id,
_class: upd.objectClass,
modifiedBy: upd.modifiedBy,
modifiedOn: upd.modifiedOn,
space: upd.space,
attachedTo: upd.attachedTo,
attachedToClass: upd.attachedToClass
}
return doc
}
function updateDoc2Elastic (attributes: Record<string, any>, doc: IndexedDoc): IndexedDoc {
for (const [k, v] of Object.entries(attributes)) {
const { _class, attr, docId, extra } = extractDocKey(k)
let vv: any = v
if (extra.includes('base64')) {
vv = Buffer.from(v, 'base64').toString()
}
if (docId === undefined) {
doc[k] = vv
continue
}
const docIdAttr = '|' + docKey(attr, { _class, extra: extra.filter((it) => it !== 'base64') })
if (vv !== null) {
// Since we replace array of values, we could ignore null
doc[docIdAttr] = [...(doc[docIdAttr] ?? []), vv]
}
}
return doc
}

View File

@ -0,0 +1,39 @@
/**
* @public
*/
export class RateLimitter {
idCounter: number = 0
processingQueue = new Map<string, Promise<void>>()
queue: (() => Promise<void>)[] = []
constructor (readonly config: () => { rate: number }) {}
async exec<T, B extends Record<string, any> = {}>(op: (args?: B) => Promise<T>, args?: B): Promise<T> {
const processingId = `${this.idCounter++}`
if (this.processingQueue.size > this.config().rate) {
await Promise.race(this.processingQueue.values())
}
try {
const p = op(args)
this.processingQueue.set(processingId, p as Promise<void>)
return await p
} finally {
this.processingQueue.delete(processingId)
}
}
async add<T, B extends Record<string, any> = {}>(op: (args?: B) => Promise<T>, args?: B): Promise<void> {
if (this.processingQueue.size < this.config().rate) {
void this.exec(op, args)
} else {
await this.exec(op, args)
}
}
async waitProcessing (): Promise<void> {
await await Promise.race(this.processingQueue.values())
}
}

View File

@ -0,0 +1,234 @@
//
// Copyright © 2022 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import core, {
AnyAttribute,
Class,
Doc,
DocIndexState,
DocumentQuery,
DocumentUpdate,
docUpdKey,
extractDocKey,
FullTextSearchContext,
Hierarchy,
isFullTextAttribute,
Ref,
Storage
} from '@hcengineering/core'
import { translate } from '@hcengineering/platform'
import { IndexedDoc } from '../types'
import { contentStageId, DocUpdateHandler, fieldStateId, FullTextPipeline, FullTextPipelineStage } from './types'
import { convert } from 'html-to-text'
/**
* @public
*/
export const summaryStageId = 'sum-v1_1'
/**
* @public
*/
export class FullSummaryStage implements FullTextPipelineStage {
require = [fieldStateId, contentStageId]
stageId = summaryStageId
enabled = true
clearExcept?: string[] = undefined
updateFields: DocUpdateHandler[] = []
// If specified, index only fields with content speciffied.
matchExtra: string[] = [] // 'content', 'base64'] // '#en'
summaryField = 'summary'
fieldFilter: ((attr: AnyAttribute, value: string) => boolean)[] = []
async initialize (storage: Storage, pipeline: FullTextPipeline): Promise<void> {}
async search (
_classes: Ref<Class<Doc>>[],
search: DocumentQuery<Doc>,
size?: number,
from?: number
): Promise<{ docs: IndexedDoc[], pass: boolean }> {
return { docs: [], pass: true }
}
async collect (toIndex: DocIndexState[], pipeline: FullTextPipeline): Promise<void> {
for (const doc of toIndex) {
if (pipeline.cancelling) {
return
}
const needIndex = isIndexingRequired(pipeline, doc)
// No need to index this class, mark embeddings as empty ones.
if (!needIndex) {
await pipeline.update(doc._id, true, {})
continue
}
const update: DocumentUpdate<DocIndexState> = {}
const embeddingText = await extractIndexedValues(doc, pipeline.hierarchy, {
matchExtra: this.matchExtra,
fieldFilter: this.fieldFilter
})
;(update as any)[docUpdKey(this.summaryField)] = embeddingText
await pipeline.update(doc._id, true, update)
}
}
async remove (docs: DocIndexState[], pipeline: FullTextPipeline): Promise<void> {
// will be handled by field processor
for (const doc of docs) {
await pipeline.update(doc._id, true, {})
}
}
}
/**
* @public
*/
export function isIndexingRequired (pipeline: FullTextPipeline, doc: DocIndexState): boolean {
let objClass = pipeline.hierarchy.getClass(doc.objectClass)
let needIndex = false
while (true) {
if (pipeline.hierarchy.hasMixin(objClass, core.mixin.FullTextSearchContext)) {
needIndex = pipeline.hierarchy.as<Class<Doc>, FullTextSearchContext>(
objClass,
core.mixin.FullTextSearchContext
).fullTextSummary
break
}
if (objClass.extends === undefined) {
break
}
objClass = pipeline.hierarchy.getClass(objClass.extends)
}
return needIndex
}
/**
* @public
*/
export async function extractIndexedValues (
doc: DocIndexState,
hierarchy: Hierarchy,
opt: {
matchExtra: string[]
fieldFilter: ((attr: AnyAttribute, value: string) => boolean)[]
}
): Promise<string> {
const attributes: Record<Ref<Class<Doc>>, Record<string, string>> = {}
const childAttributes: Record<Ref<Class<Doc>>, Record<string, string>> = {}
const currentReplacement: Record<string, string> = {}
for (const [k, v] of Object.entries(doc.attributes)) {
try {
const { _class, attr, extra, docId } = extractDocKey(k)
let sourceContent = `${v as string}`.trim()
if (extra.includes('base64')) {
sourceContent = Buffer.from(sourceContent, 'base64').toString().trim()
}
if (sourceContent.length === 0) {
continue
}
if (_class === undefined) {
// Skip all helper fields.
continue
}
if (!opt.matchExtra.every((it) => extra.includes(it))) {
continue
}
// Check if attribute is indexable
let keyAttr: AnyAttribute
try {
keyAttr = hierarchy.getAttribute(_class, attr)
} catch (err: any) {
// Skip if there is no attribute.
continue
}
if (keyAttr.type._class === core.class.TypeMarkup) {
sourceContent = convert(sourceContent, {
preserveNewlines: true,
selectors: [{ selector: 'img', format: 'skip' }]
})
}
if (!opt.fieldFilter.every((it) => it(keyAttr, sourceContent))) {
// Some of filters not pass value
continue
}
if (!isFullTextAttribute(keyAttr)) {
continue
}
const repl = extra.join('#')
if ((currentReplacement[attr] ?? '').length <= repl.length) {
const label = await translate(keyAttr.label, {})
const cl = _class ?? doc.objectClass
if (docId === undefined) {
attributes[cl] = { ...attributes[cl], [k]: `${label} is ${sourceContent}\n` }
} else {
childAttributes[cl] = { ...childAttributes[cl], [k]: sourceContent }
}
currentReplacement[attr] = repl
}
} catch (err: any) {
console.log(err)
}
}
let embeddingText = ''
for (const [, vv] of Object.entries(attributes)) {
embeddingText += '\n'
for (const [, v] of Object.entries(vv)) {
// Check if attribute is text one.
embeddingText += ' ' + v + '\n'
}
}
// Extra child attributes
for (const [, vv] of Object.entries(childAttributes)) {
for (const [, v] of Object.entries(vv)) {
// Check if attribute is text one.
embeddingText += ' ' + v + '\n'
}
}
// Trim empty inner space.
embeddingText = (embeddingText ?? '')
.split(/ +|\t+/)
.filter((it) => it)
.join(' ')
embeddingText = (embeddingText ?? '')
.split(/\n\n+/)
.filter((it) => it)
.join('\n\n')
return embeddingText.trim()
}

View File

@ -13,7 +13,7 @@
// limitations under the License.
//
import { Class, Doc, DocIndexState, DocumentQuery, DocumentUpdate, Hierarchy, Ref } from '@hcengineering/core'
import { Class, Doc, DocIndexState, DocumentQuery, DocumentUpdate, Hierarchy, Ref, Storage } from '@hcengineering/core'
import type { IndexedDoc } from '../types'
/**
@ -25,7 +25,6 @@ export interface FullTextPipeline {
docId: Ref<DocIndexState>,
mark: boolean,
update: DocumentUpdate<DocIndexState>,
elasticUpdate: Partial<IndexedDoc>,
flush?: boolean
) => Promise<void>
@ -42,11 +41,7 @@ export interface FullTextPipeline {
/**
* @public
*/
export type DocUpdateHandler = (
doc: DocIndexState,
update: DocumentUpdate<DocIndexState>,
elastic: Partial<IndexedDoc>
) => Promise<void>
export type DocUpdateHandler = (doc: DocIndexState, update: DocumentUpdate<DocIndexState>) => Promise<void>
/**
* @public
@ -58,13 +53,15 @@ export interface FullTextPipelineStage {
// State to be updated
stageId: string
// Clear all stages except following.
clearExcept: string[]
// If specified, will clear all stages except specified + current
clearExcept?: string[]
// Will propogate some changes for both mark values.
updateFields: DocUpdateHandler[]
limit: number
enabled: boolean
initialize: (storage: Storage, pipeline: FullTextPipeline) => Promise<void>
// Collect all changes related to bulk of document states
collect: (docs: DocIndexState[], pipeline: FullTextPipeline) => Promise<void>
@ -84,8 +81,13 @@ export interface FullTextPipelineStage {
/**
* @public
*/
export const contentStageId = 'cnt-v1'
export const contentStageId = 'cnt-v2b'
/**
* @public
*/
export const fieldStateId = 'fld-v1'
/**
* @public
*/
export const fullTextPushStageId = 'fts-v1'

View File

@ -94,10 +94,6 @@ export function isClassIndexable (hierarchy: Hierarchy, c: Ref<Class<Doc>>): boo
if (indexed !== undefined) {
return indexed as boolean
}
if (hierarchy.isMixin(c)) {
// No need for mixins.
return false
}
const domain = hierarchy.findDomain(c)
if (domain === undefined) {
hierarchy.setClassifierProp(c, 'class_indexed', false)

View File

@ -54,8 +54,6 @@ import { getResource } from '@hcengineering/platform'
import { DbAdapter, DbAdapterConfiguration, TxAdapter } from './adapter'
import { FullTextIndex } from './fulltext'
import { FullTextIndexPipeline } from './indexer'
import { ContentRetrievalStage } from './indexer/content'
import { IndexedFieldStage } from './indexer/field'
import { FullTextPipelineStage } from './indexer/types'
import serverCore from './plugin'
import { Triggers } from './triggers'
@ -73,8 +71,10 @@ import type {
export type FullTextPipelineStageFactory = (
adapter: FullTextAdapter,
stages: FullTextPipelineStage[]
) => FullTextPipelineStage
storage: ServerStorage,
storageAdapter: MinioService,
contentAdapter: ContentTextAdapter
) => FullTextPipelineStage[]
/**
* @public
*/
@ -87,7 +87,7 @@ export interface DbConfiguration {
factory: FullTextAdapterFactory
url: string
metrics: MeasureContext
stages?: FullTextPipelineStageFactory[]
stages: FullTextPipelineStageFactory
}
contentAdapter: {
factory: ContentAdapterFactory
@ -456,7 +456,10 @@ class TServerStorage implements ServerStorage {
},
findAll: fAll(ctx),
modelDb: this.modelDb,
hierarchy: this.hierarchy
hierarchy: this.hierarchy,
txFx: async (f) => {
await f(this.getAdapter(DOMAIN_TX))
}
})
))
]
@ -690,23 +693,23 @@ export async function createServerStorage (
conf.contentAdapter.metrics
)
const docIndexState = adapters.get(conf.defaultAdapter)
if (docIndexState === undefined) {
const defaultAdapter = adapters.get(conf.defaultAdapter)
if (defaultAdapter === undefined) {
throw new Error(`No Adapter for ${DOMAIN_DOC_INDEX_STATE}`)
}
const indexFactory = (storage: ServerStorage): FullTextIndex => {
const stages: FullTextPipelineStage[] = [
new IndexedFieldStage(storage, fulltextAdapter.metrics().newChild('fields', {})),
new ContentRetrievalStage(
storageAdapter,
conf.workspace,
fulltextAdapter.metrics().newChild('content', {}),
contentAdapter
)
]
;(conf.fulltextAdapter.stages ?? []).forEach((it) => stages.push(it(fulltextAdapter, stages)))
const indexer = new FullTextIndexPipeline(docIndexState, stages, fulltextAdapter, hierarchy, conf.workspace)
if (storageAdapter === undefined) {
throw new Error('No storage adapter')
}
const stages = conf.fulltextAdapter.stages(fulltextAdapter, storage, storageAdapter, contentAdapter)
const indexer = new FullTextIndexPipeline(
defaultAdapter,
stages,
hierarchy,
conf.workspace,
fulltextAdapter.metrics()
)
return new FullTextIndex(
hierarchy,
fulltextAdapter,
@ -733,3 +736,24 @@ export async function createServerStorage (
options
)
}
/**
* @public
*/
export function createNullStorageFactory (): MinioService {
return {
client: '' as any,
exists: async (workspaceId: WorkspaceId) => {
return false
},
make: async (workspaceId: WorkspaceId) => {},
remove: async (workspaceId: WorkspaceId, objectNames: string[]) => {},
delete: async (workspaceId: WorkspaceId) => {},
list: async (workspaceId: WorkspaceId, prefix?: string) => [],
stat: async (workspaceId: WorkspaceId, objectName: string) => ({} as any),
get: async (workspaceId: WorkspaceId, objectName: string) => ({} as any),
put: async (workspaceId: WorkspaceId, objectName: string, stream: any, size?: number, qwe?: any) => ({} as any),
read: async (workspaceId: WorkspaceId, name: string) => ({} as any),
partial: async (workspaceId: WorkspaceId, objectName: string, offset: number, length?: number) => ({} as any)
}
}

View File

@ -111,6 +111,8 @@ export interface TriggerControl {
// Later can be replaced with generic one with bucket encapsulated inside.
storageFx: (f: (adapter: MinioService, workspaceId: WorkspaceId) => Promise<void>) => void
fx: (f: () => Promise<void>) => void
txFx: (f: (storage: Storage) => Promise<void>) => Promise<void>
}
/**
@ -177,7 +179,8 @@ export interface FullTextAdapter {
close: () => Promise<void>
metrics: () => MeasureContext
initMapping: (field: string, dims: number) => Promise<void>
// If no field is provided, will return existing mapping of all dimms.
initMapping: (field?: { key: string, dims: number }) => Promise<Record<string, number>>
load: (docs: Ref<Doc>[]) => Promise<IndexedDoc[]>
}
@ -186,7 +189,9 @@ export interface FullTextAdapter {
* @public
*/
export class DummyFullTextAdapter implements FullTextAdapter {
async initMapping (field: string, dims: number): Promise<void> {}
async initMapping (field?: { key: string, dims: number }): Promise<Record<string, number>> {
return {}
}
async index (doc: IndexedDoc): Promise<TxResult> {
return {}

View File

@ -32,5 +32,9 @@
"@hcengineering/platform": "^0.6.8",
"@hcengineering/server-core": "^0.6.1",
"@elastic/elasticsearch": "^7.14.0"
},
"repository": "https://github.com/hcengineering/anticrm",
"publishConfig": {
"registry": "https://npm.pkg.github.com"
}
}

Some files were not shown because too many files have changed in this diff Show More