From 5a74bda519b3e3904ce9ff3a30bb786e767438ce Mon Sep 17 00:00:00 2001 From: Andrey Sobolev Date: Wed, 23 Feb 2022 16:09:07 +0700 Subject: [PATCH] Tool recruit update (#1035) Signed-off-by: Andrey Sobolev --- common/config/rush/pnpm-lock.yaml | 34 ++- dev/tool/package.json | 10 +- dev/tool/src/importer.ts | 13 +- dev/tool/src/index.ts | 13 + dev/tool/src/recruit.ts | 228 ++++++++++++++++++ dev/tool/src/utils.ts | 12 + dev/tool/src/workspace.ts | 37 +-- packages/rekoni/src/types.ts | 1 + .../src/components/CreateCandidate.svelte | 4 +- 9 files changed, 319 insertions(+), 33 deletions(-) create mode 100644 dev/tool/src/recruit.ts create mode 100644 dev/tool/src/utils.ts diff --git a/common/config/rush/pnpm-lock.yaml b/common/config/rush/pnpm-lock.yaml index 99c6158dd8..7f4ccdf779 100644 --- a/common/config/rush/pnpm-lock.yaml +++ b/common/config/rush/pnpm-lock.yaml @@ -164,6 +164,7 @@ specifiers: '@types/mime-types': ~2.1.1 '@types/pdfkit': ~0.12.3 '@types/prosemirror-model': ~1.16.0 + '@types/request': ~2.48.8 '@types/toposort': ^2.0.3 '@types/uuid': ^8.3.1 '@types/xml2js': ~0.4.9 @@ -207,6 +208,7 @@ specifiers: prettier: ^2.4.1 prettier-plugin-svelte: ^2.2.0 prosemirror-model: ~1.16.1 + request: ~2.88.2 sass: ^1.37.5 sass-loader: ^12.1.0 simplytyped: ^3.3.0 @@ -389,6 +391,7 @@ dependencies: '@types/mime-types': 2.1.1 '@types/pdfkit': 0.12.3 '@types/prosemirror-model': 1.16.0 + '@types/request': 2.48.8 '@types/toposort': 2.0.3 '@types/uuid': 8.3.3 '@types/xml2js': 0.4.9 @@ -432,6 +435,7 @@ dependencies: prettier: 2.5.1 prettier-plugin-svelte: 2.5.1_prettier@2.5.1 prosemirror-model: 1.16.1 + request: 2.88.2 sass: 1.45.0 sass-loader: 12.4.0_sass@1.45.0+webpack@5.65.0 simplytyped: 3.3.0_typescript@4.5.4 @@ -2125,6 +2129,10 @@ packages: '@types/node': 17.0.0 dev: false + /@types/caseless/0.12.2: + resolution: {integrity: sha512-6ckxMjBBD8URvjB6J3NcnuAn5Pkl7t3TizAg+xdlzzQGSPSmBcXf8KoIH0ua/i+tio+ZRUHEXp0HEmvaR4kt0w==} + dev: false + /@types/connect-history-api-fallback/1.3.5: resolution: {integrity: sha512-h8QJa8xSb1WD4fpKBDcATDNGXghFj6/3GRWG6dhmRcu0RX1Ubasur2Uvx5aeEwlf0MwblEC2bMzzMQntxnw/Cw==} dependencies: @@ -2451,6 +2459,15 @@ packages: resolution: {integrity: sha512-EEhsLsD6UsDM1yFhAvy0Cjr6VwmpMWqFBCb9w07wVugF7w9nfajxLuVmngTIpgS6svCnm6Vaw+MZhoDCKnOfsw==} dev: false + /@types/request/2.48.8: + resolution: {integrity: sha512-whjk1EDJPcAR2kYHRbFl/lKeeKYTi05A15K9bnLInCVroNDCtXce57xKdI0/rQaA3K+6q0eFyUBPmqfSndUZdQ==} + dependencies: + '@types/caseless': 0.12.2 + '@types/node': 17.0.10 + '@types/tough-cookie': 4.0.1 + form-data: 2.5.1 + dev: false + /@types/retry/0.12.1: resolution: {integrity: sha512-xoDlM2S4ortawSWORYqsdU+2rxdh4LRW9ytc3zmT37RIKQh6IHyKwwtKhKis9ah8ol07DCkZxPt8BBvPjC6v4g==} dev: false @@ -2496,6 +2513,10 @@ packages: resolution: {integrity: sha512-jRtyvEu0Na/sy0oIxBW0f6wPQjidgVqlmCTJVHEGTNEUdL1f0YSvdPzHY7nX7MUWAZS6zcAa0KkqofHjy/xDZQ==} dev: false + /@types/tough-cookie/4.0.1: + resolution: {integrity: sha512-Y0K95ThC3esLEYD6ZuqNek29lNX2EM1qxV8y2FTLUB0ff5wWrk7az+mLrnNFUnaXcgKye22+sFBRXOgpPILZNg==} + dev: false + /@types/uuid/8.3.3: resolution: {integrity: sha512-0LbEEx1zxrYB3pgpd1M5lEhLcXjKJnYghvhTRgaBeUivLHMDM1TzF3IJ6hXU2+8uA4Xz+5BA63mtZo5DjVT8iA==} dev: false @@ -5657,6 +5678,15 @@ packages: mime-types: 2.1.34 dev: false + /form-data/2.5.1: + resolution: {integrity: sha512-m21N3WOmEEURgk6B9GLOE4RuWOFf28Lhh9qGYeNlGq4VDXUlJy2th2slBNU8Gp8EzloYZOibZJ7t5ecIrFSjVA==} + engines: {node: '>= 0.12'} + dependencies: + asynckit: 0.4.0 + combined-stream: 1.0.8 + mime-types: 2.1.34 + dev: false + /forwarded-parse/2.1.2: resolution: {integrity: sha512-alTFZZQDKMporBH77856pXgzhEzaUVmLCDk+egLgIgHst3Tpndzz8MnKe+GzRJRfvVdn69HhpW7cmXzvtLvJAw==} dev: false @@ -14286,7 +14316,7 @@ packages: dev: false file:projects/tool.tgz: - resolution: {integrity: sha512-3L/wr78VWV79qjSBCDWg37qMPdsEzAf5STbeXuHPOaMCvkhij64Nv0wN3vzj8CRwWRuKrX0o3Ts0NLw2SSl7hQ==, tarball: file:projects/tool.tgz} + resolution: {integrity: sha512-mU+wxglCPtQylCz6eJOSTVv0pnsJcZ7F9nWUFIimMVNzAf3SR1vRHgozcESbMWCzDnwNx1rRb5SJUMFP5Uy2cQ==, tarball: file:projects/tool.tgz} name: '@rush-temp/tool' version: 0.0.0 dependencies: @@ -14296,6 +14326,7 @@ packages: '@types/mime-types': 2.1.1 '@types/minio': 7.0.11 '@types/node': 16.11.14 + '@types/request': 2.48.8 '@types/ws': 8.2.2 '@types/xml2js': 0.4.9 '@typescript-eslint/eslint-plugin': 5.7.0_c25e8c1f4f4f7aaed27aa6f9ce042237 @@ -14314,6 +14345,7 @@ packages: minio: 7.0.26 mongodb: 4.2.2 prettier: 2.5.1 + request: 2.88.2 ts-node: 10.4.0_5d12c2add188ff0e728b4ade3dacd39b typescript: 4.5.4 ws: 8.4.2 diff --git a/dev/tool/package.json b/dev/tool/package.json index 956db5bb9a..c07d303abc 100644 --- a/dev/tool/package.json +++ b/dev/tool/package.json @@ -12,7 +12,7 @@ "bundle": "esbuild src/index.ts --bundle --minify --platform=node > bundle.js", "docker:build": "docker build -t anticrm/tool .", "docker:push": "docker push anticrm/tool", - "run-local": "cross-env MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin MINIO_ENDPOINT=localhost MONGO_URL=mongodb://localhost:27017 TRANSACTOR_URL=ws:/localhost:3333 TELEGRAM_DATABASE=telegram-service ELASTIC_URL=http://localhost:9200 ts-node ./src/index.ts", + "run-local": "cross-env MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin MINIO_ENDPOINT=localhost MONGO_URL=mongodb://localhost:27017 TRANSACTOR_URL=ws:/localhost:3333 TELEGRAM_DATABASE=telegram-service ELASTIC_URL=http://localhost:9200 REKONI_URL=http://localhost:4004 ts-node ./src/index.ts", "run-local-node": "cross-env MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin MINIO_ENDPOINT=localhost MONGO_URL=mongodb://localhost:27017 TRANSACTOR_URL=ws:/localhost:3333 TELEGRAM_DATABASE=telegram-service ELASTIC_URL=http://localhost:9200 node ./bundle.js", "lint": "eslint src", "format": "prettier --write src && eslint --fix src" @@ -37,7 +37,8 @@ "typescript": "^4.3.5", "@types/ws": "^8.2.1", "@types/xml2js": "~0.4.9", - "@types/mime-types": "~2.1.1" + "@types/mime-types": "~2.1.1", + "@types/request": "~2.48.8" }, "dependencies": { "mongodb": "^4.1.1", @@ -85,6 +86,9 @@ "@anticrm/server-recruit": "~0.6.0", "@anticrm/server-recruit-resources": "~0.6.0", "@anticrm/server-task": "~0.6.0", - "@anticrm/server-task-resources": "~0.6.0" + "@anticrm/server-task-resources": "~0.6.0", + "@anticrm/rekoni": "~0.6.0", + "request": "~2.88.2", + "@anticrm/tags": "~0.6.0" } } diff --git a/dev/tool/src/importer.ts b/dev/tool/src/importer.ts index f28a7025a9..11cbea5e73 100644 --- a/dev/tool/src/importer.ts +++ b/dev/tool/src/importer.ts @@ -17,7 +17,7 @@ import attachment, { Attachment } from '@anticrm/attachment' import chunter, { Comment } from '@anticrm/chunter' import contact, { Channel, ChannelProvider, EmployeeAccount, Person } from '@anticrm/contact' -import core, { AttachedData, AttachedDoc, Class, Data, Doc, DocumentUpdate, Ref, SortingOrder, Space, TxOperations, TxResult, MixinData } from '@anticrm/core' +import core, { AttachedData, Class, Data, Doc, MixinData, Ref, SortingOrder, Space, TxOperations, TxResult } from '@anticrm/core' import recruit from '@anticrm/model-recruit' import { Applicant, Candidate, Vacancy } from '@anticrm/recruit' import task, { calcRank, DoneState, genRanks, Kanban, State } from '@anticrm/task' @@ -30,6 +30,7 @@ import { dirname, join } from 'path' import { parseStringPromise } from 'xml2js' import { connect } from './connect' import { ElasticTool } from './elastic' +import { findOrUpdateAttached } from './utils' const _ = { candidates: 'Кандидаты', @@ -432,13 +433,3 @@ async function createUpdateSpaceKanban (spaceId: Ref, client: TxOperati ) return states } -async function findOrUpdateAttached (client: TxOperations, space: Ref, _class: Ref>, objectId: Ref, data: AttachedData, attached: {attachedTo: Ref, attachedClass: Ref>, collection: string}): Promise { - let existingObj = await client.findOne(_class, { _id: objectId, space }) as T - if (existingObj !== undefined) { - await client.updateCollection(_class, space, objectId, attached.attachedTo, attached.attachedClass, attached.collection, data as unknown as DocumentUpdate) - } else { - await client.addCollection(_class, space, attached.attachedTo, attached.attachedClass, attached.collection, data, objectId) - existingObj = { _id: objectId, _class, space, ...data, ...attached } as unknown as T - } - return existingObj -} diff --git a/dev/tool/src/index.ts b/dev/tool/src/index.ts index 56741c8666..a9b21ed486 100644 --- a/dev/tool/src/index.ts +++ b/dev/tool/src/index.ts @@ -32,8 +32,10 @@ import { decodeToken, generateToken } from '@anticrm/server-token' import toolPlugin, { prepareTools, version } from '@anticrm/server-tool' import { program } from 'commander' import { Db, MongoClient } from 'mongodb' +import { exit } from 'process' import { rebuildElastic } from './elastic' import { importXml } from './importer' +import { updateCandidates } from './recruit' import { clearTelegramHistory } from './telegram' import { diffWorkspace, dumpWorkspace, restoreWorkspace } from './workspace' @@ -233,5 +235,16 @@ program .action(async (token) => { console.log(decodeToken(token)) }) +program + .command('update-recruit ') + .description('process pdf documents inside minio and update resumes with skills, etc.') + .action(async (workspace) => { + const rekoniUrl = process.env.REKONI_URL + if (rekoniUrl === undefined) { + console.log('Please provide REKONI_URL environment variable') + exit(1) + } + return await updateCandidates(transactorUrl, workspace, minio, mongodbUri, elasticUrl, rekoniUrl) + }) program.parse(process.argv) diff --git a/dev/tool/src/recruit.ts b/dev/tool/src/recruit.ts new file mode 100644 index 0000000000..bb17532adb --- /dev/null +++ b/dev/tool/src/recruit.ts @@ -0,0 +1,228 @@ +// +// Copyright © 2020, 2021 Anticrm Platform Contributors. +// Copyright © 2021 Hardcore Engineering Inc. +// +// Licensed under the Eclipse Public License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. You may +// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// +// See the License for the specific language governing permissions and +// limitations under the License. +// + +import { Attachment } from '@anticrm/attachment' +import contact, { Channel, ChannelProvider, EmployeeAccount } from '@anticrm/contact' +import { Ref, TxOperations, WithLookup } from '@anticrm/core' +import attachment from '@anticrm/model-attachment' +import recruit from '@anticrm/model-recruit' +import { Candidate } from '@anticrm/recruit' +import { ReconiDocument } from '@anticrm/rekoni' +import { generateToken } from '@anticrm/server-token' +import tags, { findTagCategory } from '@anticrm/tags' +import { Client } from 'minio' +import request from 'request' +import { connect } from './connect' +import { ElasticTool } from './elastic' +import { findOrUpdateAttached } from './utils' +import { readMinioData } from './workspace' + +async function recognize (rekoniUrl: string, data: string, token: string): Promise { + return await new Promise((resolve) => { + request.post({ + url: rekoniUrl + '/recognize?format=pdf', + headers: { + Authorization: 'Bearer ' + token, + 'Content-Type': 'application/json' + }, + json: true, + body: { fileUrl: 'document.pdf', dataBlob: data } + }, function (error, response, body) { + if (error != null) { + console.error(error) + } + resolve(body as ReconiDocument) + }) + }) +} + +function isUndef (value?: string): boolean { + if (value == null || value.trim().length === 0) { + return true + } + return false +} + +async function addChannel (client: TxOperations, channels: Channel[], c: Candidate, type: Ref, value?: string): Promise { + if (value !== undefined) { + const provider = channels.find((e) => e.provider === type) + if (provider === undefined) { + await client.addCollection( + contact.class.Channel, + contact.space.Contacts, + c._id, + contact.class.Person, + 'channels', + { + value: value, + provider: type + } + ) + } else { + if (isUndef(provider.value)) { + provider.value = value + await client.update(provider, + { + value: value, + provider: type + } + ) + } + } + } +} + +export async function updateCandidates ( + transactorUrl: string, + dbName: string, + minio: Client, + mongoUrl: string, + elasticUrl: string, + rekoniUrl: string +): Promise { + const connection = await connect(transactorUrl, dbName) + + const tool = new ElasticTool(mongoUrl, dbName, minio, elasticUrl) + const done = await tool.connect() + + const token = generateToken('anticrm@hc.engineering', dbName) + try { + const client = new TxOperations(connection, 'recruit:account:candidate-importer' as Ref) + + const candidates = await client.findAll(recruit.mixin.Candidate, {}) + console.log('candidates', candidates.length) + let cpos = 0 + for (const c of candidates) { + cpos++ + const attachments = await client.findAll(attachment.class.Attachment, { attachedTo: c._id }) + for (const a of attachments) { + if (a.type !== 'application/pdf') { + console.log('skipping', c.name, a.name, `(${cpos}, ${candidates.length})`) + } + if (a.type.includes('application/pdf')) { + console.log('processing', c.name, a.name, `(${cpos}, ${candidates.length})`) + try { + const buffer = Buffer.concat(await readMinioData(minio, dbName, a.file)).toString('base64') + const document = await recognize(rekoniUrl, buffer, token) + if (document !== undefined) { + await updateAvatar(c, document, minio, dbName, client, tool) + + // Update candidate values if applicable + if (isUndef(c.city) && document.city !== undefined) { + await client.update(c, { city: document.city }) + } + + if (isUndef(c.title) && document.title !== undefined) { + await client.update(c, { title: document.title }) + } + + // Update contact + await updateContacts(client, c, document) + + // Update skills + await updateSkills(client, c, document) + } + } catch (err: any) { + console.error('error processing', err) + } + } + } + } + } catch (err: any) { + console.error(err) + } finally { + await done() + await connection.close() + } +} + +async function updateSkills (client: TxOperations, c: Candidate, document: ReconiDocument): Promise { + const skills = await client.findAll(tags.class.TagReference, { attachedTo: c._id }) + const namedSkills = new Set(Array.from(skills.map(it => (it.title.toLowerCase())))) + + const elements = await client.findAll(tags.class.TagElement, { targetClass: recruit.mixin.Candidate }) + const namedElements = new Map(Array.from(elements.map(it => ([it.title.toLowerCase(), it._id])))) + + const categories = await client.findAll(tags.class.TagCategory, {}) + + let pos = 0 + for (const s of document.skills ?? []) { + const title = s.trim().toLowerCase() + // Check if we already had skill added + if (!namedSkills.has(title)) { + // No yet tag with title + const color = pos++ + let tag = namedElements.get(title) + if (tag === undefined) { + const category = findTagCategory(s, categories) + tag = await client.createDoc(tags.class.TagElement, tags.space.Tags, { + title: s, + color, + targetClass: recruit.mixin.Candidate, + description: '', + category: category + }) + } + namedSkills.add(title) + await client.addCollection(tags.class.TagReference, c.space, c._id, recruit.mixin.Candidate, 'skills', { + title: title, + color, + tag + }) + } + } +} +async function updateContacts (client: TxOperations, c: WithLookup, document: ReconiDocument): Promise { + const channels = await client.findAll(contact.class.Channel, { attachedTo: c._id }) + await addChannel(client, channels, c, contact.channelProvider.Email, document.email) + await addChannel(client, channels, c, contact.channelProvider.GitHub, document.github) + await addChannel(client, channels, c, contact.channelProvider.LinkedIn, document.linkedin) + await addChannel(client, channels, c, contact.channelProvider.Phone, document.phone) + await addChannel(client, channels, c, contact.channelProvider.Telegram, document.telegram) + await addChannel(client, channels, c, contact.channelProvider.Twitter, document.twitter) + await addChannel(client, channels, c, contact.channelProvider.Facebook, document.facebook) +} + +async function updateAvatar (c: WithLookup, document: ReconiDocument, minio: Client, dbName: string, client: TxOperations, tool: ElasticTool): Promise { + if (document.format !== 'headhunter' && document.format !== 'podbor') { + // Only update avatar for this kind of resume formats. + return + } + if (c.avatar === undefined && document.avatar !== undefined && document.avatarName !== undefined && document.avatarFormat !== undefined) { + const attachId = (`${c._id}.${document.avatarName}`) as Ref + // Upload new avatar for candidate + const data = Buffer.from(document.avatar, 'base64') + await minio.putObject(dbName, attachId, data, data.length, { + 'Content-Type': document.avatarFormat + }) + + const attachedDoc = await findOrUpdateAttached(client, recruit.space.CandidatesPublic, attachment.class.Photo, attachId, { + name: document.avatarName, + file: attachId, + type: document.avatarFormat, + size: data.length, + lastModified: Date.now() + }, { + attachedTo: c._id, + attachedClass: contact.class.Person, + collection: 'photos' + }) + + await tool.indexAttachmentDoc(attachedDoc, data) + + await client.update(c, { avatar: attachId }) + } +} diff --git a/dev/tool/src/utils.ts b/dev/tool/src/utils.ts new file mode 100644 index 0000000000..29ee97b90b --- /dev/null +++ b/dev/tool/src/utils.ts @@ -0,0 +1,12 @@ +import { AttachedData, AttachedDoc, Class, Doc, DocumentUpdate, Ref, Space, TxOperations } from '@anticrm/core' + +export async function findOrUpdateAttached (client: TxOperations, space: Ref, _class: Ref>, objectId: Ref, data: AttachedData, attached: { attachedTo: Ref, attachedClass: Ref>, collection: string }): Promise { + let existingObj = await client.findOne(_class, { _id: objectId, space }) as T + if (existingObj !== undefined) { + await client.updateCollection(_class, space, objectId, attached.attachedTo, attached.attachedClass, attached.collection, data as unknown as DocumentUpdate) + } else { + await client.addCollection(_class, space, attached.attachedTo, attached.attachedClass, attached.collection, data, objectId) + existingObj = { _id: objectId, _class, space, ...data, ...attached } as unknown as T + } + return existingObj +} diff --git a/dev/tool/src/workspace.ts b/dev/tool/src/workspace.ts index 1db9e8b7d4..b297e9994e 100644 --- a/dev/tool/src/workspace.ts +++ b/dev/tool/src/workspace.ts @@ -77,22 +77,7 @@ export async function dumpWorkspace (mongoUrl: string, dbName: string, fileName: const fileHandle = await open(join(minioDbLocation, d.name), 'w') - const data = await minio.getObject(dbName, d.name) - const chunks: Buffer[] = [] - - await new Promise((resolve) => { - data.on('readable', () => { - let chunk - while ((chunk = data.read()) !== null) { - const b = chunk as Buffer - chunks.push(b) - } - }) - - data.on('end', () => { - resolve(null) - }) - }) + const chunks: Buffer[] = await readMinioData(minio, dbName, d.name) for (const b of chunks) { await fileHandle.write(b) } @@ -106,6 +91,26 @@ export async function dumpWorkspace (mongoUrl: string, dbName: string, fileName: } } +export async function readMinioData (minio: Client, dbName: string, name: string): Promise { + const data = await minio.getObject(dbName, name) + const chunks: Buffer[] = [] + + await new Promise((resolve) => { + data.on('readable', () => { + let chunk + while ((chunk = data.read()) !== null) { + const b = chunk as Buffer + chunks.push(b) + } + }) + + data.on('end', () => { + resolve(null) + }) + }) + return chunks +} + export async function restoreWorkspace ( mongoUrl: string, dbName: string, diff --git a/packages/rekoni/src/types.ts b/packages/rekoni/src/types.ts index 5c446d3216..28d2884b16 100644 --- a/packages/rekoni/src/types.ts +++ b/packages/rekoni/src/types.ts @@ -17,6 +17,7 @@ * @public */ export interface ReconiDocument { + format: string firstName: string lastName: string title?: string diff --git a/plugins/recruit-resources/src/components/CreateCandidate.svelte b/plugins/recruit-resources/src/components/CreateCandidate.svelte index 5e2e05eb79..b5a7a13347 100644 --- a/plugins/recruit-resources/src/components/CreateCandidate.svelte +++ b/plugins/recruit-resources/src/components/CreateCandidate.svelte @@ -30,7 +30,7 @@ } from '@anticrm/presentation' import type { Candidate } from '@anticrm/recruit' import { recognizeDocument } from '@anticrm/rekoni' - import tags, { findTagCategory, findTagCategory, TagElement, TagReference } from '@anticrm/tags' + import tags, { findTagCategory, TagElement, TagReference } from '@anticrm/tags' import { Component, EditBox, @@ -248,7 +248,7 @@ const categories = await client.findAll(tags.class.TagCategory, {}) const categoriesMap = new Map(Array.from(categories.map(it => ([it._id, it])))) - + const newSkills:TagReference[] = [] // Create missing tag elemnts for (const s of doc.skills ?? []) {