Tool recruit update (#1035)

Signed-off-by: Andrey Sobolev <haiodo@gmail.com>
This commit is contained in:
Andrey Sobolev 2022-02-23 16:09:07 +07:00 committed by GitHub
parent 2691ac930e
commit 5a74bda519
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 319 additions and 33 deletions

View File

@ -164,6 +164,7 @@ specifiers:
'@types/mime-types': ~2.1.1
'@types/pdfkit': ~0.12.3
'@types/prosemirror-model': ~1.16.0
'@types/request': ~2.48.8
'@types/toposort': ^2.0.3
'@types/uuid': ^8.3.1
'@types/xml2js': ~0.4.9
@ -207,6 +208,7 @@ specifiers:
prettier: ^2.4.1
prettier-plugin-svelte: ^2.2.0
prosemirror-model: ~1.16.1
request: ~2.88.2
sass: ^1.37.5
sass-loader: ^12.1.0
simplytyped: ^3.3.0
@ -389,6 +391,7 @@ dependencies:
'@types/mime-types': 2.1.1
'@types/pdfkit': 0.12.3
'@types/prosemirror-model': 1.16.0
'@types/request': 2.48.8
'@types/toposort': 2.0.3
'@types/uuid': 8.3.3
'@types/xml2js': 0.4.9
@ -432,6 +435,7 @@ dependencies:
prettier: 2.5.1
prettier-plugin-svelte: 2.5.1_prettier@2.5.1
prosemirror-model: 1.16.1
request: 2.88.2
sass: 1.45.0
sass-loader: 12.4.0_sass@1.45.0+webpack@5.65.0
simplytyped: 3.3.0_typescript@4.5.4
@ -2125,6 +2129,10 @@ packages:
'@types/node': 17.0.0
dev: false
/@types/caseless/0.12.2:
resolution: {integrity: sha512-6ckxMjBBD8URvjB6J3NcnuAn5Pkl7t3TizAg+xdlzzQGSPSmBcXf8KoIH0ua/i+tio+ZRUHEXp0HEmvaR4kt0w==}
dev: false
/@types/connect-history-api-fallback/1.3.5:
resolution: {integrity: sha512-h8QJa8xSb1WD4fpKBDcATDNGXghFj6/3GRWG6dhmRcu0RX1Ubasur2Uvx5aeEwlf0MwblEC2bMzzMQntxnw/Cw==}
dependencies:
@ -2451,6 +2459,15 @@ packages:
resolution: {integrity: sha512-EEhsLsD6UsDM1yFhAvy0Cjr6VwmpMWqFBCb9w07wVugF7w9nfajxLuVmngTIpgS6svCnm6Vaw+MZhoDCKnOfsw==}
dev: false
/@types/request/2.48.8:
resolution: {integrity: sha512-whjk1EDJPcAR2kYHRbFl/lKeeKYTi05A15K9bnLInCVroNDCtXce57xKdI0/rQaA3K+6q0eFyUBPmqfSndUZdQ==}
dependencies:
'@types/caseless': 0.12.2
'@types/node': 17.0.10
'@types/tough-cookie': 4.0.1
form-data: 2.5.1
dev: false
/@types/retry/0.12.1:
resolution: {integrity: sha512-xoDlM2S4ortawSWORYqsdU+2rxdh4LRW9ytc3zmT37RIKQh6IHyKwwtKhKis9ah8ol07DCkZxPt8BBvPjC6v4g==}
dev: false
@ -2496,6 +2513,10 @@ packages:
resolution: {integrity: sha512-jRtyvEu0Na/sy0oIxBW0f6wPQjidgVqlmCTJVHEGTNEUdL1f0YSvdPzHY7nX7MUWAZS6zcAa0KkqofHjy/xDZQ==}
dev: false
/@types/tough-cookie/4.0.1:
resolution: {integrity: sha512-Y0K95ThC3esLEYD6ZuqNek29lNX2EM1qxV8y2FTLUB0ff5wWrk7az+mLrnNFUnaXcgKye22+sFBRXOgpPILZNg==}
dev: false
/@types/uuid/8.3.3:
resolution: {integrity: sha512-0LbEEx1zxrYB3pgpd1M5lEhLcXjKJnYghvhTRgaBeUivLHMDM1TzF3IJ6hXU2+8uA4Xz+5BA63mtZo5DjVT8iA==}
dev: false
@ -5657,6 +5678,15 @@ packages:
mime-types: 2.1.34
dev: false
/form-data/2.5.1:
resolution: {integrity: sha512-m21N3WOmEEURgk6B9GLOE4RuWOFf28Lhh9qGYeNlGq4VDXUlJy2th2slBNU8Gp8EzloYZOibZJ7t5ecIrFSjVA==}
engines: {node: '>= 0.12'}
dependencies:
asynckit: 0.4.0
combined-stream: 1.0.8
mime-types: 2.1.34
dev: false
/forwarded-parse/2.1.2:
resolution: {integrity: sha512-alTFZZQDKMporBH77856pXgzhEzaUVmLCDk+egLgIgHst3Tpndzz8MnKe+GzRJRfvVdn69HhpW7cmXzvtLvJAw==}
dev: false
@ -14286,7 +14316,7 @@ packages:
dev: false
file:projects/tool.tgz:
resolution: {integrity: sha512-3L/wr78VWV79qjSBCDWg37qMPdsEzAf5STbeXuHPOaMCvkhij64Nv0wN3vzj8CRwWRuKrX0o3Ts0NLw2SSl7hQ==, tarball: file:projects/tool.tgz}
resolution: {integrity: sha512-mU+wxglCPtQylCz6eJOSTVv0pnsJcZ7F9nWUFIimMVNzAf3SR1vRHgozcESbMWCzDnwNx1rRb5SJUMFP5Uy2cQ==, tarball: file:projects/tool.tgz}
name: '@rush-temp/tool'
version: 0.0.0
dependencies:
@ -14296,6 +14326,7 @@ packages:
'@types/mime-types': 2.1.1
'@types/minio': 7.0.11
'@types/node': 16.11.14
'@types/request': 2.48.8
'@types/ws': 8.2.2
'@types/xml2js': 0.4.9
'@typescript-eslint/eslint-plugin': 5.7.0_c25e8c1f4f4f7aaed27aa6f9ce042237
@ -14314,6 +14345,7 @@ packages:
minio: 7.0.26
mongodb: 4.2.2
prettier: 2.5.1
request: 2.88.2
ts-node: 10.4.0_5d12c2add188ff0e728b4ade3dacd39b
typescript: 4.5.4
ws: 8.4.2

View File

@ -12,7 +12,7 @@
"bundle": "esbuild src/index.ts --bundle --minify --platform=node > bundle.js",
"docker:build": "docker build -t anticrm/tool .",
"docker:push": "docker push anticrm/tool",
"run-local": "cross-env MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin MINIO_ENDPOINT=localhost MONGO_URL=mongodb://localhost:27017 TRANSACTOR_URL=ws:/localhost:3333 TELEGRAM_DATABASE=telegram-service ELASTIC_URL=http://localhost:9200 ts-node ./src/index.ts",
"run-local": "cross-env MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin MINIO_ENDPOINT=localhost MONGO_URL=mongodb://localhost:27017 TRANSACTOR_URL=ws:/localhost:3333 TELEGRAM_DATABASE=telegram-service ELASTIC_URL=http://localhost:9200 REKONI_URL=http://localhost:4004 ts-node ./src/index.ts",
"run-local-node": "cross-env MINIO_ACCESS_KEY=minioadmin MINIO_SECRET_KEY=minioadmin MINIO_ENDPOINT=localhost MONGO_URL=mongodb://localhost:27017 TRANSACTOR_URL=ws:/localhost:3333 TELEGRAM_DATABASE=telegram-service ELASTIC_URL=http://localhost:9200 node ./bundle.js",
"lint": "eslint src",
"format": "prettier --write src && eslint --fix src"
@ -37,7 +37,8 @@
"typescript": "^4.3.5",
"@types/ws": "^8.2.1",
"@types/xml2js": "~0.4.9",
"@types/mime-types": "~2.1.1"
"@types/mime-types": "~2.1.1",
"@types/request": "~2.48.8"
},
"dependencies": {
"mongodb": "^4.1.1",
@ -85,6 +86,9 @@
"@anticrm/server-recruit": "~0.6.0",
"@anticrm/server-recruit-resources": "~0.6.0",
"@anticrm/server-task": "~0.6.0",
"@anticrm/server-task-resources": "~0.6.0"
"@anticrm/server-task-resources": "~0.6.0",
"@anticrm/rekoni": "~0.6.0",
"request": "~2.88.2",
"@anticrm/tags": "~0.6.0"
}
}

View File

@ -17,7 +17,7 @@
import attachment, { Attachment } from '@anticrm/attachment'
import chunter, { Comment } from '@anticrm/chunter'
import contact, { Channel, ChannelProvider, EmployeeAccount, Person } from '@anticrm/contact'
import core, { AttachedData, AttachedDoc, Class, Data, Doc, DocumentUpdate, Ref, SortingOrder, Space, TxOperations, TxResult, MixinData } from '@anticrm/core'
import core, { AttachedData, Class, Data, Doc, MixinData, Ref, SortingOrder, Space, TxOperations, TxResult } from '@anticrm/core'
import recruit from '@anticrm/model-recruit'
import { Applicant, Candidate, Vacancy } from '@anticrm/recruit'
import task, { calcRank, DoneState, genRanks, Kanban, State } from '@anticrm/task'
@ -30,6 +30,7 @@ import { dirname, join } from 'path'
import { parseStringPromise } from 'xml2js'
import { connect } from './connect'
import { ElasticTool } from './elastic'
import { findOrUpdateAttached } from './utils'
const _ = {
candidates: 'Кандидаты',
@ -432,13 +433,3 @@ async function createUpdateSpaceKanban (spaceId: Ref<Vacancy>, client: TxOperati
)
return states
}
async function findOrUpdateAttached<T extends AttachedDoc> (client: TxOperations, space: Ref<Space>, _class: Ref<Class<T>>, objectId: Ref<T>, data: AttachedData<T>, attached: {attachedTo: Ref<Doc>, attachedClass: Ref<Class<Doc>>, collection: string}): Promise<T> {
let existingObj = await client.findOne<Doc>(_class, { _id: objectId, space }) as T
if (existingObj !== undefined) {
await client.updateCollection(_class, space, objectId, attached.attachedTo, attached.attachedClass, attached.collection, data as unknown as DocumentUpdate<T>)
} else {
await client.addCollection(_class, space, attached.attachedTo, attached.attachedClass, attached.collection, data, objectId)
existingObj = { _id: objectId, _class, space, ...data, ...attached } as unknown as T
}
return existingObj
}

View File

@ -32,8 +32,10 @@ import { decodeToken, generateToken } from '@anticrm/server-token'
import toolPlugin, { prepareTools, version } from '@anticrm/server-tool'
import { program } from 'commander'
import { Db, MongoClient } from 'mongodb'
import { exit } from 'process'
import { rebuildElastic } from './elastic'
import { importXml } from './importer'
import { updateCandidates } from './recruit'
import { clearTelegramHistory } from './telegram'
import { diffWorkspace, dumpWorkspace, restoreWorkspace } from './workspace'
@ -233,5 +235,16 @@ program
.action(async (token) => {
console.log(decodeToken(token))
})
program
.command('update-recruit <workspace>')
.description('process pdf documents inside minio and update resumes with skills, etc.')
.action(async (workspace) => {
const rekoniUrl = process.env.REKONI_URL
if (rekoniUrl === undefined) {
console.log('Please provide REKONI_URL environment variable')
exit(1)
}
return await updateCandidates(transactorUrl, workspace, minio, mongodbUri, elasticUrl, rekoniUrl)
})
program.parse(process.argv)

228
dev/tool/src/recruit.ts Normal file
View File

@ -0,0 +1,228 @@
//
// Copyright © 2020, 2021 Anticrm Platform Contributors.
// Copyright © 2021 Hardcore Engineering Inc.
//
// Licensed under the Eclipse Public License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. You may
// obtain a copy of the License at https://www.eclipse.org/legal/epl-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//
// See the License for the specific language governing permissions and
// limitations under the License.
//
import { Attachment } from '@anticrm/attachment'
import contact, { Channel, ChannelProvider, EmployeeAccount } from '@anticrm/contact'
import { Ref, TxOperations, WithLookup } from '@anticrm/core'
import attachment from '@anticrm/model-attachment'
import recruit from '@anticrm/model-recruit'
import { Candidate } from '@anticrm/recruit'
import { ReconiDocument } from '@anticrm/rekoni'
import { generateToken } from '@anticrm/server-token'
import tags, { findTagCategory } from '@anticrm/tags'
import { Client } from 'minio'
import request from 'request'
import { connect } from './connect'
import { ElasticTool } from './elastic'
import { findOrUpdateAttached } from './utils'
import { readMinioData } from './workspace'
async function recognize (rekoniUrl: string, data: string, token: string): Promise<ReconiDocument | undefined> {
return await new Promise((resolve) => {
request.post({
url: rekoniUrl + '/recognize?format=pdf',
headers: {
Authorization: 'Bearer ' + token,
'Content-Type': 'application/json'
},
json: true,
body: { fileUrl: 'document.pdf', dataBlob: data }
}, function (error, response, body) {
if (error != null) {
console.error(error)
}
resolve(body as ReconiDocument)
})
})
}
function isUndef (value?: string): boolean {
if (value == null || value.trim().length === 0) {
return true
}
return false
}
async function addChannel (client: TxOperations, channels: Channel[], c: Candidate, type: Ref<ChannelProvider>, value?: string): Promise<void> {
if (value !== undefined) {
const provider = channels.find((e) => e.provider === type)
if (provider === undefined) {
await client.addCollection(
contact.class.Channel,
contact.space.Contacts,
c._id,
contact.class.Person,
'channels',
{
value: value,
provider: type
}
)
} else {
if (isUndef(provider.value)) {
provider.value = value
await client.update(provider,
{
value: value,
provider: type
}
)
}
}
}
}
export async function updateCandidates (
transactorUrl: string,
dbName: string,
minio: Client,
mongoUrl: string,
elasticUrl: string,
rekoniUrl: string
): Promise<void> {
const connection = await connect(transactorUrl, dbName)
const tool = new ElasticTool(mongoUrl, dbName, minio, elasticUrl)
const done = await tool.connect()
const token = generateToken('anticrm@hc.engineering', dbName)
try {
const client = new TxOperations(connection, 'recruit:account:candidate-importer' as Ref<EmployeeAccount>)
const candidates = await client.findAll(recruit.mixin.Candidate, {})
console.log('candidates', candidates.length)
let cpos = 0
for (const c of candidates) {
cpos++
const attachments = await client.findAll(attachment.class.Attachment, { attachedTo: c._id })
for (const a of attachments) {
if (a.type !== 'application/pdf') {
console.log('skipping', c.name, a.name, `(${cpos}, ${candidates.length})`)
}
if (a.type.includes('application/pdf')) {
console.log('processing', c.name, a.name, `(${cpos}, ${candidates.length})`)
try {
const buffer = Buffer.concat(await readMinioData(minio, dbName, a.file)).toString('base64')
const document = await recognize(rekoniUrl, buffer, token)
if (document !== undefined) {
await updateAvatar(c, document, minio, dbName, client, tool)
// Update candidate values if applicable
if (isUndef(c.city) && document.city !== undefined) {
await client.update(c, { city: document.city })
}
if (isUndef(c.title) && document.title !== undefined) {
await client.update(c, { title: document.title })
}
// Update contact
await updateContacts(client, c, document)
// Update skills
await updateSkills(client, c, document)
}
} catch (err: any) {
console.error('error processing', err)
}
}
}
}
} catch (err: any) {
console.error(err)
} finally {
await done()
await connection.close()
}
}
async function updateSkills (client: TxOperations, c: Candidate, document: ReconiDocument): Promise<void> {
const skills = await client.findAll(tags.class.TagReference, { attachedTo: c._id })
const namedSkills = new Set(Array.from(skills.map(it => (it.title.toLowerCase()))))
const elements = await client.findAll(tags.class.TagElement, { targetClass: recruit.mixin.Candidate })
const namedElements = new Map(Array.from(elements.map(it => ([it.title.toLowerCase(), it._id]))))
const categories = await client.findAll(tags.class.TagCategory, {})
let pos = 0
for (const s of document.skills ?? []) {
const title = s.trim().toLowerCase()
// Check if we already had skill added
if (!namedSkills.has(title)) {
// No yet tag with title
const color = pos++
let tag = namedElements.get(title)
if (tag === undefined) {
const category = findTagCategory(s, categories)
tag = await client.createDoc(tags.class.TagElement, tags.space.Tags, {
title: s,
color,
targetClass: recruit.mixin.Candidate,
description: '',
category: category
})
}
namedSkills.add(title)
await client.addCollection(tags.class.TagReference, c.space, c._id, recruit.mixin.Candidate, 'skills', {
title: title,
color,
tag
})
}
}
}
async function updateContacts (client: TxOperations, c: WithLookup<Candidate>, document: ReconiDocument): Promise<void> {
const channels = await client.findAll(contact.class.Channel, { attachedTo: c._id })
await addChannel(client, channels, c, contact.channelProvider.Email, document.email)
await addChannel(client, channels, c, contact.channelProvider.GitHub, document.github)
await addChannel(client, channels, c, contact.channelProvider.LinkedIn, document.linkedin)
await addChannel(client, channels, c, contact.channelProvider.Phone, document.phone)
await addChannel(client, channels, c, contact.channelProvider.Telegram, document.telegram)
await addChannel(client, channels, c, contact.channelProvider.Twitter, document.twitter)
await addChannel(client, channels, c, contact.channelProvider.Facebook, document.facebook)
}
async function updateAvatar (c: WithLookup<Candidate>, document: ReconiDocument, minio: Client, dbName: string, client: TxOperations, tool: ElasticTool): Promise<void> {
if (document.format !== 'headhunter' && document.format !== 'podbor') {
// Only update avatar for this kind of resume formats.
return
}
if (c.avatar === undefined && document.avatar !== undefined && document.avatarName !== undefined && document.avatarFormat !== undefined) {
const attachId = (`${c._id}.${document.avatarName}`) as Ref<Attachment>
// Upload new avatar for candidate
const data = Buffer.from(document.avatar, 'base64')
await minio.putObject(dbName, attachId, data, data.length, {
'Content-Type': document.avatarFormat
})
const attachedDoc = await findOrUpdateAttached<Attachment>(client, recruit.space.CandidatesPublic, attachment.class.Photo, attachId, {
name: document.avatarName,
file: attachId,
type: document.avatarFormat,
size: data.length,
lastModified: Date.now()
}, {
attachedTo: c._id,
attachedClass: contact.class.Person,
collection: 'photos'
})
await tool.indexAttachmentDoc(attachedDoc, data)
await client.update(c, { avatar: attachId })
}
}

12
dev/tool/src/utils.ts Normal file
View File

@ -0,0 +1,12 @@
import { AttachedData, AttachedDoc, Class, Doc, DocumentUpdate, Ref, Space, TxOperations } from '@anticrm/core'
export async function findOrUpdateAttached<T extends AttachedDoc> (client: TxOperations, space: Ref<Space>, _class: Ref<Class<T>>, objectId: Ref<T>, data: AttachedData<T>, attached: { attachedTo: Ref<Doc>, attachedClass: Ref<Class<Doc>>, collection: string }): Promise<T> {
let existingObj = await client.findOne<Doc>(_class, { _id: objectId, space }) as T
if (existingObj !== undefined) {
await client.updateCollection(_class, space, objectId, attached.attachedTo, attached.attachedClass, attached.collection, data as unknown as DocumentUpdate<T>)
} else {
await client.addCollection(_class, space, attached.attachedTo, attached.attachedClass, attached.collection, data, objectId)
existingObj = { _id: objectId, _class, space, ...data, ...attached } as unknown as T
}
return existingObj
}

View File

@ -77,7 +77,22 @@ export async function dumpWorkspace (mongoUrl: string, dbName: string, fileName:
const fileHandle = await open(join(minioDbLocation, d.name), 'w')
const data = await minio.getObject(dbName, d.name)
const chunks: Buffer[] = await readMinioData(minio, dbName, d.name)
for (const b of chunks) {
await fileHandle.write(b)
}
await fileHandle.close()
}
}
await writeFile(fileName + '.workspace.json', JSON.stringify(workspaceInfo, undefined, 2))
} finally {
await client.close()
}
}
export async function readMinioData (minio: Client, dbName: string, name: string): Promise<Buffer[]> {
const data = await minio.getObject(dbName, name)
const chunks: Buffer[] = []
await new Promise((resolve) => {
@ -93,17 +108,7 @@ export async function dumpWorkspace (mongoUrl: string, dbName: string, fileName:
resolve(null)
})
})
for (const b of chunks) {
await fileHandle.write(b)
}
await fileHandle.close()
}
}
await writeFile(fileName + '.workspace.json', JSON.stringify(workspaceInfo, undefined, 2))
} finally {
await client.close()
}
return chunks
}
export async function restoreWorkspace (

View File

@ -17,6 +17,7 @@
* @public
*/
export interface ReconiDocument {
format: string
firstName: string
lastName: string
title?: string

View File

@ -30,7 +30,7 @@
} from '@anticrm/presentation'
import type { Candidate } from '@anticrm/recruit'
import { recognizeDocument } from '@anticrm/rekoni'
import tags, { findTagCategory, findTagCategory, TagElement, TagReference } from '@anticrm/tags'
import tags, { findTagCategory, TagElement, TagReference } from '@anticrm/tags'
import {
Component,
EditBox,