1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-09-20 06:17:20 +03:00

perf(tcp server): option to download spaCy models through mirror in China

This commit is contained in:
louistiti 2022-10-07 15:31:24 +08:00
parent 40958f2fb4
commit ea6fc4108a
3 changed files with 77 additions and 22 deletions

View File

@ -7,7 +7,7 @@ import semver from 'semver'
import { version } from '@@/package.json'
import { LogHelper } from '@/helpers/log-helper'
import { PYTHON_BRIDGE_BIN_PATH } from '@/constants'
import { PYTHON_BRIDGE_BIN_PATH, EN_SPACY_MODEL_NAME } from '@/constants'
dotenv.config()
@ -190,7 +190,7 @@ export default () =>
try {
const p = await command(
'pipenv run python -c "import en_core_web_trf"',
`pipenv run python -c "import ${EN_SPACY_MODEL_NAME}"`,
{ shell: true }
)
LogHelper.info(p.command)

View File

@ -3,7 +3,14 @@ import path from 'node:path'
import { command } from 'execa'
import { PYTHON_BRIDGE_SRC_PATH, TCP_SERVER_SRC_PATH } from '@/constants'
import {
EN_SPACY_MODEL_NAME,
EN_SPACY_MODEL_VERSION,
FR_SPACY_MODEL_NAME,
FR_SPACY_MODEL_VERSION,
PYTHON_BRIDGE_SRC_PATH,
TCP_SERVER_SRC_PATH
} from '@/constants'
import { LogHelper } from '@/helpers/log-helper'
import { LoaderHelper } from '@/helpers/loader-helper'
import { OSHelper, OSTypes } from '@/helpers/os-helper'
@ -17,9 +24,25 @@ import { OSHelper, OSTypes } from '@/helpers/os-helper'
* 5. Install spaCy models if the targeted development environment is the TCP server
*/
// Define mirror to download models installation file
function getModelInstallationFileUrl(model, mirror = undefined) {
const { name, version } = SPACY_MODELS.get(model)
const suffix = 'py3-none-any.whl'
let urlPrefix = 'https://github.com/explosion/spacy-models/releases/download'
if (mirror === 'cn') {
LogHelper.info(
'Using Chinese mirror to download model installation file...'
)
urlPrefix =
'https://download.fastgit.org/explosion/spacy-models/releases/download'
}
return `${urlPrefix}/${name}-${version}/${name}-${version}-${suffix}`
}
const SETUP_TARGETS = new Map()
// Find new spaCy models: https://github.com/explosion/spacy-models/releases
const SPACY_MODELS = ['en_core_web_trf-3.4.0', 'fr_core_news_md-3.4.0']
const SPACY_MODELS = new Map()
SETUP_TARGETS.set('python-bridge', {
name: 'Python bridge',
@ -33,11 +56,22 @@ SETUP_TARGETS.set('tcp-server', {
dotVenvPath: path.join(TCP_SERVER_SRC_PATH, '.venv'),
dotProjectPath: path.join(TCP_SERVER_SRC_PATH, '.venv', '.project')
})
SPACY_MODELS.set('en', {
name: EN_SPACY_MODEL_NAME,
version: EN_SPACY_MODEL_VERSION
})
SPACY_MODELS.set('fr', {
name: FR_SPACY_MODEL_NAME,
version: FR_SPACY_MODEL_VERSION
})
;(async () => {
LoaderHelper.start()
const { argv } = process
const givenSetupTarget = argv[2].toLowerCase()
// cn
const givenMirror = argv[3]?.toLowerCase()
if (!SETUP_TARGETS.has(givenSetupTarget)) {
LogHelper.error(
@ -170,8 +204,13 @@ SETUP_TARGETS.set('tcp-server', {
LogHelper.info('Installing spaCy models...')
// Install models one by one to avoid network throttling
for (const model of SPACY_MODELS) {
await command(`pipenv run spacy download ${model} --direct`, {
for (const modelLanguage of SPACY_MODELS.keys()) {
const modelInstallationFileUrl = getModelInstallationFileUrl(
modelLanguage,
givenMirror
)
await command(`pipenv run pip install ${modelInstallationFileUrl}`, {
shell: true,
stdio: 'inherit'
})
@ -187,11 +226,9 @@ SETUP_TARGETS.set('tcp-server', {
LogHelper.info('Checking whether all spaCy models are installed...')
try {
for (let model of SPACY_MODELS) {
;[model] = model.split('-')
for (const { name: modelName } of SPACY_MODELS.values()) {
const { stderr } = await command(
`pipenv run python -c "import ${model}"`,
`pipenv run python -c "import ${modelName}"`,
{ shell: true }
)

View File

@ -11,6 +11,9 @@ const PRODUCTION_ENV = 'production'
const DEVELOPMENT_ENV = 'development'
const TESTING_ENV = 'testing'
/**
* Binaries / distribution
*/
export const BINARIES_FOLDER_NAME = OSHelper.getBinariesFolderName()
export const PYTHON_BRIDGE_DIST_PATH = path.join('bridges', 'python', 'dist')
export const TCP_SERVER_DIST_PATH = path.join('tcp_server', 'dist')
@ -21,11 +24,37 @@ export const TCP_SERVER_SRC_PATH = path.join('tcp_server', 'src')
export const PYTHON_BRIDGE_BIN_NAME = 'leon-python-bridge'
export const TCP_SERVER_BIN_NAME = 'leon-tcp-server'
export const TCP_SERVER_BIN_PATH = path.join(
TCP_SERVER_DIST_PATH,
BINARIES_FOLDER_NAME,
TCP_SERVER_BIN_NAME
)
export const PYTHON_BRIDGE_BIN_PATH = path.join(
PYTHON_BRIDGE_DIST_PATH,
BINARIES_FOLDER_NAME,
PYTHON_BRIDGE_BIN_NAME
)
/**
* spaCy models
* Find new spaCy models: https://github.com/explosion/spacy-models/releases
*/
export const EN_SPACY_MODEL_NAME = 'en_core_web_trf'
export const EN_SPACY_MODEL_VERSION = '3.4.0'
export const FR_SPACY_MODEL_NAME = 'fr_core_news_md'
export const FR_SPACY_MODEL_VERSION = '3.4.0'
/**
* Environments
*/
export const IS_PRODUCTION_ENV = process.env['LEON_NODE_ENV'] === PRODUCTION_ENV
export const IS_DEVELOPMENT_ENV =
process.env['LEON_NODE_ENV'] === DEVELOPMENT_ENV
export const IS_TESTING_ENV = process.env['LEON_NODE_ENV'] === TESTING_ENV
/**
* Leon environment preferences
*/
export const LANG = process.env['LEON_LANG'] as LongLanguageCode
export const HOST = process.env['LEON_HOST']
@ -48,14 +77,3 @@ export const HAS_LOGGER = process.env['LEON_LOGGER'] === 'true'
export const TCP_SERVER_HOST = process.env['LEON_PY_TCP_SERVER_HOST']
export const TCP_SERVER_PORT = process.env['LEON_PY_TCP_SERVER_PORT']
export const TCP_SERVER_BIN_PATH = path.join(
TCP_SERVER_DIST_PATH,
BINARIES_FOLDER_NAME,
TCP_SERVER_BIN_NAME
)
export const PYTHON_BRIDGE_BIN_PATH = path.join(
PYTHON_BRIDGE_DIST_PATH,
BINARIES_FOLDER_NAME,
PYTHON_BRIDGE_BIN_NAME
)