mirror of
https://github.com/leon-ai/leon.git
synced 2024-11-10 15:19:18 +03:00
perf(tcp server): option to download spaCy models through mirror in China
This commit is contained in:
parent
40958f2fb4
commit
ea6fc4108a
@ -7,7 +7,7 @@ import semver from 'semver'
|
||||
|
||||
import { version } from '@@/package.json'
|
||||
import { LogHelper } from '@/helpers/log-helper'
|
||||
import { PYTHON_BRIDGE_BIN_PATH } from '@/constants'
|
||||
import { PYTHON_BRIDGE_BIN_PATH, EN_SPACY_MODEL_NAME } from '@/constants'
|
||||
|
||||
dotenv.config()
|
||||
|
||||
@ -190,7 +190,7 @@ export default () =>
|
||||
|
||||
try {
|
||||
const p = await command(
|
||||
'pipenv run python -c "import en_core_web_trf"',
|
||||
`pipenv run python -c "import ${EN_SPACY_MODEL_NAME}"`,
|
||||
{ shell: true }
|
||||
)
|
||||
LogHelper.info(p.command)
|
||||
|
@ -3,7 +3,14 @@ import path from 'node:path'
|
||||
|
||||
import { command } from 'execa'
|
||||
|
||||
import { PYTHON_BRIDGE_SRC_PATH, TCP_SERVER_SRC_PATH } from '@/constants'
|
||||
import {
|
||||
EN_SPACY_MODEL_NAME,
|
||||
EN_SPACY_MODEL_VERSION,
|
||||
FR_SPACY_MODEL_NAME,
|
||||
FR_SPACY_MODEL_VERSION,
|
||||
PYTHON_BRIDGE_SRC_PATH,
|
||||
TCP_SERVER_SRC_PATH
|
||||
} from '@/constants'
|
||||
import { LogHelper } from '@/helpers/log-helper'
|
||||
import { LoaderHelper } from '@/helpers/loader-helper'
|
||||
import { OSHelper, OSTypes } from '@/helpers/os-helper'
|
||||
@ -17,9 +24,25 @@ import { OSHelper, OSTypes } from '@/helpers/os-helper'
|
||||
* 5. Install spaCy models if the targeted development environment is the TCP server
|
||||
*/
|
||||
|
||||
// Define mirror to download models installation file
|
||||
function getModelInstallationFileUrl(model, mirror = undefined) {
|
||||
const { name, version } = SPACY_MODELS.get(model)
|
||||
const suffix = 'py3-none-any.whl'
|
||||
let urlPrefix = 'https://github.com/explosion/spacy-models/releases/download'
|
||||
|
||||
if (mirror === 'cn') {
|
||||
LogHelper.info(
|
||||
'Using Chinese mirror to download model installation file...'
|
||||
)
|
||||
urlPrefix =
|
||||
'https://download.fastgit.org/explosion/spacy-models/releases/download'
|
||||
}
|
||||
|
||||
return `${urlPrefix}/${name}-${version}/${name}-${version}-${suffix}`
|
||||
}
|
||||
|
||||
const SETUP_TARGETS = new Map()
|
||||
// Find new spaCy models: https://github.com/explosion/spacy-models/releases
|
||||
const SPACY_MODELS = ['en_core_web_trf-3.4.0', 'fr_core_news_md-3.4.0']
|
||||
const SPACY_MODELS = new Map()
|
||||
|
||||
SETUP_TARGETS.set('python-bridge', {
|
||||
name: 'Python bridge',
|
||||
@ -33,11 +56,22 @@ SETUP_TARGETS.set('tcp-server', {
|
||||
dotVenvPath: path.join(TCP_SERVER_SRC_PATH, '.venv'),
|
||||
dotProjectPath: path.join(TCP_SERVER_SRC_PATH, '.venv', '.project')
|
||||
})
|
||||
|
||||
SPACY_MODELS.set('en', {
|
||||
name: EN_SPACY_MODEL_NAME,
|
||||
version: EN_SPACY_MODEL_VERSION
|
||||
})
|
||||
SPACY_MODELS.set('fr', {
|
||||
name: FR_SPACY_MODEL_NAME,
|
||||
version: FR_SPACY_MODEL_VERSION
|
||||
})
|
||||
;(async () => {
|
||||
LoaderHelper.start()
|
||||
|
||||
const { argv } = process
|
||||
const givenSetupTarget = argv[2].toLowerCase()
|
||||
// cn
|
||||
const givenMirror = argv[3]?.toLowerCase()
|
||||
|
||||
if (!SETUP_TARGETS.has(givenSetupTarget)) {
|
||||
LogHelper.error(
|
||||
@ -170,8 +204,13 @@ SETUP_TARGETS.set('tcp-server', {
|
||||
LogHelper.info('Installing spaCy models...')
|
||||
|
||||
// Install models one by one to avoid network throttling
|
||||
for (const model of SPACY_MODELS) {
|
||||
await command(`pipenv run spacy download ${model} --direct`, {
|
||||
for (const modelLanguage of SPACY_MODELS.keys()) {
|
||||
const modelInstallationFileUrl = getModelInstallationFileUrl(
|
||||
modelLanguage,
|
||||
givenMirror
|
||||
)
|
||||
|
||||
await command(`pipenv run pip install ${modelInstallationFileUrl}`, {
|
||||
shell: true,
|
||||
stdio: 'inherit'
|
||||
})
|
||||
@ -187,11 +226,9 @@ SETUP_TARGETS.set('tcp-server', {
|
||||
LogHelper.info('Checking whether all spaCy models are installed...')
|
||||
|
||||
try {
|
||||
for (let model of SPACY_MODELS) {
|
||||
;[model] = model.split('-')
|
||||
|
||||
for (const { name: modelName } of SPACY_MODELS.values()) {
|
||||
const { stderr } = await command(
|
||||
`pipenv run python -c "import ${model}"`,
|
||||
`pipenv run python -c "import ${modelName}"`,
|
||||
{ shell: true }
|
||||
)
|
||||
|
||||
|
@ -11,6 +11,9 @@ const PRODUCTION_ENV = 'production'
|
||||
const DEVELOPMENT_ENV = 'development'
|
||||
const TESTING_ENV = 'testing'
|
||||
|
||||
/**
|
||||
* Binaries / distribution
|
||||
*/
|
||||
export const BINARIES_FOLDER_NAME = OSHelper.getBinariesFolderName()
|
||||
export const PYTHON_BRIDGE_DIST_PATH = path.join('bridges', 'python', 'dist')
|
||||
export const TCP_SERVER_DIST_PATH = path.join('tcp_server', 'dist')
|
||||
@ -21,11 +24,37 @@ export const TCP_SERVER_SRC_PATH = path.join('tcp_server', 'src')
|
||||
export const PYTHON_BRIDGE_BIN_NAME = 'leon-python-bridge'
|
||||
export const TCP_SERVER_BIN_NAME = 'leon-tcp-server'
|
||||
|
||||
export const TCP_SERVER_BIN_PATH = path.join(
|
||||
TCP_SERVER_DIST_PATH,
|
||||
BINARIES_FOLDER_NAME,
|
||||
TCP_SERVER_BIN_NAME
|
||||
)
|
||||
export const PYTHON_BRIDGE_BIN_PATH = path.join(
|
||||
PYTHON_BRIDGE_DIST_PATH,
|
||||
BINARIES_FOLDER_NAME,
|
||||
PYTHON_BRIDGE_BIN_NAME
|
||||
)
|
||||
|
||||
/**
|
||||
* spaCy models
|
||||
* Find new spaCy models: https://github.com/explosion/spacy-models/releases
|
||||
*/
|
||||
export const EN_SPACY_MODEL_NAME = 'en_core_web_trf'
|
||||
export const EN_SPACY_MODEL_VERSION = '3.4.0'
|
||||
export const FR_SPACY_MODEL_NAME = 'fr_core_news_md'
|
||||
export const FR_SPACY_MODEL_VERSION = '3.4.0'
|
||||
|
||||
/**
|
||||
* Environments
|
||||
*/
|
||||
export const IS_PRODUCTION_ENV = process.env['LEON_NODE_ENV'] === PRODUCTION_ENV
|
||||
export const IS_DEVELOPMENT_ENV =
|
||||
process.env['LEON_NODE_ENV'] === DEVELOPMENT_ENV
|
||||
export const IS_TESTING_ENV = process.env['LEON_NODE_ENV'] === TESTING_ENV
|
||||
|
||||
/**
|
||||
* Leon environment preferences
|
||||
*/
|
||||
export const LANG = process.env['LEON_LANG'] as LongLanguageCode
|
||||
|
||||
export const HOST = process.env['LEON_HOST']
|
||||
@ -48,14 +77,3 @@ export const HAS_LOGGER = process.env['LEON_LOGGER'] === 'true'
|
||||
|
||||
export const TCP_SERVER_HOST = process.env['LEON_PY_TCP_SERVER_HOST']
|
||||
export const TCP_SERVER_PORT = process.env['LEON_PY_TCP_SERVER_PORT']
|
||||
|
||||
export const TCP_SERVER_BIN_PATH = path.join(
|
||||
TCP_SERVER_DIST_PATH,
|
||||
BINARIES_FOLDER_NAME,
|
||||
TCP_SERVER_BIN_NAME
|
||||
)
|
||||
export const PYTHON_BRIDGE_BIN_PATH = path.join(
|
||||
PYTHON_BRIDGE_DIST_PATH,
|
||||
BINARIES_FOLDER_NAME,
|
||||
PYTHON_BRIDGE_BIN_NAME
|
||||
)
|
||||
|
Loading…
Reference in New Issue
Block a user