mirror of
https://github.com/google/sentencepiece.git
synced 2024-09-11 10:55:42 +03:00
202 lines
6.1 KiB
Python
Executable File
202 lines
6.1 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
# Copyright 2018 Google Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.!
|
|
|
|
import codecs
|
|
import os
|
|
import string
|
|
import subprocess
|
|
import sys
|
|
from setuptools import Extension, setup
|
|
from setuptools.command.build_ext import build_ext as _build_ext
|
|
from setuptools.command.build_py import build_py as _build_py
|
|
|
|
sys.path.append(os.path.join('.', 'test'))
|
|
|
|
|
|
def long_description():
|
|
with codecs.open('README.md', 'r', 'utf-8') as f:
|
|
long_description = f.read()
|
|
return long_description
|
|
|
|
|
|
exec(open('src/sentencepiece/_version.py').read())
|
|
|
|
|
|
def run_pkg_config(section, pkg_config_path=None):
|
|
try:
|
|
cmd = 'pkg-config sentencepiece --{}'.format(section)
|
|
if pkg_config_path:
|
|
cmd = 'env PKG_CONFIG_PATH={} {}'.format(pkg_config_path, cmd)
|
|
output = subprocess.check_output(cmd, shell=True)
|
|
if sys.version_info >= (3, 0, 0):
|
|
output = output.decode('utf-8')
|
|
except subprocess.CalledProcessError:
|
|
sys.stderr.write('Failed to find sentencepiece pkg-config\n')
|
|
sys.exit(1)
|
|
return output.strip().split()
|
|
|
|
|
|
def is_sentencepiece_installed():
|
|
try:
|
|
subprocess.check_call('pkg-config sentencepiece --libs', shell=True)
|
|
return True
|
|
except subprocess.CalledProcessError:
|
|
return False
|
|
|
|
|
|
def get_cflags_and_libs(root):
|
|
cflags = ['-std=c++17', '-I' + os.path.join(root, 'include')]
|
|
libs = []
|
|
if os.path.exists(os.path.join(root, 'lib/pkgconfig/sentencepiece.pc')):
|
|
libs = [
|
|
os.path.join(root, 'lib/libsentencepiece.a'),
|
|
os.path.join(root, 'lib/libsentencepiece_train.a'),
|
|
]
|
|
elif os.path.exists(os.path.join(root, 'lib64/pkgconfig/sentencepiece.pc')):
|
|
libs = [
|
|
os.path.join(root, 'lib64/libsentencepiece.a'),
|
|
os.path.join(root, 'lib64/libsentencepiece_train.a'),
|
|
]
|
|
return cflags, libs
|
|
|
|
|
|
class build_ext(_build_ext):
|
|
"""Override build_extension to run cmake."""
|
|
|
|
def build_extension(self, ext):
|
|
cflags, libs = get_cflags_and_libs('../build/root')
|
|
|
|
if len(libs) == 0:
|
|
if is_sentencepiece_installed():
|
|
cflags = cflags + run_pkg_config('cflags')
|
|
libs = run_pkg_config('libs')
|
|
else:
|
|
subprocess.check_call(['./build_bundled.sh', __version__])
|
|
cflags, libs = get_cflags_and_libs('./build/root')
|
|
|
|
# Fix compile on some versions of Mac OSX
|
|
# See: https://github.com/neulab/xnmt/issues/199
|
|
if sys.platform == 'darwin':
|
|
cflags.append('-mmacosx-version-min=10.9')
|
|
else:
|
|
cflags.append('-Wl,-strip-all')
|
|
libs.append('-Wl,-strip-all')
|
|
if sys.platform == 'linux':
|
|
libs.append('-Wl,-Bsymbolic')
|
|
print('## cflags={}'.format(' '.join(cflags)))
|
|
print('## libs={}'.format(' '.join(libs)))
|
|
ext.extra_compile_args = cflags
|
|
ext.extra_link_args = libs
|
|
_build_ext.build_extension(self, ext)
|
|
|
|
|
|
if os.name == 'nt':
|
|
# Must pre-install sentencepice into build directory.
|
|
arch = 'win32'
|
|
if sys.maxsize > 2**32:
|
|
arch = 'amd64'
|
|
if os.path.exists('..\\build\\root_{}\\lib'.format(arch)):
|
|
cflags = ['/std:c++17', '/I..\\build\\root_{}\\include'.format(arch)]
|
|
libs = [
|
|
'..\\build\\root_{}\\lib\\sentencepiece.lib'.format(arch),
|
|
'..\\build\\root_{}\\lib\\sentencepiece_train.lib'.format(arch),
|
|
]
|
|
elif os.path.exists('..\\build\\root\\lib'):
|
|
cflags = ['/std:c++17', '/I..\\build\\root\\include']
|
|
libs = [
|
|
'..\\build\\root\\lib\\sentencepiece.lib',
|
|
'..\\build\\root\\lib\\sentencepiece_train.lib',
|
|
]
|
|
else:
|
|
# build library locally with cmake and vc++.
|
|
cmake_arch = 'Win32'
|
|
if arch == 'amd64':
|
|
cmake_arch = 'x64'
|
|
subprocess.check_call([
|
|
'cmake',
|
|
'sentencepiece',
|
|
'-A',
|
|
cmake_arch,
|
|
'-B',
|
|
'build',
|
|
'-DSPM_ENABLE_SHARED=OFF',
|
|
'-DCMAKE_INSTALL_PREFIX=build\\root',
|
|
])
|
|
subprocess.check_call([
|
|
'cmake',
|
|
'--build',
|
|
'build',
|
|
'--config',
|
|
'Release',
|
|
'--target',
|
|
'install',
|
|
'--parallel',
|
|
'8',
|
|
])
|
|
cflags = ['/std:c++17', '/I.\\build\\root\\include']
|
|
libs = [
|
|
'.\\build\\root\\lib\\sentencepiece.lib',
|
|
'.\\build\\root\\lib\\sentencepiece_train.lib',
|
|
]
|
|
|
|
SENTENCEPIECE_EXT = Extension(
|
|
'sentencepiece._sentencepiece',
|
|
sources=['src/sentencepiece/sentencepiece_wrap.cxx'],
|
|
extra_compile_args=cflags,
|
|
extra_link_args=libs,
|
|
)
|
|
cmdclass = {}
|
|
else:
|
|
SENTENCEPIECE_EXT = Extension(
|
|
'sentencepiece._sentencepiece',
|
|
sources=['src/sentencepiece/sentencepiece_wrap.cxx'],
|
|
)
|
|
cmdclass = {'build_ext': build_ext}
|
|
|
|
setup(
|
|
name='sentencepiece',
|
|
author='Taku Kudo',
|
|
author_email='taku@google.com',
|
|
description='SentencePiece python wrapper',
|
|
long_description=long_description(),
|
|
long_description_content_type='text/markdown',
|
|
version=__version__,
|
|
package_dir={'': 'src'},
|
|
url='https://github.com/google/sentencepiece',
|
|
license='Apache',
|
|
platforms='Unix',
|
|
py_modules=[
|
|
'sentencepiece/__init__',
|
|
'sentencepiece/_version',
|
|
'sentencepiece/sentencepiece_model_pb2',
|
|
'sentencepiece/sentencepiece_pb2',
|
|
],
|
|
ext_modules=[SENTENCEPIECE_EXT],
|
|
cmdclass=cmdclass,
|
|
classifiers=[
|
|
'Development Status :: 5 - Production/Stable',
|
|
'Environment :: Console',
|
|
'Intended Audience :: Developers',
|
|
'Intended Audience :: Science/Research',
|
|
'License :: OSI Approved :: Apache Software License',
|
|
'Operating System :: Unix',
|
|
'Programming Language :: Python',
|
|
'Topic :: Text Processing :: Linguistic',
|
|
'Topic :: Software Development :: Libraries :: Python Modules',
|
|
],
|
|
test_suite='sentencepiece_test.suite',
|
|
)
|