From f54d8ba070f40ddddf9ef6d7777eaf2b3432f889 Mon Sep 17 00:00:00 2001 From: Taku Kudo Date: Tue, 4 Apr 2023 03:15:11 +0000 Subject: [PATCH] includes the sentencepiece source files in python source package --- .gitignore | 2 ++ python/MANIFEST.in | 1 + python/build_bundled.sh | 12 +++++++----- python/build_sdist.sh | 11 +++++++++++ python/setup.py | 39 ++++++++++++++++++++++++++++++++++----- 5 files changed, 55 insertions(+), 10 deletions(-) create mode 100755 python/build_sdist.sh diff --git a/.gitignore b/.gitignore index ecdb585..743769d 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,5 @@ libsentencepiece_train.so* python/bundled _sentencepiece.*.so third_party/abseil-cpp + +python/sentencepiece diff --git a/python/MANIFEST.in b/python/MANIFEST.in index ddfa0f1..fab3380 100644 --- a/python/MANIFEST.in +++ b/python/MANIFEST.in @@ -1,3 +1,4 @@ recursive-include test *.py *.model botchan.txt recursive-include src *.i +recursive-include sentencepiece * include *.md VERSION.* build_bundled.sh diff --git a/python/build_bundled.sh b/python/build_bundled.sh index 996a39e..a3f6861 100755 --- a/python/build_bundled.sh +++ b/python/build_bundled.sh @@ -2,13 +2,15 @@ VERSION="$1" -mkdir -p bundled +mkdir -p build -BUILD_DIR=./bundled -INSTALL_DIR=./bundled/root +BUILD_DIR=./build +INSTALL_DIR=./build/root -if [ -f ../src/CMakeLists.txt ]; then - SRC_DIR=.. +if [ -f ./sentencepiece/src/CMakeLists.txt ]; then + SRC_DIR=./sentencepiece +elif [ -f ../src/CMakeLists.txt ]; then + SRC_DIR=.. else # Try taged version. Othewise, use head. git clone https://github.com/google/sentencepiece.git -b v"${VERSION}" --depth 1 || \ diff --git a/python/build_sdist.sh b/python/build_sdist.sh new file mode 100755 index 0000000..92da94b --- /dev/null +++ b/python/build_sdist.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +mkdir -p sentencepiece + +for i in CMakeLists.txt LICENSE README.md VERSION.txt cmake config.h.in sentencepiece.pc.in src third_party +do + echo "copying ../${i} sentencepiece/${i}" + cp -f -R "../${i}" sentencepiece +done + +python3 setup.py sdist diff --git a/python/setup.py b/python/setup.py index e6aa585..5411231 100755 --- a/python/setup.py +++ b/python/setup.py @@ -78,8 +78,6 @@ class build_ext(_build_ext): def build_extension(self, ext): cflags, libs = get_cflags_and_libs('../build/root') - if len(libs) == 0: - cflags, libs = get_cflags_and_libs('./bundled/root') if len(libs) == 0: if is_sentencepiece_installed(): @@ -87,7 +85,7 @@ class build_ext(_build_ext): libs = run_pkg_config('libs') else: subprocess.check_call(['./build_bundled.sh', __version__]) - cflags, libs = get_cflags_and_libs('./bundled/root') + cflags, libs = get_cflags_and_libs('./build/root') # Fix compile on some versions of Mac OSX # See: https://github.com/neulab/xnmt/issues/199 @@ -104,7 +102,7 @@ class build_ext(_build_ext): if os.name == 'nt': - # Must pre-install sentencepice into bundled directory. + # Must pre-install sentencepice into build directory. arch = 'win32' if sys.maxsize > 2**32: arch = 'amd64' @@ -114,12 +112,43 @@ if os.name == 'nt': '..\\build\\root_{}\\lib\\sentencepiece.lib'.format(arch), '..\\build\\root_{}\\lib\\sentencepiece_train.lib'.format(arch), ] - else: + elif os.path.exists('..\\build\\root\\lib'): cflags = ['/std:c++17', '/I..\\build\\root\\include'] libs = [ '..\\build\\root\\lib\\sentencepiece.lib', '..\\build\\root\\lib\\sentencepiece_train.lib', ] + else: + # build library locally with cmake and vc++. + cmake_arch = 'Win32' + if arch == 'amd64': + cmake_arch = 'x64' + subprocess.check_call([ + 'cmake', + 'sentencepiece', + '-A', + cmake_arch, + '-B', + 'build', + '-DSPM_ENABLE_SHARED=OFF', + '-DCMAKE_INSTALL_PREFIX=build\\root', + ]) + subprocess.check_call([ + 'cmake', + '--build', + 'build', + '--config', + 'Release', + '--target', + 'install', + '--parallel', + '8', + ]) + cflags = ['/std:c++17', '/I.\\build\\root\\include'] + libs = [ + '.\\build\\root\\lib\\sentencepiece.lib', + '.\\build\\root\\lib\\sentencepiece_train.lib', + ] SENTENCEPIECE_EXT = Extension( 'sentencepiece._sentencepiece',