prepare for external absl library

This commit is contained in:
Taku Kudo 2021-01-03 02:28:55 +09:00
parent c4fba494f7
commit 844ac060f8
7 changed files with 76 additions and 26 deletions

1
.gitignore vendored
View File

@ -72,3 +72,4 @@ libsentencepiece.so*
libsentencepiece_train.so*
python/bundled
_sentencepiece.*.so
third_party/abseil-cpp

View File

@ -26,6 +26,7 @@ option(SPM_ENABLE_TCMALLOC "Enable TCMalloc if available." ON)
option(SPM_TCMALLOC_STATIC "Link static library of TCMALLOC." OFF)
option(SPM_NO_THREADLOCAL "Disable thread_local operator" OFF)
option(SPM_USE_BUILTIN_PROTOBUF "Use built-in protobuf" ON)
option(SPM_USE_EXTERNAL_ABSL "Use external protobuf" OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
@ -97,6 +98,7 @@ endif()
add_subdirectory(src)
add_subdirectory(third_party)
# add_subdirectory(third_party/abseil-cpp)
set(CPACK_SOURCE_GENERATOR "TXZ")
set(CPACK_GENERATOR "7Z")
@ -108,3 +110,22 @@ set(CPACK_PACKAGE_CONTACT "taku@google.com")
set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Taku Kudo")
set(CPACK_SOURCE_IGNORE_FILES "/build/;/.git/;/dist/;/sdist/;~$;${CPACK_SOURCE_IGNORE_FILES}")
include(CPack)
if (SPM_USE_EXTERNAL_ABSL)
include(ExternalProject)
ExternalProject_Add(abseil-cpp
PREFIX abseil-cpp
GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND "")
ExternalProject_Get_Property(abseil-cpp ABSL_SOURCE_DIR)
ExternalProject_Add_Step(
abseil-cpp
copySource
WORKING_DIRECTORY ${ABSL_SOURCE_DIR}
COMMAND mkdir -p "${PROJECT_SOURCE_DIR}/third_party/abseil-cpp"
COMMAND ${CMAKE_COMMAND} -E copy_directory "${ABSL_SOURCE_DIR}" "${PROJECT_SOURCE_DIR}/third_party/abseil-cpp")
endif()

View File

@ -12,12 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.!
set(ABSL_FLAGS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/absl/flags/flag.cc)
set(ABSL_STRINGS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/absl/strings/string_view.cc)
if (SPM_USE_BUILTIN_PROTOBUF)
set(SPM_PROTO_HDRS builtin_pb/sentencepiece.pb.h)
set(SPM_PROTO_SRCS builtin_pb/sentencepiece.pb.cc)
set(SPM_MODEL_PROTO_HDRS builtin_pb/sentencepiece_model.pb.h)
set(SPM_MODEL_PROTO_SRCS builtin_pb/sentencepiece_model.pb.cc)
set(ABSL_FLAGS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/absl/flags/flag.cc)
set(PROTOBUF_LITE_LIBRARY "")
set(PROTOBUF_LITE_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/arena.cc
@ -51,6 +53,7 @@ if (SPM_USE_BUILTIN_PROTOBUF)
else()
add_definitions("-pthread -DHAVE_PTHREAD=1 -Wno-sign-compare")
endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../third_party)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite)
include_directories(builtin_pb)
else()
@ -58,9 +61,9 @@ else()
include_directories(${Protobuf_INCLUDE_DIRS})
protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto)
protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto)
set(ABSL_FLAGS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/absl/flags/flag.cc)
set(PROTOBUF_LITE_SRCS "")
include_directories(${PROTOBUF_INCLUDE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../third_party)
endif()
include_directories(${CMAKE_CURRENT_BINARY_DIR})
@ -101,8 +104,7 @@ set(SPM_SRCS
sentencepiece_processor.cc
unigram_model.cc
util.cc
word_model.cc
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/absl/strings/string_view.cc)
word_model.cc)
set(SPM_TRAIN_SRCS
${SPM_PROTO_HDRS}
@ -156,12 +158,12 @@ set(SPM_TEST_SRCS
util_test.cc
word_model_test.cc
word_model_trainer_test.cc
pretokenizer_for_training_test.cc
${ABSL_FLAGS_SRCS})
pretokenizer_for_training_test.cc)
find_package(Threads REQUIRED)
set(SPM_LIBS ${PROTOBUF_LITE_LIBRARY} Threads::Threads)
# absl::strings absl::flags absl::flags_parse)
if (SPM_ENABLE_NFKC_COMPILE)
find_package(ICU 4.4 COMPONENTS i18n data uc REQUIRED)
@ -185,6 +187,9 @@ if (SPM_ENABLE_TCMALLOC)
endif()
endif()
add_library(absl_flags-static STATIC ${ABSL_FLAGS_SRCS})
add_library(absl_strings-static STATIC ${ABSL_STRINGS_SRCS})
if (SPM_ENABLE_SHARED)
add_library(sentencepiece SHARED ${SPM_SRCS})
add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS})
@ -193,12 +198,12 @@ endif()
add_library(sentencepiece-static STATIC ${SPM_SRCS})
add_library(sentencepiece_train-static STATIC ${SPM_TRAIN_SRCS})
target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS})
target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS})
target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS} absl_strings-static)
target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS} absl_strings-static)
if (SPM_ENABLE_SHARED)
target_link_libraries(sentencepiece ${SPM_LIBS})
target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece)
target_link_libraries(sentencepiece ${SPM_LIBS} absl_strings-static)
target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece absl_strings-static)
if ((${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7l") OR
(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips") OR
(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "m68k") OR
@ -250,17 +255,17 @@ if (NOT MSVC)
endif()
endif()
add_executable(spm_encode spm_encode_main.cc ${ABSL_FLAGS_SRCS})
add_executable(spm_decode spm_decode_main.cc ${ABSL_FLAGS_SRCS})
add_executable(spm_normalize spm_normalize_main.cc ${ABSL_FLAGS_SRCS})
add_executable(spm_train spm_train_main.cc ${ABSL_FLAGS_SRCS})
add_executable(spm_export_vocab spm_export_vocab_main.cc ${ABSL_FLAGS_SRCS})
add_executable(spm_encode spm_encode_main.cc)
add_executable(spm_decode spm_decode_main.cc)
add_executable(spm_normalize spm_normalize_main.cc)
add_executable(spm_train spm_train_main.cc)
add_executable(spm_export_vocab spm_export_vocab_main.cc)
target_link_libraries(spm_encode sentencepiece)
target_link_libraries(spm_decode sentencepiece)
target_link_libraries(spm_normalize sentencepiece sentencepiece_train)
target_link_libraries(spm_train sentencepiece sentencepiece_train)
target_link_libraries(spm_export_vocab sentencepiece)
target_link_libraries(spm_encode sentencepiece absl_flags-static)
target_link_libraries(spm_decode sentencepiece absl_flags-static)
target_link_libraries(spm_normalize sentencepiece sentencepiece_train absl_flags-static)
target_link_libraries(spm_train sentencepiece sentencepiece_train absl_flags-static)
target_link_libraries(spm_export_vocab sentencepiece absl_flags-static)
if (SPM_ENABLE_NFKC_COMPILE)
add_executable(compile_charsmap compile_charsmap_main.cc)

View File

@ -17,8 +17,11 @@
#include "common.h"
#include "third_party/absl/flags/flag.h"
#include "third_party/absl/flags/parse.h"
ABSL_DECLARE_FLAG(int32, minloglevel);
ABSL_FLAG(int, minloglevel, 0,
"Messages logged at a lower level than this don't actually get "
"logged anywhere");
namespace sentencepiece {

View File

@ -26,9 +26,6 @@
ABSL_FLAG(bool, help, false, "show help");
ABSL_FLAG(bool, version, false, "show version");
ABSL_FLAG(int, minloglevel, 0,
"Messages logged at a lower level than this don't actually get "
"logged anywhere");
namespace absl {
namespace internal {

View File

@ -52,8 +52,6 @@ void SetFlag(Flag<T> *flag, const V &v) {
const T value(v);
flag->set_value(value);
}
std::vector<char *> ParseCommandLine(int argc, char *argv[]);
} // namespace absl
#define ABSL_FLAG(Type, name, defautl_value, help) \

25
third_party/absl/flags/parse.h vendored Normal file
View File

@ -0,0 +1,25 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.!
#ifndef ABSL_FLAGS_PARSE_H_
#define ABSL_FLAGS_PARSE_H_
#include <vector>
namespace absl {
std::vector<char *> ParseCommandLine(int argc, char *argv[]);
} // namespace absl
#endif // ABSL_FLAGS_PARSE_H_