mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
Add Triton Marian backend running in AzureML Inference Environment (#749)
* Add Triton Marian backend running in AzureML Inference Environment
This commit is contained in:
parent
e274ac76b2
commit
ca7a887aa7
96
contrib/triton-aml/Dockerfile
Normal file
96
contrib/triton-aml/Dockerfile
Normal file
@ -0,0 +1,96 @@
|
||||
# It is recommended to use a machine which supports CUDA to build this image.
|
||||
FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 AS BUILDER
|
||||
RUN apt-get update --fix-missing
|
||||
RUN apt-get install -y curl git autoconf automake libtool curl make g++ unzip cmake build-essential cpio
|
||||
RUN apt-get -y clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# install zlib
|
||||
WORKDIR /
|
||||
RUN git clone --no-checkout https://github.com/madler/zlib
|
||||
WORKDIR /zlib
|
||||
RUN git checkout tags/v1.2.10 && \
|
||||
./configure && \
|
||||
make install
|
||||
|
||||
# protobuf install
|
||||
WORKDIR /
|
||||
RUN git clone --no-checkout https://github.com/protocolbuffers/protobuf.git
|
||||
WORKDIR /protobuf
|
||||
RUN git checkout tags/v3.8.0 && \
|
||||
git submodule update --init --recursive && \
|
||||
./autogen.sh
|
||||
RUN ./configure --disable-shared --prefix=/usr CFLAGS="-fPIC" CXXFLAGS="-fPIC" && \
|
||||
make && \
|
||||
make check && \
|
||||
make install && \
|
||||
ldconfig # refresh shared library cache.
|
||||
|
||||
# Intel mkl install
|
||||
WORKDIR /
|
||||
RUN curl --tlsv1.2 --output l_mkl_2020.0.166.tgz https://registrationcenter-download.intel.com/akdlm/irc_nas/tec/16318/l_mkl_2020.0.166.tgz
|
||||
RUN tar zxvf l_mkl_2020.0.166.tgz
|
||||
WORKDIR /l_mkl_2020.0.166
|
||||
RUN ./install.sh --silent ./silent.cfg --install_dir /opt/intel/ --accept_eula
|
||||
|
||||
# boost install
|
||||
WORKDIR /
|
||||
RUN git clone --recursive https://github.com/boostorg/boost --branch boost-1.72.0 /boost
|
||||
WORKDIR /boost
|
||||
RUN ./bootstrap.sh
|
||||
RUN ./b2 install --prefix=/usr --with-system --with-thread --with-date_time --with-regex --with-serialization
|
||||
|
||||
# Marian install
|
||||
WORKDIR /
|
||||
RUN git clone --no-checkout https://github.com/marian-nmt/marian-dev
|
||||
WORKDIR marian-dev
|
||||
RUN git checkout youki/quantize-embedding
|
||||
RUN git checkout dad48865fd3b7f1d7b891de81040f7651e824510
|
||||
RUN mkdir src/static
|
||||
RUN mkdir build
|
||||
COPY src/cmarian.cpp /marian-dev/src/static
|
||||
COPY src/logging.cpp /marian-dev/src/common
|
||||
RUN rm src/CMakeLists.txt
|
||||
COPY src/CMakeLists.txt /marian-dev/src
|
||||
|
||||
WORKDIR /marian-dev/build
|
||||
RUN cmake .. -DCOMPILE_CPU=on -DCOMPILE_CUDA=on -DUSE_SENTENCEPIECE=on -DUSE_STATIC_LIBS=off -DCOMPILE_SERVER=off -DUSE_FBGEMM=on -DCUDA_cublas_device_LIBRARY=/usr/lib/x86_64-linux-gnu/libcublas.so
|
||||
RUN make -j $(grep -c ^processor /proc/cpuinfo)
|
||||
|
||||
# build cmarian static library
|
||||
FROM nvcr.io/nvidia/tritonserver:20.09-py3
|
||||
RUN mkdir -p /marian-dev/build/src/3rd_party/sentencepiece/src
|
||||
COPY --from=BUILDER /usr/lib/libprotobuf.a /usr/lib
|
||||
COPY --from=BUILDER /usr/lib/libboost_system.a /usr/lib
|
||||
COPY --from=BUILDER /marian-dev/build/src/3rd_party/fbgemm/libfbgemm.a /usr/lib
|
||||
COPY --from=BUILDER /marian-dev/build/src/3rd_party/fbgemm/asmjit/libasmjit.a /usr/lib
|
||||
COPY --from=BUILDER /marian-dev/build/src/3rd_party/sentencepiece/src/libsentencepiece_train.a /usr/lib
|
||||
COPY --from=BUILDER /marian-dev/build/src/3rd_party/sentencepiece/src/libsentencepiece.a /usr/lib
|
||||
COPY --from=BUILDER /marian-dev/build/libmarian.a /usr/lib/libcmarian.a
|
||||
COPY --from=BUILDER /marian-dev/build/src/libmarian_cuda.a /usr/lib/libcmarian_cuda.a
|
||||
|
||||
# build triton custom backend
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common \
|
||||
build-essential \
|
||||
git \
|
||||
libopencv-dev \
|
||||
libopencv-core-dev \
|
||||
libssl-dev \
|
||||
libtool \
|
||||
pkg-config \
|
||||
rapidjson-dev
|
||||
|
||||
# install cmake-3.19.0
|
||||
RUN wget https://github.com/Kitware/CMake/releases/download/v3.19.0-rc1/cmake-3.19.0-rc1-Linux-x86_64.sh
|
||||
RUN sh cmake-3.19.0-rc1-Linux-x86_64.sh --prefix=/usr/local --exclude-subdir
|
||||
|
||||
ADD marian_backend /opt/tritonserver/marian_backend
|
||||
WORKDIR /opt/tritonserver/marian_backend
|
||||
RUN mkdir build
|
||||
RUN cd build && \
|
||||
cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install .. && \
|
||||
make install
|
36
contrib/triton-aml/README.md
Normal file
36
contrib/triton-aml/README.md
Normal file
@ -0,0 +1,36 @@
|
||||
Triton-AML
|
||||
======
|
||||
|
||||
*Triton-AML* is a Triton custom backend running with Marian in the AzureML Inference Environment, it's one of the implementation of [Triton Backend Shared Library](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/backend.html#backend-shared-library).
|
||||
|
||||
This backend is compiled with the static library of Marian on a specific version.
|
||||
|
||||
Layout:
|
||||
|
||||
- marian_backend: Triton Marian backend source code
|
||||
- src: Changed code and CMakeLists.txt of Marian
|
||||
- Dockerfile: Used for compiling the backend with the static library of Marian
|
||||
- build.sh: A simple shell script to run the Dockerfile to get the generated libtriton_marian.so
|
||||
|
||||
## Usage
|
||||
|
||||
Run `./build.sh` to get the Triton Marian backend shared library.
|
||||
|
||||
For all the users, you can put the libtriton_marian.so into the following places:
|
||||
|
||||
- <model_repository>/<model_name>/<version_directory>/libtriton_marian.so
|
||||
- <model_repository>/<model_name>/libtriton_marian.so
|
||||
|
||||
For the AzureML Inference team members, you can put it into the following place of *aml-triton* base image:
|
||||
|
||||
- <backend_directory>/marian/libtriton_marian.so
|
||||
|
||||
Where <backend_directory> is by default /opt/tritonserver/backends.
|
||||
|
||||
## Make changes
|
||||
|
||||
If you want to compile with another version of Marian, you need to replace `RUN git checkout youki/quantize-embedding` in the Dockerfile, then copy the new CMakeLists.txt replace the old one, add src/cmarian.cpp into CMakeLists.txt and make some changes to make sure it will build a static library of Marian.
|
||||
|
||||
## Limitation
|
||||
|
||||
For now, it's only used for *nlxseq2seq* model, some hard code is in the `ModelState::SetMarianConfigPath` function, some changes must be done if you want to run other models with Marian.
|
10
contrib/triton-aml/build.sh
Normal file
10
contrib/triton-aml/build.sh
Normal file
@ -0,0 +1,10 @@
|
||||
#!/bin/sh
|
||||
echo Building Triton Marian backend ...
|
||||
|
||||
docker build -t triton-marian-build .
|
||||
|
||||
echo Copying artifacts ...
|
||||
|
||||
docker container create --name extract triton-marian-build
|
||||
docker container cp extract:/opt/tritonserver/marian_backend/build/libtriton_marian.so .
|
||||
docker container rm -f extract
|
156
contrib/triton-aml/marian_backend/CMakeLists.txt
Normal file
156
contrib/triton-aml/marian_backend/CMakeLists.txt
Normal file
@ -0,0 +1,156 @@
|
||||
cmake_minimum_required(VERSION 3.17)
|
||||
|
||||
project(tritonmarianbackend LANGUAGES C CXX)
|
||||
|
||||
#
|
||||
# Options
|
||||
#
|
||||
# Must include options required for this project as well as any
|
||||
# projects included in this one by FetchContent.
|
||||
#
|
||||
# GPU support is disabled by default because marian backend doesn't
|
||||
# support GPUs.
|
||||
#
|
||||
option(TRITON_ENABLE_GPU "Enable GPU support in backend" OFF)
|
||||
option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
|
||||
|
||||
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
|
||||
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
|
||||
set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
|
||||
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
#
|
||||
# Dependencies
|
||||
#
|
||||
# FetchContent's composibility isn't very good. We must include the
|
||||
# transitive closure of all repos so that we can override the tag.
|
||||
#
|
||||
include(FetchContent)
|
||||
|
||||
FetchContent_Declare(
|
||||
repo-common
|
||||
GIT_REPOSITORY https://github.com/triton-inference-server/common.git
|
||||
GIT_TAG ${TRITON_COMMON_REPO_TAG}
|
||||
GIT_SHALLOW ON
|
||||
)
|
||||
FetchContent_Declare(
|
||||
repo-core
|
||||
GIT_REPOSITORY https://github.com/triton-inference-server/core.git
|
||||
GIT_TAG ${TRITON_CORE_REPO_TAG}
|
||||
GIT_SHALLOW ON
|
||||
)
|
||||
FetchContent_Declare(
|
||||
repo-backend
|
||||
GIT_REPOSITORY https://github.com/triton-inference-server/backend.git
|
||||
GIT_TAG ${TRITON_BACKEND_REPO_TAG}
|
||||
GIT_SHALLOW ON
|
||||
)
|
||||
FetchContent_MakeAvailable(repo-common repo-core repo-backend)
|
||||
|
||||
#
|
||||
# Shared library implementing the Triton Backend API
|
||||
#
|
||||
configure_file(src/libtriton_marian.ldscript libtriton_marian.ldscript COPYONLY)
|
||||
|
||||
add_library(
|
||||
triton-marian-backend SHARED
|
||||
src/marian.cc
|
||||
)
|
||||
|
||||
add_library(
|
||||
TritonMarianBackend::triton-marian-backend ALIAS triton-marian-backend
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
triton-marian-backend
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src
|
||||
)
|
||||
|
||||
target_compile_features(triton-marian-backend PRIVATE cxx_std_11)
|
||||
target_compile_options(
|
||||
triton-marian-backend PRIVATE
|
||||
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
|
||||
-Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
triton-marian-backend
|
||||
PRIVATE
|
||||
triton-backend-utils # from repo-backend
|
||||
triton-core-serverstub # from repo-core
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
triton-marian-backend
|
||||
PRIVATE # from marian environment
|
||||
cmarian
|
||||
cmarian_cuda
|
||||
sentencepiece
|
||||
sentencepiece_train
|
||||
fbgemm
|
||||
asmjit
|
||||
protobuf
|
||||
)
|
||||
|
||||
|
||||
set_target_properties(
|
||||
triton-marian-backend PROPERTIES
|
||||
POSITION_INDEPENDENT_CODE ON
|
||||
OUTPUT_NAME triton_marian
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_marian.ldscript
|
||||
LINK_FLAGS "-Wl,--version-script libtriton_marian.ldscript"
|
||||
)
|
||||
|
||||
#
|
||||
# Install
|
||||
#
|
||||
include(GNUInstallDirs)
|
||||
set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonMarianBackend)
|
||||
|
||||
install(
|
||||
TARGETS
|
||||
triton-marian-backend
|
||||
EXPORT
|
||||
triton-marian-backend-targets
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/marian
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/marian
|
||||
)
|
||||
|
||||
install(
|
||||
EXPORT
|
||||
triton-marian-backend-targets
|
||||
FILE
|
||||
TritonMarianBackendTargets.cmake
|
||||
NAMESPACE
|
||||
TritonMarianBackend::
|
||||
DESTINATION
|
||||
${INSTALL_CONFIGDIR}
|
||||
)
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
configure_package_config_file(
|
||||
${CMAKE_CURRENT_LIST_DIR}/cmake/TritonMarianBackendConfig.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/TritonMarianBackendConfig.cmake
|
||||
INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
|
||||
)
|
||||
|
||||
install(
|
||||
FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/TritonMarianBackendConfig.cmake
|
||||
DESTINATION ${INSTALL_CONFIGDIR}
|
||||
)
|
||||
|
||||
#
|
||||
# Export from build tree
|
||||
#
|
||||
export(
|
||||
EXPORT triton-marian-backend-targets
|
||||
FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonMarianBackendTargets.cmake
|
||||
NAMESPACE TritonMarianBackend::
|
||||
)
|
||||
|
||||
export(PACKAGE TritonMarianBackend)
|
16
contrib/triton-aml/marian_backend/README.md
Normal file
16
contrib/triton-aml/marian_backend/README.md
Normal file
@ -0,0 +1,16 @@
|
||||
Use cmake to build and install in a local directory.
|
||||
|
||||
```
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ..
|
||||
$ make install
|
||||
```
|
||||
|
||||
The following required Triton repositories will be pulled and used in
|
||||
the build. By default the "main" branch/tag will be used for each repo
|
||||
but the listed CMake argument can be used to override.
|
||||
|
||||
* triton-inference-server/backend: -DTRITON_BACKEND_REPO_TAG=[tag]
|
||||
* triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag]
|
||||
* triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
|
@ -0,0 +1,13 @@
|
||||
include(CMakeFindDependencyMacro)
|
||||
|
||||
get_filename_component(
|
||||
TRITONMARIANBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
|
||||
)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${TRITONMARIANBACKEND_CMAKE_DIR})
|
||||
|
||||
if(NOT TARGET TritonMarianBackend::triton-marian-backend)
|
||||
include("${TRITONMARIANBACKEND_CMAKE_DIR}/TritonMarianBackendTargets.cmake")
|
||||
endif()
|
||||
|
||||
set(TRITONMARIANBACKEND_LIBRARIES TritonMarianBackend::triton-marian-backend)
|
@ -0,0 +1,5 @@
|
||||
{
|
||||
global:
|
||||
TRITONBACKEND_*;
|
||||
local: *;
|
||||
};
|
570
contrib/triton-aml/marian_backend/src/marian.cc
Normal file
570
contrib/triton-aml/marian_backend/src/marian.cc
Normal file
@ -0,0 +1,570 @@
|
||||
#include <algorithm>
|
||||
#include "marian.h"
|
||||
#include "triton/backend/backend_common.h"
|
||||
|
||||
namespace triton { namespace backend { namespace marian {
|
||||
|
||||
#define GUARDED_RESPOND_IF_ERROR(RESPONSES, IDX, X) \
|
||||
do { \
|
||||
if ((RESPONSES)[IDX] != nullptr) { \
|
||||
TRITONSERVER_Error* err__ = (X); \
|
||||
if (err__ != nullptr) { \
|
||||
LOG_IF_ERROR( \
|
||||
TRITONBACKEND_ResponseSend( \
|
||||
(RESPONSES)[IDX], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
|
||||
err__), \
|
||||
"failed to send error response"); \
|
||||
(RESPONSES)[IDX] = nullptr; \
|
||||
TRITONSERVER_ErrorDelete(err__); \
|
||||
} \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
//
|
||||
// ModelState
|
||||
//
|
||||
// State associated with a model that is using this backend. An object
|
||||
// of this class is created and associated with each
|
||||
// TRITONBACKEND_Model.
|
||||
//
|
||||
class ModelState {
|
||||
public:
|
||||
static TRITONSERVER_Error* Create(
|
||||
TRITONBACKEND_Model* triton_model, ModelState** state);
|
||||
|
||||
TRITONSERVER_Error* SetMarianConfigPath();
|
||||
|
||||
// Get the handle to the TRITONBACKEND model.
|
||||
TRITONBACKEND_Model* TritonModel() { return triton_model_; }
|
||||
|
||||
// Get the name of the model.
|
||||
const std::string& Name() const { return name_; }
|
||||
|
||||
// Get the Marian config path of the model.
|
||||
const std::string& MarianConfigPath() const { return marian_config_path_; }
|
||||
|
||||
private:
|
||||
ModelState(
|
||||
TRITONBACKEND_Model* triton_model, const char* name,
|
||||
common::TritonJson::Value&& model_config);
|
||||
|
||||
TRITONBACKEND_Model* triton_model_;
|
||||
const std::string name_;
|
||||
common::TritonJson::Value model_config_;
|
||||
std::string marian_config_path_;
|
||||
};
|
||||
|
||||
TRITONSERVER_Error*
|
||||
ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
|
||||
{
|
||||
TRITONSERVER_Message* config_message;
|
||||
RETURN_IF_ERROR(TRITONBACKEND_ModelConfig(
|
||||
triton_model, 1 /* config_version */, &config_message));
|
||||
|
||||
// Get the model configuration as a json string from
|
||||
// config_message, parse it with the TritonJson.
|
||||
const char* buffer;
|
||||
size_t byte_size;
|
||||
RETURN_IF_ERROR(
|
||||
TRITONSERVER_MessageSerializeToJson(config_message, &buffer, &byte_size));
|
||||
|
||||
common::TritonJson::Value model_config;
|
||||
TRITONSERVER_Error* err = model_config.Parse(buffer, byte_size);
|
||||
RETURN_IF_ERROR(TRITONSERVER_MessageDelete(config_message));
|
||||
RETURN_IF_ERROR(err);
|
||||
|
||||
const char* model_name;
|
||||
RETURN_IF_ERROR(TRITONBACKEND_ModelName(triton_model, &model_name));
|
||||
|
||||
*state = new ModelState(
|
||||
triton_model, model_name, std::move(model_config));
|
||||
|
||||
return nullptr; // success
|
||||
}
|
||||
|
||||
ModelState::ModelState(
|
||||
TRITONBACKEND_Model* triton_model, const char* name,
|
||||
common::TritonJson::Value&& model_config)
|
||||
: triton_model_(triton_model), name_(name),
|
||||
model_config_(std::move(model_config))
|
||||
{
|
||||
}
|
||||
|
||||
TRITONSERVER_Error*
|
||||
ModelState::SetMarianConfigPath()
|
||||
{
|
||||
common::TritonJson::WriteBuffer buffer;
|
||||
RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
|
||||
LOG_MESSAGE(
|
||||
TRITONSERVER_LOG_INFO,
|
||||
(std::string("model configuration:\n") + buffer.Contents()).c_str());
|
||||
|
||||
std::string config_filepath_str;
|
||||
common::TritonJson::Value parameters;
|
||||
if (model_config_.Find("parameters", ¶meters)) {
|
||||
common::TritonJson::Value config_filepath;
|
||||
if (parameters.Find("config_filepath", &config_filepath)) {
|
||||
RETURN_IF_ERROR(config_filepath.MemberAsString(
|
||||
"string_value", &config_filepath_str)
|
||||
);
|
||||
LOG_MESSAGE(
|
||||
TRITONSERVER_LOG_INFO,
|
||||
(std::string("model config path is set to : ") + config_filepath_str)
|
||||
.c_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Set the Marian config path.
|
||||
std::string config_path("/var/azureml-app/");
|
||||
config_path.append(std::getenv("AZUREML_MODEL_DIR"));
|
||||
config_path.append("/nlxseq2seq/triton/nlxseq2seq/1/data/model/");
|
||||
config_path.append(config_filepath_str);
|
||||
marian_config_path_ = config_path;
|
||||
|
||||
return nullptr; // success
|
||||
}
|
||||
|
||||
//
|
||||
// ModelInstanceState
|
||||
//
|
||||
// State associated with a model instance. An object of this class is
|
||||
// created and associated with each TRITONBACKEND_ModelInstance.
|
||||
//
|
||||
class ModelInstanceState {
|
||||
public:
|
||||
static TRITONSERVER_Error* Create(
|
||||
TRITONBACKEND_ModelInstance* triton_model_instance,
|
||||
void* marian, ModelInstanceState **state);
|
||||
|
||||
// Get the handle to the TRITONBACKEND model instance.
|
||||
TRITONBACKEND_ModelInstance* TritonModelInstance()
|
||||
{
|
||||
return triton_model_instance_;
|
||||
}
|
||||
|
||||
// Get the name, kind, device ID and marian instance of the instance.
|
||||
const std::string& Name() const { return name_; }
|
||||
TRITONSERVER_InstanceGroupKind Kind() const { return kind_; }
|
||||
int32_t DeviceId() const { return device_id_; }
|
||||
void* Marian() const { return marian_; }
|
||||
|
||||
private:
|
||||
ModelInstanceState(
|
||||
TRITONBACKEND_ModelInstance* triton_model_instance,
|
||||
void* marian, const char* name,
|
||||
const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id);
|
||||
|
||||
TRITONBACKEND_ModelInstance* triton_model_instance_;
|
||||
void* marian_;
|
||||
const std::string name_;
|
||||
const TRITONSERVER_InstanceGroupKind kind_;
|
||||
const int32_t device_id_;
|
||||
};
|
||||
|
||||
TRITONSERVER_Error*
|
||||
ModelInstanceState::Create(
|
||||
TRITONBACKEND_ModelInstance* triton_model_instance,
|
||||
void* marian, ModelInstanceState** state)
|
||||
{
|
||||
const char* instance_name;
|
||||
RETURN_IF_ERROR(
|
||||
TRITONBACKEND_ModelInstanceName(triton_model_instance, &instance_name));
|
||||
|
||||
TRITONSERVER_InstanceGroupKind instance_kind;
|
||||
RETURN_IF_ERROR(
|
||||
TRITONBACKEND_ModelInstanceKind(triton_model_instance, &instance_kind));
|
||||
|
||||
int32_t instance_id;
|
||||
RETURN_IF_ERROR(
|
||||
TRITONBACKEND_ModelInstanceDeviceId(triton_model_instance, &instance_id));
|
||||
|
||||
*state = new ModelInstanceState(
|
||||
triton_model_instance, marian, instance_name,
|
||||
instance_kind, instance_id);
|
||||
|
||||
return nullptr; // success
|
||||
}
|
||||
|
||||
ModelInstanceState::ModelInstanceState(
|
||||
TRITONBACKEND_ModelInstance* triton_model_instance,
|
||||
void* marian, const char* name,
|
||||
const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id)
|
||||
: triton_model_instance_(triton_model_instance), marian_(marian),
|
||||
name_(name), kind_(kind), device_id_(device_id)
|
||||
{
|
||||
}
|
||||
|
||||
/////////////
|
||||
|
||||
extern "C" {
|
||||
|
||||
TRITONSERVER_Error*
|
||||
TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
|
||||
{
|
||||
ModelState* model_state;
|
||||
RETURN_IF_ERROR(ModelState::Create(model, &model_state));
|
||||
RETURN_IF_ERROR(model_state->SetMarianConfigPath());
|
||||
RETURN_IF_ERROR(
|
||||
TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state))
|
||||
);
|
||||
|
||||
return nullptr; // success
|
||||
}
|
||||
|
||||
TRITONSERVER_Error*
|
||||
TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
|
||||
{
|
||||
void* vstate;
|
||||
RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
|
||||
ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
|
||||
|
||||
LOG_MESSAGE(
|
||||
TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");
|
||||
|
||||
delete model_state;
|
||||
|
||||
return nullptr; // success
|
||||
}
|
||||
|
||||
TRITONSERVER_Error*
|
||||
TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
|
||||
{
|
||||
TRITONBACKEND_Model* model;
|
||||
RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
|
||||
|
||||
void* vmodelstate;
|
||||
RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
|
||||
ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
|
||||
|
||||
std::string marian_config_path = model_state->MarianConfigPath();
|
||||
|
||||
int32_t device;
|
||||
RETURN_IF_ERROR(
|
||||
TRITONBACKEND_ModelInstanceDeviceId(instance, &device));
|
||||
|
||||
void* marian_instance = init(const_cast<char*>(marian_config_path.c_str()), device);
|
||||
|
||||
ModelInstanceState* instance_state;
|
||||
RETURN_IF_ERROR(
|
||||
ModelInstanceState::Create(instance, marian_instance, &instance_state));
|
||||
RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
|
||||
instance, reinterpret_cast<void*>(instance_state)));
|
||||
|
||||
return nullptr; // success
|
||||
}
|
||||
|
||||
TRITONSERVER_Error*
|
||||
TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
|
||||
{
|
||||
void* vstate;
|
||||
RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
|
||||
ModelInstanceState* instance_state =
|
||||
reinterpret_cast<ModelInstanceState*>(vstate);
|
||||
|
||||
LOG_MESSAGE(
|
||||
TRITONSERVER_LOG_INFO,
|
||||
"TRITONBACKEND_ModelInstanceFinalize: delete instance state");
|
||||
|
||||
delete instance_state;
|
||||
|
||||
return nullptr; // success
|
||||
}
|
||||
|
||||
TRITONSERVER_Error*
|
||||
TRITONBACKEND_ModelInstanceExecute(
|
||||
TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
|
||||
const uint32_t request_count)
|
||||
{
|
||||
LOG_MESSAGE(
|
||||
TRITONSERVER_LOG_INFO,
|
||||
("Marian model instance executing " + std::to_string(request_count) +
|
||||
" requests").c_str()
|
||||
);
|
||||
|
||||
// 'responses' is initialized with the response objects below and
|
||||
// if/when an error response is sent the corresponding entry in
|
||||
// 'responses' is set to nullptr to indicate that that response has
|
||||
// already been sent.
|
||||
std::vector<TRITONBACKEND_Response*> responses;
|
||||
responses.reserve(request_count);
|
||||
|
||||
// Create a single response object for each request. If something
|
||||
// goes wrong when attempting to create the response objects just
|
||||
// fail all of the requests by returning an error.
|
||||
for (uint32_t r = 0; r < request_count; ++r) {
|
||||
TRITONBACKEND_Request* request = requests[r];
|
||||
|
||||
TRITONBACKEND_Response* response;
|
||||
RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
|
||||
responses.push_back(response);
|
||||
}
|
||||
|
||||
// We will execute all the requests at the same time, and so there
|
||||
// will be a single compute-start / compute-end time-range.
|
||||
uint64_t total_batch_size = 0;
|
||||
uint64_t exec_start_ns = 0;
|
||||
SET_TIMESTAMP(exec_start_ns);
|
||||
|
||||
std::vector<TRITONBACKEND_Input*> request_input;
|
||||
std::vector<int> request_batch_size;
|
||||
std::vector<std::string> inputs;
|
||||
std::string input_strings;
|
||||
|
||||
// Create a single response object for each request. If something
|
||||
// goes wrong when attempting to create the response objects just
|
||||
// fail all of the requests by returning an error.
|
||||
for (uint32_t r = 0; r < request_count; ++r) {
|
||||
TRITONBACKEND_Request* request = requests[r];
|
||||
|
||||
const char* input_name;
|
||||
GUARDED_RESPOND_IF_ERROR(
|
||||
responses, r,
|
||||
TRITONBACKEND_RequestInputName(request, 0 /* index */, &input_name)
|
||||
);
|
||||
|
||||
TRITONBACKEND_Input* input = nullptr;
|
||||
GUARDED_RESPOND_IF_ERROR(
|
||||
responses, r,
|
||||
TRITONBACKEND_RequestInput(request, input_name, &input)
|
||||
);
|
||||
request_input.push_back(input);
|
||||
|
||||
// If an error response was sent while getting the input name
|
||||
// or input then display an error message and move on
|
||||
// to next request.
|
||||
if (responses[r] == nullptr) {
|
||||
LOG_MESSAGE(
|
||||
TRITONSERVER_LOG_ERROR,
|
||||
(std::string("request ") + std::to_string(r) +
|
||||
": failed to read input or requested output name, error response sent")
|
||||
.c_str()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get input buffer count.
|
||||
uint32_t input_buffer_count;
|
||||
GUARDED_RESPOND_IF_ERROR(
|
||||
responses, r,
|
||||
TRITONBACKEND_InputProperties(
|
||||
input, nullptr /* input_name */, nullptr, nullptr,
|
||||
nullptr, nullptr, &input_buffer_count
|
||||
)
|
||||
);
|
||||
if (responses[r] == nullptr) {
|
||||
LOG_MESSAGE(
|
||||
TRITONSERVER_LOG_ERROR,
|
||||
(std::string("request ") + std::to_string(r) +
|
||||
": failed to read input properties, error response sent")
|
||||
.c_str()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Compose all the requests input to make a batch request,
|
||||
// record the sentences count of each request for further process.
|
||||
std::vector<char> content_buffer;
|
||||
for (uint32_t b = 0; b < input_buffer_count; ++b) {
|
||||
const void* input_buffer = nullptr;
|
||||
uint64_t buffer_byte_size = 0;
|
||||
TRITONSERVER_MemoryType input_memory_type = TRITONSERVER_MEMORY_CPU;
|
||||
int64_t input_memory_type_id = 0;
|
||||
GUARDED_RESPOND_IF_ERROR(
|
||||
responses, r,
|
||||
TRITONBACKEND_InputBuffer(
|
||||
input, b, &input_buffer, &buffer_byte_size,
|
||||
&input_memory_type, &input_memory_type_id
|
||||
)
|
||||
);
|
||||
if ((responses[r] == nullptr) ||
|
||||
(input_memory_type == TRITONSERVER_MEMORY_GPU)) {
|
||||
GUARDED_RESPOND_IF_ERROR(
|
||||
responses, r,
|
||||
TRITONSERVER_ErrorNew(
|
||||
TRITONSERVER_ERROR_UNSUPPORTED,
|
||||
"failed to get input buffer in CPU memory"
|
||||
)
|
||||
);
|
||||
}
|
||||
content_buffer.insert(
|
||||
content_buffer.end(), reinterpret_cast<const char*>(input_buffer) + 4,
|
||||
reinterpret_cast<const char*>(input_buffer) + buffer_byte_size - 4
|
||||
);
|
||||
}
|
||||
|
||||
std::string s(content_buffer.begin(), content_buffer.end());
|
||||
int count = std::count(s.begin(), s.end(), '\n');
|
||||
request_batch_size.push_back(count + 1);
|
||||
inputs.push_back(s);
|
||||
content_buffer.clear();
|
||||
|
||||
if (input_strings.empty()) {
|
||||
input_strings = s;
|
||||
} else {
|
||||
input_strings.append("\n");
|
||||
input_strings.append(s);
|
||||
}
|
||||
|
||||
total_batch_size++;
|
||||
}
|
||||
|
||||
// Operate on the entire batch of requests for improved performance.
|
||||
void* vstate;
|
||||
RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
|
||||
ModelInstanceState* instance_state =
|
||||
reinterpret_cast<ModelInstanceState*>(vstate);
|
||||
void* marian = instance_state->Marian();
|
||||
char* result = translate(marian, const_cast<char*>(input_strings.c_str()));
|
||||
|
||||
// Assign the results to the corresponding request.
|
||||
char* pos = result;
|
||||
for (uint32_t r = 0; r < request_count; ++r) {
|
||||
int batch_size = request_batch_size[r];
|
||||
uint64_t output_byte_size = 0;
|
||||
char* output_content = nullptr;
|
||||
|
||||
// Find current output content.
|
||||
while (batch_size > 0) {
|
||||
char* p = strchr(pos, '\n');
|
||||
if (p != nullptr) {
|
||||
*p = '\0';
|
||||
}
|
||||
if (output_content == nullptr) {
|
||||
output_content = pos;
|
||||
} else {
|
||||
strcat(output_content, "\n");
|
||||
strcat(output_content, pos);
|
||||
}
|
||||
// Move to next output content.
|
||||
if (p != nullptr) {
|
||||
pos = p + 1;
|
||||
}
|
||||
batch_size--;
|
||||
}
|
||||
output_byte_size = strlen(output_content);
|
||||
|
||||
TRITONBACKEND_Input* input = request_input[r];
|
||||
const char* input_name;
|
||||
TRITONSERVER_DataType input_datatype;
|
||||
const int64_t* input_shape;
|
||||
uint32_t input_dims_count;
|
||||
uint64_t input_byte_size;
|
||||
uint32_t input_buffer_count;
|
||||
GUARDED_RESPOND_IF_ERROR(
|
||||
responses, r,
|
||||
TRITONBACKEND_InputProperties(
|
||||
input, &input_name, &input_datatype, &input_shape,
|
||||
&input_dims_count, &input_byte_size, &input_buffer_count
|
||||
)
|
||||
);
|
||||
if (responses[r] == nullptr) {
|
||||
LOG_MESSAGE(
|
||||
TRITONSERVER_LOG_ERROR,
|
||||
(std::string("request ") + std::to_string(r) +
|
||||
": failed to read input properties, error response sent")
|
||||
.c_str()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
TRITONBACKEND_Request* request = requests[r];
|
||||
const char* requested_output_name = nullptr;
|
||||
GUARDED_RESPOND_IF_ERROR(
|
||||
responses, r,
|
||||
TRITONBACKEND_RequestOutputName(
|
||||
request, 0 /* index */, &requested_output_name
|
||||
)
|
||||
);
|
||||
|
||||
// Create an output tensor in the response,
|
||||
// input and output have same datatype and shape...
|
||||
TRITONBACKEND_Response* response = responses[r];
|
||||
TRITONBACKEND_Output* output;
|
||||
GUARDED_RESPOND_IF_ERROR(
|
||||
responses, r,
|
||||
TRITONBACKEND_ResponseOutput(
|
||||
response, &output, requested_output_name, input_datatype,
|
||||
input_shape, input_dims_count
|
||||
)
|
||||
);
|
||||
|
||||
// Get the output buffer. We request a buffer in CPU memory
|
||||
// but we have to handle any returned type. If we get back
|
||||
// a buffer in GPU memory we just fail the request.
|
||||
void* output_buffer;
|
||||
TRITONSERVER_MemoryType output_memory_type = TRITONSERVER_MEMORY_CPU;
|
||||
int64_t output_memory_type_id = 0;
|
||||
GUARDED_RESPOND_IF_ERROR(
|
||||
responses, r,
|
||||
TRITONBACKEND_OutputBuffer(
|
||||
output, &output_buffer, output_byte_size + 4,
|
||||
&output_memory_type, &output_memory_type_id
|
||||
)
|
||||
);
|
||||
if ((responses[r] == nullptr) ||
|
||||
(output_memory_type == TRITONSERVER_MEMORY_GPU)) {
|
||||
GUARDED_RESPOND_IF_ERROR(
|
||||
responses, r,
|
||||
TRITONSERVER_ErrorNew(
|
||||
TRITONSERVER_ERROR_UNSUPPORTED,
|
||||
"failed to create output buffer in CPU memory"
|
||||
)
|
||||
);
|
||||
LOG_MESSAGE(
|
||||
TRITONSERVER_LOG_ERROR,
|
||||
(std::string("request ") + std::to_string(r) +
|
||||
": failed to create output buffer in CPU memory, error request sent")
|
||||
.c_str()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Copy Marian result -> output.
|
||||
memcpy(output_buffer, reinterpret_cast<char*>(&output_byte_size), 4);
|
||||
memcpy(reinterpret_cast<char*>(output_buffer) + 4, output_content, output_byte_size);
|
||||
|
||||
// Send the response.
|
||||
LOG_IF_ERROR(
|
||||
TRITONBACKEND_ResponseSend(
|
||||
responses[r], TRITONSERVER_RESPONSE_COMPLETE_FINAL,
|
||||
nullptr /* success */),
|
||||
"failed sending response"
|
||||
);
|
||||
|
||||
// Report statistics for the successful request.
|
||||
uint64_t request_exec_end_ns = 0;
|
||||
SET_TIMESTAMP(request_exec_end_ns);
|
||||
LOG_IF_ERROR(
|
||||
TRITONBACKEND_ModelInstanceReportStatistics(
|
||||
instance_state->TritonModelInstance(), request, true /* success */,
|
||||
exec_start_ns, exec_start_ns, request_exec_end_ns, request_exec_end_ns),
|
||||
"failed reporting request statistics"
|
||||
);
|
||||
|
||||
// Release each request as soon as we sent the corresponding response.
|
||||
LOG_IF_ERROR(
|
||||
TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
|
||||
"failed releasing request"
|
||||
);
|
||||
}
|
||||
|
||||
// Report statistics for the entire batch of requests.
|
||||
uint64_t exec_end_ns = 0;
|
||||
SET_TIMESTAMP(exec_end_ns);
|
||||
LOG_IF_ERROR(
|
||||
TRITONBACKEND_ModelInstanceReportBatchStatistics(
|
||||
instance_state->TritonModelInstance(), total_batch_size,
|
||||
exec_start_ns, exec_start_ns, exec_end_ns, exec_end_ns),
|
||||
"failed reporting batch request statistics"
|
||||
);
|
||||
|
||||
// Release Marian result.
|
||||
free_result(result);
|
||||
|
||||
return nullptr; // success
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
}}} // namespace triton::backend::marian
|
11
contrib/triton-aml/marian_backend/src/marian.h
Normal file
11
contrib/triton-aml/marian_backend/src/marian.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef _WIN32
|
||||
#define DLLEXPORT extern "C" __declspec(dllexport)
|
||||
#else
|
||||
#define DLLEXPORT extern "C"
|
||||
#endif
|
||||
|
||||
DLLEXPORT void* init(char* path, int device_num);
|
||||
DLLEXPORT char* translate(void* marian, char* sent);
|
||||
DLLEXPORT void free_result(char* to_free);
|
239
contrib/triton-aml/src/CMakeLists.txt
Normal file
239
contrib/triton-aml/src/CMakeLists.txt
Normal file
@ -0,0 +1,239 @@
|
||||
add_subdirectory(3rd_party)
|
||||
|
||||
include_directories(.)
|
||||
include_directories(3rd_party)
|
||||
include_directories(3rd_party/SQLiteCpp/include)
|
||||
include_directories(3rd_party/sentencepiece)
|
||||
include_directories(3rd_party/fbgemm/include)
|
||||
include_directories(${CMAKE_BINARY_DIR}/local/include)
|
||||
|
||||
add_library(marian STATIC
|
||||
static/cmarian.cpp
|
||||
common/aliases.cpp
|
||||
common/fastopt.cpp
|
||||
common/version.cpp
|
||||
common/utils.cpp
|
||||
common/logging.cpp
|
||||
common/cli_helper.cpp
|
||||
common/cli_wrapper.cpp
|
||||
common/config.cpp
|
||||
common/config_parser.cpp
|
||||
common/config_validator.cpp
|
||||
common/options.cpp
|
||||
common/binary.cpp
|
||||
common/io.cpp
|
||||
common/filesystem.cpp
|
||||
common/file_stream.cpp
|
||||
common/types.cpp
|
||||
|
||||
data/alignment.cpp
|
||||
data/vocab.cpp
|
||||
data/default_vocab.cpp
|
||||
data/sentencepiece_vocab.cpp
|
||||
data/factored_vocab.cpp
|
||||
data/corpus_base.cpp
|
||||
data/corpus.cpp
|
||||
data/corpus_sqlite.cpp
|
||||
data/corpus_nbest.cpp
|
||||
data/text_input.cpp
|
||||
|
||||
3rd_party/cnpy/cnpy.cpp
|
||||
3rd_party/ExceptionWithCallStack.cpp
|
||||
|
||||
3rd_party/phf/phf.cc
|
||||
|
||||
tensors/backend.cpp
|
||||
tensors/rand.cpp
|
||||
tensors/tensor.cpp
|
||||
tensors/cpu/device.cpp
|
||||
tensors/cpu/prod.cpp
|
||||
tensors/cpu/tensor_operators.cpp
|
||||
|
||||
tensors/cpu/sharp/int_gemm.cpp
|
||||
tensors/cpu/sharp/avx_gemm.cpp
|
||||
tensors/cpu/sharp/sse_gemm.cpp
|
||||
tensors/cpu/fbgemm/packed_gemm.cpp
|
||||
|
||||
graph/expression_graph.cpp
|
||||
graph/expression_operators.cpp
|
||||
graph/node.cpp
|
||||
graph/node_operators.cpp
|
||||
graph/node_initializers.cpp
|
||||
|
||||
layers/convolution.cpp
|
||||
layers/generic.cpp
|
||||
layers/loss.cpp
|
||||
layers/weight.cpp
|
||||
|
||||
rnn/cells.cpp
|
||||
rnn/attention.cpp
|
||||
|
||||
optimizers/clippers.cpp
|
||||
optimizers/optimizers.cpp
|
||||
|
||||
models/model_factory.cpp
|
||||
models/encoder_decoder.cpp
|
||||
models/transformer_stub.cpp
|
||||
|
||||
rescorer/score_collector.cpp
|
||||
|
||||
translator/history.cpp
|
||||
translator/output_collector.cpp
|
||||
translator/output_printer.cpp
|
||||
translator/nth_element.cpp
|
||||
translator/helpers.cpp
|
||||
translator/scorers.cpp
|
||||
|
||||
training/graph_group_async.cpp
|
||||
training/graph_group_async_drop.cpp
|
||||
training/graph_group_sync.cpp
|
||||
training/graph_group_singleton.cpp
|
||||
training/graph_group_multinode.cpp
|
||||
training/graph_group_multinode_sync.cpp
|
||||
training/validator.cpp
|
||||
training/communicator.cpp
|
||||
training/scheduler.cpp
|
||||
|
||||
# this is only compiled to catch build errors, but not linked
|
||||
microsoft/quicksand.cpp
|
||||
|
||||
$<TARGET_OBJECTS:libyaml-cpp>
|
||||
$<TARGET_OBJECTS:SQLiteCpp>
|
||||
$<TARGET_OBJECTS:pathie-cpp>
|
||||
$<TARGET_OBJECTS:zlib>
|
||||
)
|
||||
target_compile_options(marian PUBLIC ${ALL_WARNINGS})
|
||||
|
||||
# Generate git_revision.h to reflect current git revision information
|
||||
# [https://stackoverflow.com/questions/1435953/how-can-i-pass-git-sha1-to-compiler-as-definition-using-cmake]
|
||||
# Git updates .git/logs/HEAD file whenever you pull or commit something.
|
||||
|
||||
# If Marian is checked out as a submodule in another repository,
|
||||
# there's no .git directory in ${CMAKE_SOURCE_DIR}. Instead .git is a
|
||||
# file that specifies the relative path from ${CMAKE_SOURCE_DIR} to
|
||||
# ./git/modules/<MARIAN_ROOT_DIR> in the root of the repository that
|
||||
# contains Marian as a submodule. We set MARIAN_GIT_DIR to the appropriate
|
||||
# path, depending on whether ${CMAKE_SOURCE_DIR}/.git is a directory or file.
|
||||
if(IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.git) # not a submodule
|
||||
set(MARIAN_GIT_DIR ${CMAKE_SOURCE_DIR}/.git)
|
||||
else(IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.git)
|
||||
file(READ ${CMAKE_SOURCE_DIR}/.git MARIAN_GIT_DIR)
|
||||
string(REGEX REPLACE "gitdir: (.*)\n" "\\1" MARIAN_GIT_DIR ${MARIAN_GIT_DIR})
|
||||
get_filename_component(MARIAN_GIT_DIR "${CMAKE_SOURCE_DIR}/${MARIAN_GIT_DIR}" ABSOLUTE)
|
||||
endif(IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.git)
|
||||
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/common/git_revision.h
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
COMMAND git log -1 --pretty=format:\#define\ GIT_REVISION\ \"\%h\ \%ai\" > ${CMAKE_CURRENT_SOURCE_DIR}/common/git_revision.h
|
||||
DEPENDS ${MARIAN_GIT_DIR}/logs/HEAD
|
||||
VERBATIM
|
||||
)
|
||||
add_custom_target(marian_version DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/common/git_revision.h)
|
||||
add_dependencies(marian marian_version) # marian must depend on it so that it gets created first
|
||||
# make sure all local dependencies are installed first before this is built
|
||||
add_dependencies(marian 3rd_party_installs)
|
||||
|
||||
if(CUDA_FOUND)
|
||||
cuda_add_library(marian_cuda
|
||||
tensors/gpu/device.cu
|
||||
tensors/gpu/algorithm.cu
|
||||
tensors/gpu/prod.cpp
|
||||
tensors/gpu/element.cu
|
||||
tensors/gpu/add.cu
|
||||
tensors/gpu/add_all.cu
|
||||
tensors/gpu/tensor_operators.cu
|
||||
tensors/gpu/cudnn_wrappers.cu
|
||||
translator/nth_element.cu
|
||||
translator/helpers.cu
|
||||
training/gradient_dropping/gpu/dropper.cu
|
||||
training/gradient_dropping/gpu/sparse_algorithm.cu
|
||||
STATIC)
|
||||
|
||||
target_compile_options(marian_cuda PUBLIC ${ALL_WARNINGS})
|
||||
# make sure all local dependencies are installed first before this is built
|
||||
add_dependencies(marian_cuda 3rd_party_installs)
|
||||
set_target_properties(marian_cuda PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
endif(CUDA_FOUND)
|
||||
|
||||
set_target_properties(marian PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
set_target_properties(marian PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
set_target_properties(marian PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
|
||||
add_executable(marian_train command/marian_main.cpp)
|
||||
set_target_properties(marian_train PROPERTIES OUTPUT_NAME marian)
|
||||
target_compile_options(marian_train PUBLIC ${ALL_WARNINGS})
|
||||
|
||||
add_executable(marian_decoder command/marian_decoder.cpp)
|
||||
set_target_properties(marian_decoder PROPERTIES OUTPUT_NAME marian-decoder)
|
||||
target_compile_options(marian_decoder PUBLIC ${ALL_WARNINGS})
|
||||
|
||||
add_executable(marian_scorer command/marian_scorer.cpp)
|
||||
set_target_properties(marian_scorer PROPERTIES OUTPUT_NAME marian-scorer)
|
||||
target_compile_options(marian_scorer PUBLIC ${ALL_WARNINGS})
|
||||
|
||||
add_executable(marian_vocab command/marian_vocab.cpp)
|
||||
set_target_properties(marian_vocab PROPERTIES OUTPUT_NAME marian-vocab)
|
||||
target_compile_options(marian_vocab PUBLIC ${ALL_WARNINGS})
|
||||
|
||||
add_executable(marian_conv command/marian_conv.cpp)
|
||||
set_target_properties(marian_conv PROPERTIES OUTPUT_NAME marian-conv)
|
||||
target_compile_options(marian_conv PUBLIC ${ALL_WARNINGS})
|
||||
|
||||
set(EXECUTABLES ${EXECUTABLES} marian_train marian_decoder marian_scorer marian_vocab marian_conv)
|
||||
|
||||
# marian.zip and marian.tgz
|
||||
# This combines marian, marian_decoder in a single ZIP or TAR file for
|
||||
# execution in MSFT internal tools FLO and Philly.
|
||||
# For Philly submission, we need statically-linked versions to deal with
|
||||
# library dependencies, so this target is only enabled for static builds.
|
||||
if(USE_STATIC_LIBS)
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_BINARY_DIR}/marian.zip"
|
||||
COMMAND zip -v -0 -j "${CMAKE_BINARY_DIR}/marian.zip"
|
||||
"${CMAKE_BINARY_DIR}/marian"
|
||||
"${CMAKE_BINARY_DIR}/marian-decoder"
|
||||
"${CMAKE_BINARY_DIR}/marian-scorer"
|
||||
"${CMAKE_BINARY_DIR}/marian-vocab"
|
||||
"${CMAKE_BINARY_DIR}/marian-conv"
|
||||
DEPENDS marian_train marian_decoder marian_scorer marian_vocab marian_conv)
|
||||
add_custom_target(marian_zip DEPENDS "${CMAKE_BINARY_DIR}/marian.zip")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${CMAKE_BINARY_DIR}/marian.tgz"
|
||||
COMMAND tar -cvvzf "${CMAKE_BINARY_DIR}/marian.tgz" -C "${CMAKE_BINARY_DIR}"
|
||||
"marian"
|
||||
"marian-decoder"
|
||||
"marian-scorer"
|
||||
"marian-vocab"
|
||||
"marian-conv"
|
||||
DEPENDS marian_train marian_decoder marian_scorer marian_vocab marian_conv)
|
||||
add_custom_target(marian_tgz DEPENDS "${CMAKE_BINARY_DIR}/marian.tgz")
|
||||
add_custom_target(philly DEPENDS marian_tgz marian_zip)
|
||||
endif(USE_STATIC_LIBS)
|
||||
|
||||
if(COMPILE_SERVER)
|
||||
add_executable(marian_server command/marian_server.cpp)
|
||||
set_target_properties(marian_server PROPERTIES OUTPUT_NAME marian-server)
|
||||
target_compile_options(marian_server PUBLIC ${ALL_WARNINGS})
|
||||
set(EXECUTABLES ${EXECUTABLES} marian_server)
|
||||
endif(COMPILE_SERVER)
|
||||
|
||||
foreach(exec ${EXECUTABLES})
|
||||
target_link_libraries(${exec} marian ${EXT_LIBS} ${EXT_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
if(CUDA_FOUND)
|
||||
target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif(CUDA_FOUND)
|
||||
set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
endforeach(exec)
|
||||
|
||||
if(COMPILE_TESTS)
|
||||
set(CATCH_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/3rd_party)
|
||||
add_library(Catch INTERFACE)
|
||||
target_include_directories(Catch INTERFACE ${CATCH_INCLUDE_DIR})
|
||||
|
||||
add_subdirectory(tests)
|
||||
endif(COMPILE_TESTS)
|
||||
|
||||
if(COMPILE_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
endif(COMPILE_EXAMPLES)
|
74
contrib/triton-aml/src/cmarian.cpp
Normal file
74
contrib/triton-aml/src/cmarian.cpp
Normal file
@ -0,0 +1,74 @@
|
||||
#include "marian.h"
|
||||
#include "translator/beam_search.h"
|
||||
#include "translator/translator.h"
|
||||
#include "common/utils.h"
|
||||
|
||||
#include<stdio.h>
|
||||
#include<string.h>
|
||||
#include<iostream>
|
||||
#include <string>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define DLLEXPORT extern "C" __declspec(dllexport)
|
||||
#else
|
||||
#define DLLEXPORT extern "C"
|
||||
#endif
|
||||
|
||||
using namespace marian;
|
||||
|
||||
class CMarian {
|
||||
private:
|
||||
Ptr<Options> options_;
|
||||
char* configPath_;
|
||||
Ptr<TranslateService<BeamSearch>> task_;
|
||||
|
||||
public:
|
||||
CMarian(char* configPath, int device_num) : configPath_(configPath) {
|
||||
int argc = 5;
|
||||
char** argv = new char*[argc];
|
||||
argv[0] = new char[20];
|
||||
strcpy(argv[0], "./marian-decoder");
|
||||
argv[1] = new char[12];
|
||||
strcpy(argv[1], "--config");
|
||||
argv[2] = configPath_;
|
||||
argv[3] = new char[12];
|
||||
strcpy(argv[3], "--devices");
|
||||
argv[4] = new char[sizeof(device_num) + 1];
|
||||
strcpy(argv[4], std::to_string(device_num).c_str());
|
||||
|
||||
options_ = marian::parseOptions(argc, argv, cli::mode::translation, true);
|
||||
task_ = New<TranslateService<BeamSearch>>(options_);
|
||||
|
||||
delete[] argv[0];
|
||||
delete[] argv[1];
|
||||
delete[] argv[3];
|
||||
delete[] argv[4];
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Exposes Marian translation capabilities based on the loaded YAML config associated with this class.
|
||||
* @param sent The sentence to run inference on.
|
||||
* @return A string delimited by ||| with newlines separating beams.
|
||||
*/
|
||||
char* translate(char* sent) {
|
||||
std::string strSent(sent);
|
||||
auto outputText = task_->run(strSent);
|
||||
char* ret = (char*) malloc(outputText.length() + 1);
|
||||
snprintf(ret, outputText.length() + 1, "%s", outputText.c_str());
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
DLLEXPORT void* init(char* path, int device_num) {
|
||||
CMarian* m = new CMarian(path, device_num);
|
||||
return (void*)m;
|
||||
}
|
||||
|
||||
DLLEXPORT char* translate(void* marian, char* sent) {
|
||||
CMarian* m = static_cast<CMarian*>(marian);
|
||||
return m->translate(sent);
|
||||
}
|
||||
|
||||
DLLEXPORT void free_result(char* to_free) {
|
||||
free(to_free);
|
||||
}
|
157
contrib/triton-aml/src/logging.cpp
Normal file
157
contrib/triton-aml/src/logging.cpp
Normal file
@ -0,0 +1,157 @@
|
||||
#include "logging.h"
|
||||
#include "common/config.h"
|
||||
#include "spdlog/sinks/null_sink.h"
|
||||
#include "3rd_party/ExceptionWithCallStack.h"
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __unix__
|
||||
#include <signal.h>
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define noinline __declspec(noinline)
|
||||
#else
|
||||
#define noinline __attribute__((noinline))
|
||||
#endif
|
||||
|
||||
namespace marian {
|
||||
static bool throwExceptionOnAbort = false;
|
||||
bool getThrowExceptionOnAbort() { return throwExceptionOnAbort; }
|
||||
void setThrowExceptionOnAbort(bool doThrowExceptionOnAbort) { throwExceptionOnAbort = doThrowExceptionOnAbort; };
|
||||
}
|
||||
|
||||
std::shared_ptr<spdlog::logger> createStderrLogger(const std::string& name,
|
||||
const std::string& pattern,
|
||||
const std::vector<std::string>& files,
|
||||
bool quiet) {
|
||||
std::vector<spdlog::sink_ptr> sinks;
|
||||
|
||||
auto stderr_sink = spdlog::sinks::stderr_sink_mt::instance();
|
||||
if(!quiet)
|
||||
sinks.push_back(stderr_sink);
|
||||
|
||||
for(auto&& file : files) {
|
||||
auto file_sink = std::make_shared<spdlog::sinks::simple_file_sink_st>(file, true);
|
||||
sinks.push_back(file_sink);
|
||||
}
|
||||
|
||||
auto existing = spdlog::get(name);
|
||||
if (existing) return existing;
|
||||
|
||||
auto logger = std::make_shared<spdlog::logger>(name, begin(sinks), end(sinks));
|
||||
|
||||
spdlog::register_logger(logger);
|
||||
logger->set_pattern(pattern);
|
||||
return logger;
|
||||
}
|
||||
|
||||
bool setLoggingLevel(spdlog::logger& logger, std::string const level) {
|
||||
if(level == "trace")
|
||||
logger.set_level(spdlog::level::trace);
|
||||
else if(level == "debug")
|
||||
logger.set_level(spdlog::level::debug);
|
||||
else if(level == "info")
|
||||
logger.set_level(spdlog::level::info);
|
||||
else if(level == "warn")
|
||||
logger.set_level(spdlog::level::warn);
|
||||
else if(level == "err" || level == "error")
|
||||
logger.set_level(spdlog::level::err);
|
||||
else if(level == "critical")
|
||||
logger.set_level(spdlog::level::critical);
|
||||
else if(level == "off")
|
||||
logger.set_level(spdlog::level::off);
|
||||
else {
|
||||
logger.warn("Unknown log level '{}' for logger '{}'", level.c_str(), logger.name().c_str());
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void setErrorHandlers();
|
||||
void createLoggers(const marian::Config* config) {
|
||||
std::vector<std::string> generalLogs;
|
||||
std::vector<std::string> validLogs;
|
||||
|
||||
if(config && !config->get<std::string>("log").empty()) {
|
||||
generalLogs.push_back(config->get<std::string>("log"));
|
||||
#ifndef _WIN32
|
||||
// can't open the same file twice in Windows for some reason
|
||||
validLogs.push_back(config->get<std::string>("log"));
|
||||
#endif
|
||||
}
|
||||
|
||||
// valid-log is available only for training
|
||||
if(config && config->has("valid-log") && !config->get<std::string>("valid-log").empty()) {
|
||||
validLogs.push_back(config->get<std::string>("valid-log"));
|
||||
}
|
||||
|
||||
bool quiet = config && config->get<bool>("quiet");
|
||||
Logger general{createStderrLogger("general", "[%Y-%m-%d %T] %v", generalLogs, quiet)};
|
||||
Logger valid{createStderrLogger("valid", "[%Y-%m-%d %T] [valid] %v", validLogs, quiet)};
|
||||
|
||||
if(config && config->has("log-level")) {
|
||||
std::string loglevel = config->get<std::string>("log-level");
|
||||
if(!setLoggingLevel(*general, loglevel))
|
||||
return;
|
||||
setLoggingLevel(*valid, loglevel);
|
||||
}
|
||||
|
||||
if(config && !config->get<std::string>("log-time-zone").empty()) {
|
||||
std::string timezone = config->get<std::string>("log-time-zone");
|
||||
#ifdef _WIN32
|
||||
#define setenv(var, val, over) SetEnvironmentVariableA(var, val) // ignoring over flag
|
||||
#endif
|
||||
setenv("TZ", timezone.c_str(), true);
|
||||
tzset();
|
||||
}
|
||||
|
||||
setErrorHandlers();
|
||||
}
|
||||
|
||||
static void unhandledException() {
|
||||
if(std::current_exception()) {
|
||||
try {
|
||||
throw; // rethrow so that we can get access to what()
|
||||
} catch(const std::exception& e) {
|
||||
ABORT("Unhandled exception of type '{}': {}", typeid(e).name(), e.what());
|
||||
} catch(...) {
|
||||
ABORT("Unhandled exception");
|
||||
}
|
||||
} else {
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
|
||||
static void setErrorHandlers() {
|
||||
// call stack for unhandled exceptions
|
||||
std::set_terminate(unhandledException);
|
||||
#ifdef __unix__
|
||||
// catch segfaults
|
||||
struct sigaction sa = { 0 };
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
sa.sa_sigaction = [](int /*signal*/, siginfo_t*, void*) { ABORT("Segmentation fault"); };
|
||||
sigaction(SIGSEGV, &sa, NULL);
|
||||
sa.sa_sigaction = [](int /*signal*/, siginfo_t*, void*) { ABORT("Floating-point exception"); };
|
||||
sigaction(SIGFPE, &sa, NULL);
|
||||
#endif
|
||||
}
|
||||
|
||||
// modify the log pattern for the "general" logger to include the MPI rank
|
||||
// This is called upon initializing MPI. It is needed to associated error messages to ranks.
|
||||
void switchtoMultinodeLogging(std::string nodeIdStr) {
|
||||
Logger log = spdlog::get("general");
|
||||
if(log)
|
||||
log->set_pattern("[%Y-%m-%d %T " + nodeIdStr + ":%t] %v");
|
||||
}
|
||||
|
||||
|
||||
namespace marian {
|
||||
std::string noinline getCallStack(size_t skipLevels) {
|
||||
return ::Microsoft::MSR::CNTK::DebugUtil::GetCallStack(skipLevels + 2, /*makeFunctionNamesStandOut=*/true);
|
||||
}
|
||||
|
||||
void noinline logCallStack(size_t skipLevels) {
|
||||
checkedLog("general", "critical", getCallStack(skipLevels));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user