From 00d2e999e3c24c9e8c836e800b3720bbe4a326a1 Mon Sep 17 00:00:00 2001 From: Roman Grundkiewicz Date: Thu, 5 Mar 2020 21:08:23 +0000 Subject: [PATCH] Add support for compiling on Mac (and clang) (#598) * Compile marian on mac and clang. Two linker errors left * MacOS defines has a different definition for unsigned long * Find OpenBLAS on mac * Fix a typo in the BLAS detection * Simplify and add comments * Refactor cpu allocation code. Do not fallback to malloc * Fix compilation warning on gcc * Refactor memory allocation * Make things compile with clang-8 with fewer warnings. * Eliminate clang warnings when compiling examples and when compiling without MKL * added USE_MKL option to compile without MKL for debugging even when MKL is installed * fixed issues with compiling examples with clang * Fix compile errors with clang in src/tests. * Fix missing whitespace in error message in src/tests/sqlite.cpp. * Responding to Frank Seide's code review. * Eliminate clang warnings when compiling with -DUSE_FBGEMM=on. * Fix compilation on gcc 8 * Get Marian to compile with Clang-10. * Fix Clang-8 warnings when compiling with marian-server * Add more comments and explicit unsigned long long for windows * Pull in fbgemm that supports mac * Fix warning flags order in CMakeLists.txt Co-authored-by: Kenneth Heafield Co-authored-by: Ulrich Germann Co-authored-by: Roman Grundkiewicz --- CMakeLists.txt | 43 +++++--- cmake/FindCBLAS.cmake | 2 +- src/3rd_party/CMakeLists.txt | 45 +++++++- src/3rd_party/fbgemm | 2 +- src/3rd_party/half_float/umHalf.inl | 4 +- .../pathie-cpp/src/entry_iterator.cpp | 2 +- src/3rd_party/pathie-cpp/src/path.cpp | 2 +- src/3rd_party/pathie-cpp/src/pathie.cpp | 8 +- src/3rd_party/zstr/strict_fstream.hpp | 2 +- src/CMakeLists.txt | 4 + src/command/marian_server.cpp | 2 +- src/common/definitions.h | 15 +++ src/common/fastopt.cpp | 14 ++- src/common/fastopt.h | 5 +- src/common/file_stream.h | 17 ++- src/common/filesystem.h | 14 ++- src/common/intrusive_ptr.h | 6 +- src/common/logging.cpp | 2 +- src/common/types.h | 40 +++++-- src/common/utils.cpp | 12 ++- src/data/batch.h | 2 +- src/data/corpus_base.h | 1 + src/data/default_vocab.cpp | 3 +- src/data/shortlist.h | 7 +- src/data/text_input.h | 1 + src/data/vocab_base.h | 1 + src/examples/mnist/dataset.h | 3 + src/examples/mnist/model.h | 8 +- src/examples/mnist/validator.h | 4 +- src/functional/operators.h | 101 +++++++++--------- src/graph/node.h | 10 +- src/graph/node_initializers.h | 11 +- src/graph/node_operators_binary.h | 17 +-- src/graph/parameters.h | 8 +- src/layers/generic.h | 2 + src/layers/guided_alignment.h | 8 +- src/layers/loss.h | 19 ++-- src/layers/weight.h | 1 + src/microsoft/quicksand.cpp | 3 +- src/microsoft/quicksand.h | 2 + src/models/costs.h | 9 ++ src/models/encoder_decoder.h | 1 + src/models/model_base.h | 2 + src/models/model_task.h | 2 + src/models/s2s.h | 5 +- src/models/states.h | 7 +- src/optimizers/clippers.h | 1 + src/optimizers/optimizers.h | 4 +- src/rescorer/score_collector.h | 1 + src/rnn/rnn.h | 3 +- src/tensors/backend.h | 2 +- src/tensors/cpu/device.cpp | 49 +++++---- src/tensors/cpu/fbgemm/expanded_gemm.h | 45 ++++---- src/tensors/cpu/tensor_operators.cpp | 16 ++- src/tensors/rand.h | 4 +- src/tensors/tensor_operators.h | 6 +- src/tests/prod.cpp | 2 +- src/tests/sqlite.cpp | 2 + src/training/communicator.cpp | 4 +- src/training/communicator.h | 11 +- .../gradient_dropping/sparse_tensor.h | 2 +- src/training/graph_group.h | 12 +-- src/training/graph_group_async.h | 2 +- src/training/graph_group_multinode_sync.h | 1 - src/training/graph_group_sync.h | 1 - src/training/training_state.h | 1 + src/training/validator.h | 8 ++ src/translator/output_collector.h | 1 + src/translator/scorers.h | 7 ++ 69 files changed, 428 insertions(+), 236 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c442931f..c27761cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF) option(USE_CUDNN "Use CUDNN library" OFF) option(USE_DOXYGEN "Build documentation with Doxygen" ON) option(USE_FBGEMM "Use FBGEMM" OFF) +option(USE_MKL "Compile with MKL support" ON) option(USE_MPI "Use MPI library" OFF) option(USE_NCCL "Use NCCL library" ON) option(USE_SENTENCEPIECE "Download and compile SentencePiece" OFF) @@ -33,7 +34,7 @@ option(USE_STATIC_LIBS "Link statically against non-system libs" OFF) if(USE_CCACHE) find_program(CCACHE_PROGRAM ccache) if(CCACHE_PROGRAM) - message(STATUS "Found and will be using ccache for faster repeat compilation (use cmake -DUSE_CCACHE=off to disable).") + message(STATUS "Will be using ccache for faster repeat compilation (use cmake -DUSE_CCACHE=off to disable).") set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") else(CCACHE_PROGRAM) message(WARNING "Compilation with ccache requested but no ccache found.") @@ -141,20 +142,32 @@ else(MSVC) add_definitions(-DUSE_FBGEMM=1) endif(USE_FBGEMM) - set(DISABLE_GLOBALLY "-Wno-unused-result") + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0) + # Clang-10.0.0 complains when CUDA is newer than 10.1 + set(CLANG_IGNORE_UNKNOWN_CUDA "-Wno-unknown-cuda-version") + endif() + set(DISABLE_GLOBALLY "-Wno-unused-result -Wno-unknown-warning-option ${CLANG_IGNORE_UNKNOWN_CUDA}") # These are used in src/CMakeLists.txt on a per-target basis - list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function; - -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;) + list(APPEND ALL_WARNINGS -Wall; -Werror; -Wextra; -Wno-unused-result; -Wno-deprecated; + -Wno-pragmas; -Wno-unused-parameter; -Wno-unused-function; + -Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; + -Wno-missing-field-initializers;) # This warning does not exist prior to gcc 5.0 if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0) - list(APPEND ALL_WARNINGS -Wsuggest-override) + list(APPEND ALL_WARNINGS -Wsuggest-override -Wno-int-in-bool-context) endif() - set(CMAKE_CXX_FLAGS "-std=c++11 -pthread -Wl,--no-as-needed -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") - set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -m64 -funroll-loops -ffinite-math-only -g -rdynamic") - set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -rdynamic") + if(CMAKE_COMPILER_IS_GNUCC) + # these flags are not known to clang + set(CMAKE_GCC_FLAGS "-Wl,--no-as-needed") + set(CMAKE_RDYNAMIC_FLAG "-rdynamic") + endif(CMAKE_COMPILER_IS_GNUCC) + + set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") + set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -m64 -funroll-loops -ffinite-math-only -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_CXX_FLAGS_SLIM "-Ofast -m64 -funroll-loops -ffinite-math-only -DNDEBUG") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE}") set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg") @@ -162,9 +175,9 @@ else(MSVC) set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction") # these need to be set separately - set(CMAKE_C_FLAGS "-pthread -Wl,--no-as-needed -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") - set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -ffinite-math-only -g -rdynamic") - set(CMAKE_C_FLAGS_DEBUG "-O0 -g -rdynamic") + set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}") + set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -ffinite-math-only -g ${CMAKE_RDYNAMIC_FLAG}") + set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}") set(CMAKE_C_FLAGS_SLIM "-O3 -m64 -funroll-loops -ffinite-math-only -DNDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE}") set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg") @@ -204,7 +217,7 @@ if(CUDA_FOUND) if((CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0") AND (CMAKE_VERSION VERSION_LESS "3.12.2")) message(WARNING "On some Unix systems CUDA 10.0+ requires CMake 3.12.2+; you use CMake ${CMAKE_VERSION}") endif() - + if(COMPILE_CUDA_SM35) LIST(APPEND COMPUTE -arch=sm_35; -gencode=arch=compute_35,code=sm_35;) # Tesla K40 and above endif(COMPILE_CUDA_SM35) @@ -323,13 +336,15 @@ if(USE_MPI) endif(USE_MPI) if(COMPILE_CPU) - find_package(MKL) + if(USE_MKL) + find_package(MKL) + endif(USE_MKL) if(MKL_FOUND) include_directories(${MKL_INCLUDE_DIR}) set(EXT_LIBS ${EXT_LIBS} ${MKL_LIBRARIES}) add_definitions(-DBLAS_FOUND=1 -DMKL_FOUND=1) else(MKL_FOUND) - set(BLA_VENDOR "OpenBLAS") + set(BLAS_VENDOR "OpenBLAS") find_package(BLAS) if(BLAS_FOUND) include(FindCBLAS) diff --git a/cmake/FindCBLAS.cmake b/cmake/FindCBLAS.cmake index 631da99b..97b0d3f8 100644 --- a/cmake/FindCBLAS.cmake +++ b/cmake/FindCBLAS.cmake @@ -54,7 +54,7 @@ MACRO(CHECK_ALL_LIBRARIES LIBRARIES INCLUDE _prefix _name _flags _list _include IF(APPLE) FIND_LIBRARY(${_prefix}_${_library}_LIBRARY NAMES ${_library} - PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV + PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 /usr/local/opt/openblas/lib ENV DYLD_LIBRARY_PATH ) ELSE(APPLE) diff --git a/src/3rd_party/CMakeLists.txt b/src/3rd_party/CMakeLists.txt index e100a619..9f3981af 100644 --- a/src/3rd_party/CMakeLists.txt +++ b/src/3rd_party/CMakeLists.txt @@ -15,12 +15,22 @@ if(USE_FBGEMM) if(NOT MSVC) # only locally disabled for the 3rd_party folder - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-value -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function") + # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-value -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused") endif() set(FBGEMM_BUILD_TESTS OFF CACHE BOOL "Disable fbgemm tests") set(FBGEMM_BUILD_BENCHMARKS OFF CACHE BOOL "Disable fbgemm benchmark") add_subdirectory(./fbgemm) + + # asmjit (3rd-party submodule of fbgemm) sets -Wall -Wextra near the end of + # the compile options, invalidating any -Wno-... flags that we may have set + # earlier. Let's remove them. + get_property(ASMJIT_COMPILE_OPTIONS TARGET asmjit PROPERTY COMPILE_OPTIONS) + list(REMOVE_ITEM ASMJIT_COMPILE_OPTIONS -Wall -Wextra) + set_property(TARGET asmjit PROPERTY COMPILE_OPTIONS ${ASMJIT_COMPILE_OPTIONS}) + message(" ASMJIT COMPILE FLAGS: ${ASMJIT_COMPILE_OPTIONS}") + endif(USE_FBGEMM) if(USE_SENTENCEPIECE) @@ -39,7 +49,7 @@ if(USE_SENTENCEPIECE) message(WARNING "You are compiling SentencePiece binaries with -DUSE_STATIC_LIBS=on. \ This will cause spm_train to segfault. No need to worry if you do not intend to use that binary. \ Marian support for SentencePiece will work fine.") - + set(SPM_ENABLE_SHARED OFF CACHE BOOL "Builds shared libaries in addition to static libraries." FORCE) set(SPM_TCMALLOC_STATIC ON CACHE BOOL "Link static library of TCMALLOC." FORCE) else(USE_STATIC_LIBS) @@ -51,8 +61,19 @@ if(USE_SENTENCEPIECE) include_directories(./sentencepiece) set_target_properties(spm_encode spm_decode spm_train spm_normalize spm_export_vocab - PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") + PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") + + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + foreach(t sentencepiece sentencepiece_train sentencepiece_train-static + spm_decode spm_encode spm_export_vocab spm_normalize spm_train) + set_property(TARGET ${t} APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-tautological-compare -Wno-unused") + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0) + set_property(TARGET ${t} APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-range-loop-construct") + endif() + # get_property(SENTENCEPIECE_COMPILE_FLAGS TARGET ${t} PROPERTY COMPILE_FLAGS) + # message("-- SENTENCPIECE: compile flags for target ${t}: ${SENTENCEPIECE_COMPILE_FLAGS}") + endforeach(t) + endif() if(USE_STATIC_LIBS) set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) @@ -63,6 +84,22 @@ include_directories(./SQLiteCpp/include) include_directories(./CLI) include_directories(./pathie-cpp/include) +if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + #set_target_properties(SQLiteCpp PROPERTIES COMPILE_FLAGS + set_property(TARGET SQLiteCpp APPEND_STRING PROPERTY COMPILE_FLAGS + " -Wno-parentheses-equality -Wno-unused-value") + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0) + set_property(TARGET SQLiteCpp APPEND_STRING PROPERTY COMPILE_FLAGS + " -Wno-implicit-int-float-conversion") + endif() + set_property(TARGET libyaml-cpp APPEND_STRING PROPERTY COMPILE_FLAGS + " -fPIC -Wno-unused-value") + set_property(TARGET pathie-cpp APPEND_STRING PROPERTY COMPILE_FLAGS + " -fPIC -Wno-unused-value") +endif() + + + include_directories(./zlib) include(ExternalProject) diff --git a/src/3rd_party/fbgemm b/src/3rd_party/fbgemm index 84e66a97..f78e6098 160000 --- a/src/3rd_party/fbgemm +++ b/src/3rd_party/fbgemm @@ -1 +1 @@ -Subproject commit 84e66a976046180187724aff60a236c5378fde7c +Subproject commit f78e60988329b9207d086c743cafce1ac1bea3ab diff --git a/src/3rd_party/half_float/umHalf.inl b/src/3rd_party/half_float/umHalf.inl index 4f065dad..3f5285a2 100644 --- a/src/3rd_party/half_float/umHalf.inl +++ b/src/3rd_party/half_float/umHalf.inl @@ -186,7 +186,7 @@ inline HalfFloat& HalfFloat::operator= (float other) inline bool HalfFloat::operator== (HalfFloat other) const { // +0 and -0 are considered to be equal - if (!(bits << 1u) && !(other.bits << 1u))return true; + if ((bits << 1u) == 0 && (other.bits << 1u) == 0) return true; return bits == other.bits && !this->IsNaN(); } @@ -194,7 +194,7 @@ inline bool HalfFloat::operator== (HalfFloat other) const inline bool HalfFloat::operator!= (HalfFloat other) const { // +0 and -0 are considered to be equal - if (!(bits << 1u) && !(other.bits << 1u))return false; + if ((bits << 1u) == 0 && (other.bits << 1u) == 0) return false; return bits != other.bits || this->IsNaN(); } diff --git a/src/3rd_party/pathie-cpp/src/entry_iterator.cpp b/src/3rd_party/pathie-cpp/src/entry_iterator.cpp index 24e5b110..baaf5394 100644 --- a/src/3rd_party/pathie-cpp/src/entry_iterator.cpp +++ b/src/3rd_party/pathie-cpp/src/entry_iterator.cpp @@ -31,7 +31,7 @@ #include "../include/path.hpp" #include "../include/errors.hpp" -#if defined(__unix__) +#if defined(__unix__) || defined(__APPLE__) #include #include #include diff --git a/src/3rd_party/pathie-cpp/src/path.cpp b/src/3rd_party/pathie-cpp/src/path.cpp index 07fba271..e732e09c 100644 --- a/src/3rd_party/pathie-cpp/src/path.cpp +++ b/src/3rd_party/pathie-cpp/src/path.cpp @@ -902,7 +902,7 @@ Path Path::pwd() */ Path Path::exe() { -#if defined(__linux__) +#if defined(__linux__) || defined(__APPLE__) char buf[PATH_MAX]; ssize_t size = ::readlink("/proc/self/exe", buf, PATH_MAX); diff --git a/src/3rd_party/pathie-cpp/src/pathie.cpp b/src/3rd_party/pathie-cpp/src/pathie.cpp index c0abc689..d8e567e1 100644 --- a/src/3rd_party/pathie-cpp/src/pathie.cpp +++ b/src/3rd_party/pathie-cpp/src/pathie.cpp @@ -143,7 +143,7 @@ std::string Pathie::convert_encodings(const char* from_encoding, const char* to_ errno = 0; errsav = 0; -#ifdef BSD +#if defined(BSD) && ! defined(__APPLE__) //Since MacOS evolved from BSD, it is captured here but the iconv on macos behaves differently // What the heck. FreeBSD violates POSIX.1-2008: it declares iconv() // differently than mandated by POSIX: http://pubs.opengroup.org/onlinepubs/9699919799/functions/iconv.html // (it declares a `const' where it must not be). @@ -181,11 +181,10 @@ std::string Pathie::convert_encodings(const char* from_encoding, const char* to_ std::string Pathie::utf8_to_filename(const std::string& utf8) { bool fs_encoding_is_utf8 = false; - + char* fsencoding = NULL; #if defined(__APPLE__) || defined(PATHIE_ASSUME_UTF8_ON_UNIX) fs_encoding_is_utf8 = true; #else - char* fsencoding = NULL; fsencoding = nl_langinfo(CODESET); fs_encoding_is_utf8 = (strcmp(fsencoding, "UTF-8") == 0); #endif @@ -206,11 +205,10 @@ std::string Pathie::utf8_to_filename(const std::string& utf8) std::string Pathie::filename_to_utf8(const std::string& native_filename) { bool fs_encoding_is_utf8 = false; - + char* fsencoding = NULL; #if defined(__APPLE__) || defined(PATHIE_ASSUME_UTF8_ON_UNIX) fs_encoding_is_utf8 = true; #else - char* fsencoding = NULL; fsencoding = nl_langinfo(CODESET); fs_encoding_is_utf8 = (strcmp(fsencoding, "UTF-8") == 0); #endif diff --git a/src/3rd_party/zstr/strict_fstream.hpp b/src/3rd_party/zstr/strict_fstream.hpp index fbc64164..7b117393 100644 --- a/src/3rd_party/zstr/strict_fstream.hpp +++ b/src/3rd_party/zstr/strict_fstream.hpp @@ -27,7 +27,7 @@ static std::string strerror() { buff = "Unknown error"; } -#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && ! _GNU_SOURCE +#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || __APPLE__) && ! _GNU_SOURCE // XSI-compliant strerror_r() if (strerror_r(errno, &buff[0], buff.size()) != 0) { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c1a51390..839a64f0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -215,6 +215,10 @@ if(COMPILE_SERVER) set(EXECUTABLES ${EXECUTABLES} marian_server) endif(COMPILE_SERVER) +if(APPLE) # This is a dependency of pathie but I can't seem to link it into that CMakeLists because we're not compiling it as a library. + set(EXT_LIBS ${EXT_LIBS} iconv) +endif() + foreach(exec ${EXECUTABLES}) target_link_libraries(${exec} marian ${EXT_LIBS} ${EXT_LIBS} ${CMAKE_THREAD_LIBS_INIT}) if(CUDA_FOUND) diff --git a/src/command/marian_server.cpp b/src/command/marian_server.cpp index 5df944d7..e4074bd1 100644 --- a/src/command/marian_server.cpp +++ b/src/command/marian_server.cpp @@ -44,7 +44,7 @@ int main(int argc, char **argv) { // Error Codes for error code meanings // http://www.boost.org/doc/libs/1_55_0/doc/html/boost_asio/reference.html - translate.on_error = [](Ptr connection, + translate.on_error = [](Ptr /*connection*/, const SimpleWeb::error_code &ec) { LOG(error, "Connection error: ({}) {}", ec.value(), ec.message()); }; diff --git a/src/common/definitions.h b/src/common/definitions.h index 45f5d563..2df0f8f3 100755 --- a/src/common/definitions.h +++ b/src/common/definitions.h @@ -10,6 +10,21 @@ #include #include +// The macro MAYBE_UNUSED is used to selectively disable +// unused-variable warnings. C++17 defines the attribute +// [[maybe_unused]], but I don't think we're at C++17 yet. We can add it when we reach C++17. +// The compilers gcc and clang (and maybe others) define +// __has_attribute and support __attribute__(unused) in C++11, +#if defined __has_attribute +# if __has_attribute(unused) +# define MAYBE_UNUSED __attribute__((unused)) +# else +# define MAYBE_UNUSED +# endif +#endif + + + #define THREAD_GUARD(body) [&]() { body; }() // test if THREAD_GUARD is neccessary, remove if no problems occur. #define NodeOp(op) [=]() { op; } diff --git a/src/common/fastopt.cpp b/src/common/fastopt.cpp index ccdd7b54..9af8e844 100644 --- a/src/common/fastopt.cpp +++ b/src/common/fastopt.cpp @@ -84,10 +84,16 @@ std::vector As>::apply(const FastOpt& node) { // specializations for simple vector types template struct As>; template struct As>; -// Windows and Unix based OS have different type definitions for 'unsigned long'. -// So, we need an explicit definition for uint64_t. Otherwise, there's a linking error on windows. +// Windows, Linux based OS and Mac have different type definitions for 'unsigned long'. +// So, we need an explicit definitions for uint64_t, that cover different platforms. +// Otherwise, there's a linking error on windows or Linux or Mac. // https://software.intel.com/en-us/articles/size-of-long-integer-type-on-different-architecture-and-os/ -template struct As>; +// https://stackoverflow.com/questions/32021860/c-should-you-size-t-with-a-regular-array +// MacOS: size_t = unsigned long (8 bytes), uint64_t = unsigned long long (8 bytes) +// Linux: size_t = unsigned long (8 bytes), uint64_t = unsigned long (8 bytes) +// Windows: size_t = unsigned long long (8 bytes), uint64_t = unsigned long long (8 bytes) +template struct As>; +template struct As>; template struct As>; template struct As>; template struct As>; @@ -103,4 +109,4 @@ std::pair As>::apply(const FastOpt& node) { template struct As>; } -} \ No newline at end of file +} diff --git a/src/common/fastopt.h b/src/common/fastopt.h index 9ddc3c2c..3f735660 100644 --- a/src/common/fastopt.h +++ b/src/common/fastopt.h @@ -367,7 +367,8 @@ public: } const FastOpt& operator[](const char* const key) const { - return operator[](crc::crc(key)); + // MacOS requires explicit cast to size_t before we can use it. + return operator[]((size_t)crc::crc(key)); } const FastOpt& operator[](const std::string& key) const { @@ -375,4 +376,4 @@ public: } }; -} \ No newline at end of file +} diff --git a/src/common/file_stream.h b/src/common/file_stream.h index c8151207..9bbf3359 100644 --- a/src/common/file_stream.h +++ b/src/common/file_stream.h @@ -7,10 +7,21 @@ #include "common/filesystem.h" #include "common/logging.h" +// Even when compiling with clang, __GNUC__ may be defined, so +// we need to add some extra checks to avoid compile errors with +// respect to -Wsuggest-override. #ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wsuggest-override" +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-value" +# if defined(__has_warning) +# if __has_warning("-Wsuggest-override") +# pragma GCC diagnostic ignored "-Wsuggest-override" +# endif +# else +# pragma GCC diagnostic ignored "-Wsuggest-override" +# endif #endif + #ifdef _MSC_VER #pragma warning(push) // 4101: 'identifier' : unreferenced local variable. One parameter variable in zstr.hpp is not used. #pragma warning(disable : 4101) @@ -82,7 +93,7 @@ protected: void NormalizeTempPrefix(std::string& base) const; void MakeTemp(const std::string& base); - + }; } // namespace io diff --git a/src/common/filesystem.h b/src/common/filesystem.h index d95125c5..d7cb3da6 100644 --- a/src/common/filesystem.h +++ b/src/common/filesystem.h @@ -7,9 +7,19 @@ // @TODO: go back to canonical names for functions and objects // as specified in C++17 so it becomes easy to move in the future +// Even when compiling with clang, __GNUC__ may be defined, so +// we need to add some extra checks to avoid compile errors with +// respect to -Wsuggest-override. #ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wsuggest-override" +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-value" +# if defined(__has_warning) +# if __has_warning("-Wsuggest-override") +# pragma GCC diagnostic ignored "-Wsuggest-override" +# endif +# else +# pragma GCC diagnostic ignored "-Wsuggest-override" +# endif #endif #include "3rd_party/pathie-cpp/include/path.hpp" // @TODO: update to latest Pathie diff --git a/src/common/intrusive_ptr.h b/src/common/intrusive_ptr.h index fa412a8b..d476dbc1 100644 --- a/src/common/intrusive_ptr.h +++ b/src/common/intrusive_ptr.h @@ -169,12 +169,12 @@ inline bool operator!=(const IntrusivePtr& a, std::nullptr_t) { template inline bool operator==(T* a, const IntrusivePtr& b) { - return b.get(); + return a == b.get(); // used to say: return b.get(); That cannot be right. [UG] } template inline bool operator!=(T* a, const IntrusivePtr& b) { - return b.get(); + return a != b.get(); // used to say: return b.get(); That cannot be right. [UG] } template @@ -223,5 +223,3 @@ namespace std { } }; } - - diff --git a/src/common/logging.cpp b/src/common/logging.cpp index 93bfcbd8..62d76fee 100644 --- a/src/common/logging.cpp +++ b/src/common/logging.cpp @@ -124,7 +124,7 @@ static void setErrorHandlers() { std::set_terminate(unhandledException); #ifdef __unix__ // catch segfaults - struct sigaction sa = { 0 }; + struct sigaction sa = { {0} }; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_SIGINFO; sa.sa_sigaction = [](int /*signal*/, siginfo_t*, void*) { ABORT("Segmentation fault"); }; diff --git a/src/common/types.h b/src/common/types.h index 552ae225..4bc4f9ad 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -254,7 +254,7 @@ enum class Type : size_t { packed16 = TypeClass::packed_type + 2u, // special type for FBGEMM, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint16) is meaningless. packed8avx2 = TypeClass::packed_type + 1u + TypeClass::avx2_type, // special type for FBGEMM with AVX2, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless. packed8avx512 = TypeClass::packed_type + 1u + TypeClass::avx512_type, // special type for FBGEMM with AVX512, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless. - + }; static inline size_t operator&(TypeClass typeClass, Type type) { @@ -394,7 +394,7 @@ static Type inline typeFromString(const std::string& str) { return Type::float32; if(str == "float64") return Type::float64; - + if(str == "packed16") return Type::packed16; if(str == "packed8avx2") @@ -437,19 +437,35 @@ void matchOrAbort(Type type) { namespace typeFitting { // own namespace instead of in class, otherwise we get error "explicit specialization in non-namespace scope" - // compares max for different types as constexpr, so can be used at compile-time to determine if RequestType type max fits into ReturnType max, see std::conditional below. - template - constexpr bool fitsIntoMax() { return std::numeric_limits::max() <= std::numeric_limits::max(); } // for built-in types everything is constexpr + // Helper function for fitsIntoMax() below + // Returns the 'capacity' of a type: number of digits for integers, + // max_exponent for floats. We ignore the mantissa for floats. + template constexpr int capacity() { + static_assert(std::is_arithmetic::value || std::is_same::value, + "Wrong type for this template"); + return (std::is_integral::value + ? std::numeric_limits::digits + : std::numeric_limits::max_exponent); + } + + + // Compare max for different types as constexpr, so can be used at compile-time to determine if RequestType type max fits into ReturnType max, see std::conditional below. + template + constexpr bool fitsIntoMax() { + // We can't just compare std::numeric_limits<>::max(), because Clang-10 + // complains about rounding errors when implicitly converting int to float + return ((!std::is_integral::value // RequestType is a float + && std::is_integral::value) // ReturnType an integer + ? capacity() < capacity() // special case + : capacity() <= capacity()); // normal case + } // for built-in types everything is constexpr - // add specializations here when needed - template <> constexpr bool fitsIntoMax() { return true; }; // for float16 conversion to float is not constexpr, hence specializations - template <> constexpr bool fitsIntoMax() { return false; }; // for float16 conversion to float is not constexpr, hence specializations } template class NumericLimits { private: - + template void setLimitsMax() { max = (ReturnType)std::numeric_limits::max(); lowest = (ReturnType)std::numeric_limits::lowest(); @@ -459,10 +475,14 @@ private: void setLimits() { // check if the maximum of type RequestType fits into ReturnType constexpr bool fits = typeFitting::fitsIntoMax(); + // sanity check: + static_assert(fits || typeFitting::fitsIntoMax(), + "RequestType doesn't fit into ReturnType, and ReturnType doesn't " + "fit into RequestType. fitsIntoMax is broken!"); // and then use the smaller of each types to determine max, min, lowest. using MaxType = typename std::conditional::type; setLimitsMax(); - // @TODO: should we rather abort if the RequestType does not fit into ReturnType instead of clipping to smaller type? + // @TODO: should we rather abort if the RequestType does not fit into ReturnType instead of clipping to smaller type? // ABORT_IF(!fits, "Type {} is too small to contain max of type {}", typeId(), typeId()); } diff --git a/src/common/utils.cpp b/src/common/utils.cpp index 80aee431..3acb756d 100755 --- a/src/common/utils.cpp +++ b/src/common/utils.cpp @@ -8,12 +8,22 @@ #include #include #include -#ifdef __unix__ +#if defined(__unix__) || defined(__APPLE__) #include #endif #include #include +// MACOS lacks HOST_NAME_MAX +#ifndef HOST_NAME_MAX +# if defined(_POSIX_HOST_NAME_MAX) +# define HOST_NAME_MAX _POSIX_HOST_NAME_MAX +# elif defined(MAXHOSTNAMELEN) +# define HOST_NAME_MAX MAXHOSTNAMELEN +# endif +#endif + + namespace marian { namespace utils { diff --git a/src/data/batch.h b/src/data/batch.h index b1b10981..3c592b31 100755 --- a/src/data/batch.h +++ b/src/data/batch.h @@ -26,7 +26,7 @@ public: virtual void setGuidedAlignment(std::vector&&) = 0; virtual void setDataWeights(const std::vector&) = 0; - + virtual ~Batch() {}; protected: std::vector sentenceIds_; }; diff --git a/src/data/corpus_base.h b/src/data/corpus_base.h index f0d9b706..e85e378a 100755 --- a/src/data/corpus_base.h +++ b/src/data/corpus_base.h @@ -525,6 +525,7 @@ public: const std::vector>& vocabs, Ptr options); + virtual ~CorpusBase() {} virtual std::vector>& getVocabs() = 0; protected: diff --git a/src/data/default_vocab.cpp b/src/data/default_vocab.cpp index 53f90a5d..590e9931 100644 --- a/src/data/default_vocab.cpp +++ b/src/data/default_vocab.cpp @@ -45,6 +45,7 @@ protected: public: // @TODO: choose between 'virtual' and 'final'. Can we derive from this class? + virtual ~DefaultVocab() {}; virtual const std::string& canonicalExtension() const override { return suffixes_[0]; } virtual const std::vector& suffixes() const override { return suffixes_; } @@ -295,7 +296,7 @@ private: class ClassVocab : public DefaultVocab { private: // Do nothing. - virtual void addRequiredVocabulary(const std::string& vocabPath, bool isJson) override { vocabPath; isJson; } + virtual void addRequiredVocabulary(const std::string& /*vocabPath*/, bool /*isJson*/) override {} // Not adding special class labels, only seen classes. virtual void create(const std::string& vocabPath, diff --git a/src/data/shortlist.h b/src/data/shortlist.h index e5d08494..177272df 100644 --- a/src/data/shortlist.h +++ b/src/data/shortlist.h @@ -36,6 +36,8 @@ public: class ShortlistGenerator { public: + virtual ~ShortlistGenerator() {} + virtual Ptr generate(Ptr batch) const = 0; // Writes text version of (possibly) pruned short list to file @@ -129,7 +131,6 @@ private: Ptr trgVocab_; size_t srcIdx_; - size_t trgIdx_; bool shared_{false}; size_t firstNum_{100}; @@ -183,13 +184,12 @@ public: Ptr srcVocab, Ptr trgVocab, size_t srcIdx = 0, - size_t trgIdx = 1, + size_t /*trgIdx*/ = 1, bool shared = false) : options_(options), srcVocab_(srcVocab), trgVocab_(trgVocab), srcIdx_(srcIdx), - trgIdx_(trgIdx), shared_(shared) { std::vector vals = options_->get>("shortlist"); @@ -235,7 +235,6 @@ public: virtual Ptr generate(Ptr batch) const override { auto srcBatch = (*batch)[srcIdx_]; - // auto trgBatch = (*batch)[trgIdx_]; // add firstNum most frequent words std::unordered_set indexSet; diff --git a/src/data/text_input.h b/src/data/text_input.h index bdbcf64e..db99ef6a 100644 --- a/src/data/text_input.h +++ b/src/data/text_input.h @@ -37,6 +37,7 @@ public: typedef SentenceTuple Sample; TextInput(std::vector inputs, std::vector> vocabs, Ptr options); + virtual ~TextInput() {} Sample next() override; diff --git a/src/data/vocab_base.h b/src/data/vocab_base.h index 6b6869d8..8c214c97 100755 --- a/src/data/vocab_base.h +++ b/src/data/vocab_base.h @@ -57,6 +57,7 @@ public: virtual Word randWord() const { return Word::fromWordIndex(rand() % size()); } + virtual ~IVocab() {}; }; class Options; diff --git a/src/examples/mnist/dataset.h b/src/examples/mnist/dataset.h index 0e73840b..b0492b85 100644 --- a/src/examples/mnist/dataset.h +++ b/src/examples/mnist/dataset.h @@ -62,6 +62,7 @@ private: std::vector inputs_; public: + std::vector& inputs() { return inputs_; } const std::vector& inputs() const { return inputs_; } @@ -144,6 +145,8 @@ public: loadData(); } + virtual ~MNISTData(){} + void loadData() override { ABORT_IF(paths_.size() != 2, "Paths to MNIST data files are not specified"); diff --git a/src/examples/mnist/model.h b/src/examples/mnist/model.h index 24543ad1..f7af1681 100755 --- a/src/examples/mnist/model.h +++ b/src/examples/mnist/model.h @@ -47,6 +47,8 @@ class MNISTLogsoftmax : public ILogProb { public: MNISTLogsoftmax() {} + virtual ~MNISTLogsoftmax(){} + Logits apply(Ptr model, Ptr graph, Ptr batch, @@ -61,13 +63,15 @@ public: typedef data::MNISTData dataset_type; template - MnistFeedForwardNet(Ptr options, Args... args) + MnistFeedForwardNet(Ptr options, Args... /*args*/) : options_(options), inference_(options->get("inference", false)) {} + virtual ~MnistFeedForwardNet(){} + virtual Logits build(Ptr graph, Ptr batch, bool /*clean*/ = false) override { - + return Logits(apply(graph, batch, inference_)); } diff --git a/src/examples/mnist/validator.h b/src/examples/mnist/validator.h index 7adc73f0..232bb124 100644 --- a/src/examples/mnist/validator.h +++ b/src/examples/mnist/validator.h @@ -19,7 +19,9 @@ public: builder_ = models::createModelFromOptions(options, models::usage::translation); } - virtual void keepBest(const std::vector>& graphs) override { + virtual ~MNISTAccuracyValidator(){} + + virtual void keepBest(const std::vector>& /*graphs*/) override { LOG(warn, "Keeping best model for MNIST examples is not supported"); } diff --git a/src/functional/operators.h b/src/functional/operators.h index 41fc74b0..be6bb2e5 100755 --- a/src/functional/operators.h +++ b/src/functional/operators.h @@ -7,55 +7,58 @@ namespace marian { namespace functional { // General template, will be used for any type without specializations -// and will fail with an abort message. +// and will fail at runtime with an abort message. Note that the +// general template functions don't have named parameters on purpose, +// because clang will warn about unused parameters during compilation. + template struct Ops { - static HOST_DEVICE_INLINE T tanh(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T sin(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T cos(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T tan(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T log(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T exp(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T abs(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T sqrt(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T neg(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T sgn(const T& x) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T tanh(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T sin(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T cos(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T tan(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T log(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T exp(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T abs(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T sqrt(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T neg(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T sgn(const T&) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T add(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T sub(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T mul(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T div(const T& x, const T& y) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T add(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T sub(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T mul(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T div(const T&, const T&) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T max(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T min(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T pow(const T& x, const T& y) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T max(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T min(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T pow(const T&, const T&) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T negate(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T eq(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T neq(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T gt(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T lt(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T geq(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T leq(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T _and(const T& x, const T& y) { ABORT("Unknown type"); } // 'and' is used by gcc - static HOST_DEVICE_INLINE T _or(const T& x, const T& y) { ABORT("Unknown type"); } // 'or' is used by gcc + static HOST_DEVICE_INLINE T negate(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T eq(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T neq(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T gt(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T lt(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T geq(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T leq(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T _and(const T&, const T&) { ABORT("Unknown type"); } // 'and' is used by gcc + static HOST_DEVICE_INLINE T _or(const T&, const T&) { ABORT("Unknown type"); } // 'or' is used by gcc // Neural Networks specific functions - static HOST_DEVICE_INLINE T sigmoid(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T logaddexp(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T clip(const T& x, const T& y) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T sigmoid(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T logaddexp(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T clip(const T&, const T&) { ABORT("Unknown type"); } // derivative of Clip, cut-off function - static HOST_DEVICE_INLINE T bump(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T relu(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T reluBack(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T prelu(const T& x, const T& y) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T preluBack(const T& x, const T& y) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T bump(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T relu(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T reluBack(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T prelu(const T&, const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T preluBack(const T&, const T&) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T if_then_else(const T& x, const T& y, const T& z) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T if_then_else(const T&, const T&, const T&) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T sumReduce(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T maxReduce(const T& x) { ABORT("Unknown type"); } - static HOST_DEVICE_INLINE T minReduce(const T& x) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T sumReduce(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T maxReduce(const T&) { ABORT("Unknown type"); } + static HOST_DEVICE_INLINE T minReduce(const T&) { ABORT("Unknown type"); } }; // Specialization for float @@ -127,14 +130,14 @@ template <> struct Ops { typedef double Single; - static HOST_DEVICE_INLINE double tanh(const double& x) { return tanh(x); } - static HOST_DEVICE_INLINE double sin(const double& x) { return sin(x); } - static HOST_DEVICE_INLINE double cos(const double& x) { return cos(x); } - static HOST_DEVICE_INLINE double tan(const double& x) { return tan(x); } - static HOST_DEVICE_INLINE double log(const double& x) { return log(x); } - static HOST_DEVICE_INLINE double exp(const double& x) { return exp(x); } - static HOST_DEVICE_INLINE double abs(const double& x) { return abs(x); } - static HOST_DEVICE_INLINE double sqrt(const double& x) { return sqrt(x); } + static HOST_DEVICE_INLINE double tanh(const double& x) { return std::tanh(x); } + static HOST_DEVICE_INLINE double sin(const double& x) { return std::sin(x); } + static HOST_DEVICE_INLINE double cos(const double& x) { return std::cos(x); } + static HOST_DEVICE_INLINE double tan(const double& x) { return std::tan(x); } + static HOST_DEVICE_INLINE double log(const double& x) { return std::log(x); } + static HOST_DEVICE_INLINE double exp(const double& x) { return std::exp(x); } + static HOST_DEVICE_INLINE double abs(const double& x) { return std::abs(x); } + static HOST_DEVICE_INLINE double sqrt(const double& x) { return std::sqrt(x); } static HOST_DEVICE_INLINE double neg(const double& x) { return -x; } static HOST_DEVICE_INLINE double sgn(const double& x) { return (0 < x) - (x < 0); } @@ -145,7 +148,7 @@ struct Ops { static HOST_DEVICE_INLINE double max(const double& x, const double& y) { return x < y ? y : x; } static HOST_DEVICE_INLINE double min(const double& x, const double& y) { return x < y ? x : y; } - static HOST_DEVICE_INLINE double pow(const double& x, const double& y) { return pow(x, y); } + static HOST_DEVICE_INLINE double pow(const double& x, const double& y) { return std::pow(x, y); } static HOST_DEVICE_INLINE double negate(const double& x) { return !(bool)x; } @@ -460,7 +463,7 @@ struct Ops { static DEVICE_INLINE half exp(const half& x) { return hexp(x); } static DEVICE_INLINE half sqrt(const half& x) { return hsqrt(x); } static DEVICE_INLINE half neg(const half& x) { return -x; } - + static DEVICE_INLINE half abs(const half& x) { return fabs((float)x); }// @TODO half has this information somewhere in the struct, right? static DEVICE_INLINE half sgn(const half& x) { half zero = 0.f; return (zero < x) - (x < zero); } // @TODO half has this information somewhere in the struct, right? diff --git a/src/graph/node.h b/src/graph/node.h index 4d591674..c017eeb2 100644 --- a/src/graph/node.h +++ b/src/graph/node.h @@ -40,7 +40,7 @@ protected: std::string debugMessage_; Ptr> subtape_; // a subtape is used to keep track of nodes that need to be freed and recomputed with gradient-checkpointing. - bool isCheckpoint_{false}; // true if this node has been selected to be a checkpoint, currently only done manually. + bool isCheckpoint_{false}; // true if this node has been selected to be a checkpoint, currently only done manually. Ptr recorder_; size_t recorderHash_; @@ -138,7 +138,7 @@ public: virtual std::string graphviz() override { std::stringstream ss; - ss << "\"" << this << "\" [" + ss << "\"" << this << "\" [" << "shape=\"" << form() << "\", " << "label=" << label() << ", " << "style=\"filled\", " @@ -147,7 +147,7 @@ public: for(auto&& child : children()) ss << "\"" << child << "\" -> \"" << this << "\";" << std::endl; - + if(subtape_) { for(auto&& dep : *subtape_) ss << "\"" << dep << "\" -> \"" << this << "\" [style=dotted];" << std::endl; @@ -188,9 +188,9 @@ struct NaryNodeOp : public Node { // Deduce type automatically, but then all types must be the same // this is called automatically when no output type is specified. - // If the input types are mixed, the output type needs to be specified + // If the input types are mixed, the output type needs to be specified // in the constructor. - Type commonType(const std::vector& nodes) { + static Type commonType(const std::vector& nodes) { ABORT_IF(nodes.size() == 0, "NaryNodeOp has no children"); Type type = nodes[0]->value_type(); for(int i = 1; i < nodes.size(); ++i) diff --git a/src/graph/node_initializers.h b/src/graph/node_initializers.h index 624816b9..21dcc95d 100755 --- a/src/graph/node_initializers.h +++ b/src/graph/node_initializers.h @@ -17,9 +17,9 @@ namespace inits { /** * Base class for specialized NodeInitializers. * - * A NodeInitializer is a functor that is associated with parameters - * and constants, and is invoked on a tensor during node intialization. - * You need to override NodeIntializer::apply(Tensor) with your own + * A NodeInitializer is a functor that is associated with parameters + * and constants, and is invoked on a tensor during node intialization. + * You need to override NodeIntializer::apply(Tensor) with your own * functionality or use a fromLambda intializer. * * See node_initializers.cpp for examples. @@ -31,6 +31,7 @@ protected: public: virtual void apply(Tensor t) = 0; void setAllocator(Ptr allocator) { allocator_ = allocator; } + virtual ~NodeInitializer() {} }; /** @@ -135,7 +136,7 @@ Ptr dropout(float dropoutProbabilty); /** * Intialize with gumbel noise, i.e. -log(-log(u)) where u ~ Uniform(0 + eps, 1 - eps) - * + * * @return A NodeInitializer */ Ptr gumbel(float eps = 1e-5f); @@ -163,7 +164,7 @@ Ptr fromWord2vec(const std::string& file, /** * Computes Google's Transformer-style sinusoidal position embeddings - * starting from position 'start' taking into account batch and time + * starting from position 'start' taking into account batch and time * dimensions of the tensor. * * Expected tensor layout {-2: time, -1: model} diff --git a/src/graph/node_operators_binary.h b/src/graph/node_operators_binary.h index d12c2481..63158ffa 100755 --- a/src/graph/node_operators_binary.h +++ b/src/graph/node_operators_binary.h @@ -480,9 +480,12 @@ class CSRDotNodeOp : public NaryNodeOp { bool transS_; bool swapOperands_; public: - CSRDotNodeOp(const Shape& S_shape, Expr S_values, Expr S_indices, Expr S_offsets, Expr D, bool transS, bool swapOperands) - : NaryNodeOp({ S_values, S_indices, S_offsets, D }, newShape(S_shape, S_values, S_indices, S_offsets, D, transS, swapOperands), commonType({S_values, D})), - transS_(transS), swapOperands_(swapOperands) { + CSRDotNodeOp(const Shape& S_shape, Expr S_values, Expr S_indices, + Expr S_offsets, Expr D, bool transS, bool swapOperands) + : NaryNodeOp({ S_values, S_indices, S_offsets, D }, + newShape(S_shape, S_values, S_indices, S_offsets, D, transS, swapOperands), + NaryNodeOp::commonType({S_values, D})), + transS_(transS), swapOperands_(swapOperands) { matchOrAbort(S_indices->value_type()); matchOrAbort(S_offsets->value_type()); } @@ -513,7 +516,7 @@ public: NodeOps backwardOps() override { return { nullptr, // can't backprop into the sparse matrix (the gradient is dense) - nullptr, + nullptr, nullptr, NodeOp(CSRProd(child(3)->grad(), // child(3) = D graph()->allocator(), @@ -527,7 +530,7 @@ public: virtual size_t hash() override { size_t seed = NaryNodeOp::hash(); for(auto s : shape()) - util::hash_combine(seed, s); + util::hash_combine(seed, s); util::hash_combine(seed, transS_); util::hash_combine(seed, swapOperands_); return seed; @@ -1050,8 +1053,8 @@ struct ConcatenateNodeOp : public NaryNodeOp { auto checkShape = shape; for(auto child : nodes) { checkShape.set(ax_, child->shape()[ax_]); // don't abort on different sizes on axis dim. - ABORT_IF(checkShape != child->shape(), - "Child shapes {} and {} cannot be concatenated along axis {}", + ABORT_IF(checkShape != child->shape(), + "Child shapes {} and {} cannot be concatenated along axis {}", shape, child->shape(), ax); sum += child->shape()[ax_]; diff --git a/src/graph/parameters.h b/src/graph/parameters.h index 25eb5db4..8b4af9dd 100644 --- a/src/graph/parameters.h +++ b/src/graph/parameters.h @@ -10,10 +10,10 @@ namespace marian { -// @TODO: Currently an ExpressionGraph only supports one Parameters object and +// @TODO: Currently an ExpressionGraph only supports one Parameters object and // the type of parameters has to be the inside on Parameters object. This limits // parameter types to a single chosen type, e.g. only fp32 or only fp16. This should -// be extended to allow multiple sets of parameters. +// be extended to allow multiple sets of parameters. // The reason here is to be able to efficiently compute updates of whole parameter // sets of one type. class Parameters { @@ -40,7 +40,7 @@ public: LOG(debug, "Created parameter object of type {}", acceptedElementType_); } - ~Parameters() { + virtual ~Parameters() { LOG(debug, "Destroyed parameter object of type {}", acceptedElementType_); } @@ -88,7 +88,7 @@ public: // sort parameters by name before allocation to make sure the memory layout after allocation is always the same std::sort(params_.begin(), params_.end(), [](Expr n1, Expr n2){ return n1->name() < n2->name(); }); - + for(auto p : params_) { if(!p->val()) { vals_->allocate(p->val(), p->shape(), p->value_type()); diff --git a/src/layers/generic.h b/src/layers/generic.h index e899d8a0..a3b9bac4 100755 --- a/src/layers/generic.h +++ b/src/layers/generic.h @@ -39,6 +39,7 @@ public: // Simplest layer interface: Unary function struct IUnaryLayer { + virtual ~IUnaryLayer() {} virtual Expr apply(Expr) = 0; virtual Expr apply(const std::vector& es) { ABORT_IF(es.size() > 1, "Not implemented"); // simple stub @@ -59,6 +60,7 @@ struct IEmbeddingLayer { // alternative from indices directly virtual Expr applyIndices(const std::vector& embIdx, const Shape& shape) const = 0; + virtual ~IEmbeddingLayer() {} }; // base class for Encoder and Decoder classes, which have embeddings and a batch index (=stream index) diff --git a/src/layers/guided_alignment.h b/src/layers/guided_alignment.h index 48ad6599..f08d3f09 100755 --- a/src/layers/guided_alignment.h +++ b/src/layers/guided_alignment.h @@ -5,14 +5,14 @@ namespace marian { -static inline RationalLoss guidedAlignmentCost(Ptr graph, +static inline RationalLoss guidedAlignmentCost(Ptr /*graph*/, Ptr batch, Ptr options, Expr attention) { // [beam depth=1, max src length, batch size, tgt length] std::string guidedLossType = options->get("guided-alignment-cost"); // @TODO: change "cost" to "loss" float guidedLossWeight = options->get("guided-alignment-weight"); - + const auto& shape = attention->shape(); // [beam depth=1, max src length, batch size, tgt length] float epsilon = 1e-6f; Expr alignmentLoss; // sum up loss over all attention/alignment positions @@ -55,8 +55,8 @@ static inline RationalLoss guidedAlignmentCost(Ptr graph, else ABORT("Unknown alignment cost type: {}", guidedLossType); // every position is a label as they should all agree - // @TODO: there should be positional masking here ... on the other hand, positions that are not - // in a sentence should always agree (both being 0). Lack of masking affects label count only which is + // @TODO: there should be positional masking here ... on the other hand, positions that are not + // in a sentence should always agree (both being 0). Lack of masking affects label count only which is // probably negligible? numLabels = shape.elements(); } diff --git a/src/layers/loss.h b/src/layers/loss.h index d9deaeb8..43e89c1d 100755 --- a/src/layers/loss.h +++ b/src/layers/loss.h @@ -331,6 +331,7 @@ public: : LabelwiseLoss(axes), // cross-entropy already reduces over axis -1 labelSmoothing_(labelSmoothing), factorWeight_(factorWeight) {} + virtual ~CrossEntropyLoss() {} protected: float labelSmoothing_; // interpolation factor for label smoothing, see below float factorWeight_; // give extra weight to factors @@ -368,7 +369,7 @@ protected: if(labelWeights) { // We currently do not know how to use target factors and word-level label weights together - bool wordlevel = labelWeights->shape()[-3] > 1; // Time-dimension is not trivially 1, hence we have word-level weights. + bool wordlevel = labelWeights->shape()[-3] > 1; // Time-dimension is not trivially 1, hence we have word-level weights. ABORT_IF(wordlevel && logits.getNumFactorGroups() > 1, "CE loss with word-level label weights is not implemented for factors"); ce = ce * cast(labelWeights, Type::float32); } @@ -379,15 +380,15 @@ protected: /** - * @brief Unlikelihood loss across last axis, summed up over batch and time dimensions. This is an - * implementation of sequence-level unlikelihood loss from https://arxiv.org/abs/1908.04319. + * @brief Unlikelihood loss across last axis, summed up over batch and time dimensions. This is an + * implementation of sequence-level unlikelihood loss from https://arxiv.org/abs/1908.04319. * We rely on word-level label weights where 1 is correct and 0 is marking an error. If there are not * zeros for a sentence it going to be trained with normal CE loss if there is at least one 0 it is going * to flip over to use SUL for that sentence to penalize the selected word. - * + * * SUL is implemented as: * -log(gather(1 - softmax(logits), -1, indices)) - * + * * Factors are currently not supported. */ class SequenceUnlikelihoodLoss : public CrossEntropyLoss { @@ -411,17 +412,17 @@ protected: ABORT_IF(!mask, "mask is required"); // @TODO: check this, it seems weights for padding are by default 1, which would make this obsolete. // use label weights, where 1 is GOOD and 0 is BAD. After inversion here, now 1 marks, mask again to eliminate padding (might be obsolete) auto errorMask = (1.f - cast(labelWeights, Type::float32)) * cast(mask, Type::float32); - + auto ceUl = logits.applyLossFunction(labels, [&](Expr logits, Expr indices) { return cast(unlikelihood(logits, indices), Type::float32); }); - + // compute if want to use CE or UL. If there are no errors train with CE, otherwise train _only on_ the errors with UL. This is the "mixed" training - // schedule from https://arxiv.org/abs/1908.04319. Providing labels with or without error scores we can easily switch between CE and UL. + // schedule from https://arxiv.org/abs/1908.04319. Providing labels with or without error scores we can easily switch between CE and UL. auto onlyCe = eq(sum(errorMask, /*axis=*/-3), 0.f); // [1, 1, dimBatch, 1] - equal 1 if no errors are present ceUl = errorMask * ceUl; // don't use for correct label or padding - auto cost = onlyCe * ce + (1.f - onlyCe) * ceUl; // ce or unlikelihood part are never simultanously used as cost per batch entry + auto cost = onlyCe * ce + (1.f - onlyCe) * ceUl; // ce or unlikelihood part are never simultanously used as cost per batch entry return cost; } diff --git a/src/layers/weight.h b/src/layers/weight.h index 787c2376..74cf47d6 100644 --- a/src/layers/weight.h +++ b/src/layers/weight.h @@ -17,6 +17,7 @@ public: virtual void debugWeighting(std::vector /*weightedMask*/, std::vector /*freqMask*/, Ptr /*batch*/){}; + virtual ~WeightingBase() {} }; class DataWeighting : public WeightingBase { diff --git a/src/microsoft/quicksand.cpp b/src/microsoft/quicksand.cpp index 0f0ea253..2f7687aa 100755 --- a/src/microsoft/quicksand.cpp +++ b/src/microsoft/quicksand.cpp @@ -41,6 +41,7 @@ class VocabWrapper : public IVocabWrapper { Ptr pImpl_; public: VocabWrapper(Ptr vocab) : pImpl_(vocab) {} + virtual ~VocabWrapper() {} WordIndex encode(const std::string& word) const override { return (*pImpl_)[word].toWordIndex(); } std::string decode(WordIndex id) const override { return (*pImpl_)[Word::fromWordIndex(id)]; } size_t size() const override { return pImpl_->size(); } @@ -243,7 +244,7 @@ DecoderCpuAvxVersion parseCpuAvxVersion(std::string name) { } } -// @TODO: clean-up this code and unify with marian-conv. The targetPrec parameter is not clear enought etc. +// @TODO: clean-up this code and unify with marian-conv. The targetPrec parameter is not clear enought etc. bool convertModel(std::string inputFile, std::string outputFile, int32_t targetPrec) { std::cout << "Converting from: " << inputFile << ", to: " << outputFile << std::endl; diff --git a/src/microsoft/quicksand.h b/src/microsoft/quicksand.h index 1089be14..87de1948 100755 --- a/src/microsoft/quicksand.h +++ b/src/microsoft/quicksand.h @@ -54,6 +54,8 @@ public: const std::vector& ptrs) : options_(options), ptrs_(ptrs) {} + virtual ~IBeamSearchDecoder() {} + virtual QSNBestBatch decode(const QSBatch& qsBatch, size_t maxLength, const std::unordered_set& shortlist) diff --git a/src/models/costs.h b/src/models/costs.h index 03b247f0..2a66e549 100755 --- a/src/models/costs.h +++ b/src/models/costs.h @@ -25,6 +25,7 @@ public: Ptr graph, // @TODO: why needed? Can it be gotten from model? Ptr batch, bool clearGraph = true) = 0; + virtual ~ICost() {} }; class EncoderDecoderCECost : public ICost { @@ -51,6 +52,8 @@ public: weighter_ = WeightingFactory(options_); } + virtual ~EncoderDecoderCECost() {} + Ptr apply(Ptr model, Ptr graph, Ptr batch, @@ -136,6 +139,8 @@ public: Trainer(Ptr model, Ptr cost) : model_(model), cost_(cost) {} + virtual ~Trainer() {} + Ptr getModel() { return model_; } virtual void load(Ptr graph, @@ -179,6 +184,8 @@ public: Scorer(Ptr model, Ptr cost) : model_(model), logProb_(cost) {} + virtual ~Scorer(){} + Ptr getModel() { return model_; } virtual void load(Ptr graph, @@ -211,6 +218,7 @@ public: class LogSoftmaxStep : public ILogProbStep { public: + virtual ~LogSoftmaxStep() {} virtual Ptr apply(Ptr state) override { // decoder needs normalized probabilities (note: skipped if beam 1 and --skip-cost) state->setLogProbs(state->getLogProbs().applyUnaryFunction(logsoftmax)); @@ -224,6 +232,7 @@ public: // with --output-sampling during translation with marian-decoder class GumbelSoftmaxStep : public ILogProbStep { public: + virtual ~GumbelSoftmaxStep() {} virtual Ptr apply(Ptr state) override { state->setLogProbs(state->getLogProbs().applyUnaryFunctions( [](Expr logits){ // lemma gets gumbelled diff --git a/src/models/encoder_decoder.h b/src/models/encoder_decoder.h index 599dbfdc..92c1647f 100755 --- a/src/models/encoder_decoder.h +++ b/src/models/encoder_decoder.h @@ -11,6 +11,7 @@ namespace marian { class IEncoderDecoder : public models::IModel { public: + virtual ~IEncoderDecoder() {} virtual void load(Ptr graph, const std::string& name, bool markedReloaded = true) override diff --git a/src/models/model_base.h b/src/models/model_base.h index 016f3b8e..5f76b380 100644 --- a/src/models/model_base.h +++ b/src/models/model_base.h @@ -41,6 +41,8 @@ public: // @TODO: Is there a better name? class ICriterionFunction { public: + virtual ~ICriterionFunction() {} + virtual void load(Ptr, const std::string&, bool markReloaded = true) diff --git a/src/models/model_task.h b/src/models/model_task.h index 1fcf6eca..96dfadd0 100644 --- a/src/models/model_task.h +++ b/src/models/model_task.h @@ -5,10 +5,12 @@ namespace marian { struct ModelTask { + virtual ~ModelTask() {} virtual void run() = 0; }; struct ModelServiceTask { + virtual ~ModelServiceTask() {} virtual std::string run(const std::string&) = 0; }; } // namespace marian diff --git a/src/models/s2s.h b/src/models/s2s.h index 4c0fd019..8d12f96e 100755 --- a/src/models/s2s.h +++ b/src/models/s2s.h @@ -11,6 +11,7 @@ namespace marian { class EncoderS2S : public EncoderBase { using EncoderBase::EncoderBase; public: + virtual ~EncoderS2S() {} Expr applyEncoderRNN(Ptr graph, Expr embeddings, Expr mask, @@ -254,7 +255,7 @@ public: auto embeddings = state->getTargetHistoryEmbeddings(); // The batch dimension of the inputs can change due to batch-pruning, in that case - // cached elements need to be rebuilt, in this case the mapped encoder context in the + // cached elements need to be rebuilt, in this case the mapped encoder context in the // attention mechanism of the decoder RNN. int currDimBatch = embeddings->shape()[-2]; if(!rnn_ || lastDimBatch_ != currDimBatch) // if currDimBatch is different, rebuild the cached RNN @@ -263,7 +264,7 @@ public: // Also @TODO: maybe implement a Cached(build, updateIf) that runs a check and rebuild if required // at dereferecing : // rnn_ = Cached( - // /*build=*/[]{ return constructDecoderRNN(graph, state); }, + // /*build=*/[]{ return constructDecoderRNN(graph, state); }, // /*updateIf=*/[]{ return state->batchDimChanged() }); // rnn_->transduce(...); diff --git a/src/models/states.h b/src/models/states.h index 21cf42a8..c2f9ee05 100755 --- a/src/models/states.h +++ b/src/models/states.h @@ -17,6 +17,7 @@ public: : context_(context), mask_(mask), batch_(batch) {} EncoderState() {} + virtual ~EncoderState() {} virtual Expr getContext() const { return context_; } virtual Expr getAttended() const { return context_; } @@ -53,6 +54,7 @@ public: const std::vector>& encStates, Ptr batch) : states_(states), logProbs_(logProbs), encStates_(encStates), batch_(batch) {} + virtual ~DecoderState() {} // @TODO: Do we need all these to be virtual? virtual const std::vector>& getEncoderStates() const { @@ -68,10 +70,10 @@ public: int beamSize) const { std::vector> newEncStates; - for(auto& es : encStates_) + for(auto& es : encStates_) // If the size of the batch dimension of the encoder state context changed, subselect the correct batch entries newEncStates.push_back(es->getContext()->shape()[-2] == batchIndices.size() ? es : es->select(batchIndices)); - + // hypindices matches batchIndices in terms of batch dimension, so we only need hypIndices auto selectedState = New( states_.select(hypIndices, beamSize, /*isBatchMajor=*/false), logProbs_, newEncStates, batch_); @@ -121,6 +123,7 @@ private: Words targetWords_; public: + virtual ~ClassifierState() {} virtual Expr getLogProbs() const { return logProbs_; } virtual void setLogProbs(Expr logProbs) { logProbs_ = logProbs; } diff --git a/src/optimizers/clippers.h b/src/optimizers/clippers.h index f953c2ec..67a50004 100644 --- a/src/optimizers/clippers.h +++ b/src/optimizers/clippers.h @@ -16,6 +16,7 @@ namespace marian { class ClipperBase { public: virtual void clip(Tensor) = 0; + virtual ~ClipperBase() {} }; typedef std::shared_ptr ClipperPtr; diff --git a/src/optimizers/optimizers.h b/src/optimizers/optimizers.h index 2b70ba84..d10af919 100644 --- a/src/optimizers/optimizers.h +++ b/src/optimizers/optimizers.h @@ -29,6 +29,8 @@ public: LOG(info, "[optimizers] Learning rate gets automatically adjusted as if minibatch size was {}", refMBWordsParam_); } + virtual ~OptimizerBase() {} + static constexpr size_t mbSizeNotProvided = SIZE_MAX; void update(Ptr graph, size_t mbSize = mbSizeNotProvided) { @@ -114,7 +116,7 @@ class Sgd : public OptimizerBase { public: Sgd(float eta, size_t refMBWordsParam = 0, Ptr clipper = nullptr) : OptimizerBase(eta, refMBWordsParam, clipper) {} - + virtual ~Sgd() {} virtual void setParams(const std::vector& /*params*/) override {} private: void updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBWords) override; diff --git a/src/rescorer/score_collector.h b/src/rescorer/score_collector.h index ba488436..38ba7a08 100644 --- a/src/rescorer/score_collector.h +++ b/src/rescorer/score_collector.h @@ -13,6 +13,7 @@ namespace marian { class ScoreCollector { public: ScoreCollector(const Ptr& options); + virtual ~ScoreCollector() {} virtual void Write(long id, const std::string& message); virtual void Write(long id, diff --git a/src/rnn/rnn.h b/src/rnn/rnn.h index c9131d87..4efc569c 100644 --- a/src/rnn/rnn.h +++ b/src/rnn/rnn.h @@ -35,7 +35,7 @@ protected: public: BaseRNN(Ptr graph, Ptr options) : graph_(graph), options_(options) {} - + virtual ~BaseRNN() {} virtual Expr transduce(Expr, Expr = nullptr) = 0; virtual Expr transduce(Expr, State, Expr = nullptr) = 0; virtual Expr transduce(Expr, States, Expr = nullptr) = 0; @@ -113,6 +113,7 @@ private: public: friend RNN; + virtual ~SingleLayerRNN() {} // @TODO: benchmark whether this concatenation is a good idea virtual Expr transduce(Expr input, Expr mask = nullptr) override { diff --git a/src/tensors/backend.h b/src/tensors/backend.h index 01d00267..ce8a5e60 100644 --- a/src/tensors/backend.h +++ b/src/tensors/backend.h @@ -17,7 +17,7 @@ protected: public: Backend(DeviceId deviceId, size_t seed) : deviceId_(deviceId), seed_(seed), randomGenerator_(createRandomGenerator(seed, deviceId)) {} - + virtual ~Backend() {}; virtual DeviceId getDeviceId() { return deviceId_; }; virtual Ptr getRandomGenerator() { return randomGenerator_; } diff --git a/src/tensors/cpu/device.cpp b/src/tensors/cpu/device.cpp index 941902a8..40bb558b 100644 --- a/src/tensors/cpu/device.cpp +++ b/src/tensors/cpu/device.cpp @@ -8,29 +8,40 @@ namespace marian { namespace cpu { +namespace { // allocate function for tensor reserve() below. -// Needed for AVX512, while not available on all compilers. It seems clang -// does not have aligned_alloc for all cstlib versions. If AVX512 is not used -// a simple malloc is probably fine. -// Should generate a runtime error otherwise as we have a check in the AVX512 -// functions which tests for alignment. -#ifdef _WIN32 -#define MALLOC(size) _aligned_malloc(size, alignment_) -#elif __GNUC__ -#define MALLOC(size) aligned_alloc(alignment_, size) -#else -#define MALLOC(size) malloc(size) -#endif +// Alignment is needed because we use AVX512 and AVX2 vectors. We should fail if we can't allocate aligned memory. #ifdef _WIN32 -#define FREE(ptr) _aligned_free(ptr) +void *genericMalloc(size_t alignment, size_t size) { + void *ret = _aligned_malloc(size, alignment); + ABORT_IF(!ret, "Failed to allocate memory on CPU"); + return ret; +} +void genericFree(void *ptr) { + _aligned_free(ptr); +} #else -#define FREE(ptr) free(ptr) +// Linux and OS X. There is no fallback to malloc because we need it to be aligned. +void *genericMalloc(size_t alignment, size_t size) { + // On macos, aligned_alloc is available only on c++17 + // Furthermore, it requires that the memory requested is an exact multiple of the alignment, otherwise it fails. + // posix_memalign is available both Mac (Since 2016) and Linux and in both gcc and clang + void *result; + // Error could be detected by return value or just remaining nullptr. + ABORT_IF(posix_memalign(&result, alignment, size), "Failed to allocate memory on CPU"); + return result; +} +void genericFree(void *ptr) { + free(ptr); +} #endif +} // namespace + Device::~Device() { - FREE(data_); + genericFree(data_); } void Device::reserve(size_t size) { @@ -38,14 +49,12 @@ void Device::reserve(size_t size) { ABORT_IF(size < size_ || size == 0, "New size must be larger than old size and larger than 0"); + uint8_t *temp = static_cast(genericMalloc(alignment_, size)); if(data_) { - uint8_t *temp = static_cast(MALLOC(size)); std::copy(data_, data_ + size_, temp); - FREE(data_); - data_ = temp; - } else { - data_ = static_cast(MALLOC(size)); + genericFree(data_); } + data_ = temp; size_ = size; } } // namespace cpu diff --git a/src/tensors/cpu/fbgemm/expanded_gemm.h b/src/tensors/cpu/fbgemm/expanded_gemm.h index cf53fe1a..32cc6b12 100644 --- a/src/tensors/cpu/fbgemm/expanded_gemm.h +++ b/src/tensors/cpu/fbgemm/expanded_gemm.h @@ -17,6 +17,7 @@ #endif using namespace fbgemm; +// @TODO: don't use using namespace ...; in header files. Just don't. [UG] #endif // USE_FBGEMM namespace marian { @@ -96,7 +97,7 @@ struct FbgemmPacked16PackNodeOp : public UnaryNodeOp { const std::string type() override { return "packMatFp16"; } - Shape newShape(Expr a, bool transpose) { + Shape newShape(Expr MAYBE_UNUSED a, bool MAYBE_UNUSED transpose) { #if USE_FBGEMM auto shapeMat = a->shape(); // Should be 2D - weight matrix @@ -115,15 +116,14 @@ struct FbgemmPacked16PackNodeOp : public UnaryNodeOp { packsize_); Shape outShape({(int)packsize_}); - return outShape; -#else // USE_FBGEMM +#else ABORT("Packed GEMM requires a build with USE_FBGEMM enabled"); return Shape(); #endif // USE_FBGEMM } }; - + ; // Pack a matrix (int8) into cache utilization efficient way (block format) together with quantization into int8 // PackMatrix packMat_: the type of packed matrix - A or B matrix // marian::Type packType_: the type the input matrix is packed - packed8avx2 or packed8avx512 @@ -132,6 +132,7 @@ struct FbgemmPacked16PackNodeOp : public UnaryNodeOp { // int ncol_: the number of columns // uint64_t packsize_: the size of the packed matrix // (the size of int8 packed B from fbgemm:PackAWithQuantRowOffset + quantization scale, offset and zero point) + struct FbgemmPacked8PackNodeOp : public UnaryNodeOp { PackMatrix packMat_; marian::Type packType_; @@ -180,19 +181,21 @@ struct FbgemmPacked8PackNodeOp : public UnaryNodeOp { const std::string type() override { return "packMatInt8"; } - Shape newShape(Expr a, bool transpose) { #if USE_FBGEMM + Shape newShape(Expr a, bool transpose) { fbgemmPacked8PackInfo(a->shape(), packType_, transpose, nrow_, ncol_, packsize_); Shape outShape({(int)packsize_}); - return outShape; -#else // USE_FBGEMM + } +#else + Shape newShape(Expr /*a*/, bool /*transpose*/) { ABORT("Packed GEMM requires a build with USE_FBGEMM enabled"); return Shape(); -#endif // USE_FBGEMM } +#endif // USE_FBGEMM }; + // Affine transform (matrix multiplication) using packed B matrix // float scalar_: scalar multiplier // size_t m_: the number of rows in A and C @@ -202,7 +205,6 @@ struct FbgemmPacked8PackNodeOp : public UnaryNodeOp { // bool transB_: transpose B class FbgemmPacked16AffineNodeOp : public NaryNodeOp { private: - float scalar_; size_t m_; size_t n_; size_t k_; @@ -210,9 +212,8 @@ private: bool transB_; public: - FbgemmPacked16AffineNodeOp(const std::vector& nodes, Shape bShape, bool transA, bool transB, float scalar) - : NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32), - scalar_(scalar) { + FbgemmPacked16AffineNodeOp(const std::vector& nodes, Shape bShape, bool transA, bool transB, float /*scalar*/) + : NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32)/*, scalar_(scalar)*/ { transA_ = transA; transB_ = transB; m_ = nodes[0]->shape().elements() / nodes[0]->shape()[-1]; @@ -281,7 +282,6 @@ public: // bool transB_: transpose B class FbgemmPacked8AffineNodeOp : public NaryNodeOp { private: - float scalar_; size_t m_; size_t n_; size_t k_; @@ -289,9 +289,8 @@ private: bool transB_; public: - FbgemmPacked8AffineNodeOp(const std::vector& nodes, Shape bShape, bool transA, bool transB, float scalar) - : NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32), - scalar_(scalar) { + FbgemmPacked8AffineNodeOp(const std::vector& nodes, Shape bShape, bool transA, bool transB, float /*scalar*/) + : NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32)/*, scalar_(scalar) */ { transA_ = transA; transB_ = transB; m_ = nodes[0]->shape().elements() / nodes[0]->shape()[-1]; @@ -302,7 +301,7 @@ public: size_t l = bShape.elements() / bShape[-1]; n_ = bShape[-1]; if(transB) - std::swap(l, n_); + std::swap(l, n_); } Shape newShape(Expr a, Shape bShape, bool transA, bool transB) { @@ -369,9 +368,9 @@ static inline Expr affine(Expr a, Expr b, Shape bShape, Expr c, bool transA, boo Type elementType = b->value_type(); if (elementType == Type::packed16) - return Expression(nodes, bShape, transA, transB, scalar); + return Expression(nodes, bShape, transA, transB, scalar); else if (isPacked(elementType) && sizeOf(elementType) == 1) - return Expression(nodes, bShape, transA, transB, scalar); + return Expression(nodes, bShape, transA, transB, scalar); else { ABORT("Only int8 and fp16 are available. {}", elementType); return nullptr; @@ -380,9 +379,9 @@ static inline Expr affine(Expr a, Expr b, Shape bShape, Expr c, bool transA, boo static inline Expr pack(Type elementType, Expr a, PackMatrix packMat, bool transpose, float clipValue) { if (elementType == Type::packed16) - return Expression(a, packMat, transpose, clipValue); + return Expression(a, packMat, transpose, clipValue); else if (isPacked(elementType) && sizeOf(elementType) == 1) - return Expression(a, packMat, elementType, transpose, clipValue); + return Expression(a, packMat, elementType, transpose, clipValue); else { ABORT("Only int8 and fp16 are available. {}", elementType); return nullptr; @@ -394,9 +393,9 @@ static inline Expr dot(Expr a, Expr b, Shape bShape, bool transA, bool transB, f Type elementType = b->value_type(); if (elementType == Type::packed16) - return Expression(nodes, bShape, transA, transB, scalar); + return Expression(nodes, bShape, transA, transB, scalar); else if (isPacked(elementType) && sizeOf(elementType) == 1) - return Expression(nodes, bShape, transA, transB, scalar); + return Expression(nodes, bShape, transA, transB, scalar); else { ABORT("Only int8 and fp16 are available. {}", elementType); return nullptr; diff --git a/src/tensors/cpu/tensor_operators.cpp b/src/tensors/cpu/tensor_operators.cpp index 91e058b8..ae5eeed5 100755 --- a/src/tensors/cpu/tensor_operators.cpp +++ b/src/tensors/cpu/tensor_operators.cpp @@ -20,7 +20,7 @@ namespace marian { namespace cpu { -void IsNaN(const Tensor in, Ptr allocator, bool& /*isNaN*/, bool& /*isInf*/) { + void IsNaN(const Tensor /*in*/, Ptr /*allocator*/, bool& /*isNaN*/, bool& /*isInf*/) { ABORT("Not implemented"); } @@ -214,9 +214,11 @@ void Transpose0213(Tensor out, Tensor in) { } } +// This function is called only when MKL is available. +#if MKL_FOUND // Given a 4D array, transpose (swap) the initial 3 dimensions while keeping the last dimension. // e.g. 1234 --> 2134, 1234 --> 3214 (4 is always kept). -// This is an optimized version for swapping first 3 dimensions +// This is an optimized version for swapping first 3 dimensions // assuming the last dimension is large enough to get benefits from vectorized copy. // // @param out output tensor @@ -225,14 +227,13 @@ void Transpose0213(Tensor out, Tensor in) { template void TransposeFirst3In4(Tensor out, Tensor in, const std::vector& vAxis) { ABORT_IF(vAxis.size() != 4, "This function handles only 4D arrays."); -#if MKL_FOUND int innermost = in->shape()[-1]; int l1 = in->shape()[vAxis[0]]; int l2 = in->shape()[vAxis[1]]; int l3 = in->shape()[vAxis[2]]; - // find the mapping between the transposed output dimensional indices (oi, oj, ok) + // find the mapping between the transposed output dimensional indices (oi, oj, ok) // and original input dimensional indices (i, j, k) int oi, oj, ok; #pragma omp parallel for @@ -275,11 +276,8 @@ void TransposeFirst3In4(Tensor out, Tensor in, const std::vector& vAxis) { } } } -#else - // it shouldn't come into here. This function is called only when MKL is available. - ABORT("Should not get here"); -#endif // MKL_FOUND } +#endif // MKL_FOUND inline void transpose4x4_SSE(const float* A, float* B, @@ -656,7 +654,7 @@ void SelectAxis2(Tensor out, functional::Shape outShape = out->shape(); functional::Shape inShape = in->shape(); - + auto idxData = indices->data(); auto odata = out->data(); const auto idata = in->data(); diff --git a/src/tensors/rand.h b/src/tensors/rand.h index f3e356fb..94b44a97 100644 --- a/src/tensors/rand.h +++ b/src/tensors/rand.h @@ -15,11 +15,11 @@ protected: public: RandomGenerator(size_t seed) : seed_(seed) { } - + virtual ~RandomGenerator() {} virtual void uniform(Tensor, float a, float b) = 0; virtual void normal(Tensor, float mean, float stddev) = 0; }; Ptr createRandomGenerator(size_t /*seed*/, DeviceId); -} \ No newline at end of file +} diff --git a/src/tensors/tensor_operators.h b/src/tensors/tensor_operators.h index d7565240..227bc953 100644 --- a/src/tensors/tensor_operators.h +++ b/src/tensors/tensor_operators.h @@ -25,7 +25,7 @@ namespace marian { template -void copy(Ptr backend, const InIt beg, const InIt end, OutIt it) { +void copy(Ptr& MAYBE_UNUSED backend, const InIt beg, const InIt end, OutIt it) { #ifdef CUDA_FOUND if(backend->getDeviceId().type == DeviceType::gpu) gpu::copy(backend, beg, end, it); @@ -119,7 +119,7 @@ DISPATCH3(Concatenate, marian::Tensor, const std::vector&, int) // clang-format on -// Bernoulli(tensor, 0.5f, 2.f, -1.f) generates a tensor composed of 50% of 1 and 50% of -1. +// Bernoulli(tensor, 0.5f, 2.f, -1.f) generates a tensor composed of 50% of 1 and 50% of -1. static inline void Bernoulli(Tensor resultTensor, float keepProb, float scale = 1.f, float shift = 0.f) { // in-place uniform distribution auto rnd = resultTensor->getBackend()->getRandomGenerator(); @@ -190,7 +190,7 @@ void LayerNormalizationGrad(Tensor gradX, } static inline void LayerNormalizationGrad( - Ptr allocator, + Ptr MAYBE_UNUSED allocator, Tensor gradX, Tensor gradGamma, Tensor gradBeta, diff --git a/src/tests/prod.cpp b/src/tests/prod.cpp index 37809bac..698712b9 100644 --- a/src/tests/prod.cpp +++ b/src/tests/prod.cpp @@ -1,7 +1,7 @@ #include "marian.h" #include "common/timer.h" -int main(int argc, char** argv) { +int main(int /*argc*/, char** /*argv*/) { using namespace marian; { diff --git a/src/tests/sqlite.cpp b/src/tests/sqlite.cpp index 21748514..f822dbc5 100644 --- a/src/tests/sqlite.cpp +++ b/src/tests/sqlite.cpp @@ -8,6 +8,8 @@ #include int main(int argc, char** argv) { + ABORT_IF(argc != 3, "FATAL ERROR: Incorrect number of command line arguments " + "(expected: 2) for command {}.",argv[0]); SQLite::Database db("corpus.db", SQLite::OPEN_READWRITE|SQLite::OPEN_CREATE); db.exec("PRAGMA temp_store_directory = '/data1/marcinjd';"); diff --git a/src/training/communicator.cpp b/src/training/communicator.cpp index be9b23be..98b7be7f 100644 --- a/src/training/communicator.cpp +++ b/src/training/communicator.cpp @@ -38,7 +38,7 @@ Ptr createCommunicator( } // the actual implementation is inside communicator.cu - return New(graphs, mpi); + return New(graphs, mpi); #else // no CUDA or no NCCL noNccl; // (unused) return New(graphs, mpi); @@ -141,7 +141,7 @@ public: FakeMPIWrapper(bool) { LOG(warn, "Compiled without MPI support. Falling back to FakeMPIWrapper"); } - + virtual ~FakeMPIWrapper() {} virtual size_t myMPIRank() const override { return 0; }; virtual size_t numMPIProcesses() const override { return 1; }; diff --git a/src/training/communicator.h b/src/training/communicator.h index 6ccce204..47274491 100644 --- a/src/training/communicator.h +++ b/src/training/communicator.h @@ -156,11 +156,8 @@ public: void scatterReduceAndResetGrads() const override { const_cast(this)->lazyInit(); - int totalSize = (int)graphs_[0]->params()->vals()->size(); - int shardSize = (int)ceil(totalSize / (float)graphs_.size()); - // Gather gradients from different devices into current gradient shards - auto scatter = [this, shardSize](size_t idx, size_t begin, size_t end) { + auto scatter = [this](size_t idx, size_t begin, size_t end) { auto curGrad = graphs_[idx]->params()->grads()->subtensor(begin, end-begin); // collect and sum gradients @@ -176,7 +173,7 @@ public: }; // reset gradients outside current shard - auto reset = [this, shardSize](size_t idx, size_t begin, size_t end) { + auto reset = [this](size_t idx, size_t begin, size_t end) { auto grad = graphs_[idx]->params()->grads(); if (begin > 0) grad->subtensor(0, begin)->set(0); @@ -189,11 +186,9 @@ public: } void allGatherParams() const override { - int totalSize = (int)graphs_[0]->params()->vals()->size(); - int shardSize = (int)ceil(totalSize / (float)graphs_.size()); // Update all graphs with parameter shard - auto gather = [this, shardSize](size_t idx, size_t begin, size_t end) { + auto gather = [this](size_t idx, size_t begin, size_t end) { auto getShard = [&](Ptr graph) { return graph->params()->vals()->subtensor(begin, end-begin); }; diff --git a/src/training/gradient_dropping/sparse_tensor.h b/src/training/gradient_dropping/sparse_tensor.h index 5effa3d7..652a77ec 100644 --- a/src/training/gradient_dropping/sparse_tensor.h +++ b/src/training/gradient_dropping/sparse_tensor.h @@ -118,7 +118,7 @@ public: } // Convert a tensor into a sparse tensor format - void fromDense(Tensor t) { + void fromDense(Tensor MAYBE_UNUSED t) { if(backend_->getDeviceId().type == DeviceType::cpu) { ABORT("Gradient Dropping for CPU is not yet supported"); } diff --git a/src/training/graph_group.h b/src/training/graph_group.h index 265f9785..56b8afe3 100644 --- a/src/training/graph_group.h +++ b/src/training/graph_group.h @@ -54,10 +54,10 @@ public: * number of devices, which is passed in as the 'multiplier'. */ // @TODO: Can this be made const? It seems wrong to have a stateful method that still returns a result. - virtual Ptr collectStats(Ptr graph, - Ptr model, - const std::vector>& vocabs, - double multiplier = 1.) { + Ptr collectStats(Ptr graph, + Ptr model, + const std::vector>& vocabs, + double multiplier = 1.) { auto stats = New(); size_t numFiles = options_->get>("train-sets").size(); @@ -92,8 +92,8 @@ public: maxBatch *= 2; } - // Do a binary search for maxmimum batch size that fits into given workspace memory - // for a tested sentence length. + // Do a binary search for maxmimum batch size that fits into given workspace memory + // for a tested sentence length. for(size_t i = step; i <= maxLength; i += step) { size_t start = 1; size_t end = maxBatch; diff --git a/src/training/graph_group_async.h b/src/training/graph_group_async.h index 0e27a665..dd5254bb 100644 --- a/src/training/graph_group_async.h +++ b/src/training/graph_group_async.h @@ -64,7 +64,7 @@ public: void save(Ptr, bool final = false); // @TODO: give it a fake batch generator which own vocabs instead of passing vocabs - Ptr collectStats(const std::vector>& vocabs) { + virtual Ptr collectStats(const std::vector>& vocabs) { return GraphGroup::collectStats(graphs_[0], builders_[0], vocabs); } diff --git a/src/training/graph_group_multinode_sync.h b/src/training/graph_group_multinode_sync.h index ff1fe4e9..0a089d7b 100644 --- a/src/training/graph_group_multinode_sync.h +++ b/src/training/graph_group_multinode_sync.h @@ -63,7 +63,6 @@ private: Tensor paramsAvg_; std::vector accGradientsSync_cpu; std::vector receiveBuffer_cpu; - bool synchronization_happened{false}; Ptr syncOptimizer_; diff --git a/src/training/graph_group_sync.h b/src/training/graph_group_sync.h index b127296d..067d245c 100644 --- a/src/training/graph_group_sync.h +++ b/src/training/graph_group_sync.h @@ -26,7 +26,6 @@ class SyncGraphGroup : public GraphGroup, public ExponentialSmoothing { // state for update() bool first_{ true }; // gets interpreted and cleared by update() std::vector> pendingBatches_; // in case of dynamic MB-size scaling, we temporarly buffer up batches across update() calls until enough - size_t typicalTrgWords_{}; // typical batch size in words (labels), 0 if unknown (e.g. specified in sentences) double updateMultiplier_{1}; // multiplier not applied in collectStats() (no multiplier if not mini-batch-fit) void initialize(const Ptr& exampleBatch); diff --git a/src/training/training_state.h b/src/training/training_state.h index d39173be..1eb187b7 100644 --- a/src/training/training_state.h +++ b/src/training/training_state.h @@ -13,6 +13,7 @@ class TrainingState; class TrainingObserver { public: + virtual ~TrainingObserver() {} virtual void init(TrainingState&) {} virtual void actAfterEpoch(TrainingState&) {} virtual void actAfterBatches(TrainingState&) {} diff --git a/src/training/validator.h b/src/training/validator.h index e97f0da6..1658dff3 100755 --- a/src/training/validator.h +++ b/src/training/validator.h @@ -36,6 +36,7 @@ protected: public: ValidatorBase(bool lowerIsBetter) : lowerIsBetter_(lowerIsBetter), lastBest_{initScore()} {} + virtual ~ValidatorBase() {} virtual float validate(const std::vector>& graphs, Ptr state) = 0; @@ -51,6 +52,7 @@ public: template // @TODO: BuilderType doesn't really serve a purpose here? Review and remove. class Validator : public ValidatorBase { public: + virtual ~Validator() {} Validator(std::vector> vocabs, Ptr options, bool lowerIsBetter = true) : ValidatorBase(lowerIsBetter), vocabs_(vocabs), @@ -137,6 +139,7 @@ class CrossEntropyValidator : public Validator> vocabs, Ptr options); + virtual ~CrossEntropyValidator() {} std::string type() override { return options_->get("cost-type"); } @@ -148,6 +151,7 @@ protected: class AccuracyValidator : public Validator { public: AccuracyValidator(std::vector> vocabs, Ptr options); + virtual ~AccuracyValidator() {} std::string type() override { return "accuracy"; } @@ -161,6 +165,7 @@ private: public: BertAccuracyValidator(std::vector> vocabs, Ptr options, bool evalMaskedLM); + virtual ~BertAccuracyValidator() {} std::string type() override { if(evalMaskedLM_) @@ -177,6 +182,7 @@ protected: class ScriptValidator : public Validator { public: ScriptValidator(std::vector> vocabs, Ptr options); + virtual ~ScriptValidator() {} virtual float validate(const std::vector>& graphs, Ptr /*ignored*/) override; @@ -193,6 +199,7 @@ protected: class TranslationValidator : public Validator { public: TranslationValidator(std::vector> vocabs, Ptr options); + virtual ~TranslationValidator() {} virtual float validate(const std::vector>& graphs, Ptr state) override; @@ -212,6 +219,7 @@ protected: class BleuValidator : public Validator { public: BleuValidator(std::vector> vocabs, Ptr options, bool detok = false); + virtual ~BleuValidator() {} virtual float validate(const std::vector>& graphs, Ptr state) override; diff --git a/src/translator/output_collector.h b/src/translator/output_collector.h index 9e0e12b5..ffcbd2d5 100644 --- a/src/translator/output_collector.h +++ b/src/translator/output_collector.h @@ -11,6 +11,7 @@ namespace marian { class PrintingStrategy { public: + virtual ~PrintingStrategy() {} virtual bool shouldBePrinted(long) = 0; }; diff --git a/src/translator/scorers.h b/src/translator/scorers.h index c936514c..a5a0be2c 100755 --- a/src/translator/scorers.h +++ b/src/translator/scorers.h @@ -10,6 +10,8 @@ namespace marian { class ScorerState { public: + virtual ~ScorerState(){} + virtual Logits getLogProbs() const = 0; virtual void blacklist(Expr /*totalCosts*/, Ptr /*batch*/){}; @@ -24,6 +26,8 @@ public: Scorer(const std::string& name, float weight) : name_(name), weight_(weight) {} + virtual ~Scorer(){} + std::string getName() { return name_; } float getWeight() { return weight_; } @@ -53,6 +57,7 @@ protected: public: ScorerWrapperState(Ptr state) : state_(state) {} + virtual ~ScorerWrapperState() {} virtual Ptr getState() { return state_; } @@ -88,6 +93,8 @@ public: encdec_(std::static_pointer_cast(encdec)), ptr_{ptr} {} + virtual ~ScorerWrapper() {} + virtual void init(Ptr graph) override { graph->switchParams(getName()); if(ptr_)