mirror of
https://github.com/marian-nmt/marian.git
synced 2024-11-03 20:13:47 +03:00
Add support for compiling on Mac (and clang) (#598)
* Compile marian on mac and clang. Two linker errors left * MacOS defines has a different definition for unsigned long * Find OpenBLAS on mac * Fix a typo in the BLAS detection * Simplify and add comments * Refactor cpu allocation code. Do not fallback to malloc * Fix compilation warning on gcc * Refactor memory allocation * Make things compile with clang-8 with fewer warnings. * Eliminate clang warnings when compiling examples and when compiling without MKL * added USE_MKL option to compile without MKL for debugging even when MKL is installed * fixed issues with compiling examples with clang * Fix compile errors with clang in src/tests. * Fix missing whitespace in error message in src/tests/sqlite.cpp. * Responding to Frank Seide's code review. * Eliminate clang warnings when compiling with -DUSE_FBGEMM=on. * Fix compilation on gcc 8 * Get Marian to compile with Clang-10. * Fix Clang-8 warnings when compiling with marian-server * Add more comments and explicit unsigned long long for windows * Pull in fbgemm that supports mac * Fix warning flags order in CMakeLists.txt Co-authored-by: Kenneth Heafield <kpu@users.noreply.github.com> Co-authored-by: Ulrich Germann <ulrich.germann@gmail.com> Co-authored-by: Roman Grundkiewicz <romang@amu.edu.pl>
This commit is contained in:
parent
67b055fe4a
commit
00d2e999e3
@ -24,6 +24,7 @@ option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF)
|
||||
option(USE_CUDNN "Use CUDNN library" OFF)
|
||||
option(USE_DOXYGEN "Build documentation with Doxygen" ON)
|
||||
option(USE_FBGEMM "Use FBGEMM" OFF)
|
||||
option(USE_MKL "Compile with MKL support" ON)
|
||||
option(USE_MPI "Use MPI library" OFF)
|
||||
option(USE_NCCL "Use NCCL library" ON)
|
||||
option(USE_SENTENCEPIECE "Download and compile SentencePiece" OFF)
|
||||
@ -33,7 +34,7 @@ option(USE_STATIC_LIBS "Link statically against non-system libs" OFF)
|
||||
if(USE_CCACHE)
|
||||
find_program(CCACHE_PROGRAM ccache)
|
||||
if(CCACHE_PROGRAM)
|
||||
message(STATUS "Found and will be using ccache for faster repeat compilation (use cmake -DUSE_CCACHE=off to disable).")
|
||||
message(STATUS "Will be using ccache for faster repeat compilation (use cmake -DUSE_CCACHE=off to disable).")
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}")
|
||||
else(CCACHE_PROGRAM)
|
||||
message(WARNING "Compilation with ccache requested but no ccache found.")
|
||||
@ -141,20 +142,32 @@ else(MSVC)
|
||||
add_definitions(-DUSE_FBGEMM=1)
|
||||
endif(USE_FBGEMM)
|
||||
|
||||
set(DISABLE_GLOBALLY "-Wno-unused-result")
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0)
|
||||
# Clang-10.0.0 complains when CUDA is newer than 10.1
|
||||
set(CLANG_IGNORE_UNKNOWN_CUDA "-Wno-unknown-cuda-version")
|
||||
endif()
|
||||
set(DISABLE_GLOBALLY "-Wno-unused-result -Wno-unknown-warning-option ${CLANG_IGNORE_UNKNOWN_CUDA}")
|
||||
|
||||
# These are used in src/CMakeLists.txt on a per-target basis
|
||||
list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function;
|
||||
-Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;)
|
||||
list(APPEND ALL_WARNINGS -Wall; -Werror; -Wextra; -Wno-unused-result; -Wno-deprecated;
|
||||
-Wno-pragmas; -Wno-unused-parameter; -Wno-unused-function;
|
||||
-Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare;
|
||||
-Wno-missing-field-initializers;)
|
||||
|
||||
# This warning does not exist prior to gcc 5.0
|
||||
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0)
|
||||
list(APPEND ALL_WARNINGS -Wsuggest-override)
|
||||
list(APPEND ALL_WARNINGS -Wsuggest-override -Wno-int-in-bool-context)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_FLAGS "-std=c++11 -pthread -Wl,--no-as-needed -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -m64 -funroll-loops -ffinite-math-only -g -rdynamic")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -rdynamic")
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
# these flags are not known to clang
|
||||
set(CMAKE_GCC_FLAGS "-Wl,--no-as-needed")
|
||||
set(CMAKE_RDYNAMIC_FLAG "-rdynamic")
|
||||
endif(CMAKE_COMPILER_IS_GNUCC)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -m64 -funroll-loops -ffinite-math-only -g ${CMAKE_RDYNAMIC_FLAG}")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}")
|
||||
set(CMAKE_CXX_FLAGS_SLIM "-Ofast -m64 -funroll-loops -ffinite-math-only -DNDEBUG")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE}")
|
||||
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg")
|
||||
@ -162,9 +175,9 @@ else(MSVC)
|
||||
set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction")
|
||||
|
||||
# these need to be set separately
|
||||
set(CMAKE_C_FLAGS "-pthread -Wl,--no-as-needed -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
|
||||
set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -ffinite-math-only -g -rdynamic")
|
||||
set(CMAKE_C_FLAGS_DEBUG "-O0 -g -rdynamic")
|
||||
set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
|
||||
set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -ffinite-math-only -g ${CMAKE_RDYNAMIC_FLAG}")
|
||||
set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}")
|
||||
set(CMAKE_C_FLAGS_SLIM "-O3 -m64 -funroll-loops -ffinite-math-only -DNDEBUG")
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE}")
|
||||
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")
|
||||
@ -204,7 +217,7 @@ if(CUDA_FOUND)
|
||||
if((CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0") AND (CMAKE_VERSION VERSION_LESS "3.12.2"))
|
||||
message(WARNING "On some Unix systems CUDA 10.0+ requires CMake 3.12.2+; you use CMake ${CMAKE_VERSION}")
|
||||
endif()
|
||||
|
||||
|
||||
if(COMPILE_CUDA_SM35)
|
||||
LIST(APPEND COMPUTE -arch=sm_35; -gencode=arch=compute_35,code=sm_35;) # Tesla K40 and above
|
||||
endif(COMPILE_CUDA_SM35)
|
||||
@ -323,13 +336,15 @@ if(USE_MPI)
|
||||
endif(USE_MPI)
|
||||
|
||||
if(COMPILE_CPU)
|
||||
find_package(MKL)
|
||||
if(USE_MKL)
|
||||
find_package(MKL)
|
||||
endif(USE_MKL)
|
||||
if(MKL_FOUND)
|
||||
include_directories(${MKL_INCLUDE_DIR})
|
||||
set(EXT_LIBS ${EXT_LIBS} ${MKL_LIBRARIES})
|
||||
add_definitions(-DBLAS_FOUND=1 -DMKL_FOUND=1)
|
||||
else(MKL_FOUND)
|
||||
set(BLA_VENDOR "OpenBLAS")
|
||||
set(BLAS_VENDOR "OpenBLAS")
|
||||
find_package(BLAS)
|
||||
if(BLAS_FOUND)
|
||||
include(FindCBLAS)
|
||||
|
@ -54,7 +54,7 @@ MACRO(CHECK_ALL_LIBRARIES LIBRARIES INCLUDE _prefix _name _flags _list _include
|
||||
IF(APPLE)
|
||||
FIND_LIBRARY(${_prefix}_${_library}_LIBRARY
|
||||
NAMES ${_library}
|
||||
PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV
|
||||
PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 /usr/local/opt/openblas/lib ENV
|
||||
DYLD_LIBRARY_PATH
|
||||
)
|
||||
ELSE(APPLE)
|
||||
|
45
src/3rd_party/CMakeLists.txt
vendored
45
src/3rd_party/CMakeLists.txt
vendored
@ -15,12 +15,22 @@ if(USE_FBGEMM)
|
||||
|
||||
if(NOT MSVC)
|
||||
# only locally disabled for the 3rd_party folder
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-value -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function")
|
||||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-value -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused")
|
||||
endif()
|
||||
|
||||
set(FBGEMM_BUILD_TESTS OFF CACHE BOOL "Disable fbgemm tests")
|
||||
set(FBGEMM_BUILD_BENCHMARKS OFF CACHE BOOL "Disable fbgemm benchmark")
|
||||
add_subdirectory(./fbgemm)
|
||||
|
||||
# asmjit (3rd-party submodule of fbgemm) sets -Wall -Wextra near the end of
|
||||
# the compile options, invalidating any -Wno-... flags that we may have set
|
||||
# earlier. Let's remove them.
|
||||
get_property(ASMJIT_COMPILE_OPTIONS TARGET asmjit PROPERTY COMPILE_OPTIONS)
|
||||
list(REMOVE_ITEM ASMJIT_COMPILE_OPTIONS -Wall -Wextra)
|
||||
set_property(TARGET asmjit PROPERTY COMPILE_OPTIONS ${ASMJIT_COMPILE_OPTIONS})
|
||||
message(" ASMJIT COMPILE FLAGS: ${ASMJIT_COMPILE_OPTIONS}")
|
||||
|
||||
endif(USE_FBGEMM)
|
||||
|
||||
if(USE_SENTENCEPIECE)
|
||||
@ -39,7 +49,7 @@ if(USE_SENTENCEPIECE)
|
||||
message(WARNING "You are compiling SentencePiece binaries with -DUSE_STATIC_LIBS=on. \
|
||||
This will cause spm_train to segfault. No need to worry if you do not intend to use that binary. \
|
||||
Marian support for SentencePiece will work fine.")
|
||||
|
||||
|
||||
set(SPM_ENABLE_SHARED OFF CACHE BOOL "Builds shared libaries in addition to static libraries." FORCE)
|
||||
set(SPM_TCMALLOC_STATIC ON CACHE BOOL "Link static library of TCMALLOC." FORCE)
|
||||
else(USE_STATIC_LIBS)
|
||||
@ -51,8 +61,19 @@ if(USE_SENTENCEPIECE)
|
||||
include_directories(./sentencepiece)
|
||||
|
||||
set_target_properties(spm_encode spm_decode spm_train spm_normalize spm_export_vocab
|
||||
PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
foreach(t sentencepiece sentencepiece_train sentencepiece_train-static
|
||||
spm_decode spm_encode spm_export_vocab spm_normalize spm_train)
|
||||
set_property(TARGET ${t} APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-tautological-compare -Wno-unused")
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0)
|
||||
set_property(TARGET ${t} APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-range-loop-construct")
|
||||
endif()
|
||||
# get_property(SENTENCEPIECE_COMPILE_FLAGS TARGET ${t} PROPERTY COMPILE_FLAGS)
|
||||
# message("-- SENTENCPIECE: compile flags for target ${t}: ${SENTENCEPIECE_COMPILE_FLAGS}")
|
||||
endforeach(t)
|
||||
endif()
|
||||
|
||||
if(USE_STATIC_LIBS)
|
||||
set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
|
||||
@ -63,6 +84,22 @@ include_directories(./SQLiteCpp/include)
|
||||
include_directories(./CLI)
|
||||
include_directories(./pathie-cpp/include)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
#set_target_properties(SQLiteCpp PROPERTIES COMPILE_FLAGS
|
||||
set_property(TARGET SQLiteCpp APPEND_STRING PROPERTY COMPILE_FLAGS
|
||||
" -Wno-parentheses-equality -Wno-unused-value")
|
||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0)
|
||||
set_property(TARGET SQLiteCpp APPEND_STRING PROPERTY COMPILE_FLAGS
|
||||
" -Wno-implicit-int-float-conversion")
|
||||
endif()
|
||||
set_property(TARGET libyaml-cpp APPEND_STRING PROPERTY COMPILE_FLAGS
|
||||
" -fPIC -Wno-unused-value")
|
||||
set_property(TARGET pathie-cpp APPEND_STRING PROPERTY COMPILE_FLAGS
|
||||
" -fPIC -Wno-unused-value")
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
include_directories(./zlib)
|
||||
|
||||
include(ExternalProject)
|
||||
|
2
src/3rd_party/fbgemm
vendored
2
src/3rd_party/fbgemm
vendored
@ -1 +1 @@
|
||||
Subproject commit 84e66a976046180187724aff60a236c5378fde7c
|
||||
Subproject commit f78e60988329b9207d086c743cafce1ac1bea3ab
|
4
src/3rd_party/half_float/umHalf.inl
vendored
4
src/3rd_party/half_float/umHalf.inl
vendored
@ -186,7 +186,7 @@ inline HalfFloat& HalfFloat::operator= (float other)
|
||||
inline bool HalfFloat::operator== (HalfFloat other) const
|
||||
{
|
||||
// +0 and -0 are considered to be equal
|
||||
if (!(bits << 1u) && !(other.bits << 1u))return true;
|
||||
if ((bits << 1u) == 0 && (other.bits << 1u) == 0) return true;
|
||||
|
||||
return bits == other.bits && !this->IsNaN();
|
||||
}
|
||||
@ -194,7 +194,7 @@ inline bool HalfFloat::operator== (HalfFloat other) const
|
||||
inline bool HalfFloat::operator!= (HalfFloat other) const
|
||||
{
|
||||
// +0 and -0 are considered to be equal
|
||||
if (!(bits << 1u) && !(other.bits << 1u))return false;
|
||||
if ((bits << 1u) == 0 && (other.bits << 1u) == 0) return false;
|
||||
|
||||
return bits != other.bits || this->IsNaN();
|
||||
}
|
||||
|
@ -31,7 +31,7 @@
|
||||
#include "../include/path.hpp"
|
||||
#include "../include/errors.hpp"
|
||||
|
||||
#if defined(__unix__)
|
||||
#if defined(__unix__) || defined(__APPLE__)
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include <errno.h>
|
||||
|
2
src/3rd_party/pathie-cpp/src/path.cpp
vendored
2
src/3rd_party/pathie-cpp/src/path.cpp
vendored
@ -902,7 +902,7 @@ Path Path::pwd()
|
||||
*/
|
||||
Path Path::exe()
|
||||
{
|
||||
#if defined(__linux__)
|
||||
#if defined(__linux__) || defined(__APPLE__)
|
||||
char buf[PATH_MAX];
|
||||
ssize_t size = ::readlink("/proc/self/exe", buf, PATH_MAX);
|
||||
|
||||
|
8
src/3rd_party/pathie-cpp/src/pathie.cpp
vendored
8
src/3rd_party/pathie-cpp/src/pathie.cpp
vendored
@ -143,7 +143,7 @@ std::string Pathie::convert_encodings(const char* from_encoding, const char* to_
|
||||
errno = 0;
|
||||
errsav = 0;
|
||||
|
||||
#ifdef BSD
|
||||
#if defined(BSD) && ! defined(__APPLE__) //Since MacOS evolved from BSD, it is captured here but the iconv on macos behaves differently
|
||||
// What the heck. FreeBSD violates POSIX.1-2008: it declares iconv()
|
||||
// differently than mandated by POSIX: http://pubs.opengroup.org/onlinepubs/9699919799/functions/iconv.html
|
||||
// (it declares a `const' where it must not be).
|
||||
@ -181,11 +181,10 @@ std::string Pathie::convert_encodings(const char* from_encoding, const char* to_
|
||||
std::string Pathie::utf8_to_filename(const std::string& utf8)
|
||||
{
|
||||
bool fs_encoding_is_utf8 = false;
|
||||
|
||||
char* fsencoding = NULL;
|
||||
#if defined(__APPLE__) || defined(PATHIE_ASSUME_UTF8_ON_UNIX)
|
||||
fs_encoding_is_utf8 = true;
|
||||
#else
|
||||
char* fsencoding = NULL;
|
||||
fsencoding = nl_langinfo(CODESET);
|
||||
fs_encoding_is_utf8 = (strcmp(fsencoding, "UTF-8") == 0);
|
||||
#endif
|
||||
@ -206,11 +205,10 @@ std::string Pathie::utf8_to_filename(const std::string& utf8)
|
||||
std::string Pathie::filename_to_utf8(const std::string& native_filename)
|
||||
{
|
||||
bool fs_encoding_is_utf8 = false;
|
||||
|
||||
char* fsencoding = NULL;
|
||||
#if defined(__APPLE__) || defined(PATHIE_ASSUME_UTF8_ON_UNIX)
|
||||
fs_encoding_is_utf8 = true;
|
||||
#else
|
||||
char* fsencoding = NULL;
|
||||
fsencoding = nl_langinfo(CODESET);
|
||||
fs_encoding_is_utf8 = (strcmp(fsencoding, "UTF-8") == 0);
|
||||
#endif
|
||||
|
2
src/3rd_party/zstr/strict_fstream.hpp
vendored
2
src/3rd_party/zstr/strict_fstream.hpp
vendored
@ -27,7 +27,7 @@ static std::string strerror()
|
||||
{
|
||||
buff = "Unknown error";
|
||||
}
|
||||
#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && ! _GNU_SOURCE
|
||||
#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || __APPLE__) && ! _GNU_SOURCE
|
||||
// XSI-compliant strerror_r()
|
||||
if (strerror_r(errno, &buff[0], buff.size()) != 0)
|
||||
{
|
||||
|
@ -215,6 +215,10 @@ if(COMPILE_SERVER)
|
||||
set(EXECUTABLES ${EXECUTABLES} marian_server)
|
||||
endif(COMPILE_SERVER)
|
||||
|
||||
if(APPLE) # This is a dependency of pathie but I can't seem to link it into that CMakeLists because we're not compiling it as a library.
|
||||
set(EXT_LIBS ${EXT_LIBS} iconv)
|
||||
endif()
|
||||
|
||||
foreach(exec ${EXECUTABLES})
|
||||
target_link_libraries(${exec} marian ${EXT_LIBS} ${EXT_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
if(CUDA_FOUND)
|
||||
|
@ -44,7 +44,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
// Error Codes for error code meanings
|
||||
// http://www.boost.org/doc/libs/1_55_0/doc/html/boost_asio/reference.html
|
||||
translate.on_error = [](Ptr<WSServer::Connection> connection,
|
||||
translate.on_error = [](Ptr<WSServer::Connection> /*connection*/,
|
||||
const SimpleWeb::error_code &ec) {
|
||||
LOG(error, "Connection error: ({}) {}", ec.value(), ec.message());
|
||||
};
|
||||
|
@ -10,6 +10,21 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// The macro MAYBE_UNUSED is used to selectively disable
|
||||
// unused-variable warnings. C++17 defines the attribute
|
||||
// [[maybe_unused]], but I don't think we're at C++17 yet. We can add it when we reach C++17.
|
||||
// The compilers gcc and clang (and maybe others) define
|
||||
// __has_attribute and support __attribute__(unused) in C++11,
|
||||
#if defined __has_attribute
|
||||
# if __has_attribute(unused)
|
||||
# define MAYBE_UNUSED __attribute__((unused))
|
||||
# else
|
||||
# define MAYBE_UNUSED
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#define THREAD_GUARD(body) [&]() { body; }() // test if THREAD_GUARD is neccessary, remove if no problems occur.
|
||||
#define NodeOp(op) [=]() { op; }
|
||||
|
||||
|
@ -84,10 +84,16 @@ std::vector<T> As<std::vector<T>>::apply(const FastOpt& node) {
|
||||
// specializations for simple vector types
|
||||
template struct As<std::vector<bool>>;
|
||||
template struct As<std::vector<int>>;
|
||||
// Windows and Unix based OS have different type definitions for 'unsigned long'.
|
||||
// So, we need an explicit definition for uint64_t. Otherwise, there's a linking error on windows.
|
||||
// Windows, Linux based OS and Mac have different type definitions for 'unsigned long'.
|
||||
// So, we need an explicit definitions for uint64_t, that cover different platforms.
|
||||
// Otherwise, there's a linking error on windows or Linux or Mac.
|
||||
// https://software.intel.com/en-us/articles/size-of-long-integer-type-on-different-architecture-and-os/
|
||||
template struct As<std::vector<uint64_t>>;
|
||||
// https://stackoverflow.com/questions/32021860/c-should-you-size-t-with-a-regular-array
|
||||
// MacOS: size_t = unsigned long (8 bytes), uint64_t = unsigned long long (8 bytes)
|
||||
// Linux: size_t = unsigned long (8 bytes), uint64_t = unsigned long (8 bytes)
|
||||
// Windows: size_t = unsigned long long (8 bytes), uint64_t = unsigned long long (8 bytes)
|
||||
template struct As<std::vector<unsigned long long>>;
|
||||
template struct As<std::vector<unsigned long>>;
|
||||
template struct As<std::vector<float>>;
|
||||
template struct As<std::vector<double>>;
|
||||
template struct As<std::vector<std::string>>;
|
||||
@ -103,4 +109,4 @@ std::pair<T1, T2> As<std::pair<T1, T2>>::apply(const FastOpt& node) {
|
||||
template struct As<std::pair<int, int>>;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -367,7 +367,8 @@ public:
|
||||
}
|
||||
|
||||
const FastOpt& operator[](const char* const key) const {
|
||||
return operator[](crc::crc(key));
|
||||
// MacOS requires explicit cast to size_t before we can use it.
|
||||
return operator[]((size_t)crc::crc(key));
|
||||
}
|
||||
|
||||
const FastOpt& operator[](const std::string& key) const {
|
||||
@ -375,4 +376,4 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -7,10 +7,21 @@
|
||||
#include "common/filesystem.h"
|
||||
#include "common/logging.h"
|
||||
|
||||
// Even when compiling with clang, __GNUC__ may be defined, so
|
||||
// we need to add some extra checks to avoid compile errors with
|
||||
// respect to -Wsuggest-override.
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wunused-value"
|
||||
# if defined(__has_warning)
|
||||
# if __has_warning("-Wsuggest-override")
|
||||
# pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
# endif
|
||||
# else
|
||||
# pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(push) // 4101: 'identifier' : unreferenced local variable. One parameter variable in zstr.hpp is not used.
|
||||
#pragma warning(disable : 4101)
|
||||
@ -82,7 +93,7 @@ protected:
|
||||
|
||||
void NormalizeTempPrefix(std::string& base) const;
|
||||
void MakeTemp(const std::string& base);
|
||||
|
||||
|
||||
};
|
||||
|
||||
} // namespace io
|
||||
|
@ -7,9 +7,19 @@
|
||||
// @TODO: go back to canonical names for functions and objects
|
||||
// as specified in C++17 so it becomes easy to move in the future
|
||||
|
||||
// Even when compiling with clang, __GNUC__ may be defined, so
|
||||
// we need to add some extra checks to avoid compile errors with
|
||||
// respect to -Wsuggest-override.
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
# pragma GCC diagnostic push
|
||||
# pragma GCC diagnostic ignored "-Wunused-value"
|
||||
# if defined(__has_warning)
|
||||
# if __has_warning("-Wsuggest-override")
|
||||
# pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
# endif
|
||||
# else
|
||||
# pragma GCC diagnostic ignored "-Wsuggest-override"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include "3rd_party/pathie-cpp/include/path.hpp" // @TODO: update to latest Pathie
|
||||
|
@ -169,12 +169,12 @@ inline bool operator!=(const IntrusivePtr<T>& a, std::nullptr_t) {
|
||||
|
||||
template<class T>
|
||||
inline bool operator==(T* a, const IntrusivePtr<T>& b) {
|
||||
return b.get();
|
||||
return a == b.get(); // used to say: return b.get(); That cannot be right. [UG]
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline bool operator!=(T* a, const IntrusivePtr<T>& b) {
|
||||
return b.get();
|
||||
return a != b.get(); // used to say: return b.get(); That cannot be right. [UG]
|
||||
}
|
||||
|
||||
template<class T, class U>
|
||||
@ -223,5 +223,3 @@ namespace std {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
|
@ -124,7 +124,7 @@ static void setErrorHandlers() {
|
||||
std::set_terminate(unhandledException);
|
||||
#ifdef __unix__
|
||||
// catch segfaults
|
||||
struct sigaction sa = { 0 };
|
||||
struct sigaction sa = { {0} };
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
sa.sa_sigaction = [](int /*signal*/, siginfo_t*, void*) { ABORT("Segmentation fault"); };
|
||||
|
@ -254,7 +254,7 @@ enum class Type : size_t {
|
||||
packed16 = TypeClass::packed_type + 2u, // special type for FBGEMM, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint16) is meaningless.
|
||||
packed8avx2 = TypeClass::packed_type + 1u + TypeClass::avx2_type, // special type for FBGEMM with AVX2, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
|
||||
packed8avx512 = TypeClass::packed_type + 1u + TypeClass::avx512_type, // special type for FBGEMM with AVX512, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
|
||||
|
||||
|
||||
};
|
||||
|
||||
static inline size_t operator&(TypeClass typeClass, Type type) {
|
||||
@ -394,7 +394,7 @@ static Type inline typeFromString(const std::string& str) {
|
||||
return Type::float32;
|
||||
if(str == "float64")
|
||||
return Type::float64;
|
||||
|
||||
|
||||
if(str == "packed16")
|
||||
return Type::packed16;
|
||||
if(str == "packed8avx2")
|
||||
@ -437,19 +437,35 @@ void matchOrAbort(Type type) {
|
||||
|
||||
namespace typeFitting { // own namespace instead of in class, otherwise we get error "explicit specialization in non-namespace scope"
|
||||
|
||||
// compares max for different types as constexpr, so can be used at compile-time to determine if RequestType type max fits into ReturnType max, see std::conditional below.
|
||||
template <typename RequestType, typename ReturnType>
|
||||
constexpr bool fitsIntoMax() { return std::numeric_limits<RequestType>::max() <= std::numeric_limits<ReturnType>::max(); } // for built-in types everything is constexpr
|
||||
// Helper function for fitsIntoMax() below
|
||||
// Returns the 'capacity' of a type: number of digits for integers,
|
||||
// max_exponent for floats. We ignore the mantissa for floats.
|
||||
template<typename X> constexpr int capacity() {
|
||||
static_assert(std::is_arithmetic<X>::value || std::is_same<X,HalfFloat>::value,
|
||||
"Wrong type for this template");
|
||||
return (std::is_integral<X>::value
|
||||
? std::numeric_limits<X>::digits
|
||||
: std::numeric_limits<X>::max_exponent);
|
||||
}
|
||||
|
||||
|
||||
// Compare max for different types as constexpr, so can be used at compile-time to determine if RequestType type max fits into ReturnType max, see std::conditional below.
|
||||
template <typename RequestType, typename ReturnType>
|
||||
constexpr bool fitsIntoMax() {
|
||||
// We can't just compare std::numeric_limits<>::max(), because Clang-10
|
||||
// complains about rounding errors when implicitly converting int to float
|
||||
return ((!std::is_integral<RequestType>::value // RequestType is a float
|
||||
&& std::is_integral<ReturnType>::value) // ReturnType an integer
|
||||
? capacity<RequestType>() < capacity<ReturnType>() // special case
|
||||
: capacity<RequestType>() <= capacity<ReturnType>()); // normal case
|
||||
} // for built-in types everything is constexpr
|
||||
|
||||
// add specializations here when needed
|
||||
template <> constexpr bool fitsIntoMax<float16, float>() { return true; }; // for float16 conversion to float is not constexpr, hence specializations
|
||||
template <> constexpr bool fitsIntoMax<float, float16>() { return false; }; // for float16 conversion to float is not constexpr, hence specializations
|
||||
}
|
||||
|
||||
template <typename ReturnType>
|
||||
class NumericLimits {
|
||||
private:
|
||||
|
||||
|
||||
template <typename MaxType> void setLimitsMax() {
|
||||
max = (ReturnType)std::numeric_limits<MaxType>::max();
|
||||
lowest = (ReturnType)std::numeric_limits<MaxType>::lowest();
|
||||
@ -459,10 +475,14 @@ private:
|
||||
void setLimits() {
|
||||
// check if the maximum of type RequestType fits into ReturnType
|
||||
constexpr bool fits = typeFitting::fitsIntoMax<RequestType, ReturnType>();
|
||||
// sanity check:
|
||||
static_assert(fits || typeFitting::fitsIntoMax<ReturnType, RequestType>(),
|
||||
"RequestType doesn't fit into ReturnType, and ReturnType doesn't "
|
||||
"fit into RequestType. fitsIntoMax is broken!");
|
||||
// and then use the smaller of each types to determine max, min, lowest.
|
||||
using MaxType = typename std::conditional<fits, RequestType, ReturnType>::type;
|
||||
setLimitsMax<MaxType>();
|
||||
// @TODO: should we rather abort if the RequestType does not fit into ReturnType instead of clipping to smaller type?
|
||||
// @TODO: should we rather abort if the RequestType does not fit into ReturnType instead of clipping to smaller type?
|
||||
// ABORT_IF(!fits, "Type {} is too small to contain max of type {}", typeId<ReturnType>(), typeId<RequestType>());
|
||||
}
|
||||
|
||||
|
@ -8,12 +8,22 @@
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#ifdef __unix__
|
||||
#if defined(__unix__) || defined(__APPLE__)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <codecvt>
|
||||
#include <cwctype>
|
||||
|
||||
// MACOS lacks HOST_NAME_MAX
|
||||
#ifndef HOST_NAME_MAX
|
||||
# if defined(_POSIX_HOST_NAME_MAX)
|
||||
# define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
|
||||
# elif defined(MAXHOSTNAMELEN)
|
||||
# define HOST_NAME_MAX MAXHOSTNAMELEN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
namespace marian {
|
||||
namespace utils {
|
||||
|
||||
|
@ -26,7 +26,7 @@ public:
|
||||
|
||||
virtual void setGuidedAlignment(std::vector<float>&&) = 0;
|
||||
virtual void setDataWeights(const std::vector<float>&) = 0;
|
||||
|
||||
virtual ~Batch() {};
|
||||
protected:
|
||||
std::vector<size_t> sentenceIds_;
|
||||
};
|
||||
|
@ -525,6 +525,7 @@ public:
|
||||
const std::vector<Ptr<Vocab>>& vocabs,
|
||||
Ptr<Options> options);
|
||||
|
||||
virtual ~CorpusBase() {}
|
||||
virtual std::vector<Ptr<Vocab>>& getVocabs() = 0;
|
||||
|
||||
protected:
|
||||
|
@ -45,6 +45,7 @@ protected:
|
||||
|
||||
public:
|
||||
// @TODO: choose between 'virtual' and 'final'. Can we derive from this class?
|
||||
virtual ~DefaultVocab() {};
|
||||
virtual const std::string& canonicalExtension() const override { return suffixes_[0]; }
|
||||
virtual const std::vector<std::string>& suffixes() const override { return suffixes_; }
|
||||
|
||||
@ -295,7 +296,7 @@ private:
|
||||
class ClassVocab : public DefaultVocab {
|
||||
private:
|
||||
// Do nothing.
|
||||
virtual void addRequiredVocabulary(const std::string& vocabPath, bool isJson) override { vocabPath; isJson; }
|
||||
virtual void addRequiredVocabulary(const std::string& /*vocabPath*/, bool /*isJson*/) override {}
|
||||
|
||||
// Not adding special class labels, only seen classes.
|
||||
virtual void create(const std::string& vocabPath,
|
||||
|
@ -36,6 +36,8 @@ public:
|
||||
|
||||
class ShortlistGenerator {
|
||||
public:
|
||||
virtual ~ShortlistGenerator() {}
|
||||
|
||||
virtual Ptr<Shortlist> generate(Ptr<data::CorpusBatch> batch) const = 0;
|
||||
|
||||
// Writes text version of (possibly) pruned short list to file
|
||||
@ -129,7 +131,6 @@ private:
|
||||
Ptr<const Vocab> trgVocab_;
|
||||
|
||||
size_t srcIdx_;
|
||||
size_t trgIdx_;
|
||||
bool shared_{false};
|
||||
|
||||
size_t firstNum_{100};
|
||||
@ -183,13 +184,12 @@ public:
|
||||
Ptr<const Vocab> srcVocab,
|
||||
Ptr<const Vocab> trgVocab,
|
||||
size_t srcIdx = 0,
|
||||
size_t trgIdx = 1,
|
||||
size_t /*trgIdx*/ = 1,
|
||||
bool shared = false)
|
||||
: options_(options),
|
||||
srcVocab_(srcVocab),
|
||||
trgVocab_(trgVocab),
|
||||
srcIdx_(srcIdx),
|
||||
trgIdx_(trgIdx),
|
||||
shared_(shared) {
|
||||
std::vector<std::string> vals = options_->get<std::vector<std::string>>("shortlist");
|
||||
|
||||
@ -235,7 +235,6 @@ public:
|
||||
|
||||
virtual Ptr<Shortlist> generate(Ptr<data::CorpusBatch> batch) const override {
|
||||
auto srcBatch = (*batch)[srcIdx_];
|
||||
// auto trgBatch = (*batch)[trgIdx_];
|
||||
|
||||
// add firstNum most frequent words
|
||||
std::unordered_set<WordIndex> indexSet;
|
||||
|
@ -37,6 +37,7 @@ public:
|
||||
typedef SentenceTuple Sample;
|
||||
|
||||
TextInput(std::vector<std::string> inputs, std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options);
|
||||
virtual ~TextInput() {}
|
||||
|
||||
Sample next() override;
|
||||
|
||||
|
@ -57,6 +57,7 @@ public:
|
||||
virtual Word randWord() const {
|
||||
return Word::fromWordIndex(rand() % size());
|
||||
}
|
||||
virtual ~IVocab() {};
|
||||
};
|
||||
|
||||
class Options;
|
||||
|
@ -62,6 +62,7 @@ private:
|
||||
std::vector<Input> inputs_;
|
||||
|
||||
public:
|
||||
|
||||
std::vector<Input>& inputs() { return inputs_; }
|
||||
|
||||
const std::vector<Input>& inputs() const { return inputs_; }
|
||||
@ -144,6 +145,8 @@ public:
|
||||
loadData();
|
||||
}
|
||||
|
||||
virtual ~MNISTData(){}
|
||||
|
||||
void loadData() override {
|
||||
ABORT_IF(paths_.size() != 2, "Paths to MNIST data files are not specified");
|
||||
|
||||
|
@ -47,6 +47,8 @@ class MNISTLogsoftmax : public ILogProb {
|
||||
public:
|
||||
MNISTLogsoftmax() {}
|
||||
|
||||
virtual ~MNISTLogsoftmax(){}
|
||||
|
||||
Logits apply(Ptr<IModel> model,
|
||||
Ptr<ExpressionGraph> graph,
|
||||
Ptr<data::Batch> batch,
|
||||
@ -61,13 +63,15 @@ public:
|
||||
typedef data::MNISTData dataset_type;
|
||||
|
||||
template <class... Args>
|
||||
MnistFeedForwardNet(Ptr<Options> options, Args... args)
|
||||
MnistFeedForwardNet(Ptr<Options> options, Args... /*args*/)
|
||||
: options_(options), inference_(options->get<bool>("inference", false)) {}
|
||||
|
||||
virtual ~MnistFeedForwardNet(){}
|
||||
|
||||
virtual Logits build(Ptr<ExpressionGraph> graph,
|
||||
Ptr<data::Batch> batch,
|
||||
bool /*clean*/ = false) override {
|
||||
|
||||
|
||||
return Logits(apply(graph, batch, inference_));
|
||||
}
|
||||
|
||||
|
@ -19,7 +19,9 @@ public:
|
||||
builder_ = models::createModelFromOptions(options, models::usage::translation);
|
||||
}
|
||||
|
||||
virtual void keepBest(const std::vector<Ptr<ExpressionGraph>>& graphs) override {
|
||||
virtual ~MNISTAccuracyValidator(){}
|
||||
|
||||
virtual void keepBest(const std::vector<Ptr<ExpressionGraph>>& /*graphs*/) override {
|
||||
LOG(warn, "Keeping best model for MNIST examples is not supported");
|
||||
}
|
||||
|
||||
|
@ -7,55 +7,58 @@ namespace marian {
|
||||
namespace functional {
|
||||
|
||||
// General template, will be used for any type without specializations
|
||||
// and will fail with an abort message.
|
||||
// and will fail at runtime with an abort message. Note that the
|
||||
// general template functions don't have named parameters on purpose,
|
||||
// because clang will warn about unused parameters during compilation.
|
||||
|
||||
template <typename T>
|
||||
struct Ops {
|
||||
static HOST_DEVICE_INLINE T tanh(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T sin(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T cos(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T tan(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T log(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T exp(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T abs(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T sqrt(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T neg(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T sgn(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T tanh(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T sin(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T cos(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T tan(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T log(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T exp(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T abs(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T sqrt(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T neg(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T sgn(const T&) { ABORT("Unknown type"); }
|
||||
|
||||
static HOST_DEVICE_INLINE T add(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T sub(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T mul(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T div(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T add(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T sub(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T mul(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T div(const T&, const T&) { ABORT("Unknown type"); }
|
||||
|
||||
static HOST_DEVICE_INLINE T max(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T min(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T pow(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T max(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T min(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T pow(const T&, const T&) { ABORT("Unknown type"); }
|
||||
|
||||
static HOST_DEVICE_INLINE T negate(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T eq(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T neq(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T gt(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T lt(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T geq(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T leq(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T _and(const T& x, const T& y) { ABORT("Unknown type"); } // 'and' is used by gcc
|
||||
static HOST_DEVICE_INLINE T _or(const T& x, const T& y) { ABORT("Unknown type"); } // 'or' is used by gcc
|
||||
static HOST_DEVICE_INLINE T negate(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T eq(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T neq(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T gt(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T lt(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T geq(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T leq(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T _and(const T&, const T&) { ABORT("Unknown type"); } // 'and' is used by gcc
|
||||
static HOST_DEVICE_INLINE T _or(const T&, const T&) { ABORT("Unknown type"); } // 'or' is used by gcc
|
||||
|
||||
// Neural Networks specific functions
|
||||
static HOST_DEVICE_INLINE T sigmoid(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T logaddexp(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T clip(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T sigmoid(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T logaddexp(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T clip(const T&, const T&) { ABORT("Unknown type"); }
|
||||
// derivative of Clip, cut-off function
|
||||
static HOST_DEVICE_INLINE T bump(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T relu(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T reluBack(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T prelu(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T preluBack(const T& x, const T& y) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T bump(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T relu(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T reluBack(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T prelu(const T&, const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T preluBack(const T&, const T&) { ABORT("Unknown type"); }
|
||||
|
||||
static HOST_DEVICE_INLINE T if_then_else(const T& x, const T& y, const T& z) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T if_then_else(const T&, const T&, const T&) { ABORT("Unknown type"); }
|
||||
|
||||
static HOST_DEVICE_INLINE T sumReduce(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T maxReduce(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T minReduce(const T& x) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T sumReduce(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T maxReduce(const T&) { ABORT("Unknown type"); }
|
||||
static HOST_DEVICE_INLINE T minReduce(const T&) { ABORT("Unknown type"); }
|
||||
};
|
||||
|
||||
// Specialization for float
|
||||
@ -127,14 +130,14 @@ template <>
|
||||
struct Ops<double> {
|
||||
typedef double Single;
|
||||
|
||||
static HOST_DEVICE_INLINE double tanh(const double& x) { return tanh(x); }
|
||||
static HOST_DEVICE_INLINE double sin(const double& x) { return sin(x); }
|
||||
static HOST_DEVICE_INLINE double cos(const double& x) { return cos(x); }
|
||||
static HOST_DEVICE_INLINE double tan(const double& x) { return tan(x); }
|
||||
static HOST_DEVICE_INLINE double log(const double& x) { return log(x); }
|
||||
static HOST_DEVICE_INLINE double exp(const double& x) { return exp(x); }
|
||||
static HOST_DEVICE_INLINE double abs(const double& x) { return abs(x); }
|
||||
static HOST_DEVICE_INLINE double sqrt(const double& x) { return sqrt(x); }
|
||||
static HOST_DEVICE_INLINE double tanh(const double& x) { return std::tanh(x); }
|
||||
static HOST_DEVICE_INLINE double sin(const double& x) { return std::sin(x); }
|
||||
static HOST_DEVICE_INLINE double cos(const double& x) { return std::cos(x); }
|
||||
static HOST_DEVICE_INLINE double tan(const double& x) { return std::tan(x); }
|
||||
static HOST_DEVICE_INLINE double log(const double& x) { return std::log(x); }
|
||||
static HOST_DEVICE_INLINE double exp(const double& x) { return std::exp(x); }
|
||||
static HOST_DEVICE_INLINE double abs(const double& x) { return std::abs(x); }
|
||||
static HOST_DEVICE_INLINE double sqrt(const double& x) { return std::sqrt(x); }
|
||||
static HOST_DEVICE_INLINE double neg(const double& x) { return -x; }
|
||||
static HOST_DEVICE_INLINE double sgn(const double& x) { return (0 < x) - (x < 0); }
|
||||
|
||||
@ -145,7 +148,7 @@ struct Ops<double> {
|
||||
|
||||
static HOST_DEVICE_INLINE double max(const double& x, const double& y) { return x < y ? y : x; }
|
||||
static HOST_DEVICE_INLINE double min(const double& x, const double& y) { return x < y ? x : y; }
|
||||
static HOST_DEVICE_INLINE double pow(const double& x, const double& y) { return pow(x, y); }
|
||||
static HOST_DEVICE_INLINE double pow(const double& x, const double& y) { return std::pow(x, y); }
|
||||
|
||||
|
||||
static HOST_DEVICE_INLINE double negate(const double& x) { return !(bool)x; }
|
||||
@ -460,7 +463,7 @@ struct Ops<half> {
|
||||
static DEVICE_INLINE half exp(const half& x) { return hexp(x); }
|
||||
static DEVICE_INLINE half sqrt(const half& x) { return hsqrt(x); }
|
||||
static DEVICE_INLINE half neg(const half& x) { return -x; }
|
||||
|
||||
|
||||
static DEVICE_INLINE half abs(const half& x) { return fabs((float)x); }// @TODO half has this information somewhere in the struct, right?
|
||||
static DEVICE_INLINE half sgn(const half& x) { half zero = 0.f; return (zero < x) - (x < zero); } // @TODO half has this information somewhere in the struct, right?
|
||||
|
||||
|
@ -40,7 +40,7 @@ protected:
|
||||
std::string debugMessage_;
|
||||
|
||||
Ptr<std::list<Expr>> subtape_; // a subtape is used to keep track of nodes that need to be freed and recomputed with gradient-checkpointing.
|
||||
bool isCheckpoint_{false}; // true if this node has been selected to be a checkpoint, currently only done manually.
|
||||
bool isCheckpoint_{false}; // true if this node has been selected to be a checkpoint, currently only done manually.
|
||||
|
||||
Ptr<AutoTunerRecorder> recorder_;
|
||||
size_t recorderHash_;
|
||||
@ -138,7 +138,7 @@ public:
|
||||
|
||||
virtual std::string graphviz() override {
|
||||
std::stringstream ss;
|
||||
ss << "\"" << this << "\" ["
|
||||
ss << "\"" << this << "\" ["
|
||||
<< "shape=\"" << form() << "\", "
|
||||
<< "label=" << label() << ", "
|
||||
<< "style=\"filled\", "
|
||||
@ -147,7 +147,7 @@ public:
|
||||
|
||||
for(auto&& child : children())
|
||||
ss << "\"" << child << "\" -> \"" << this << "\";" << std::endl;
|
||||
|
||||
|
||||
if(subtape_) {
|
||||
for(auto&& dep : *subtape_)
|
||||
ss << "\"" << dep << "\" -> \"" << this << "\" [style=dotted];" << std::endl;
|
||||
@ -188,9 +188,9 @@ struct NaryNodeOp : public Node {
|
||||
|
||||
// Deduce type automatically, but then all types must be the same
|
||||
// this is called automatically when no output type is specified.
|
||||
// If the input types are mixed, the output type needs to be specified
|
||||
// If the input types are mixed, the output type needs to be specified
|
||||
// in the constructor.
|
||||
Type commonType(const std::vector<Expr>& nodes) {
|
||||
static Type commonType(const std::vector<Expr>& nodes) {
|
||||
ABORT_IF(nodes.size() == 0, "NaryNodeOp has no children");
|
||||
Type type = nodes[0]->value_type();
|
||||
for(int i = 1; i < nodes.size(); ++i)
|
||||
|
@ -17,9 +17,9 @@ namespace inits {
|
||||
/**
|
||||
* Base class for specialized NodeInitializers.
|
||||
*
|
||||
* A NodeInitializer is a functor that is associated with parameters
|
||||
* and constants, and is invoked on a tensor during node intialization.
|
||||
* You need to override NodeIntializer::apply(Tensor) with your own
|
||||
* A NodeInitializer is a functor that is associated with parameters
|
||||
* and constants, and is invoked on a tensor during node intialization.
|
||||
* You need to override NodeIntializer::apply(Tensor) with your own
|
||||
* functionality or use a fromLambda intializer.
|
||||
*
|
||||
* See node_initializers.cpp for examples.
|
||||
@ -31,6 +31,7 @@ protected:
|
||||
public:
|
||||
virtual void apply(Tensor t) = 0;
|
||||
void setAllocator(Ptr<Allocator> allocator) { allocator_ = allocator; }
|
||||
virtual ~NodeInitializer() {}
|
||||
};
|
||||
|
||||
/**
|
||||
@ -135,7 +136,7 @@ Ptr<NodeInitializer> dropout(float dropoutProbabilty);
|
||||
|
||||
/**
|
||||
* Intialize with gumbel noise, i.e. -log(-log(u)) where u ~ Uniform(0 + eps, 1 - eps)
|
||||
*
|
||||
*
|
||||
* @return A NodeInitializer
|
||||
*/
|
||||
Ptr<NodeInitializer> gumbel(float eps = 1e-5f);
|
||||
@ -163,7 +164,7 @@ Ptr<NodeInitializer> fromWord2vec(const std::string& file,
|
||||
|
||||
/**
|
||||
* Computes Google's Transformer-style sinusoidal position embeddings
|
||||
* starting from position 'start' taking into account batch and time
|
||||
* starting from position 'start' taking into account batch and time
|
||||
* dimensions of the tensor.
|
||||
*
|
||||
* Expected tensor layout {-2: time, -1: model}
|
||||
|
@ -480,9 +480,12 @@ class CSRDotNodeOp : public NaryNodeOp {
|
||||
bool transS_;
|
||||
bool swapOperands_;
|
||||
public:
|
||||
CSRDotNodeOp(const Shape& S_shape, Expr S_values, Expr S_indices, Expr S_offsets, Expr D, bool transS, bool swapOperands)
|
||||
: NaryNodeOp({ S_values, S_indices, S_offsets, D }, newShape(S_shape, S_values, S_indices, S_offsets, D, transS, swapOperands), commonType({S_values, D})),
|
||||
transS_(transS), swapOperands_(swapOperands) {
|
||||
CSRDotNodeOp(const Shape& S_shape, Expr S_values, Expr S_indices,
|
||||
Expr S_offsets, Expr D, bool transS, bool swapOperands)
|
||||
: NaryNodeOp({ S_values, S_indices, S_offsets, D },
|
||||
newShape(S_shape, S_values, S_indices, S_offsets, D, transS, swapOperands),
|
||||
NaryNodeOp::commonType({S_values, D})),
|
||||
transS_(transS), swapOperands_(swapOperands) {
|
||||
matchOrAbort<IndexType>(S_indices->value_type());
|
||||
matchOrAbort<IndexType>(S_offsets->value_type());
|
||||
}
|
||||
@ -513,7 +516,7 @@ public:
|
||||
|
||||
NodeOps backwardOps() override {
|
||||
return { nullptr, // can't backprop into the sparse matrix (the gradient is dense)
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
NodeOp(CSRProd(child(3)->grad(), // child(3) = D
|
||||
graph()->allocator(),
|
||||
@ -527,7 +530,7 @@ public:
|
||||
virtual size_t hash() override {
|
||||
size_t seed = NaryNodeOp::hash();
|
||||
for(auto s : shape())
|
||||
util::hash_combine(seed, s);
|
||||
util::hash_combine(seed, s);
|
||||
util::hash_combine(seed, transS_);
|
||||
util::hash_combine(seed, swapOperands_);
|
||||
return seed;
|
||||
@ -1050,8 +1053,8 @@ struct ConcatenateNodeOp : public NaryNodeOp {
|
||||
auto checkShape = shape;
|
||||
for(auto child : nodes) {
|
||||
checkShape.set(ax_, child->shape()[ax_]); // don't abort on different sizes on axis dim.
|
||||
ABORT_IF(checkShape != child->shape(),
|
||||
"Child shapes {} and {} cannot be concatenated along axis {}",
|
||||
ABORT_IF(checkShape != child->shape(),
|
||||
"Child shapes {} and {} cannot be concatenated along axis {}",
|
||||
shape, child->shape(), ax);
|
||||
|
||||
sum += child->shape()[ax_];
|
||||
|
@ -10,10 +10,10 @@
|
||||
|
||||
namespace marian {
|
||||
|
||||
// @TODO: Currently an ExpressionGraph only supports one Parameters object and
|
||||
// @TODO: Currently an ExpressionGraph only supports one Parameters object and
|
||||
// the type of parameters has to be the inside on Parameters object. This limits
|
||||
// parameter types to a single chosen type, e.g. only fp32 or only fp16. This should
|
||||
// be extended to allow multiple sets of parameters.
|
||||
// be extended to allow multiple sets of parameters.
|
||||
// The reason here is to be able to efficiently compute updates of whole parameter
|
||||
// sets of one type.
|
||||
class Parameters {
|
||||
@ -40,7 +40,7 @@ public:
|
||||
LOG(debug, "Created parameter object of type {}", acceptedElementType_);
|
||||
}
|
||||
|
||||
~Parameters() {
|
||||
virtual ~Parameters() {
|
||||
LOG(debug, "Destroyed parameter object of type {}", acceptedElementType_);
|
||||
}
|
||||
|
||||
@ -88,7 +88,7 @@ public:
|
||||
|
||||
// sort parameters by name before allocation to make sure the memory layout after allocation is always the same
|
||||
std::sort(params_.begin(), params_.end(), [](Expr n1, Expr n2){ return n1->name() < n2->name(); });
|
||||
|
||||
|
||||
for(auto p : params_) {
|
||||
if(!p->val()) {
|
||||
vals_->allocate(p->val(), p->shape(), p->value_type());
|
||||
|
@ -39,6 +39,7 @@ public:
|
||||
|
||||
// Simplest layer interface: Unary function
|
||||
struct IUnaryLayer {
|
||||
virtual ~IUnaryLayer() {}
|
||||
virtual Expr apply(Expr) = 0;
|
||||
virtual Expr apply(const std::vector<Expr>& es) {
|
||||
ABORT_IF(es.size() > 1, "Not implemented"); // simple stub
|
||||
@ -59,6 +60,7 @@ struct IEmbeddingLayer {
|
||||
|
||||
// alternative from indices directly
|
||||
virtual Expr applyIndices(const std::vector<WordIndex>& embIdx, const Shape& shape) const = 0;
|
||||
virtual ~IEmbeddingLayer() {}
|
||||
};
|
||||
|
||||
// base class for Encoder and Decoder classes, which have embeddings and a batch index (=stream index)
|
||||
|
@ -5,14 +5,14 @@
|
||||
|
||||
namespace marian {
|
||||
|
||||
static inline RationalLoss guidedAlignmentCost(Ptr<ExpressionGraph> graph,
|
||||
static inline RationalLoss guidedAlignmentCost(Ptr<ExpressionGraph> /*graph*/,
|
||||
Ptr<data::CorpusBatch> batch,
|
||||
Ptr<Options> options,
|
||||
Expr attention) { // [beam depth=1, max src length, batch size, tgt length]
|
||||
|
||||
std::string guidedLossType = options->get<std::string>("guided-alignment-cost"); // @TODO: change "cost" to "loss"
|
||||
float guidedLossWeight = options->get<float>("guided-alignment-weight");
|
||||
|
||||
|
||||
const auto& shape = attention->shape(); // [beam depth=1, max src length, batch size, tgt length]
|
||||
float epsilon = 1e-6f;
|
||||
Expr alignmentLoss; // sum up loss over all attention/alignment positions
|
||||
@ -55,8 +55,8 @@ static inline RationalLoss guidedAlignmentCost(Ptr<ExpressionGraph> graph,
|
||||
else
|
||||
ABORT("Unknown alignment cost type: {}", guidedLossType);
|
||||
// every position is a label as they should all agree
|
||||
// @TODO: there should be positional masking here ... on the other hand, positions that are not
|
||||
// in a sentence should always agree (both being 0). Lack of masking affects label count only which is
|
||||
// @TODO: there should be positional masking here ... on the other hand, positions that are not
|
||||
// in a sentence should always agree (both being 0). Lack of masking affects label count only which is
|
||||
// probably negligible?
|
||||
numLabels = shape.elements();
|
||||
}
|
||||
|
@ -331,6 +331,7 @@ public:
|
||||
: LabelwiseLoss(axes), // cross-entropy already reduces over axis -1
|
||||
labelSmoothing_(labelSmoothing), factorWeight_(factorWeight) {}
|
||||
|
||||
virtual ~CrossEntropyLoss() {}
|
||||
protected:
|
||||
float labelSmoothing_; // interpolation factor for label smoothing, see below
|
||||
float factorWeight_; // give extra weight to factors
|
||||
@ -368,7 +369,7 @@ protected:
|
||||
|
||||
if(labelWeights) {
|
||||
// We currently do not know how to use target factors and word-level label weights together
|
||||
bool wordlevel = labelWeights->shape()[-3] > 1; // Time-dimension is not trivially 1, hence we have word-level weights.
|
||||
bool wordlevel = labelWeights->shape()[-3] > 1; // Time-dimension is not trivially 1, hence we have word-level weights.
|
||||
ABORT_IF(wordlevel && logits.getNumFactorGroups() > 1, "CE loss with word-level label weights is not implemented for factors");
|
||||
ce = ce * cast(labelWeights, Type::float32);
|
||||
}
|
||||
@ -379,15 +380,15 @@ protected:
|
||||
|
||||
|
||||
/**
|
||||
* @brief Unlikelihood loss across last axis, summed up over batch and time dimensions. This is an
|
||||
* implementation of sequence-level unlikelihood loss from https://arxiv.org/abs/1908.04319.
|
||||
* @brief Unlikelihood loss across last axis, summed up over batch and time dimensions. This is an
|
||||
* implementation of sequence-level unlikelihood loss from https://arxiv.org/abs/1908.04319.
|
||||
* We rely on word-level label weights where 1 is correct and 0 is marking an error. If there are not
|
||||
* zeros for a sentence it going to be trained with normal CE loss if there is at least one 0 it is going
|
||||
* to flip over to use SUL for that sentence to penalize the selected word.
|
||||
*
|
||||
*
|
||||
* SUL is implemented as:
|
||||
* -log(gather(1 - softmax(logits), -1, indices))
|
||||
*
|
||||
*
|
||||
* Factors are currently not supported.
|
||||
*/
|
||||
class SequenceUnlikelihoodLoss : public CrossEntropyLoss {
|
||||
@ -411,17 +412,17 @@ protected:
|
||||
ABORT_IF(!mask, "mask is required"); // @TODO: check this, it seems weights for padding are by default 1, which would make this obsolete.
|
||||
// use label weights, where 1 is GOOD and 0 is BAD. After inversion here, now 1 marks, mask again to eliminate padding (might be obsolete)
|
||||
auto errorMask = (1.f - cast(labelWeights, Type::float32)) * cast(mask, Type::float32);
|
||||
|
||||
|
||||
auto ceUl = logits.applyLossFunction(labels, [&](Expr logits, Expr indices) {
|
||||
return cast(unlikelihood(logits, indices), Type::float32);
|
||||
});
|
||||
|
||||
|
||||
// compute if want to use CE or UL. If there are no errors train with CE, otherwise train _only on_ the errors with UL. This is the "mixed" training
|
||||
// schedule from https://arxiv.org/abs/1908.04319. Providing labels with or without error scores we can easily switch between CE and UL.
|
||||
// schedule from https://arxiv.org/abs/1908.04319. Providing labels with or without error scores we can easily switch between CE and UL.
|
||||
auto onlyCe = eq(sum(errorMask, /*axis=*/-3), 0.f); // [1, 1, dimBatch, 1] - equal 1 if no errors are present
|
||||
ceUl = errorMask * ceUl; // don't use for correct label or padding
|
||||
|
||||
auto cost = onlyCe * ce + (1.f - onlyCe) * ceUl; // ce or unlikelihood part are never simultanously used as cost per batch entry
|
||||
auto cost = onlyCe * ce + (1.f - onlyCe) * ceUl; // ce or unlikelihood part are never simultanously used as cost per batch entry
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ public:
|
||||
virtual void debugWeighting(std::vector<float> /*weightedMask*/,
|
||||
std::vector<float> /*freqMask*/,
|
||||
Ptr<data::CorpusBatch> /*batch*/){};
|
||||
virtual ~WeightingBase() {}
|
||||
};
|
||||
|
||||
class DataWeighting : public WeightingBase {
|
||||
|
@ -41,6 +41,7 @@ class VocabWrapper : public IVocabWrapper {
|
||||
Ptr<Vocab> pImpl_;
|
||||
public:
|
||||
VocabWrapper(Ptr<Vocab> vocab) : pImpl_(vocab) {}
|
||||
virtual ~VocabWrapper() {}
|
||||
WordIndex encode(const std::string& word) const override { return (*pImpl_)[word].toWordIndex(); }
|
||||
std::string decode(WordIndex id) const override { return (*pImpl_)[Word::fromWordIndex(id)]; }
|
||||
size_t size() const override { return pImpl_->size(); }
|
||||
@ -243,7 +244,7 @@ DecoderCpuAvxVersion parseCpuAvxVersion(std::string name) {
|
||||
}
|
||||
}
|
||||
|
||||
// @TODO: clean-up this code and unify with marian-conv. The targetPrec parameter is not clear enought etc.
|
||||
// @TODO: clean-up this code and unify with marian-conv. The targetPrec parameter is not clear enought etc.
|
||||
bool convertModel(std::string inputFile, std::string outputFile, int32_t targetPrec) {
|
||||
std::cout << "Converting from: " << inputFile << ", to: " << outputFile << std::endl;
|
||||
|
||||
|
@ -54,6 +54,8 @@ public:
|
||||
const std::vector<const void*>& ptrs)
|
||||
: options_(options), ptrs_(ptrs) {}
|
||||
|
||||
virtual ~IBeamSearchDecoder() {}
|
||||
|
||||
virtual QSNBestBatch decode(const QSBatch& qsBatch,
|
||||
size_t maxLength,
|
||||
const std::unordered_set<WordIndex>& shortlist)
|
||||
|
@ -25,6 +25,7 @@ public:
|
||||
Ptr<ExpressionGraph> graph, // @TODO: why needed? Can it be gotten from model?
|
||||
Ptr<data::Batch> batch,
|
||||
bool clearGraph = true) = 0;
|
||||
virtual ~ICost() {}
|
||||
};
|
||||
|
||||
class EncoderDecoderCECost : public ICost {
|
||||
@ -51,6 +52,8 @@ public:
|
||||
weighter_ = WeightingFactory(options_);
|
||||
}
|
||||
|
||||
virtual ~EncoderDecoderCECost() {}
|
||||
|
||||
Ptr<MultiRationalLoss> apply(Ptr<IModel> model,
|
||||
Ptr<ExpressionGraph> graph,
|
||||
Ptr<data::Batch> batch,
|
||||
@ -136,6 +139,8 @@ public:
|
||||
Trainer(Ptr<IModel> model, Ptr<ICost> cost)
|
||||
: model_(model), cost_(cost) {}
|
||||
|
||||
virtual ~Trainer() {}
|
||||
|
||||
Ptr<IModel> getModel() { return model_; }
|
||||
|
||||
virtual void load(Ptr<ExpressionGraph> graph,
|
||||
@ -179,6 +184,8 @@ public:
|
||||
Scorer(Ptr<IModel> model, Ptr<ILogProb> cost)
|
||||
: model_(model), logProb_(cost) {}
|
||||
|
||||
virtual ~Scorer(){}
|
||||
|
||||
Ptr<IModel> getModel() { return model_; }
|
||||
|
||||
virtual void load(Ptr<ExpressionGraph> graph,
|
||||
@ -211,6 +218,7 @@ public:
|
||||
|
||||
class LogSoftmaxStep : public ILogProbStep {
|
||||
public:
|
||||
virtual ~LogSoftmaxStep() {}
|
||||
virtual Ptr<DecoderState> apply(Ptr<DecoderState> state) override {
|
||||
// decoder needs normalized probabilities (note: skipped if beam 1 and --skip-cost)
|
||||
state->setLogProbs(state->getLogProbs().applyUnaryFunction(logsoftmax));
|
||||
@ -224,6 +232,7 @@ public:
|
||||
// with --output-sampling during translation with marian-decoder
|
||||
class GumbelSoftmaxStep : public ILogProbStep {
|
||||
public:
|
||||
virtual ~GumbelSoftmaxStep() {}
|
||||
virtual Ptr<DecoderState> apply(Ptr<DecoderState> state) override {
|
||||
state->setLogProbs(state->getLogProbs().applyUnaryFunctions(
|
||||
[](Expr logits){ // lemma gets gumbelled
|
||||
|
@ -11,6 +11,7 @@ namespace marian {
|
||||
|
||||
class IEncoderDecoder : public models::IModel {
|
||||
public:
|
||||
virtual ~IEncoderDecoder() {}
|
||||
virtual void load(Ptr<ExpressionGraph> graph,
|
||||
const std::string& name,
|
||||
bool markedReloaded = true) override
|
||||
|
@ -41,6 +41,8 @@ public:
|
||||
// @TODO: Is there a better name?
|
||||
class ICriterionFunction {
|
||||
public:
|
||||
virtual ~ICriterionFunction() {}
|
||||
|
||||
virtual void load(Ptr<ExpressionGraph>,
|
||||
const std::string&,
|
||||
bool markReloaded = true)
|
||||
|
@ -5,10 +5,12 @@
|
||||
namespace marian {
|
||||
|
||||
struct ModelTask {
|
||||
virtual ~ModelTask() {}
|
||||
virtual void run() = 0;
|
||||
};
|
||||
|
||||
struct ModelServiceTask {
|
||||
virtual ~ModelServiceTask() {}
|
||||
virtual std::string run(const std::string&) = 0;
|
||||
};
|
||||
} // namespace marian
|
||||
|
@ -11,6 +11,7 @@ namespace marian {
|
||||
class EncoderS2S : public EncoderBase {
|
||||
using EncoderBase::EncoderBase;
|
||||
public:
|
||||
virtual ~EncoderS2S() {}
|
||||
Expr applyEncoderRNN(Ptr<ExpressionGraph> graph,
|
||||
Expr embeddings,
|
||||
Expr mask,
|
||||
@ -254,7 +255,7 @@ public:
|
||||
auto embeddings = state->getTargetHistoryEmbeddings();
|
||||
|
||||
// The batch dimension of the inputs can change due to batch-pruning, in that case
|
||||
// cached elements need to be rebuilt, in this case the mapped encoder context in the
|
||||
// cached elements need to be rebuilt, in this case the mapped encoder context in the
|
||||
// attention mechanism of the decoder RNN.
|
||||
int currDimBatch = embeddings->shape()[-2];
|
||||
if(!rnn_ || lastDimBatch_ != currDimBatch) // if currDimBatch is different, rebuild the cached RNN
|
||||
@ -263,7 +264,7 @@ public:
|
||||
// Also @TODO: maybe implement a Cached(build, updateIf) that runs a check and rebuild if required
|
||||
// at dereferecing :
|
||||
// rnn_ = Cached<decltype(constructDecoderRNN(graph, state))>(
|
||||
// /*build=*/[]{ return constructDecoderRNN(graph, state); },
|
||||
// /*build=*/[]{ return constructDecoderRNN(graph, state); },
|
||||
// /*updateIf=*/[]{ return state->batchDimChanged() });
|
||||
// rnn_->transduce(...);
|
||||
|
||||
|
@ -17,6 +17,7 @@ public:
|
||||
: context_(context), mask_(mask), batch_(batch) {}
|
||||
|
||||
EncoderState() {}
|
||||
virtual ~EncoderState() {}
|
||||
|
||||
virtual Expr getContext() const { return context_; }
|
||||
virtual Expr getAttended() const { return context_; }
|
||||
@ -53,6 +54,7 @@ public:
|
||||
const std::vector<Ptr<EncoderState>>& encStates,
|
||||
Ptr<data::CorpusBatch> batch)
|
||||
: states_(states), logProbs_(logProbs), encStates_(encStates), batch_(batch) {}
|
||||
virtual ~DecoderState() {}
|
||||
|
||||
// @TODO: Do we need all these to be virtual?
|
||||
virtual const std::vector<Ptr<EncoderState>>& getEncoderStates() const {
|
||||
@ -68,10 +70,10 @@ public:
|
||||
int beamSize) const {
|
||||
|
||||
std::vector<Ptr<EncoderState>> newEncStates;
|
||||
for(auto& es : encStates_)
|
||||
for(auto& es : encStates_)
|
||||
// If the size of the batch dimension of the encoder state context changed, subselect the correct batch entries
|
||||
newEncStates.push_back(es->getContext()->shape()[-2] == batchIndices.size() ? es : es->select(batchIndices));
|
||||
|
||||
|
||||
// hypindices matches batchIndices in terms of batch dimension, so we only need hypIndices
|
||||
auto selectedState = New<DecoderState>(
|
||||
states_.select(hypIndices, beamSize, /*isBatchMajor=*/false), logProbs_, newEncStates, batch_);
|
||||
@ -121,6 +123,7 @@ private:
|
||||
Words targetWords_;
|
||||
|
||||
public:
|
||||
virtual ~ClassifierState() {}
|
||||
virtual Expr getLogProbs() const { return logProbs_; }
|
||||
virtual void setLogProbs(Expr logProbs) { logProbs_ = logProbs; }
|
||||
|
||||
|
@ -16,6 +16,7 @@ namespace marian {
|
||||
class ClipperBase {
|
||||
public:
|
||||
virtual void clip(Tensor) = 0;
|
||||
virtual ~ClipperBase() {}
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<ClipperBase> ClipperPtr;
|
||||
|
@ -29,6 +29,8 @@ public:
|
||||
LOG(info, "[optimizers] Learning rate gets automatically adjusted as if minibatch size was {}", refMBWordsParam_);
|
||||
}
|
||||
|
||||
virtual ~OptimizerBase() {}
|
||||
|
||||
static constexpr size_t mbSizeNotProvided = SIZE_MAX;
|
||||
|
||||
void update(Ptr<ExpressionGraph> graph, size_t mbSize = mbSizeNotProvided) {
|
||||
@ -114,7 +116,7 @@ class Sgd : public OptimizerBase {
|
||||
public:
|
||||
Sgd(float eta, size_t refMBWordsParam = 0, Ptr<ClipperBase> clipper = nullptr)
|
||||
: OptimizerBase(eta, refMBWordsParam, clipper) {}
|
||||
|
||||
virtual ~Sgd() {}
|
||||
virtual void setParams(const std::vector<float>& /*params*/) override {}
|
||||
private:
|
||||
void updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBWords) override;
|
||||
|
@ -13,6 +13,7 @@ namespace marian {
|
||||
class ScoreCollector {
|
||||
public:
|
||||
ScoreCollector(const Ptr<Options>& options);
|
||||
virtual ~ScoreCollector() {}
|
||||
|
||||
virtual void Write(long id, const std::string& message);
|
||||
virtual void Write(long id,
|
||||
|
@ -35,7 +35,7 @@ protected:
|
||||
public:
|
||||
BaseRNN(Ptr<ExpressionGraph> graph, Ptr<Options> options)
|
||||
: graph_(graph), options_(options) {}
|
||||
|
||||
virtual ~BaseRNN() {}
|
||||
virtual Expr transduce(Expr, Expr = nullptr) = 0;
|
||||
virtual Expr transduce(Expr, State, Expr = nullptr) = 0;
|
||||
virtual Expr transduce(Expr, States, Expr = nullptr) = 0;
|
||||
@ -113,6 +113,7 @@ private:
|
||||
|
||||
public:
|
||||
friend RNN;
|
||||
virtual ~SingleLayerRNN() {}
|
||||
|
||||
// @TODO: benchmark whether this concatenation is a good idea
|
||||
virtual Expr transduce(Expr input, Expr mask = nullptr) override {
|
||||
|
@ -17,7 +17,7 @@ protected:
|
||||
public:
|
||||
Backend(DeviceId deviceId, size_t seed)
|
||||
: deviceId_(deviceId), seed_(seed), randomGenerator_(createRandomGenerator(seed, deviceId)) {}
|
||||
|
||||
virtual ~Backend() {};
|
||||
virtual DeviceId getDeviceId() { return deviceId_; };
|
||||
virtual Ptr<RandomGenerator> getRandomGenerator() { return randomGenerator_; }
|
||||
|
||||
|
@ -8,29 +8,40 @@
|
||||
|
||||
namespace marian {
|
||||
namespace cpu {
|
||||
namespace {
|
||||
|
||||
// allocate function for tensor reserve() below.
|
||||
// Needed for AVX512, while not available on all compilers. It seems clang
|
||||
// does not have aligned_alloc for all cstlib versions. If AVX512 is not used
|
||||
// a simple malloc is probably fine.
|
||||
// Should generate a runtime error otherwise as we have a check in the AVX512
|
||||
// functions which tests for alignment.
|
||||
#ifdef _WIN32
|
||||
#define MALLOC(size) _aligned_malloc(size, alignment_)
|
||||
#elif __GNUC__
|
||||
#define MALLOC(size) aligned_alloc(alignment_, size)
|
||||
#else
|
||||
#define MALLOC(size) malloc(size)
|
||||
#endif
|
||||
// Alignment is needed because we use AVX512 and AVX2 vectors. We should fail if we can't allocate aligned memory.
|
||||
|
||||
#ifdef _WIN32
|
||||
#define FREE(ptr) _aligned_free(ptr)
|
||||
void *genericMalloc(size_t alignment, size_t size) {
|
||||
void *ret = _aligned_malloc(size, alignment);
|
||||
ABORT_IF(!ret, "Failed to allocate memory on CPU");
|
||||
return ret;
|
||||
}
|
||||
void genericFree(void *ptr) {
|
||||
_aligned_free(ptr);
|
||||
}
|
||||
#else
|
||||
#define FREE(ptr) free(ptr)
|
||||
// Linux and OS X. There is no fallback to malloc because we need it to be aligned.
|
||||
void *genericMalloc(size_t alignment, size_t size) {
|
||||
// On macos, aligned_alloc is available only on c++17
|
||||
// Furthermore, it requires that the memory requested is an exact multiple of the alignment, otherwise it fails.
|
||||
// posix_memalign is available both Mac (Since 2016) and Linux and in both gcc and clang
|
||||
void *result;
|
||||
// Error could be detected by return value or just remaining nullptr.
|
||||
ABORT_IF(posix_memalign(&result, alignment, size), "Failed to allocate memory on CPU");
|
||||
return result;
|
||||
}
|
||||
void genericFree(void *ptr) {
|
||||
free(ptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace
|
||||
|
||||
Device::~Device() {
|
||||
FREE(data_);
|
||||
genericFree(data_);
|
||||
}
|
||||
|
||||
void Device::reserve(size_t size) {
|
||||
@ -38,14 +49,12 @@ void Device::reserve(size_t size) {
|
||||
ABORT_IF(size < size_ || size == 0,
|
||||
"New size must be larger than old size and larger than 0");
|
||||
|
||||
uint8_t *temp = static_cast<uint8_t*>(genericMalloc(alignment_, size));
|
||||
if(data_) {
|
||||
uint8_t *temp = static_cast<uint8_t*>(MALLOC(size));
|
||||
std::copy(data_, data_ + size_, temp);
|
||||
FREE(data_);
|
||||
data_ = temp;
|
||||
} else {
|
||||
data_ = static_cast<uint8_t*>(MALLOC(size));
|
||||
genericFree(data_);
|
||||
}
|
||||
data_ = temp;
|
||||
size_ = size;
|
||||
}
|
||||
} // namespace cpu
|
||||
|
@ -17,6 +17,7 @@
|
||||
#endif
|
||||
|
||||
using namespace fbgemm;
|
||||
// @TODO: don't use using namespace ...; in header files. Just don't. [UG]
|
||||
#endif // USE_FBGEMM
|
||||
|
||||
namespace marian {
|
||||
@ -96,7 +97,7 @@ struct FbgemmPacked16PackNodeOp : public UnaryNodeOp {
|
||||
|
||||
const std::string type() override { return "packMatFp16"; }
|
||||
|
||||
Shape newShape(Expr a, bool transpose) {
|
||||
Shape newShape(Expr MAYBE_UNUSED a, bool MAYBE_UNUSED transpose) {
|
||||
#if USE_FBGEMM
|
||||
auto shapeMat = a->shape();
|
||||
// Should be 2D - weight matrix
|
||||
@ -115,15 +116,14 @@ struct FbgemmPacked16PackNodeOp : public UnaryNodeOp {
|
||||
packsize_);
|
||||
|
||||
Shape outShape({(int)packsize_});
|
||||
|
||||
return outShape;
|
||||
#else // USE_FBGEMM
|
||||
#else
|
||||
ABORT("Packed GEMM requires a build with USE_FBGEMM enabled");
|
||||
return Shape();
|
||||
#endif // USE_FBGEMM
|
||||
}
|
||||
};
|
||||
|
||||
;
|
||||
// Pack a matrix (int8) into cache utilization efficient way (block format) together with quantization into int8
|
||||
// PackMatrix packMat_: the type of packed matrix - A or B matrix
|
||||
// marian::Type packType_: the type the input matrix is packed - packed8avx2 or packed8avx512
|
||||
@ -132,6 +132,7 @@ struct FbgemmPacked16PackNodeOp : public UnaryNodeOp {
|
||||
// int ncol_: the number of columns
|
||||
// uint64_t packsize_: the size of the packed matrix
|
||||
// (the size of int8 packed B from fbgemm:PackAWithQuantRowOffset + quantization scale, offset and zero point)
|
||||
|
||||
struct FbgemmPacked8PackNodeOp : public UnaryNodeOp {
|
||||
PackMatrix packMat_;
|
||||
marian::Type packType_;
|
||||
@ -180,19 +181,21 @@ struct FbgemmPacked8PackNodeOp : public UnaryNodeOp {
|
||||
|
||||
const std::string type() override { return "packMatInt8"; }
|
||||
|
||||
Shape newShape(Expr a, bool transpose) {
|
||||
#if USE_FBGEMM
|
||||
Shape newShape(Expr a, bool transpose) {
|
||||
fbgemmPacked8PackInfo(a->shape(), packType_, transpose, nrow_, ncol_, packsize_);
|
||||
Shape outShape({(int)packsize_});
|
||||
|
||||
return outShape;
|
||||
#else // USE_FBGEMM
|
||||
}
|
||||
#else
|
||||
Shape newShape(Expr /*a*/, bool /*transpose*/) {
|
||||
ABORT("Packed GEMM requires a build with USE_FBGEMM enabled");
|
||||
return Shape();
|
||||
#endif // USE_FBGEMM
|
||||
}
|
||||
#endif // USE_FBGEMM
|
||||
};
|
||||
|
||||
|
||||
// Affine transform (matrix multiplication) using packed B matrix
|
||||
// float scalar_: scalar multiplier
|
||||
// size_t m_: the number of rows in A and C
|
||||
@ -202,7 +205,6 @@ struct FbgemmPacked8PackNodeOp : public UnaryNodeOp {
|
||||
// bool transB_: transpose B
|
||||
class FbgemmPacked16AffineNodeOp : public NaryNodeOp {
|
||||
private:
|
||||
float scalar_;
|
||||
size_t m_;
|
||||
size_t n_;
|
||||
size_t k_;
|
||||
@ -210,9 +212,8 @@ private:
|
||||
bool transB_;
|
||||
|
||||
public:
|
||||
FbgemmPacked16AffineNodeOp(const std::vector<Expr>& nodes, Shape bShape, bool transA, bool transB, float scalar)
|
||||
: NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32),
|
||||
scalar_(scalar) {
|
||||
FbgemmPacked16AffineNodeOp(const std::vector<Expr>& nodes, Shape bShape, bool transA, bool transB, float /*scalar*/)
|
||||
: NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32)/*, scalar_(scalar)*/ {
|
||||
transA_ = transA;
|
||||
transB_ = transB;
|
||||
m_ = nodes[0]->shape().elements() / nodes[0]->shape()[-1];
|
||||
@ -281,7 +282,6 @@ public:
|
||||
// bool transB_: transpose B
|
||||
class FbgemmPacked8AffineNodeOp : public NaryNodeOp {
|
||||
private:
|
||||
float scalar_;
|
||||
size_t m_;
|
||||
size_t n_;
|
||||
size_t k_;
|
||||
@ -289,9 +289,8 @@ private:
|
||||
bool transB_;
|
||||
|
||||
public:
|
||||
FbgemmPacked8AffineNodeOp(const std::vector<Expr>& nodes, Shape bShape, bool transA, bool transB, float scalar)
|
||||
: NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32),
|
||||
scalar_(scalar) {
|
||||
FbgemmPacked8AffineNodeOp(const std::vector<Expr>& nodes, Shape bShape, bool transA, bool transB, float /*scalar*/)
|
||||
: NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32)/*, scalar_(scalar) */ {
|
||||
transA_ = transA;
|
||||
transB_ = transB;
|
||||
m_ = nodes[0]->shape().elements() / nodes[0]->shape()[-1];
|
||||
@ -302,7 +301,7 @@ public:
|
||||
size_t l = bShape.elements() / bShape[-1];
|
||||
n_ = bShape[-1];
|
||||
if(transB)
|
||||
std::swap(l, n_);
|
||||
std::swap(l, n_);
|
||||
}
|
||||
|
||||
Shape newShape(Expr a, Shape bShape, bool transA, bool transB) {
|
||||
@ -369,9 +368,9 @@ static inline Expr affine(Expr a, Expr b, Shape bShape, Expr c, bool transA, boo
|
||||
Type elementType = b->value_type();
|
||||
|
||||
if (elementType == Type::packed16)
|
||||
return Expression<cpu::variant::FbgemmPacked16AffineNodeOp>(nodes, bShape, transA, transB, scalar);
|
||||
return Expression<FbgemmPacked16AffineNodeOp>(nodes, bShape, transA, transB, scalar);
|
||||
else if (isPacked(elementType) && sizeOf(elementType) == 1)
|
||||
return Expression<cpu::variant::FbgemmPacked8AffineNodeOp>(nodes, bShape, transA, transB, scalar);
|
||||
return Expression<FbgemmPacked8AffineNodeOp>(nodes, bShape, transA, transB, scalar);
|
||||
else {
|
||||
ABORT("Only int8 and fp16 are available. {}", elementType);
|
||||
return nullptr;
|
||||
@ -380,9 +379,9 @@ static inline Expr affine(Expr a, Expr b, Shape bShape, Expr c, bool transA, boo
|
||||
|
||||
static inline Expr pack(Type elementType, Expr a, PackMatrix packMat, bool transpose, float clipValue) {
|
||||
if (elementType == Type::packed16)
|
||||
return Expression<cpu::variant::FbgemmPacked16PackNodeOp>(a, packMat, transpose, clipValue);
|
||||
return Expression<FbgemmPacked16PackNodeOp>(a, packMat, transpose, clipValue);
|
||||
else if (isPacked(elementType) && sizeOf(elementType) == 1)
|
||||
return Expression<cpu::variant::FbgemmPacked8PackNodeOp>(a, packMat, elementType, transpose, clipValue);
|
||||
return Expression<FbgemmPacked8PackNodeOp>(a, packMat, elementType, transpose, clipValue);
|
||||
else {
|
||||
ABORT("Only int8 and fp16 are available. {}", elementType);
|
||||
return nullptr;
|
||||
@ -394,9 +393,9 @@ static inline Expr dot(Expr a, Expr b, Shape bShape, bool transA, bool transB, f
|
||||
Type elementType = b->value_type();
|
||||
|
||||
if (elementType == Type::packed16)
|
||||
return Expression<cpu::variant::FbgemmPacked16AffineNodeOp>(nodes, bShape, transA, transB, scalar);
|
||||
return Expression<FbgemmPacked16AffineNodeOp>(nodes, bShape, transA, transB, scalar);
|
||||
else if (isPacked(elementType) && sizeOf(elementType) == 1)
|
||||
return Expression<cpu::variant::FbgemmPacked8AffineNodeOp>(nodes, bShape, transA, transB, scalar);
|
||||
return Expression<FbgemmPacked8AffineNodeOp>(nodes, bShape, transA, transB, scalar);
|
||||
else {
|
||||
ABORT("Only int8 and fp16 are available. {}", elementType);
|
||||
return nullptr;
|
||||
|
@ -20,7 +20,7 @@ namespace marian {
|
||||
|
||||
namespace cpu {
|
||||
|
||||
void IsNaN(const Tensor in, Ptr<Allocator> allocator, bool& /*isNaN*/, bool& /*isInf*/) {
|
||||
void IsNaN(const Tensor /*in*/, Ptr<Allocator> /*allocator*/, bool& /*isNaN*/, bool& /*isInf*/) {
|
||||
ABORT("Not implemented");
|
||||
}
|
||||
|
||||
@ -214,9 +214,11 @@ void Transpose0213(Tensor out, Tensor in) {
|
||||
}
|
||||
}
|
||||
|
||||
// This function is called only when MKL is available.
|
||||
#if MKL_FOUND
|
||||
// Given a 4D array, transpose (swap) the initial 3 dimensions while keeping the last dimension.
|
||||
// e.g. 1234 --> 2134, 1234 --> 3214 (4 is always kept).
|
||||
// This is an optimized version for swapping first 3 dimensions
|
||||
// This is an optimized version for swapping first 3 dimensions
|
||||
// assuming the last dimension is large enough to get benefits from vectorized copy.
|
||||
//
|
||||
// @param out output tensor
|
||||
@ -225,14 +227,13 @@ void Transpose0213(Tensor out, Tensor in) {
|
||||
template <bool add>
|
||||
void TransposeFirst3In4(Tensor out, Tensor in, const std::vector<int>& vAxis) {
|
||||
ABORT_IF(vAxis.size() != 4, "This function handles only 4D arrays.");
|
||||
#if MKL_FOUND
|
||||
int innermost = in->shape()[-1];
|
||||
|
||||
int l1 = in->shape()[vAxis[0]];
|
||||
int l2 = in->shape()[vAxis[1]];
|
||||
int l3 = in->shape()[vAxis[2]];
|
||||
|
||||
// find the mapping between the transposed output dimensional indices (oi, oj, ok)
|
||||
// find the mapping between the transposed output dimensional indices (oi, oj, ok)
|
||||
// and original input dimensional indices (i, j, k)
|
||||
int oi, oj, ok;
|
||||
#pragma omp parallel for
|
||||
@ -275,11 +276,8 @@ void TransposeFirst3In4(Tensor out, Tensor in, const std::vector<int>& vAxis) {
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
// it shouldn't come into here. This function is called only when MKL is available.
|
||||
ABORT("Should not get here");
|
||||
#endif // MKL_FOUND
|
||||
}
|
||||
#endif // MKL_FOUND
|
||||
|
||||
inline void transpose4x4_SSE(const float* A,
|
||||
float* B,
|
||||
@ -656,7 +654,7 @@ void SelectAxis2(Tensor out,
|
||||
|
||||
functional::Shape outShape = out->shape();
|
||||
functional::Shape inShape = in->shape();
|
||||
|
||||
|
||||
auto idxData = indices->data<IndexType>();
|
||||
auto odata = out->data();
|
||||
const auto idata = in->data();
|
||||
|
@ -15,11 +15,11 @@ protected:
|
||||
|
||||
public:
|
||||
RandomGenerator(size_t seed) : seed_(seed) { }
|
||||
|
||||
virtual ~RandomGenerator() {}
|
||||
virtual void uniform(Tensor, float a, float b) = 0;
|
||||
virtual void normal(Tensor, float mean, float stddev) = 0;
|
||||
};
|
||||
|
||||
Ptr<RandomGenerator> createRandomGenerator(size_t /*seed*/, DeviceId);
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -25,7 +25,7 @@
|
||||
namespace marian {
|
||||
|
||||
template <typename InIt, typename OutIt>
|
||||
void copy(Ptr<Backend> backend, const InIt beg, const InIt end, OutIt it) {
|
||||
void copy(Ptr<Backend>& MAYBE_UNUSED backend, const InIt beg, const InIt end, OutIt it) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(backend->getDeviceId().type == DeviceType::gpu)
|
||||
gpu::copy(backend, beg, end, it);
|
||||
@ -119,7 +119,7 @@ DISPATCH3(Concatenate, marian::Tensor, const std::vector<marian::Tensor>&, int)
|
||||
|
||||
// clang-format on
|
||||
|
||||
// Bernoulli(tensor, 0.5f, 2.f, -1.f) generates a tensor composed of 50% of 1 and 50% of -1.
|
||||
// Bernoulli(tensor, 0.5f, 2.f, -1.f) generates a tensor composed of 50% of 1 and 50% of -1.
|
||||
static inline void Bernoulli(Tensor resultTensor, float keepProb, float scale = 1.f, float shift = 0.f) {
|
||||
// in-place uniform distribution
|
||||
auto rnd = resultTensor->getBackend()->getRandomGenerator();
|
||||
@ -190,7 +190,7 @@ void LayerNormalizationGrad(Tensor gradX,
|
||||
}
|
||||
|
||||
static inline void LayerNormalizationGrad(
|
||||
Ptr<Allocator> allocator,
|
||||
Ptr<Allocator> MAYBE_UNUSED allocator,
|
||||
Tensor gradX,
|
||||
Tensor gradGamma,
|
||||
Tensor gradBeta,
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include "marian.h"
|
||||
#include "common/timer.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int main(int /*argc*/, char** /*argv*/) {
|
||||
using namespace marian;
|
||||
|
||||
{
|
||||
|
@ -8,6 +8,8 @@
|
||||
#include <fstream>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
ABORT_IF(argc != 3, "FATAL ERROR: Incorrect number of command line arguments "
|
||||
"(expected: 2) for command {}.",argv[0]);
|
||||
|
||||
SQLite::Database db("corpus.db", SQLite::OPEN_READWRITE|SQLite::OPEN_CREATE);
|
||||
db.exec("PRAGMA temp_store_directory = '/data1/marcinjd';");
|
||||
|
@ -38,7 +38,7 @@ Ptr<ICommunicator> createCommunicator(
|
||||
}
|
||||
|
||||
// the actual implementation is inside communicator.cu
|
||||
return New<NCCLCommunicator>(graphs, mpi);
|
||||
return New<NCCLCommunicator>(graphs, mpi);
|
||||
#else // no CUDA or no NCCL
|
||||
noNccl; // (unused)
|
||||
return New<DefaultCommunicator>(graphs, mpi);
|
||||
@ -141,7 +141,7 @@ public:
|
||||
FakeMPIWrapper(bool) {
|
||||
LOG(warn, "Compiled without MPI support. Falling back to FakeMPIWrapper");
|
||||
}
|
||||
|
||||
virtual ~FakeMPIWrapper() {}
|
||||
virtual size_t myMPIRank() const override { return 0; };
|
||||
virtual size_t numMPIProcesses() const override { return 1; };
|
||||
|
||||
|
@ -156,11 +156,8 @@ public:
|
||||
void scatterReduceAndResetGrads() const override {
|
||||
const_cast<DefaultCommunicator*>(this)->lazyInit();
|
||||
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
|
||||
|
||||
// Gather gradients from different devices into current gradient shards
|
||||
auto scatter = [this, shardSize](size_t idx, size_t begin, size_t end) {
|
||||
auto scatter = [this](size_t idx, size_t begin, size_t end) {
|
||||
auto curGrad = graphs_[idx]->params()->grads()->subtensor(begin, end-begin);
|
||||
|
||||
// collect and sum gradients
|
||||
@ -176,7 +173,7 @@ public:
|
||||
};
|
||||
|
||||
// reset gradients outside current shard
|
||||
auto reset = [this, shardSize](size_t idx, size_t begin, size_t end) {
|
||||
auto reset = [this](size_t idx, size_t begin, size_t end) {
|
||||
auto grad = graphs_[idx]->params()->grads();
|
||||
if (begin > 0)
|
||||
grad->subtensor(0, begin)->set(0);
|
||||
@ -189,11 +186,9 @@ public:
|
||||
}
|
||||
|
||||
void allGatherParams() const override {
|
||||
int totalSize = (int)graphs_[0]->params()->vals()->size();
|
||||
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
|
||||
|
||||
// Update all graphs with parameter shard
|
||||
auto gather = [this, shardSize](size_t idx, size_t begin, size_t end) {
|
||||
auto gather = [this](size_t idx, size_t begin, size_t end) {
|
||||
auto getShard = [&](Ptr<ExpressionGraph> graph) {
|
||||
return graph->params()->vals()->subtensor(begin, end-begin);
|
||||
};
|
||||
|
@ -118,7 +118,7 @@ public:
|
||||
}
|
||||
|
||||
// Convert a tensor into a sparse tensor format
|
||||
void fromDense(Tensor t) {
|
||||
void fromDense(Tensor MAYBE_UNUSED t) {
|
||||
if(backend_->getDeviceId().type == DeviceType::cpu) {
|
||||
ABORT("Gradient Dropping for CPU is not yet supported");
|
||||
}
|
||||
|
@ -54,10 +54,10 @@ public:
|
||||
* number of devices, which is passed in as the 'multiplier'.
|
||||
*/
|
||||
// @TODO: Can this be made const? It seems wrong to have a stateful method that still returns a result.
|
||||
virtual Ptr<data::BatchStats> collectStats(Ptr<ExpressionGraph> graph,
|
||||
Ptr<models::ICriterionFunction> model,
|
||||
const std::vector<Ptr<Vocab>>& vocabs,
|
||||
double multiplier = 1.) {
|
||||
Ptr<data::BatchStats> collectStats(Ptr<ExpressionGraph> graph,
|
||||
Ptr<models::ICriterionFunction> model,
|
||||
const std::vector<Ptr<Vocab>>& vocabs,
|
||||
double multiplier = 1.) {
|
||||
auto stats = New<data::BatchStats>();
|
||||
|
||||
size_t numFiles = options_->get<std::vector<std::string>>("train-sets").size();
|
||||
@ -92,8 +92,8 @@ public:
|
||||
maxBatch *= 2;
|
||||
}
|
||||
|
||||
// Do a binary search for maxmimum batch size that fits into given workspace memory
|
||||
// for a tested sentence length.
|
||||
// Do a binary search for maxmimum batch size that fits into given workspace memory
|
||||
// for a tested sentence length.
|
||||
for(size_t i = step; i <= maxLength; i += step) {
|
||||
size_t start = 1;
|
||||
size_t end = maxBatch;
|
||||
|
@ -64,7 +64,7 @@ public:
|
||||
void save(Ptr<ExpressionGraph>, bool final = false);
|
||||
|
||||
// @TODO: give it a fake batch generator which own vocabs instead of passing vocabs
|
||||
Ptr<data::BatchStats> collectStats(const std::vector<Ptr<Vocab>>& vocabs) {
|
||||
virtual Ptr<data::BatchStats> collectStats(const std::vector<Ptr<Vocab>>& vocabs) {
|
||||
return GraphGroup::collectStats(graphs_[0], builders_[0], vocabs);
|
||||
}
|
||||
|
||||
|
@ -63,7 +63,6 @@ private:
|
||||
Tensor paramsAvg_;
|
||||
std::vector<float> accGradientsSync_cpu;
|
||||
std::vector<float> receiveBuffer_cpu;
|
||||
bool synchronization_happened{false};
|
||||
|
||||
Ptr<OptimizerBase> syncOptimizer_;
|
||||
|
||||
|
@ -26,7 +26,6 @@ class SyncGraphGroup : public GraphGroup, public ExponentialSmoothing {
|
||||
// state for update()
|
||||
bool first_{ true }; // gets interpreted and cleared by update()
|
||||
std::vector<Ptr<data::Batch>> pendingBatches_; // in case of dynamic MB-size scaling, we temporarly buffer up batches across update() calls until enough
|
||||
size_t typicalTrgWords_{}; // typical batch size in words (labels), 0 if unknown (e.g. specified in sentences)
|
||||
double updateMultiplier_{1}; // multiplier not applied in collectStats() (no multiplier if not mini-batch-fit)
|
||||
|
||||
void initialize(const Ptr<data::Batch>& exampleBatch);
|
||||
|
@ -13,6 +13,7 @@ class TrainingState;
|
||||
|
||||
class TrainingObserver {
|
||||
public:
|
||||
virtual ~TrainingObserver() {}
|
||||
virtual void init(TrainingState&) {}
|
||||
virtual void actAfterEpoch(TrainingState&) {}
|
||||
virtual void actAfterBatches(TrainingState&) {}
|
||||
|
@ -36,6 +36,7 @@ protected:
|
||||
|
||||
public:
|
||||
ValidatorBase(bool lowerIsBetter) : lowerIsBetter_(lowerIsBetter), lastBest_{initScore()} {}
|
||||
virtual ~ValidatorBase() {}
|
||||
|
||||
virtual float validate(const std::vector<Ptr<ExpressionGraph>>& graphs,
|
||||
Ptr<const TrainingState> state) = 0;
|
||||
@ -51,6 +52,7 @@ public:
|
||||
template <class DataSet, class BuilderType> // @TODO: BuilderType doesn't really serve a purpose here? Review and remove.
|
||||
class Validator : public ValidatorBase {
|
||||
public:
|
||||
virtual ~Validator() {}
|
||||
Validator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options, bool lowerIsBetter = true)
|
||||
: ValidatorBase(lowerIsBetter),
|
||||
vocabs_(vocabs),
|
||||
@ -137,6 +139,7 @@ class CrossEntropyValidator : public Validator<data::Corpus, models::ICriterionF
|
||||
|
||||
public:
|
||||
CrossEntropyValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options);
|
||||
virtual ~CrossEntropyValidator() {}
|
||||
|
||||
std::string type() override { return options_->get<std::string>("cost-type"); }
|
||||
|
||||
@ -148,6 +151,7 @@ protected:
|
||||
class AccuracyValidator : public Validator<data::Corpus, models::IModel> {
|
||||
public:
|
||||
AccuracyValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options);
|
||||
virtual ~AccuracyValidator() {}
|
||||
|
||||
std::string type() override { return "accuracy"; }
|
||||
|
||||
@ -161,6 +165,7 @@ private:
|
||||
|
||||
public:
|
||||
BertAccuracyValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options, bool evalMaskedLM);
|
||||
virtual ~BertAccuracyValidator() {}
|
||||
|
||||
std::string type() override {
|
||||
if(evalMaskedLM_)
|
||||
@ -177,6 +182,7 @@ protected:
|
||||
class ScriptValidator : public Validator<data::Corpus, models::IModel> {
|
||||
public:
|
||||
ScriptValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options);
|
||||
virtual ~ScriptValidator() {}
|
||||
|
||||
virtual float validate(const std::vector<Ptr<ExpressionGraph>>& graphs,
|
||||
Ptr<const TrainingState> /*ignored*/) override;
|
||||
@ -193,6 +199,7 @@ protected:
|
||||
class TranslationValidator : public Validator<data::Corpus, models::IModel> {
|
||||
public:
|
||||
TranslationValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options);
|
||||
virtual ~TranslationValidator() {}
|
||||
|
||||
virtual float validate(const std::vector<Ptr<ExpressionGraph>>& graphs,
|
||||
Ptr<const TrainingState> state) override;
|
||||
@ -212,6 +219,7 @@ protected:
|
||||
class BleuValidator : public Validator<data::Corpus, models::IModel> {
|
||||
public:
|
||||
BleuValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options, bool detok = false);
|
||||
virtual ~BleuValidator() {}
|
||||
|
||||
virtual float validate(const std::vector<Ptr<ExpressionGraph>>& graphs,
|
||||
Ptr<const TrainingState> state) override;
|
||||
|
@ -11,6 +11,7 @@ namespace marian {
|
||||
|
||||
class PrintingStrategy {
|
||||
public:
|
||||
virtual ~PrintingStrategy() {}
|
||||
virtual bool shouldBePrinted(long) = 0;
|
||||
};
|
||||
|
||||
|
@ -10,6 +10,8 @@ namespace marian {
|
||||
|
||||
class ScorerState {
|
||||
public:
|
||||
virtual ~ScorerState(){}
|
||||
|
||||
virtual Logits getLogProbs() const = 0;
|
||||
|
||||
virtual void blacklist(Expr /*totalCosts*/, Ptr<data::CorpusBatch> /*batch*/){};
|
||||
@ -24,6 +26,8 @@ public:
|
||||
Scorer(const std::string& name, float weight)
|
||||
: name_(name), weight_(weight) {}
|
||||
|
||||
virtual ~Scorer(){}
|
||||
|
||||
std::string getName() { return name_; }
|
||||
float getWeight() { return weight_; }
|
||||
|
||||
@ -53,6 +57,7 @@ protected:
|
||||
|
||||
public:
|
||||
ScorerWrapperState(Ptr<DecoderState> state) : state_(state) {}
|
||||
virtual ~ScorerWrapperState() {}
|
||||
|
||||
virtual Ptr<DecoderState> getState() { return state_; }
|
||||
|
||||
@ -88,6 +93,8 @@ public:
|
||||
encdec_(std::static_pointer_cast<IEncoderDecoder>(encdec)),
|
||||
ptr_{ptr} {}
|
||||
|
||||
virtual ~ScorerWrapper() {}
|
||||
|
||||
virtual void init(Ptr<ExpressionGraph> graph) override {
|
||||
graph->switchParams(getName());
|
||||
if(ptr_)
|
||||
|
Loading…
Reference in New Issue
Block a user