Add support for compiling on Mac (and clang) (#598)

* Compile marian on mac and clang. Two linker errors left

* MacOS defines has a different definition for unsigned long

* Find OpenBLAS on mac

* Fix a typo in the BLAS detection

* Simplify and add comments

* Refactor cpu allocation code. Do not fallback to malloc

* Fix compilation warning on gcc

* Refactor memory allocation

* Make things compile with clang-8 with fewer warnings.

* Eliminate clang warnings when compiling examples and when compiling without MKL

* added USE_MKL option to compile without MKL for debugging even when MKL is installed

* fixed issues with compiling examples with clang

* Fix compile errors with clang in src/tests.

* Fix missing whitespace in error message in src/tests/sqlite.cpp.

* Responding to Frank Seide's code review.

* Eliminate clang warnings when compiling with -DUSE_FBGEMM=on.

* Fix compilation on gcc 8

* Get Marian to compile with Clang-10.

* Fix Clang-8 warnings when compiling with marian-server

* Add more comments and explicit unsigned long long for windows

* Pull in fbgemm that supports mac

* Fix warning flags order in CMakeLists.txt

Co-authored-by: Kenneth Heafield <kpu@users.noreply.github.com>
Co-authored-by: Ulrich Germann <ulrich.germann@gmail.com>
Co-authored-by: Roman Grundkiewicz <romang@amu.edu.pl>
This commit is contained in:
Roman Grundkiewicz 2020-03-05 21:08:23 +00:00 committed by GitHub
parent 67b055fe4a
commit 00d2e999e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
69 changed files with 428 additions and 236 deletions

View File

@ -24,6 +24,7 @@ option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF)
option(USE_CUDNN "Use CUDNN library" OFF)
option(USE_DOXYGEN "Build documentation with Doxygen" ON)
option(USE_FBGEMM "Use FBGEMM" OFF)
option(USE_MKL "Compile with MKL support" ON)
option(USE_MPI "Use MPI library" OFF)
option(USE_NCCL "Use NCCL library" ON)
option(USE_SENTENCEPIECE "Download and compile SentencePiece" OFF)
@ -33,7 +34,7 @@ option(USE_STATIC_LIBS "Link statically against non-system libs" OFF)
if(USE_CCACHE)
find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
message(STATUS "Found and will be using ccache for faster repeat compilation (use cmake -DUSE_CCACHE=off to disable).")
message(STATUS "Will be using ccache for faster repeat compilation (use cmake -DUSE_CCACHE=off to disable).")
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}")
else(CCACHE_PROGRAM)
message(WARNING "Compilation with ccache requested but no ccache found.")
@ -141,20 +142,32 @@ else(MSVC)
add_definitions(-DUSE_FBGEMM=1)
endif(USE_FBGEMM)
set(DISABLE_GLOBALLY "-Wno-unused-result")
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0)
# Clang-10.0.0 complains when CUDA is newer than 10.1
set(CLANG_IGNORE_UNKNOWN_CUDA "-Wno-unknown-cuda-version")
endif()
set(DISABLE_GLOBALLY "-Wno-unused-result -Wno-unknown-warning-option ${CLANG_IGNORE_UNKNOWN_CUDA}")
# These are used in src/CMakeLists.txt on a per-target basis
list(APPEND ALL_WARNINGS -Wall; -Werror; -Wno-unused-result; -Wno-deprecated; -Wno-pragmas; -Wno-unused-parameter; -Wextra; -Wno-unused-function;
-Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; -Wno-missing-field-initializers;)
list(APPEND ALL_WARNINGS -Wall; -Werror; -Wextra; -Wno-unused-result; -Wno-deprecated;
-Wno-pragmas; -Wno-unused-parameter; -Wno-unused-function;
-Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare;
-Wno-missing-field-initializers;)
# This warning does not exist prior to gcc 5.0
if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0)
list(APPEND ALL_WARNINGS -Wsuggest-override)
list(APPEND ALL_WARNINGS -Wsuggest-override -Wno-int-in-bool-context)
endif()
set(CMAKE_CXX_FLAGS "-std=c++11 -pthread -Wl,--no-as-needed -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -m64 -funroll-loops -ffinite-math-only -g -rdynamic")
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -rdynamic")
if(CMAKE_COMPILER_IS_GNUCC)
# these flags are not known to clang
set(CMAKE_GCC_FLAGS "-Wl,--no-as-needed")
set(CMAKE_RDYNAMIC_FLAG "-rdynamic")
endif(CMAKE_COMPILER_IS_GNUCC)
set(CMAKE_CXX_FLAGS "-std=c++11 -pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -m64 -funroll-loops -ffinite-math-only -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_CXX_FLAGS_SLIM "-Ofast -m64 -funroll-loops -ffinite-math-only -DNDEBUG")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE}")
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -pg")
@ -162,9 +175,9 @@ else(MSVC)
set(CMAKE_CXX_FLAGS_PROFUSE "${CMAKE_CXX_FLAGS_RELEASE} -fprofile-use -fprofile-correction")
# these need to be set separately
set(CMAKE_C_FLAGS "-pthread -Wl,--no-as-needed -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -ffinite-math-only -g -rdynamic")
set(CMAKE_C_FLAGS_DEBUG "-O0 -g -rdynamic")
set(CMAKE_C_FLAGS "-pthread ${CMAKE_GCC_FLAGS} -fPIC ${DISABLE_GLOBALLY} -march=${BUILD_ARCH} ${INTRINSICS}")
set(CMAKE_C_FLAGS_RELEASE "-O3 -m64 -funroll-loops -ffinite-math-only -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_RDYNAMIC_FLAG}")
set(CMAKE_C_FLAGS_SLIM "-O3 -m64 -funroll-loops -ffinite-math-only -DNDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE}")
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")
@ -204,7 +217,7 @@ if(CUDA_FOUND)
if((CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0") AND (CMAKE_VERSION VERSION_LESS "3.12.2"))
message(WARNING "On some Unix systems CUDA 10.0+ requires CMake 3.12.2+; you use CMake ${CMAKE_VERSION}")
endif()
if(COMPILE_CUDA_SM35)
LIST(APPEND COMPUTE -arch=sm_35; -gencode=arch=compute_35,code=sm_35;) # Tesla K40 and above
endif(COMPILE_CUDA_SM35)
@ -323,13 +336,15 @@ if(USE_MPI)
endif(USE_MPI)
if(COMPILE_CPU)
find_package(MKL)
if(USE_MKL)
find_package(MKL)
endif(USE_MKL)
if(MKL_FOUND)
include_directories(${MKL_INCLUDE_DIR})
set(EXT_LIBS ${EXT_LIBS} ${MKL_LIBRARIES})
add_definitions(-DBLAS_FOUND=1 -DMKL_FOUND=1)
else(MKL_FOUND)
set(BLA_VENDOR "OpenBLAS")
set(BLAS_VENDOR "OpenBLAS")
find_package(BLAS)
if(BLAS_FOUND)
include(FindCBLAS)

View File

@ -54,7 +54,7 @@ MACRO(CHECK_ALL_LIBRARIES LIBRARIES INCLUDE _prefix _name _flags _list _include
IF(APPLE)
FIND_LIBRARY(${_prefix}_${_library}_LIBRARY
NAMES ${_library}
PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 ENV
PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 /usr/local/opt/openblas/lib ENV
DYLD_LIBRARY_PATH
)
ELSE(APPLE)

View File

@ -15,12 +15,22 @@ if(USE_FBGEMM)
if(NOT MSVC)
# only locally disabled for the 3rd_party folder
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-value -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-value -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused")
endif()
set(FBGEMM_BUILD_TESTS OFF CACHE BOOL "Disable fbgemm tests")
set(FBGEMM_BUILD_BENCHMARKS OFF CACHE BOOL "Disable fbgemm benchmark")
add_subdirectory(./fbgemm)
# asmjit (3rd-party submodule of fbgemm) sets -Wall -Wextra near the end of
# the compile options, invalidating any -Wno-... flags that we may have set
# earlier. Let's remove them.
get_property(ASMJIT_COMPILE_OPTIONS TARGET asmjit PROPERTY COMPILE_OPTIONS)
list(REMOVE_ITEM ASMJIT_COMPILE_OPTIONS -Wall -Wextra)
set_property(TARGET asmjit PROPERTY COMPILE_OPTIONS ${ASMJIT_COMPILE_OPTIONS})
message(" ASMJIT COMPILE FLAGS: ${ASMJIT_COMPILE_OPTIONS}")
endif(USE_FBGEMM)
if(USE_SENTENCEPIECE)
@ -39,7 +49,7 @@ if(USE_SENTENCEPIECE)
message(WARNING "You are compiling SentencePiece binaries with -DUSE_STATIC_LIBS=on. \
This will cause spm_train to segfault. No need to worry if you do not intend to use that binary. \
Marian support for SentencePiece will work fine.")
set(SPM_ENABLE_SHARED OFF CACHE BOOL "Builds shared libaries in addition to static libraries." FORCE)
set(SPM_TCMALLOC_STATIC ON CACHE BOOL "Link static library of TCMALLOC." FORCE)
else(USE_STATIC_LIBS)
@ -51,8 +61,19 @@ if(USE_SENTENCEPIECE)
include_directories(./sentencepiece)
set_target_properties(spm_encode spm_decode spm_train spm_normalize spm_export_vocab
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
foreach(t sentencepiece sentencepiece_train sentencepiece_train-static
spm_decode spm_encode spm_export_vocab spm_normalize spm_train)
set_property(TARGET ${t} APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-tautological-compare -Wno-unused")
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0)
set_property(TARGET ${t} APPEND_STRING PROPERTY COMPILE_FLAGS " -Wno-range-loop-construct")
endif()
# get_property(SENTENCEPIECE_COMPILE_FLAGS TARGET ${t} PROPERTY COMPILE_FLAGS)
# message("-- SENTENCPIECE: compile flags for target ${t}: ${SENTENCEPIECE_COMPILE_FLAGS}")
endforeach(t)
endif()
if(USE_STATIC_LIBS)
set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
@ -63,6 +84,22 @@ include_directories(./SQLiteCpp/include)
include_directories(./CLI)
include_directories(./pathie-cpp/include)
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
#set_target_properties(SQLiteCpp PROPERTIES COMPILE_FLAGS
set_property(TARGET SQLiteCpp APPEND_STRING PROPERTY COMPILE_FLAGS
" -Wno-parentheses-equality -Wno-unused-value")
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0)
set_property(TARGET SQLiteCpp APPEND_STRING PROPERTY COMPILE_FLAGS
" -Wno-implicit-int-float-conversion")
endif()
set_property(TARGET libyaml-cpp APPEND_STRING PROPERTY COMPILE_FLAGS
" -fPIC -Wno-unused-value")
set_property(TARGET pathie-cpp APPEND_STRING PROPERTY COMPILE_FLAGS
" -fPIC -Wno-unused-value")
endif()
include_directories(./zlib)
include(ExternalProject)

@ -1 +1 @@
Subproject commit 84e66a976046180187724aff60a236c5378fde7c
Subproject commit f78e60988329b9207d086c743cafce1ac1bea3ab

View File

@ -186,7 +186,7 @@ inline HalfFloat& HalfFloat::operator= (float other)
inline bool HalfFloat::operator== (HalfFloat other) const
{
// +0 and -0 are considered to be equal
if (!(bits << 1u) && !(other.bits << 1u))return true;
if ((bits << 1u) == 0 && (other.bits << 1u) == 0) return true;
return bits == other.bits && !this->IsNaN();
}
@ -194,7 +194,7 @@ inline bool HalfFloat::operator== (HalfFloat other) const
inline bool HalfFloat::operator!= (HalfFloat other) const
{
// +0 and -0 are considered to be equal
if (!(bits << 1u) && !(other.bits << 1u))return false;
if ((bits << 1u) == 0 && (other.bits << 1u) == 0) return false;
return bits != other.bits || this->IsNaN();
}

View File

@ -31,7 +31,7 @@
#include "../include/path.hpp"
#include "../include/errors.hpp"
#if defined(__unix__)
#if defined(__unix__) || defined(__APPLE__)
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>

View File

@ -902,7 +902,7 @@ Path Path::pwd()
*/
Path Path::exe()
{
#if defined(__linux__)
#if defined(__linux__) || defined(__APPLE__)
char buf[PATH_MAX];
ssize_t size = ::readlink("/proc/self/exe", buf, PATH_MAX);

View File

@ -143,7 +143,7 @@ std::string Pathie::convert_encodings(const char* from_encoding, const char* to_
errno = 0;
errsav = 0;
#ifdef BSD
#if defined(BSD) && ! defined(__APPLE__) //Since MacOS evolved from BSD, it is captured here but the iconv on macos behaves differently
// What the heck. FreeBSD violates POSIX.1-2008: it declares iconv()
// differently than mandated by POSIX: http://pubs.opengroup.org/onlinepubs/9699919799/functions/iconv.html
// (it declares a `const' where it must not be).
@ -181,11 +181,10 @@ std::string Pathie::convert_encodings(const char* from_encoding, const char* to_
std::string Pathie::utf8_to_filename(const std::string& utf8)
{
bool fs_encoding_is_utf8 = false;
char* fsencoding = NULL;
#if defined(__APPLE__) || defined(PATHIE_ASSUME_UTF8_ON_UNIX)
fs_encoding_is_utf8 = true;
#else
char* fsencoding = NULL;
fsencoding = nl_langinfo(CODESET);
fs_encoding_is_utf8 = (strcmp(fsencoding, "UTF-8") == 0);
#endif
@ -206,11 +205,10 @@ std::string Pathie::utf8_to_filename(const std::string& utf8)
std::string Pathie::filename_to_utf8(const std::string& native_filename)
{
bool fs_encoding_is_utf8 = false;
char* fsencoding = NULL;
#if defined(__APPLE__) || defined(PATHIE_ASSUME_UTF8_ON_UNIX)
fs_encoding_is_utf8 = true;
#else
char* fsencoding = NULL;
fsencoding = nl_langinfo(CODESET);
fs_encoding_is_utf8 = (strcmp(fsencoding, "UTF-8") == 0);
#endif

View File

@ -27,7 +27,7 @@ static std::string strerror()
{
buff = "Unknown error";
}
#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && ! _GNU_SOURCE
#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || __APPLE__) && ! _GNU_SOURCE
// XSI-compliant strerror_r()
if (strerror_r(errno, &buff[0], buff.size()) != 0)
{

View File

@ -215,6 +215,10 @@ if(COMPILE_SERVER)
set(EXECUTABLES ${EXECUTABLES} marian_server)
endif(COMPILE_SERVER)
if(APPLE) # This is a dependency of pathie but I can't seem to link it into that CMakeLists because we're not compiling it as a library.
set(EXT_LIBS ${EXT_LIBS} iconv)
endif()
foreach(exec ${EXECUTABLES})
target_link_libraries(${exec} marian ${EXT_LIBS} ${EXT_LIBS} ${CMAKE_THREAD_LIBS_INIT})
if(CUDA_FOUND)

View File

@ -44,7 +44,7 @@ int main(int argc, char **argv) {
// Error Codes for error code meanings
// http://www.boost.org/doc/libs/1_55_0/doc/html/boost_asio/reference.html
translate.on_error = [](Ptr<WSServer::Connection> connection,
translate.on_error = [](Ptr<WSServer::Connection> /*connection*/,
const SimpleWeb::error_code &ec) {
LOG(error, "Connection error: ({}) {}", ec.value(), ec.message());
};

View File

@ -10,6 +10,21 @@
#include <string>
#include <vector>
// The macro MAYBE_UNUSED is used to selectively disable
// unused-variable warnings. C++17 defines the attribute
// [[maybe_unused]], but I don't think we're at C++17 yet. We can add it when we reach C++17.
// The compilers gcc and clang (and maybe others) define
// __has_attribute and support __attribute__(unused) in C++11,
#if defined __has_attribute
# if __has_attribute(unused)
# define MAYBE_UNUSED __attribute__((unused))
# else
# define MAYBE_UNUSED
# endif
#endif
#define THREAD_GUARD(body) [&]() { body; }() // test if THREAD_GUARD is neccessary, remove if no problems occur.
#define NodeOp(op) [=]() { op; }

View File

@ -84,10 +84,16 @@ std::vector<T> As<std::vector<T>>::apply(const FastOpt& node) {
// specializations for simple vector types
template struct As<std::vector<bool>>;
template struct As<std::vector<int>>;
// Windows and Unix based OS have different type definitions for 'unsigned long'.
// So, we need an explicit definition for uint64_t. Otherwise, there's a linking error on windows.
// Windows, Linux based OS and Mac have different type definitions for 'unsigned long'.
// So, we need an explicit definitions for uint64_t, that cover different platforms.
// Otherwise, there's a linking error on windows or Linux or Mac.
// https://software.intel.com/en-us/articles/size-of-long-integer-type-on-different-architecture-and-os/
template struct As<std::vector<uint64_t>>;
// https://stackoverflow.com/questions/32021860/c-should-you-size-t-with-a-regular-array
// MacOS: size_t = unsigned long (8 bytes), uint64_t = unsigned long long (8 bytes)
// Linux: size_t = unsigned long (8 bytes), uint64_t = unsigned long (8 bytes)
// Windows: size_t = unsigned long long (8 bytes), uint64_t = unsigned long long (8 bytes)
template struct As<std::vector<unsigned long long>>;
template struct As<std::vector<unsigned long>>;
template struct As<std::vector<float>>;
template struct As<std::vector<double>>;
template struct As<std::vector<std::string>>;
@ -103,4 +109,4 @@ std::pair<T1, T2> As<std::pair<T1, T2>>::apply(const FastOpt& node) {
template struct As<std::pair<int, int>>;
}
}
}

View File

@ -367,7 +367,8 @@ public:
}
const FastOpt& operator[](const char* const key) const {
return operator[](crc::crc(key));
// MacOS requires explicit cast to size_t before we can use it.
return operator[]((size_t)crc::crc(key));
}
const FastOpt& operator[](const std::string& key) const {
@ -375,4 +376,4 @@ public:
}
};
}
}

View File

@ -7,10 +7,21 @@
#include "common/filesystem.h"
#include "common/logging.h"
// Even when compiling with clang, __GNUC__ may be defined, so
// we need to add some extra checks to avoid compile errors with
// respect to -Wsuggest-override.
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-override"
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wunused-value"
# if defined(__has_warning)
# if __has_warning("-Wsuggest-override")
# pragma GCC diagnostic ignored "-Wsuggest-override"
# endif
# else
# pragma GCC diagnostic ignored "-Wsuggest-override"
# endif
#endif
#ifdef _MSC_VER
#pragma warning(push) // 4101: 'identifier' : unreferenced local variable. One parameter variable in zstr.hpp is not used.
#pragma warning(disable : 4101)
@ -82,7 +93,7 @@ protected:
void NormalizeTempPrefix(std::string& base) const;
void MakeTemp(const std::string& base);
};
} // namespace io

View File

@ -7,9 +7,19 @@
// @TODO: go back to canonical names for functions and objects
// as specified in C++17 so it becomes easy to move in the future
// Even when compiling with clang, __GNUC__ may be defined, so
// we need to add some extra checks to avoid compile errors with
// respect to -Wsuggest-override.
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsuggest-override"
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wunused-value"
# if defined(__has_warning)
# if __has_warning("-Wsuggest-override")
# pragma GCC diagnostic ignored "-Wsuggest-override"
# endif
# else
# pragma GCC diagnostic ignored "-Wsuggest-override"
# endif
#endif
#include "3rd_party/pathie-cpp/include/path.hpp" // @TODO: update to latest Pathie

View File

@ -169,12 +169,12 @@ inline bool operator!=(const IntrusivePtr<T>& a, std::nullptr_t) {
template<class T>
inline bool operator==(T* a, const IntrusivePtr<T>& b) {
return b.get();
return a == b.get(); // used to say: return b.get(); That cannot be right. [UG]
}
template<class T>
inline bool operator!=(T* a, const IntrusivePtr<T>& b) {
return b.get();
return a != b.get(); // used to say: return b.get(); That cannot be right. [UG]
}
template<class T, class U>
@ -223,5 +223,3 @@ namespace std {
}
};
}

View File

@ -124,7 +124,7 @@ static void setErrorHandlers() {
std::set_terminate(unhandledException);
#ifdef __unix__
// catch segfaults
struct sigaction sa = { 0 };
struct sigaction sa = { {0} };
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = [](int /*signal*/, siginfo_t*, void*) { ABORT("Segmentation fault"); };

View File

@ -254,7 +254,7 @@ enum class Type : size_t {
packed16 = TypeClass::packed_type + 2u, // special type for FBGEMM, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint16) is meaningless.
packed8avx2 = TypeClass::packed_type + 1u + TypeClass::avx2_type, // special type for FBGEMM with AVX2, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
packed8avx512 = TypeClass::packed_type + 1u + TypeClass::avx512_type, // special type for FBGEMM with AVX512, not meant to be used anywhere else, not meant to be accessed invidually. Internal actual type (uint8) is meaningless.
};
static inline size_t operator&(TypeClass typeClass, Type type) {
@ -394,7 +394,7 @@ static Type inline typeFromString(const std::string& str) {
return Type::float32;
if(str == "float64")
return Type::float64;
if(str == "packed16")
return Type::packed16;
if(str == "packed8avx2")
@ -437,19 +437,35 @@ void matchOrAbort(Type type) {
namespace typeFitting { // own namespace instead of in class, otherwise we get error "explicit specialization in non-namespace scope"
// compares max for different types as constexpr, so can be used at compile-time to determine if RequestType type max fits into ReturnType max, see std::conditional below.
template <typename RequestType, typename ReturnType>
constexpr bool fitsIntoMax() { return std::numeric_limits<RequestType>::max() <= std::numeric_limits<ReturnType>::max(); } // for built-in types everything is constexpr
// Helper function for fitsIntoMax() below
// Returns the 'capacity' of a type: number of digits for integers,
// max_exponent for floats. We ignore the mantissa for floats.
template<typename X> constexpr int capacity() {
static_assert(std::is_arithmetic<X>::value || std::is_same<X,HalfFloat>::value,
"Wrong type for this template");
return (std::is_integral<X>::value
? std::numeric_limits<X>::digits
: std::numeric_limits<X>::max_exponent);
}
// Compare max for different types as constexpr, so can be used at compile-time to determine if RequestType type max fits into ReturnType max, see std::conditional below.
template <typename RequestType, typename ReturnType>
constexpr bool fitsIntoMax() {
// We can't just compare std::numeric_limits<>::max(), because Clang-10
// complains about rounding errors when implicitly converting int to float
return ((!std::is_integral<RequestType>::value // RequestType is a float
&& std::is_integral<ReturnType>::value) // ReturnType an integer
? capacity<RequestType>() < capacity<ReturnType>() // special case
: capacity<RequestType>() <= capacity<ReturnType>()); // normal case
} // for built-in types everything is constexpr
// add specializations here when needed
template <> constexpr bool fitsIntoMax<float16, float>() { return true; }; // for float16 conversion to float is not constexpr, hence specializations
template <> constexpr bool fitsIntoMax<float, float16>() { return false; }; // for float16 conversion to float is not constexpr, hence specializations
}
template <typename ReturnType>
class NumericLimits {
private:
template <typename MaxType> void setLimitsMax() {
max = (ReturnType)std::numeric_limits<MaxType>::max();
lowest = (ReturnType)std::numeric_limits<MaxType>::lowest();
@ -459,10 +475,14 @@ private:
void setLimits() {
// check if the maximum of type RequestType fits into ReturnType
constexpr bool fits = typeFitting::fitsIntoMax<RequestType, ReturnType>();
// sanity check:
static_assert(fits || typeFitting::fitsIntoMax<ReturnType, RequestType>(),
"RequestType doesn't fit into ReturnType, and ReturnType doesn't "
"fit into RequestType. fitsIntoMax is broken!");
// and then use the smaller of each types to determine max, min, lowest.
using MaxType = typename std::conditional<fits, RequestType, ReturnType>::type;
setLimitsMax<MaxType>();
// @TODO: should we rather abort if the RequestType does not fit into ReturnType instead of clipping to smaller type?
// @TODO: should we rather abort if the RequestType does not fit into ReturnType instead of clipping to smaller type?
// ABORT_IF(!fits, "Type {} is too small to contain max of type {}", typeId<ReturnType>(), typeId<RequestType>());
}

View File

@ -8,12 +8,22 @@
#include <sstream>
#include <string>
#include <set>
#ifdef __unix__
#if defined(__unix__) || defined(__APPLE__)
#include <unistd.h>
#endif
#include <codecvt>
#include <cwctype>
// MACOS lacks HOST_NAME_MAX
#ifndef HOST_NAME_MAX
# if defined(_POSIX_HOST_NAME_MAX)
# define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
# elif defined(MAXHOSTNAMELEN)
# define HOST_NAME_MAX MAXHOSTNAMELEN
# endif
#endif
namespace marian {
namespace utils {

View File

@ -26,7 +26,7 @@ public:
virtual void setGuidedAlignment(std::vector<float>&&) = 0;
virtual void setDataWeights(const std::vector<float>&) = 0;
virtual ~Batch() {};
protected:
std::vector<size_t> sentenceIds_;
};

View File

@ -525,6 +525,7 @@ public:
const std::vector<Ptr<Vocab>>& vocabs,
Ptr<Options> options);
virtual ~CorpusBase() {}
virtual std::vector<Ptr<Vocab>>& getVocabs() = 0;
protected:

View File

@ -45,6 +45,7 @@ protected:
public:
// @TODO: choose between 'virtual' and 'final'. Can we derive from this class?
virtual ~DefaultVocab() {};
virtual const std::string& canonicalExtension() const override { return suffixes_[0]; }
virtual const std::vector<std::string>& suffixes() const override { return suffixes_; }
@ -295,7 +296,7 @@ private:
class ClassVocab : public DefaultVocab {
private:
// Do nothing.
virtual void addRequiredVocabulary(const std::string& vocabPath, bool isJson) override { vocabPath; isJson; }
virtual void addRequiredVocabulary(const std::string& /*vocabPath*/, bool /*isJson*/) override {}
// Not adding special class labels, only seen classes.
virtual void create(const std::string& vocabPath,

View File

@ -36,6 +36,8 @@ public:
class ShortlistGenerator {
public:
virtual ~ShortlistGenerator() {}
virtual Ptr<Shortlist> generate(Ptr<data::CorpusBatch> batch) const = 0;
// Writes text version of (possibly) pruned short list to file
@ -129,7 +131,6 @@ private:
Ptr<const Vocab> trgVocab_;
size_t srcIdx_;
size_t trgIdx_;
bool shared_{false};
size_t firstNum_{100};
@ -183,13 +184,12 @@ public:
Ptr<const Vocab> srcVocab,
Ptr<const Vocab> trgVocab,
size_t srcIdx = 0,
size_t trgIdx = 1,
size_t /*trgIdx*/ = 1,
bool shared = false)
: options_(options),
srcVocab_(srcVocab),
trgVocab_(trgVocab),
srcIdx_(srcIdx),
trgIdx_(trgIdx),
shared_(shared) {
std::vector<std::string> vals = options_->get<std::vector<std::string>>("shortlist");
@ -235,7 +235,6 @@ public:
virtual Ptr<Shortlist> generate(Ptr<data::CorpusBatch> batch) const override {
auto srcBatch = (*batch)[srcIdx_];
// auto trgBatch = (*batch)[trgIdx_];
// add firstNum most frequent words
std::unordered_set<WordIndex> indexSet;

View File

@ -37,6 +37,7 @@ public:
typedef SentenceTuple Sample;
TextInput(std::vector<std::string> inputs, std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options);
virtual ~TextInput() {}
Sample next() override;

View File

@ -57,6 +57,7 @@ public:
virtual Word randWord() const {
return Word::fromWordIndex(rand() % size());
}
virtual ~IVocab() {};
};
class Options;

View File

@ -62,6 +62,7 @@ private:
std::vector<Input> inputs_;
public:
std::vector<Input>& inputs() { return inputs_; }
const std::vector<Input>& inputs() const { return inputs_; }
@ -144,6 +145,8 @@ public:
loadData();
}
virtual ~MNISTData(){}
void loadData() override {
ABORT_IF(paths_.size() != 2, "Paths to MNIST data files are not specified");

View File

@ -47,6 +47,8 @@ class MNISTLogsoftmax : public ILogProb {
public:
MNISTLogsoftmax() {}
virtual ~MNISTLogsoftmax(){}
Logits apply(Ptr<IModel> model,
Ptr<ExpressionGraph> graph,
Ptr<data::Batch> batch,
@ -61,13 +63,15 @@ public:
typedef data::MNISTData dataset_type;
template <class... Args>
MnistFeedForwardNet(Ptr<Options> options, Args... args)
MnistFeedForwardNet(Ptr<Options> options, Args... /*args*/)
: options_(options), inference_(options->get<bool>("inference", false)) {}
virtual ~MnistFeedForwardNet(){}
virtual Logits build(Ptr<ExpressionGraph> graph,
Ptr<data::Batch> batch,
bool /*clean*/ = false) override {
return Logits(apply(graph, batch, inference_));
}

View File

@ -19,7 +19,9 @@ public:
builder_ = models::createModelFromOptions(options, models::usage::translation);
}
virtual void keepBest(const std::vector<Ptr<ExpressionGraph>>& graphs) override {
virtual ~MNISTAccuracyValidator(){}
virtual void keepBest(const std::vector<Ptr<ExpressionGraph>>& /*graphs*/) override {
LOG(warn, "Keeping best model for MNIST examples is not supported");
}

View File

@ -7,55 +7,58 @@ namespace marian {
namespace functional {
// General template, will be used for any type without specializations
// and will fail with an abort message.
// and will fail at runtime with an abort message. Note that the
// general template functions don't have named parameters on purpose,
// because clang will warn about unused parameters during compilation.
template <typename T>
struct Ops {
static HOST_DEVICE_INLINE T tanh(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T sin(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T cos(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T tan(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T log(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T exp(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T abs(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T sqrt(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T neg(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T sgn(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T tanh(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T sin(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T cos(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T tan(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T log(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T exp(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T abs(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T sqrt(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T neg(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T sgn(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T add(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T sub(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T mul(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T div(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T add(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T sub(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T mul(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T div(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T max(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T min(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T pow(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T max(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T min(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T pow(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T negate(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T eq(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T neq(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T gt(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T lt(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T geq(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T leq(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T _and(const T& x, const T& y) { ABORT("Unknown type"); } // 'and' is used by gcc
static HOST_DEVICE_INLINE T _or(const T& x, const T& y) { ABORT("Unknown type"); } // 'or' is used by gcc
static HOST_DEVICE_INLINE T negate(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T eq(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T neq(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T gt(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T lt(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T geq(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T leq(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T _and(const T&, const T&) { ABORT("Unknown type"); } // 'and' is used by gcc
static HOST_DEVICE_INLINE T _or(const T&, const T&) { ABORT("Unknown type"); } // 'or' is used by gcc
// Neural Networks specific functions
static HOST_DEVICE_INLINE T sigmoid(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T logaddexp(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T clip(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T sigmoid(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T logaddexp(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T clip(const T&, const T&) { ABORT("Unknown type"); }
// derivative of Clip, cut-off function
static HOST_DEVICE_INLINE T bump(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T relu(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T reluBack(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T prelu(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T preluBack(const T& x, const T& y) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T bump(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T relu(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T reluBack(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T prelu(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T preluBack(const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T if_then_else(const T& x, const T& y, const T& z) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T if_then_else(const T&, const T&, const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T sumReduce(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T maxReduce(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T minReduce(const T& x) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T sumReduce(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T maxReduce(const T&) { ABORT("Unknown type"); }
static HOST_DEVICE_INLINE T minReduce(const T&) { ABORT("Unknown type"); }
};
// Specialization for float
@ -127,14 +130,14 @@ template <>
struct Ops<double> {
typedef double Single;
static HOST_DEVICE_INLINE double tanh(const double& x) { return tanh(x); }
static HOST_DEVICE_INLINE double sin(const double& x) { return sin(x); }
static HOST_DEVICE_INLINE double cos(const double& x) { return cos(x); }
static HOST_DEVICE_INLINE double tan(const double& x) { return tan(x); }
static HOST_DEVICE_INLINE double log(const double& x) { return log(x); }
static HOST_DEVICE_INLINE double exp(const double& x) { return exp(x); }
static HOST_DEVICE_INLINE double abs(const double& x) { return abs(x); }
static HOST_DEVICE_INLINE double sqrt(const double& x) { return sqrt(x); }
static HOST_DEVICE_INLINE double tanh(const double& x) { return std::tanh(x); }
static HOST_DEVICE_INLINE double sin(const double& x) { return std::sin(x); }
static HOST_DEVICE_INLINE double cos(const double& x) { return std::cos(x); }
static HOST_DEVICE_INLINE double tan(const double& x) { return std::tan(x); }
static HOST_DEVICE_INLINE double log(const double& x) { return std::log(x); }
static HOST_DEVICE_INLINE double exp(const double& x) { return std::exp(x); }
static HOST_DEVICE_INLINE double abs(const double& x) { return std::abs(x); }
static HOST_DEVICE_INLINE double sqrt(const double& x) { return std::sqrt(x); }
static HOST_DEVICE_INLINE double neg(const double& x) { return -x; }
static HOST_DEVICE_INLINE double sgn(const double& x) { return (0 < x) - (x < 0); }
@ -145,7 +148,7 @@ struct Ops<double> {
static HOST_DEVICE_INLINE double max(const double& x, const double& y) { return x < y ? y : x; }
static HOST_DEVICE_INLINE double min(const double& x, const double& y) { return x < y ? x : y; }
static HOST_DEVICE_INLINE double pow(const double& x, const double& y) { return pow(x, y); }
static HOST_DEVICE_INLINE double pow(const double& x, const double& y) { return std::pow(x, y); }
static HOST_DEVICE_INLINE double negate(const double& x) { return !(bool)x; }
@ -460,7 +463,7 @@ struct Ops<half> {
static DEVICE_INLINE half exp(const half& x) { return hexp(x); }
static DEVICE_INLINE half sqrt(const half& x) { return hsqrt(x); }
static DEVICE_INLINE half neg(const half& x) { return -x; }
static DEVICE_INLINE half abs(const half& x) { return fabs((float)x); }// @TODO half has this information somewhere in the struct, right?
static DEVICE_INLINE half sgn(const half& x) { half zero = 0.f; return (zero < x) - (x < zero); } // @TODO half has this information somewhere in the struct, right?

View File

@ -40,7 +40,7 @@ protected:
std::string debugMessage_;
Ptr<std::list<Expr>> subtape_; // a subtape is used to keep track of nodes that need to be freed and recomputed with gradient-checkpointing.
bool isCheckpoint_{false}; // true if this node has been selected to be a checkpoint, currently only done manually.
bool isCheckpoint_{false}; // true if this node has been selected to be a checkpoint, currently only done manually.
Ptr<AutoTunerRecorder> recorder_;
size_t recorderHash_;
@ -138,7 +138,7 @@ public:
virtual std::string graphviz() override {
std::stringstream ss;
ss << "\"" << this << "\" ["
ss << "\"" << this << "\" ["
<< "shape=\"" << form() << "\", "
<< "label=" << label() << ", "
<< "style=\"filled\", "
@ -147,7 +147,7 @@ public:
for(auto&& child : children())
ss << "\"" << child << "\" -> \"" << this << "\";" << std::endl;
if(subtape_) {
for(auto&& dep : *subtape_)
ss << "\"" << dep << "\" -> \"" << this << "\" [style=dotted];" << std::endl;
@ -188,9 +188,9 @@ struct NaryNodeOp : public Node {
// Deduce type automatically, but then all types must be the same
// this is called automatically when no output type is specified.
// If the input types are mixed, the output type needs to be specified
// If the input types are mixed, the output type needs to be specified
// in the constructor.
Type commonType(const std::vector<Expr>& nodes) {
static Type commonType(const std::vector<Expr>& nodes) {
ABORT_IF(nodes.size() == 0, "NaryNodeOp has no children");
Type type = nodes[0]->value_type();
for(int i = 1; i < nodes.size(); ++i)

View File

@ -17,9 +17,9 @@ namespace inits {
/**
* Base class for specialized NodeInitializers.
*
* A NodeInitializer is a functor that is associated with parameters
* and constants, and is invoked on a tensor during node intialization.
* You need to override NodeIntializer::apply(Tensor) with your own
* A NodeInitializer is a functor that is associated with parameters
* and constants, and is invoked on a tensor during node intialization.
* You need to override NodeIntializer::apply(Tensor) with your own
* functionality or use a fromLambda intializer.
*
* See node_initializers.cpp for examples.
@ -31,6 +31,7 @@ protected:
public:
virtual void apply(Tensor t) = 0;
void setAllocator(Ptr<Allocator> allocator) { allocator_ = allocator; }
virtual ~NodeInitializer() {}
};
/**
@ -135,7 +136,7 @@ Ptr<NodeInitializer> dropout(float dropoutProbabilty);
/**
* Intialize with gumbel noise, i.e. -log(-log(u)) where u ~ Uniform(0 + eps, 1 - eps)
*
*
* @return A NodeInitializer
*/
Ptr<NodeInitializer> gumbel(float eps = 1e-5f);
@ -163,7 +164,7 @@ Ptr<NodeInitializer> fromWord2vec(const std::string& file,
/**
* Computes Google's Transformer-style sinusoidal position embeddings
* starting from position 'start' taking into account batch and time
* starting from position 'start' taking into account batch and time
* dimensions of the tensor.
*
* Expected tensor layout {-2: time, -1: model}

View File

@ -480,9 +480,12 @@ class CSRDotNodeOp : public NaryNodeOp {
bool transS_;
bool swapOperands_;
public:
CSRDotNodeOp(const Shape& S_shape, Expr S_values, Expr S_indices, Expr S_offsets, Expr D, bool transS, bool swapOperands)
: NaryNodeOp({ S_values, S_indices, S_offsets, D }, newShape(S_shape, S_values, S_indices, S_offsets, D, transS, swapOperands), commonType({S_values, D})),
transS_(transS), swapOperands_(swapOperands) {
CSRDotNodeOp(const Shape& S_shape, Expr S_values, Expr S_indices,
Expr S_offsets, Expr D, bool transS, bool swapOperands)
: NaryNodeOp({ S_values, S_indices, S_offsets, D },
newShape(S_shape, S_values, S_indices, S_offsets, D, transS, swapOperands),
NaryNodeOp::commonType({S_values, D})),
transS_(transS), swapOperands_(swapOperands) {
matchOrAbort<IndexType>(S_indices->value_type());
matchOrAbort<IndexType>(S_offsets->value_type());
}
@ -513,7 +516,7 @@ public:
NodeOps backwardOps() override {
return { nullptr, // can't backprop into the sparse matrix (the gradient is dense)
nullptr,
nullptr,
nullptr,
NodeOp(CSRProd(child(3)->grad(), // child(3) = D
graph()->allocator(),
@ -527,7 +530,7 @@ public:
virtual size_t hash() override {
size_t seed = NaryNodeOp::hash();
for(auto s : shape())
util::hash_combine(seed, s);
util::hash_combine(seed, s);
util::hash_combine(seed, transS_);
util::hash_combine(seed, swapOperands_);
return seed;
@ -1050,8 +1053,8 @@ struct ConcatenateNodeOp : public NaryNodeOp {
auto checkShape = shape;
for(auto child : nodes) {
checkShape.set(ax_, child->shape()[ax_]); // don't abort on different sizes on axis dim.
ABORT_IF(checkShape != child->shape(),
"Child shapes {} and {} cannot be concatenated along axis {}",
ABORT_IF(checkShape != child->shape(),
"Child shapes {} and {} cannot be concatenated along axis {}",
shape, child->shape(), ax);
sum += child->shape()[ax_];

View File

@ -10,10 +10,10 @@
namespace marian {
// @TODO: Currently an ExpressionGraph only supports one Parameters object and
// @TODO: Currently an ExpressionGraph only supports one Parameters object and
// the type of parameters has to be the inside on Parameters object. This limits
// parameter types to a single chosen type, e.g. only fp32 or only fp16. This should
// be extended to allow multiple sets of parameters.
// be extended to allow multiple sets of parameters.
// The reason here is to be able to efficiently compute updates of whole parameter
// sets of one type.
class Parameters {
@ -40,7 +40,7 @@ public:
LOG(debug, "Created parameter object of type {}", acceptedElementType_);
}
~Parameters() {
virtual ~Parameters() {
LOG(debug, "Destroyed parameter object of type {}", acceptedElementType_);
}
@ -88,7 +88,7 @@ public:
// sort parameters by name before allocation to make sure the memory layout after allocation is always the same
std::sort(params_.begin(), params_.end(), [](Expr n1, Expr n2){ return n1->name() < n2->name(); });
for(auto p : params_) {
if(!p->val()) {
vals_->allocate(p->val(), p->shape(), p->value_type());

View File

@ -39,6 +39,7 @@ public:
// Simplest layer interface: Unary function
struct IUnaryLayer {
virtual ~IUnaryLayer() {}
virtual Expr apply(Expr) = 0;
virtual Expr apply(const std::vector<Expr>& es) {
ABORT_IF(es.size() > 1, "Not implemented"); // simple stub
@ -59,6 +60,7 @@ struct IEmbeddingLayer {
// alternative from indices directly
virtual Expr applyIndices(const std::vector<WordIndex>& embIdx, const Shape& shape) const = 0;
virtual ~IEmbeddingLayer() {}
};
// base class for Encoder and Decoder classes, which have embeddings and a batch index (=stream index)

View File

@ -5,14 +5,14 @@
namespace marian {
static inline RationalLoss guidedAlignmentCost(Ptr<ExpressionGraph> graph,
static inline RationalLoss guidedAlignmentCost(Ptr<ExpressionGraph> /*graph*/,
Ptr<data::CorpusBatch> batch,
Ptr<Options> options,
Expr attention) { // [beam depth=1, max src length, batch size, tgt length]
std::string guidedLossType = options->get<std::string>("guided-alignment-cost"); // @TODO: change "cost" to "loss"
float guidedLossWeight = options->get<float>("guided-alignment-weight");
const auto& shape = attention->shape(); // [beam depth=1, max src length, batch size, tgt length]
float epsilon = 1e-6f;
Expr alignmentLoss; // sum up loss over all attention/alignment positions
@ -55,8 +55,8 @@ static inline RationalLoss guidedAlignmentCost(Ptr<ExpressionGraph> graph,
else
ABORT("Unknown alignment cost type: {}", guidedLossType);
// every position is a label as they should all agree
// @TODO: there should be positional masking here ... on the other hand, positions that are not
// in a sentence should always agree (both being 0). Lack of masking affects label count only which is
// @TODO: there should be positional masking here ... on the other hand, positions that are not
// in a sentence should always agree (both being 0). Lack of masking affects label count only which is
// probably negligible?
numLabels = shape.elements();
}

View File

@ -331,6 +331,7 @@ public:
: LabelwiseLoss(axes), // cross-entropy already reduces over axis -1
labelSmoothing_(labelSmoothing), factorWeight_(factorWeight) {}
virtual ~CrossEntropyLoss() {}
protected:
float labelSmoothing_; // interpolation factor for label smoothing, see below
float factorWeight_; // give extra weight to factors
@ -368,7 +369,7 @@ protected:
if(labelWeights) {
// We currently do not know how to use target factors and word-level label weights together
bool wordlevel = labelWeights->shape()[-3] > 1; // Time-dimension is not trivially 1, hence we have word-level weights.
bool wordlevel = labelWeights->shape()[-3] > 1; // Time-dimension is not trivially 1, hence we have word-level weights.
ABORT_IF(wordlevel && logits.getNumFactorGroups() > 1, "CE loss with word-level label weights is not implemented for factors");
ce = ce * cast(labelWeights, Type::float32);
}
@ -379,15 +380,15 @@ protected:
/**
* @brief Unlikelihood loss across last axis, summed up over batch and time dimensions. This is an
* implementation of sequence-level unlikelihood loss from https://arxiv.org/abs/1908.04319.
* @brief Unlikelihood loss across last axis, summed up over batch and time dimensions. This is an
* implementation of sequence-level unlikelihood loss from https://arxiv.org/abs/1908.04319.
* We rely on word-level label weights where 1 is correct and 0 is marking an error. If there are not
* zeros for a sentence it going to be trained with normal CE loss if there is at least one 0 it is going
* to flip over to use SUL for that sentence to penalize the selected word.
*
*
* SUL is implemented as:
* -log(gather(1 - softmax(logits), -1, indices))
*
*
* Factors are currently not supported.
*/
class SequenceUnlikelihoodLoss : public CrossEntropyLoss {
@ -411,17 +412,17 @@ protected:
ABORT_IF(!mask, "mask is required"); // @TODO: check this, it seems weights for padding are by default 1, which would make this obsolete.
// use label weights, where 1 is GOOD and 0 is BAD. After inversion here, now 1 marks, mask again to eliminate padding (might be obsolete)
auto errorMask = (1.f - cast(labelWeights, Type::float32)) * cast(mask, Type::float32);
auto ceUl = logits.applyLossFunction(labels, [&](Expr logits, Expr indices) {
return cast(unlikelihood(logits, indices), Type::float32);
});
// compute if want to use CE or UL. If there are no errors train with CE, otherwise train _only on_ the errors with UL. This is the "mixed" training
// schedule from https://arxiv.org/abs/1908.04319. Providing labels with or without error scores we can easily switch between CE and UL.
// schedule from https://arxiv.org/abs/1908.04319. Providing labels with or without error scores we can easily switch between CE and UL.
auto onlyCe = eq(sum(errorMask, /*axis=*/-3), 0.f); // [1, 1, dimBatch, 1] - equal 1 if no errors are present
ceUl = errorMask * ceUl; // don't use for correct label or padding
auto cost = onlyCe * ce + (1.f - onlyCe) * ceUl; // ce or unlikelihood part are never simultanously used as cost per batch entry
auto cost = onlyCe * ce + (1.f - onlyCe) * ceUl; // ce or unlikelihood part are never simultanously used as cost per batch entry
return cost;
}

View File

@ -17,6 +17,7 @@ public:
virtual void debugWeighting(std::vector<float> /*weightedMask*/,
std::vector<float> /*freqMask*/,
Ptr<data::CorpusBatch> /*batch*/){};
virtual ~WeightingBase() {}
};
class DataWeighting : public WeightingBase {

View File

@ -41,6 +41,7 @@ class VocabWrapper : public IVocabWrapper {
Ptr<Vocab> pImpl_;
public:
VocabWrapper(Ptr<Vocab> vocab) : pImpl_(vocab) {}
virtual ~VocabWrapper() {}
WordIndex encode(const std::string& word) const override { return (*pImpl_)[word].toWordIndex(); }
std::string decode(WordIndex id) const override { return (*pImpl_)[Word::fromWordIndex(id)]; }
size_t size() const override { return pImpl_->size(); }
@ -243,7 +244,7 @@ DecoderCpuAvxVersion parseCpuAvxVersion(std::string name) {
}
}
// @TODO: clean-up this code and unify with marian-conv. The targetPrec parameter is not clear enought etc.
// @TODO: clean-up this code and unify with marian-conv. The targetPrec parameter is not clear enought etc.
bool convertModel(std::string inputFile, std::string outputFile, int32_t targetPrec) {
std::cout << "Converting from: " << inputFile << ", to: " << outputFile << std::endl;

View File

@ -54,6 +54,8 @@ public:
const std::vector<const void*>& ptrs)
: options_(options), ptrs_(ptrs) {}
virtual ~IBeamSearchDecoder() {}
virtual QSNBestBatch decode(const QSBatch& qsBatch,
size_t maxLength,
const std::unordered_set<WordIndex>& shortlist)

View File

@ -25,6 +25,7 @@ public:
Ptr<ExpressionGraph> graph, // @TODO: why needed? Can it be gotten from model?
Ptr<data::Batch> batch,
bool clearGraph = true) = 0;
virtual ~ICost() {}
};
class EncoderDecoderCECost : public ICost {
@ -51,6 +52,8 @@ public:
weighter_ = WeightingFactory(options_);
}
virtual ~EncoderDecoderCECost() {}
Ptr<MultiRationalLoss> apply(Ptr<IModel> model,
Ptr<ExpressionGraph> graph,
Ptr<data::Batch> batch,
@ -136,6 +139,8 @@ public:
Trainer(Ptr<IModel> model, Ptr<ICost> cost)
: model_(model), cost_(cost) {}
virtual ~Trainer() {}
Ptr<IModel> getModel() { return model_; }
virtual void load(Ptr<ExpressionGraph> graph,
@ -179,6 +184,8 @@ public:
Scorer(Ptr<IModel> model, Ptr<ILogProb> cost)
: model_(model), logProb_(cost) {}
virtual ~Scorer(){}
Ptr<IModel> getModel() { return model_; }
virtual void load(Ptr<ExpressionGraph> graph,
@ -211,6 +218,7 @@ public:
class LogSoftmaxStep : public ILogProbStep {
public:
virtual ~LogSoftmaxStep() {}
virtual Ptr<DecoderState> apply(Ptr<DecoderState> state) override {
// decoder needs normalized probabilities (note: skipped if beam 1 and --skip-cost)
state->setLogProbs(state->getLogProbs().applyUnaryFunction(logsoftmax));
@ -224,6 +232,7 @@ public:
// with --output-sampling during translation with marian-decoder
class GumbelSoftmaxStep : public ILogProbStep {
public:
virtual ~GumbelSoftmaxStep() {}
virtual Ptr<DecoderState> apply(Ptr<DecoderState> state) override {
state->setLogProbs(state->getLogProbs().applyUnaryFunctions(
[](Expr logits){ // lemma gets gumbelled

View File

@ -11,6 +11,7 @@ namespace marian {
class IEncoderDecoder : public models::IModel {
public:
virtual ~IEncoderDecoder() {}
virtual void load(Ptr<ExpressionGraph> graph,
const std::string& name,
bool markedReloaded = true) override

View File

@ -41,6 +41,8 @@ public:
// @TODO: Is there a better name?
class ICriterionFunction {
public:
virtual ~ICriterionFunction() {}
virtual void load(Ptr<ExpressionGraph>,
const std::string&,
bool markReloaded = true)

View File

@ -5,10 +5,12 @@
namespace marian {
struct ModelTask {
virtual ~ModelTask() {}
virtual void run() = 0;
};
struct ModelServiceTask {
virtual ~ModelServiceTask() {}
virtual std::string run(const std::string&) = 0;
};
} // namespace marian

View File

@ -11,6 +11,7 @@ namespace marian {
class EncoderS2S : public EncoderBase {
using EncoderBase::EncoderBase;
public:
virtual ~EncoderS2S() {}
Expr applyEncoderRNN(Ptr<ExpressionGraph> graph,
Expr embeddings,
Expr mask,
@ -254,7 +255,7 @@ public:
auto embeddings = state->getTargetHistoryEmbeddings();
// The batch dimension of the inputs can change due to batch-pruning, in that case
// cached elements need to be rebuilt, in this case the mapped encoder context in the
// cached elements need to be rebuilt, in this case the mapped encoder context in the
// attention mechanism of the decoder RNN.
int currDimBatch = embeddings->shape()[-2];
if(!rnn_ || lastDimBatch_ != currDimBatch) // if currDimBatch is different, rebuild the cached RNN
@ -263,7 +264,7 @@ public:
// Also @TODO: maybe implement a Cached(build, updateIf) that runs a check and rebuild if required
// at dereferecing :
// rnn_ = Cached<decltype(constructDecoderRNN(graph, state))>(
// /*build=*/[]{ return constructDecoderRNN(graph, state); },
// /*build=*/[]{ return constructDecoderRNN(graph, state); },
// /*updateIf=*/[]{ return state->batchDimChanged() });
// rnn_->transduce(...);

View File

@ -17,6 +17,7 @@ public:
: context_(context), mask_(mask), batch_(batch) {}
EncoderState() {}
virtual ~EncoderState() {}
virtual Expr getContext() const { return context_; }
virtual Expr getAttended() const { return context_; }
@ -53,6 +54,7 @@ public:
const std::vector<Ptr<EncoderState>>& encStates,
Ptr<data::CorpusBatch> batch)
: states_(states), logProbs_(logProbs), encStates_(encStates), batch_(batch) {}
virtual ~DecoderState() {}
// @TODO: Do we need all these to be virtual?
virtual const std::vector<Ptr<EncoderState>>& getEncoderStates() const {
@ -68,10 +70,10 @@ public:
int beamSize) const {
std::vector<Ptr<EncoderState>> newEncStates;
for(auto& es : encStates_)
for(auto& es : encStates_)
// If the size of the batch dimension of the encoder state context changed, subselect the correct batch entries
newEncStates.push_back(es->getContext()->shape()[-2] == batchIndices.size() ? es : es->select(batchIndices));
// hypindices matches batchIndices in terms of batch dimension, so we only need hypIndices
auto selectedState = New<DecoderState>(
states_.select(hypIndices, beamSize, /*isBatchMajor=*/false), logProbs_, newEncStates, batch_);
@ -121,6 +123,7 @@ private:
Words targetWords_;
public:
virtual ~ClassifierState() {}
virtual Expr getLogProbs() const { return logProbs_; }
virtual void setLogProbs(Expr logProbs) { logProbs_ = logProbs; }

View File

@ -16,6 +16,7 @@ namespace marian {
class ClipperBase {
public:
virtual void clip(Tensor) = 0;
virtual ~ClipperBase() {}
};
typedef std::shared_ptr<ClipperBase> ClipperPtr;

View File

@ -29,6 +29,8 @@ public:
LOG(info, "[optimizers] Learning rate gets automatically adjusted as if minibatch size was {}", refMBWordsParam_);
}
virtual ~OptimizerBase() {}
static constexpr size_t mbSizeNotProvided = SIZE_MAX;
void update(Ptr<ExpressionGraph> graph, size_t mbSize = mbSizeNotProvided) {
@ -114,7 +116,7 @@ class Sgd : public OptimizerBase {
public:
Sgd(float eta, size_t refMBWordsParam = 0, Ptr<ClipperBase> clipper = nullptr)
: OptimizerBase(eta, refMBWordsParam, clipper) {}
virtual ~Sgd() {}
virtual void setParams(const std::vector<float>& /*params*/) override {}
private:
void updateImpl(Tensor params, Tensor grads, size_t actualMBSize, size_t refMBWords) override;

View File

@ -13,6 +13,7 @@ namespace marian {
class ScoreCollector {
public:
ScoreCollector(const Ptr<Options>& options);
virtual ~ScoreCollector() {}
virtual void Write(long id, const std::string& message);
virtual void Write(long id,

View File

@ -35,7 +35,7 @@ protected:
public:
BaseRNN(Ptr<ExpressionGraph> graph, Ptr<Options> options)
: graph_(graph), options_(options) {}
virtual ~BaseRNN() {}
virtual Expr transduce(Expr, Expr = nullptr) = 0;
virtual Expr transduce(Expr, State, Expr = nullptr) = 0;
virtual Expr transduce(Expr, States, Expr = nullptr) = 0;
@ -113,6 +113,7 @@ private:
public:
friend RNN;
virtual ~SingleLayerRNN() {}
// @TODO: benchmark whether this concatenation is a good idea
virtual Expr transduce(Expr input, Expr mask = nullptr) override {

View File

@ -17,7 +17,7 @@ protected:
public:
Backend(DeviceId deviceId, size_t seed)
: deviceId_(deviceId), seed_(seed), randomGenerator_(createRandomGenerator(seed, deviceId)) {}
virtual ~Backend() {};
virtual DeviceId getDeviceId() { return deviceId_; };
virtual Ptr<RandomGenerator> getRandomGenerator() { return randomGenerator_; }

View File

@ -8,29 +8,40 @@
namespace marian {
namespace cpu {
namespace {
// allocate function for tensor reserve() below.
// Needed for AVX512, while not available on all compilers. It seems clang
// does not have aligned_alloc for all cstlib versions. If AVX512 is not used
// a simple malloc is probably fine.
// Should generate a runtime error otherwise as we have a check in the AVX512
// functions which tests for alignment.
#ifdef _WIN32
#define MALLOC(size) _aligned_malloc(size, alignment_)
#elif __GNUC__
#define MALLOC(size) aligned_alloc(alignment_, size)
#else
#define MALLOC(size) malloc(size)
#endif
// Alignment is needed because we use AVX512 and AVX2 vectors. We should fail if we can't allocate aligned memory.
#ifdef _WIN32
#define FREE(ptr) _aligned_free(ptr)
void *genericMalloc(size_t alignment, size_t size) {
void *ret = _aligned_malloc(size, alignment);
ABORT_IF(!ret, "Failed to allocate memory on CPU");
return ret;
}
void genericFree(void *ptr) {
_aligned_free(ptr);
}
#else
#define FREE(ptr) free(ptr)
// Linux and OS X. There is no fallback to malloc because we need it to be aligned.
void *genericMalloc(size_t alignment, size_t size) {
// On macos, aligned_alloc is available only on c++17
// Furthermore, it requires that the memory requested is an exact multiple of the alignment, otherwise it fails.
// posix_memalign is available both Mac (Since 2016) and Linux and in both gcc and clang
void *result;
// Error could be detected by return value or just remaining nullptr.
ABORT_IF(posix_memalign(&result, alignment, size), "Failed to allocate memory on CPU");
return result;
}
void genericFree(void *ptr) {
free(ptr);
}
#endif
} // namespace
Device::~Device() {
FREE(data_);
genericFree(data_);
}
void Device::reserve(size_t size) {
@ -38,14 +49,12 @@ void Device::reserve(size_t size) {
ABORT_IF(size < size_ || size == 0,
"New size must be larger than old size and larger than 0");
uint8_t *temp = static_cast<uint8_t*>(genericMalloc(alignment_, size));
if(data_) {
uint8_t *temp = static_cast<uint8_t*>(MALLOC(size));
std::copy(data_, data_ + size_, temp);
FREE(data_);
data_ = temp;
} else {
data_ = static_cast<uint8_t*>(MALLOC(size));
genericFree(data_);
}
data_ = temp;
size_ = size;
}
} // namespace cpu

View File

@ -17,6 +17,7 @@
#endif
using namespace fbgemm;
// @TODO: don't use using namespace ...; in header files. Just don't. [UG]
#endif // USE_FBGEMM
namespace marian {
@ -96,7 +97,7 @@ struct FbgemmPacked16PackNodeOp : public UnaryNodeOp {
const std::string type() override { return "packMatFp16"; }
Shape newShape(Expr a, bool transpose) {
Shape newShape(Expr MAYBE_UNUSED a, bool MAYBE_UNUSED transpose) {
#if USE_FBGEMM
auto shapeMat = a->shape();
// Should be 2D - weight matrix
@ -115,15 +116,14 @@ struct FbgemmPacked16PackNodeOp : public UnaryNodeOp {
packsize_);
Shape outShape({(int)packsize_});
return outShape;
#else // USE_FBGEMM
#else
ABORT("Packed GEMM requires a build with USE_FBGEMM enabled");
return Shape();
#endif // USE_FBGEMM
}
};
;
// Pack a matrix (int8) into cache utilization efficient way (block format) together with quantization into int8
// PackMatrix packMat_: the type of packed matrix - A or B matrix
// marian::Type packType_: the type the input matrix is packed - packed8avx2 or packed8avx512
@ -132,6 +132,7 @@ struct FbgemmPacked16PackNodeOp : public UnaryNodeOp {
// int ncol_: the number of columns
// uint64_t packsize_: the size of the packed matrix
// (the size of int8 packed B from fbgemm:PackAWithQuantRowOffset + quantization scale, offset and zero point)
struct FbgemmPacked8PackNodeOp : public UnaryNodeOp {
PackMatrix packMat_;
marian::Type packType_;
@ -180,19 +181,21 @@ struct FbgemmPacked8PackNodeOp : public UnaryNodeOp {
const std::string type() override { return "packMatInt8"; }
Shape newShape(Expr a, bool transpose) {
#if USE_FBGEMM
Shape newShape(Expr a, bool transpose) {
fbgemmPacked8PackInfo(a->shape(), packType_, transpose, nrow_, ncol_, packsize_);
Shape outShape({(int)packsize_});
return outShape;
#else // USE_FBGEMM
}
#else
Shape newShape(Expr /*a*/, bool /*transpose*/) {
ABORT("Packed GEMM requires a build with USE_FBGEMM enabled");
return Shape();
#endif // USE_FBGEMM
}
#endif // USE_FBGEMM
};
// Affine transform (matrix multiplication) using packed B matrix
// float scalar_: scalar multiplier
// size_t m_: the number of rows in A and C
@ -202,7 +205,6 @@ struct FbgemmPacked8PackNodeOp : public UnaryNodeOp {
// bool transB_: transpose B
class FbgemmPacked16AffineNodeOp : public NaryNodeOp {
private:
float scalar_;
size_t m_;
size_t n_;
size_t k_;
@ -210,9 +212,8 @@ private:
bool transB_;
public:
FbgemmPacked16AffineNodeOp(const std::vector<Expr>& nodes, Shape bShape, bool transA, bool transB, float scalar)
: NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32),
scalar_(scalar) {
FbgemmPacked16AffineNodeOp(const std::vector<Expr>& nodes, Shape bShape, bool transA, bool transB, float /*scalar*/)
: NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32)/*, scalar_(scalar)*/ {
transA_ = transA;
transB_ = transB;
m_ = nodes[0]->shape().elements() / nodes[0]->shape()[-1];
@ -281,7 +282,6 @@ public:
// bool transB_: transpose B
class FbgemmPacked8AffineNodeOp : public NaryNodeOp {
private:
float scalar_;
size_t m_;
size_t n_;
size_t k_;
@ -289,9 +289,8 @@ private:
bool transB_;
public:
FbgemmPacked8AffineNodeOp(const std::vector<Expr>& nodes, Shape bShape, bool transA, bool transB, float scalar)
: NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32),
scalar_(scalar) {
FbgemmPacked8AffineNodeOp(const std::vector<Expr>& nodes, Shape bShape, bool transA, bool transB, float /*scalar*/)
: NaryNodeOp(nodes, newShape(nodes[0], bShape, transA, transB), Type::float32)/*, scalar_(scalar) */ {
transA_ = transA;
transB_ = transB;
m_ = nodes[0]->shape().elements() / nodes[0]->shape()[-1];
@ -302,7 +301,7 @@ public:
size_t l = bShape.elements() / bShape[-1];
n_ = bShape[-1];
if(transB)
std::swap(l, n_);
std::swap(l, n_);
}
Shape newShape(Expr a, Shape bShape, bool transA, bool transB) {
@ -369,9 +368,9 @@ static inline Expr affine(Expr a, Expr b, Shape bShape, Expr c, bool transA, boo
Type elementType = b->value_type();
if (elementType == Type::packed16)
return Expression<cpu::variant::FbgemmPacked16AffineNodeOp>(nodes, bShape, transA, transB, scalar);
return Expression<FbgemmPacked16AffineNodeOp>(nodes, bShape, transA, transB, scalar);
else if (isPacked(elementType) && sizeOf(elementType) == 1)
return Expression<cpu::variant::FbgemmPacked8AffineNodeOp>(nodes, bShape, transA, transB, scalar);
return Expression<FbgemmPacked8AffineNodeOp>(nodes, bShape, transA, transB, scalar);
else {
ABORT("Only int8 and fp16 are available. {}", elementType);
return nullptr;
@ -380,9 +379,9 @@ static inline Expr affine(Expr a, Expr b, Shape bShape, Expr c, bool transA, boo
static inline Expr pack(Type elementType, Expr a, PackMatrix packMat, bool transpose, float clipValue) {
if (elementType == Type::packed16)
return Expression<cpu::variant::FbgemmPacked16PackNodeOp>(a, packMat, transpose, clipValue);
return Expression<FbgemmPacked16PackNodeOp>(a, packMat, transpose, clipValue);
else if (isPacked(elementType) && sizeOf(elementType) == 1)
return Expression<cpu::variant::FbgemmPacked8PackNodeOp>(a, packMat, elementType, transpose, clipValue);
return Expression<FbgemmPacked8PackNodeOp>(a, packMat, elementType, transpose, clipValue);
else {
ABORT("Only int8 and fp16 are available. {}", elementType);
return nullptr;
@ -394,9 +393,9 @@ static inline Expr dot(Expr a, Expr b, Shape bShape, bool transA, bool transB, f
Type elementType = b->value_type();
if (elementType == Type::packed16)
return Expression<cpu::variant::FbgemmPacked16AffineNodeOp>(nodes, bShape, transA, transB, scalar);
return Expression<FbgemmPacked16AffineNodeOp>(nodes, bShape, transA, transB, scalar);
else if (isPacked(elementType) && sizeOf(elementType) == 1)
return Expression<cpu::variant::FbgemmPacked8AffineNodeOp>(nodes, bShape, transA, transB, scalar);
return Expression<FbgemmPacked8AffineNodeOp>(nodes, bShape, transA, transB, scalar);
else {
ABORT("Only int8 and fp16 are available. {}", elementType);
return nullptr;

View File

@ -20,7 +20,7 @@ namespace marian {
namespace cpu {
void IsNaN(const Tensor in, Ptr<Allocator> allocator, bool& /*isNaN*/, bool& /*isInf*/) {
void IsNaN(const Tensor /*in*/, Ptr<Allocator> /*allocator*/, bool& /*isNaN*/, bool& /*isInf*/) {
ABORT("Not implemented");
}
@ -214,9 +214,11 @@ void Transpose0213(Tensor out, Tensor in) {
}
}
// This function is called only when MKL is available.
#if MKL_FOUND
// Given a 4D array, transpose (swap) the initial 3 dimensions while keeping the last dimension.
// e.g. 1234 --> 2134, 1234 --> 3214 (4 is always kept).
// This is an optimized version for swapping first 3 dimensions
// This is an optimized version for swapping first 3 dimensions
// assuming the last dimension is large enough to get benefits from vectorized copy.
//
// @param out output tensor
@ -225,14 +227,13 @@ void Transpose0213(Tensor out, Tensor in) {
template <bool add>
void TransposeFirst3In4(Tensor out, Tensor in, const std::vector<int>& vAxis) {
ABORT_IF(vAxis.size() != 4, "This function handles only 4D arrays.");
#if MKL_FOUND
int innermost = in->shape()[-1];
int l1 = in->shape()[vAxis[0]];
int l2 = in->shape()[vAxis[1]];
int l3 = in->shape()[vAxis[2]];
// find the mapping between the transposed output dimensional indices (oi, oj, ok)
// find the mapping between the transposed output dimensional indices (oi, oj, ok)
// and original input dimensional indices (i, j, k)
int oi, oj, ok;
#pragma omp parallel for
@ -275,11 +276,8 @@ void TransposeFirst3In4(Tensor out, Tensor in, const std::vector<int>& vAxis) {
}
}
}
#else
// it shouldn't come into here. This function is called only when MKL is available.
ABORT("Should not get here");
#endif // MKL_FOUND
}
#endif // MKL_FOUND
inline void transpose4x4_SSE(const float* A,
float* B,
@ -656,7 +654,7 @@ void SelectAxis2(Tensor out,
functional::Shape outShape = out->shape();
functional::Shape inShape = in->shape();
auto idxData = indices->data<IndexType>();
auto odata = out->data();
const auto idata = in->data();

View File

@ -15,11 +15,11 @@ protected:
public:
RandomGenerator(size_t seed) : seed_(seed) { }
virtual ~RandomGenerator() {}
virtual void uniform(Tensor, float a, float b) = 0;
virtual void normal(Tensor, float mean, float stddev) = 0;
};
Ptr<RandomGenerator> createRandomGenerator(size_t /*seed*/, DeviceId);
}
}

View File

@ -25,7 +25,7 @@
namespace marian {
template <typename InIt, typename OutIt>
void copy(Ptr<Backend> backend, const InIt beg, const InIt end, OutIt it) {
void copy(Ptr<Backend>& MAYBE_UNUSED backend, const InIt beg, const InIt end, OutIt it) {
#ifdef CUDA_FOUND
if(backend->getDeviceId().type == DeviceType::gpu)
gpu::copy(backend, beg, end, it);
@ -119,7 +119,7 @@ DISPATCH3(Concatenate, marian::Tensor, const std::vector<marian::Tensor>&, int)
// clang-format on
// Bernoulli(tensor, 0.5f, 2.f, -1.f) generates a tensor composed of 50% of 1 and 50% of -1.
// Bernoulli(tensor, 0.5f, 2.f, -1.f) generates a tensor composed of 50% of 1 and 50% of -1.
static inline void Bernoulli(Tensor resultTensor, float keepProb, float scale = 1.f, float shift = 0.f) {
// in-place uniform distribution
auto rnd = resultTensor->getBackend()->getRandomGenerator();
@ -190,7 +190,7 @@ void LayerNormalizationGrad(Tensor gradX,
}
static inline void LayerNormalizationGrad(
Ptr<Allocator> allocator,
Ptr<Allocator> MAYBE_UNUSED allocator,
Tensor gradX,
Tensor gradGamma,
Tensor gradBeta,

View File

@ -1,7 +1,7 @@
#include "marian.h"
#include "common/timer.h"
int main(int argc, char** argv) {
int main(int /*argc*/, char** /*argv*/) {
using namespace marian;
{

View File

@ -8,6 +8,8 @@
#include <fstream>
int main(int argc, char** argv) {
ABORT_IF(argc != 3, "FATAL ERROR: Incorrect number of command line arguments "
"(expected: 2) for command {}.",argv[0]);
SQLite::Database db("corpus.db", SQLite::OPEN_READWRITE|SQLite::OPEN_CREATE);
db.exec("PRAGMA temp_store_directory = '/data1/marcinjd';");

View File

@ -38,7 +38,7 @@ Ptr<ICommunicator> createCommunicator(
}
// the actual implementation is inside communicator.cu
return New<NCCLCommunicator>(graphs, mpi);
return New<NCCLCommunicator>(graphs, mpi);
#else // no CUDA or no NCCL
noNccl; // (unused)
return New<DefaultCommunicator>(graphs, mpi);
@ -141,7 +141,7 @@ public:
FakeMPIWrapper(bool) {
LOG(warn, "Compiled without MPI support. Falling back to FakeMPIWrapper");
}
virtual ~FakeMPIWrapper() {}
virtual size_t myMPIRank() const override { return 0; };
virtual size_t numMPIProcesses() const override { return 1; };

View File

@ -156,11 +156,8 @@ public:
void scatterReduceAndResetGrads() const override {
const_cast<DefaultCommunicator*>(this)->lazyInit();
int totalSize = (int)graphs_[0]->params()->vals()->size();
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
// Gather gradients from different devices into current gradient shards
auto scatter = [this, shardSize](size_t idx, size_t begin, size_t end) {
auto scatter = [this](size_t idx, size_t begin, size_t end) {
auto curGrad = graphs_[idx]->params()->grads()->subtensor(begin, end-begin);
// collect and sum gradients
@ -176,7 +173,7 @@ public:
};
// reset gradients outside current shard
auto reset = [this, shardSize](size_t idx, size_t begin, size_t end) {
auto reset = [this](size_t idx, size_t begin, size_t end) {
auto grad = graphs_[idx]->params()->grads();
if (begin > 0)
grad->subtensor(0, begin)->set(0);
@ -189,11 +186,9 @@ public:
}
void allGatherParams() const override {
int totalSize = (int)graphs_[0]->params()->vals()->size();
int shardSize = (int)ceil(totalSize / (float)graphs_.size());
// Update all graphs with parameter shard
auto gather = [this, shardSize](size_t idx, size_t begin, size_t end) {
auto gather = [this](size_t idx, size_t begin, size_t end) {
auto getShard = [&](Ptr<ExpressionGraph> graph) {
return graph->params()->vals()->subtensor(begin, end-begin);
};

View File

@ -118,7 +118,7 @@ public:
}
// Convert a tensor into a sparse tensor format
void fromDense(Tensor t) {
void fromDense(Tensor MAYBE_UNUSED t) {
if(backend_->getDeviceId().type == DeviceType::cpu) {
ABORT("Gradient Dropping for CPU is not yet supported");
}

View File

@ -54,10 +54,10 @@ public:
* number of devices, which is passed in as the 'multiplier'.
*/
// @TODO: Can this be made const? It seems wrong to have a stateful method that still returns a result.
virtual Ptr<data::BatchStats> collectStats(Ptr<ExpressionGraph> graph,
Ptr<models::ICriterionFunction> model,
const std::vector<Ptr<Vocab>>& vocabs,
double multiplier = 1.) {
Ptr<data::BatchStats> collectStats(Ptr<ExpressionGraph> graph,
Ptr<models::ICriterionFunction> model,
const std::vector<Ptr<Vocab>>& vocabs,
double multiplier = 1.) {
auto stats = New<data::BatchStats>();
size_t numFiles = options_->get<std::vector<std::string>>("train-sets").size();
@ -92,8 +92,8 @@ public:
maxBatch *= 2;
}
// Do a binary search for maxmimum batch size that fits into given workspace memory
// for a tested sentence length.
// Do a binary search for maxmimum batch size that fits into given workspace memory
// for a tested sentence length.
for(size_t i = step; i <= maxLength; i += step) {
size_t start = 1;
size_t end = maxBatch;

View File

@ -64,7 +64,7 @@ public:
void save(Ptr<ExpressionGraph>, bool final = false);
// @TODO: give it a fake batch generator which own vocabs instead of passing vocabs
Ptr<data::BatchStats> collectStats(const std::vector<Ptr<Vocab>>& vocabs) {
virtual Ptr<data::BatchStats> collectStats(const std::vector<Ptr<Vocab>>& vocabs) {
return GraphGroup::collectStats(graphs_[0], builders_[0], vocabs);
}

View File

@ -63,7 +63,6 @@ private:
Tensor paramsAvg_;
std::vector<float> accGradientsSync_cpu;
std::vector<float> receiveBuffer_cpu;
bool synchronization_happened{false};
Ptr<OptimizerBase> syncOptimizer_;

View File

@ -26,7 +26,6 @@ class SyncGraphGroup : public GraphGroup, public ExponentialSmoothing {
// state for update()
bool first_{ true }; // gets interpreted and cleared by update()
std::vector<Ptr<data::Batch>> pendingBatches_; // in case of dynamic MB-size scaling, we temporarly buffer up batches across update() calls until enough
size_t typicalTrgWords_{}; // typical batch size in words (labels), 0 if unknown (e.g. specified in sentences)
double updateMultiplier_{1}; // multiplier not applied in collectStats() (no multiplier if not mini-batch-fit)
void initialize(const Ptr<data::Batch>& exampleBatch);

View File

@ -13,6 +13,7 @@ class TrainingState;
class TrainingObserver {
public:
virtual ~TrainingObserver() {}
virtual void init(TrainingState&) {}
virtual void actAfterEpoch(TrainingState&) {}
virtual void actAfterBatches(TrainingState&) {}

View File

@ -36,6 +36,7 @@ protected:
public:
ValidatorBase(bool lowerIsBetter) : lowerIsBetter_(lowerIsBetter), lastBest_{initScore()} {}
virtual ~ValidatorBase() {}
virtual float validate(const std::vector<Ptr<ExpressionGraph>>& graphs,
Ptr<const TrainingState> state) = 0;
@ -51,6 +52,7 @@ public:
template <class DataSet, class BuilderType> // @TODO: BuilderType doesn't really serve a purpose here? Review and remove.
class Validator : public ValidatorBase {
public:
virtual ~Validator() {}
Validator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options, bool lowerIsBetter = true)
: ValidatorBase(lowerIsBetter),
vocabs_(vocabs),
@ -137,6 +139,7 @@ class CrossEntropyValidator : public Validator<data::Corpus, models::ICriterionF
public:
CrossEntropyValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options);
virtual ~CrossEntropyValidator() {}
std::string type() override { return options_->get<std::string>("cost-type"); }
@ -148,6 +151,7 @@ protected:
class AccuracyValidator : public Validator<data::Corpus, models::IModel> {
public:
AccuracyValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options);
virtual ~AccuracyValidator() {}
std::string type() override { return "accuracy"; }
@ -161,6 +165,7 @@ private:
public:
BertAccuracyValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options, bool evalMaskedLM);
virtual ~BertAccuracyValidator() {}
std::string type() override {
if(evalMaskedLM_)
@ -177,6 +182,7 @@ protected:
class ScriptValidator : public Validator<data::Corpus, models::IModel> {
public:
ScriptValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options);
virtual ~ScriptValidator() {}
virtual float validate(const std::vector<Ptr<ExpressionGraph>>& graphs,
Ptr<const TrainingState> /*ignored*/) override;
@ -193,6 +199,7 @@ protected:
class TranslationValidator : public Validator<data::Corpus, models::IModel> {
public:
TranslationValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options);
virtual ~TranslationValidator() {}
virtual float validate(const std::vector<Ptr<ExpressionGraph>>& graphs,
Ptr<const TrainingState> state) override;
@ -212,6 +219,7 @@ protected:
class BleuValidator : public Validator<data::Corpus, models::IModel> {
public:
BleuValidator(std::vector<Ptr<Vocab>> vocabs, Ptr<Options> options, bool detok = false);
virtual ~BleuValidator() {}
virtual float validate(const std::vector<Ptr<ExpressionGraph>>& graphs,
Ptr<const TrainingState> state) override;

View File

@ -11,6 +11,7 @@ namespace marian {
class PrintingStrategy {
public:
virtual ~PrintingStrategy() {}
virtual bool shouldBePrinted(long) = 0;
};

View File

@ -10,6 +10,8 @@ namespace marian {
class ScorerState {
public:
virtual ~ScorerState(){}
virtual Logits getLogProbs() const = 0;
virtual void blacklist(Expr /*totalCosts*/, Ptr<data::CorpusBatch> /*batch*/){};
@ -24,6 +26,8 @@ public:
Scorer(const std::string& name, float weight)
: name_(name), weight_(weight) {}
virtual ~Scorer(){}
std::string getName() { return name_; }
float getWeight() { return weight_; }
@ -53,6 +57,7 @@ protected:
public:
ScorerWrapperState(Ptr<DecoderState> state) : state_(state) {}
virtual ~ScorerWrapperState() {}
virtual Ptr<DecoderState> getState() { return state_; }
@ -88,6 +93,8 @@ public:
encdec_(std::static_pointer_cast<IEncoderDecoder>(encdec)),
ptr_{ptr} {}
virtual ~ScorerWrapper() {}
virtual void init(Ptr<ExpressionGraph> graph) override {
graph->switchParams(getName());
if(ptr_)