mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
more coda separation
This commit is contained in:
parent
42293372ca
commit
8ec6eef9d5
@ -10,6 +10,7 @@ option(COMPILE_EXAMPLES "Compile examples" OFF)
|
||||
option(COMPILE_TESTS "Compile tests" OFF)
|
||||
option(COMPILE_SERVER "Compile marian-server" ON)
|
||||
option(COMPILE_CPU "Compile CPU version" ON)
|
||||
option(COMPILE_CUDA "Compile GPU version" ON)
|
||||
option(USE_CUDNN "Use CUDNN library" OFF)
|
||||
option(USE_MPI "Use MPI library" OFF)
|
||||
|
||||
@ -20,14 +21,17 @@ include(GetVersionFromFile)
|
||||
message(STATUS "Project name: ${PROJECT_NAME}")
|
||||
message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}")
|
||||
|
||||
|
||||
# Set compilation flags
|
||||
set(CMAKE_CXX_FLAGS_RELEASE " -std=c++11 -O3 -Ofast -m64 -march=native -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC -Wno-unused-result -Wno-deprecated -Wno-deprecated-gpu-targets")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG " -std=c++11 -g -O0 -fPIC -Wno-unused-result -Wno-deprecated -Wno-deprecated-gpu-targets")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE " -std=c++11 -O3 -Ofast -m64 -pthread -march=native -Wl,--no-as-needed -funroll-loops -ffinite-math-only -fPIC -Wno-unused-result -Wno-deprecated -Wno-deprecated-gpu-targets")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG " -std=c++11 -g -O0 -pthread -fPIC -Wno-unused-result -Wno-deprecated -Wno-deprecated-gpu-targets")
|
||||
set(CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_RELEASE} -g -pg")
|
||||
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS_RELEASE})
|
||||
|
||||
# Find packages
|
||||
|
||||
set(EXT_LIBS ${EXT_LIBS} ${CMAKE_DL_LIBS})
|
||||
|
||||
if(COMPILE_CUDA)
|
||||
find_package(CUDA "8.0" REQUIRED)
|
||||
if(CUDA_FOUND)
|
||||
set(EXT_LIBS ${EXT_LIBS} ${CUDA_curand_LIBRARY} ${CUDA_cusparse_LIBRARY})
|
||||
@ -41,8 +45,15 @@ if(CUDA_FOUND)
|
||||
LIST(APPEND CUDA_NVCC_FLAGS -DCUDNN; )
|
||||
endif(CUDNN_FOUND)
|
||||
endif(USE_CUDNN)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUDA_FOUND")
|
||||
else(CUDA_FOUND)
|
||||
message(WARNING "Cannot find CUDA compiling CPU version only")
|
||||
endif(CUDA_FOUND)
|
||||
|
||||
else(COMPILE_CUDA)
|
||||
message(WARNING "COMPILE_CUDA=off : Building only CPU version")
|
||||
endif(COMPILE_CUDA)
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
list(APPEND CUDA_NVCC_FLAGS -std=c++11; --default-stream per-thread; -O0; -g; -Xcompiler '-fPIC'; -arch=sm_30; -gencode=arch=compute_30,code=sm_30; -gencode=arch=compute_50,code=sm_50; -gencode=arch=compute_52,code=sm_52; -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61; -gencode=arch=compute_61,code=compute_61 ;)
|
||||
else(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
|
@ -4,7 +4,7 @@ include_directories(.)
|
||||
include_directories(3rd_party)
|
||||
include_directories(3rd_party/SQLiteCpp/include)
|
||||
|
||||
cuda_add_library(marian
|
||||
add_library(marian STATIC
|
||||
common/utils.cpp
|
||||
common/logging.cpp
|
||||
common/config.cpp
|
||||
@ -26,14 +26,6 @@ cuda_add_library(marian
|
||||
tensors/cpu/prod.cpp
|
||||
tensors/cpu/tensor_operators.cpp
|
||||
|
||||
tensors/gpu/device.cu
|
||||
tensors/gpu/algorithm.cu
|
||||
tensors/gpu/dropout.cu
|
||||
tensors/gpu/prod.cu
|
||||
tensors/gpu/element.cu
|
||||
tensors/gpu/add.cu
|
||||
tensors/gpu/tensor_operators.cu
|
||||
tensors/gpu/cudnn_wrappers.cu
|
||||
|
||||
graph/expression_graph.cpp
|
||||
graph/expression_operators.cpp
|
||||
@ -53,26 +45,37 @@ cuda_add_library(marian
|
||||
|
||||
translator/history.cpp
|
||||
translator/output_collector.cpp
|
||||
translator/nth_element.cu
|
||||
translator/nth_element.cpp
|
||||
translator/helpers.cu
|
||||
translator/helpers.cpp
|
||||
translator/scorers.cpp
|
||||
|
||||
training/dropper.cu
|
||||
training/graph_group_async.cpp
|
||||
training/graph_group_async_drop.cpp
|
||||
training/graph_group_sync.cpp
|
||||
training/graph_group_singleton.cpp
|
||||
training/graph_group_multinode.cpp
|
||||
training/sparse_tensor.cu
|
||||
training/validator.cpp
|
||||
|
||||
rescorer/score_collector.cpp
|
||||
$<TARGET_OBJECTS:libyaml-cpp>
|
||||
$<TARGET_OBJECTS:SQLiteCpp>
|
||||
STATIC
|
||||
)
|
||||
$<TARGET_OBJECTS:SQLiteCpp>)
|
||||
|
||||
if(CUDA_FOUND)
|
||||
cuda_add_library(marian_cuda
|
||||
tensors/gpu/device.cu
|
||||
tensors/gpu/algorithm.cu
|
||||
tensors/gpu/dropout.cu
|
||||
tensors/gpu/prod.cu
|
||||
tensors/gpu/element.cu
|
||||
tensors/gpu/add.cu
|
||||
tensors/gpu/tensor_operators.cu
|
||||
tensors/gpu/cudnn_wrappers.cu
|
||||
translator/nth_element.cu
|
||||
translator/helpers.cu
|
||||
training/dropper.cu
|
||||
training/sparse_tensor.cu
|
||||
STATIC)
|
||||
endif(CUDA_FOUND)
|
||||
|
||||
set_target_properties(marian PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
set_target_properties(marian PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
@ -99,7 +102,10 @@ endif(COMPILE_SERVER)
|
||||
|
||||
foreach(exec ${EXECUTABLES})
|
||||
target_link_libraries(${exec} marian ${EXT_LIBS})
|
||||
cuda_add_cublas_to_target(${exec})
|
||||
if(CUDA_FOUND)
|
||||
target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS} ${CMAKE_THREAD_LIBS_INIT})
|
||||
cuda_add_cublas_to_target(${exec})
|
||||
endif(CUDA_FOUND)
|
||||
set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
endforeach(exec)
|
||||
|
||||
|
@ -3,6 +3,9 @@ add_executable(mnist_example mnist/mnist_ffnn.cpp)
|
||||
|
||||
foreach(exec iris_example mnist_example)
|
||||
target_link_libraries(${exec} marian ${EXT_LIBS})
|
||||
cuda_add_cublas_to_target(${exec})
|
||||
if(CUDA_FOUND)
|
||||
target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS})
|
||||
cuda_add_cublas_to_target(${exec})
|
||||
endif(CUDA_FOUND)
|
||||
set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
endforeach(exec)
|
||||
|
@ -1,5 +1,4 @@
|
||||
#include "graph/expression_operators.h"
|
||||
//#include "kernels/sparse.h"
|
||||
#include "layers/constructors.h"
|
||||
|
||||
#include "graph/node_operators.h"
|
||||
@ -333,6 +332,8 @@ Expr shift(Expr a, Shape shift) {
|
||||
// return Expression<LexicalProbNodeOp>(logits, att, eps, lf);
|
||||
//}
|
||||
|
||||
#ifdef CUDA_FOUND
|
||||
|
||||
Expr avg_pooling(
|
||||
Expr x,
|
||||
int height,
|
||||
@ -410,4 +411,6 @@ Expr pooling_with_masking(Expr x, Expr mask, int width, bool isEven) {
|
||||
return Expression<PoolingWithMaskingOp>(x, mask, width, isEven);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
@ -1,15 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include "tensors/tensor.h"
|
||||
#include "tensors/gpu/backend.h"
|
||||
#include "tensors/backend.h"
|
||||
|
||||
#include "graph/node.h"
|
||||
//#include "kernels/sparse.h"
|
||||
#include "tensors/tensor_operators.h"
|
||||
#include "functional/functional.h"
|
||||
|
||||
#include "tensors/gpu/cudnn_wrappers.h"
|
||||
|
||||
//#include "tensors/gpu/cudnn_wrappers.h"
|
||||
|
||||
namespace marian {
|
||||
|
||||
|
@ -1,14 +1,19 @@
|
||||
#include "tensors/backend.h"
|
||||
|
||||
#ifdef CUDA_FOUND
|
||||
#include "tensors/gpu/backend.h"
|
||||
#endif
|
||||
|
||||
#include "tensors/cpu/backend.h"
|
||||
|
||||
namespace marian {
|
||||
|
||||
Ptr<Backend> BackendByDevice(DeviceId deviceId, size_t seed) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(deviceId.type == DeviceType::gpu)
|
||||
return New<gpu::Backend>(deviceId, seed);
|
||||
else
|
||||
#endif
|
||||
return New<cpu::Backend>(deviceId, seed);
|
||||
}
|
||||
|
||||
|
@ -3,8 +3,8 @@
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "tensors/gpu/prod.h"
|
||||
#include "tensors/gpu/backend.h"
|
||||
#include "tensors/tensor.h"
|
||||
#include "tensors/cpu/backend.h"
|
||||
|
||||
#if MKL_FOUND
|
||||
#include <mkl.h>
|
||||
|
@ -59,10 +59,17 @@ namespace cpu {
|
||||
}
|
||||
|
||||
static inline Ptr<Device> DispatchDevice(DeviceId deviceId, size_t alignment = 256) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(deviceId.type == DeviceType::gpu)
|
||||
return New<gpu::Device>(deviceId, alignment);
|
||||
else
|
||||
return New<cpu::Device>(deviceId, alignment);
|
||||
#else
|
||||
if(deviceId.type == DeviceType::gpu)
|
||||
ABORT("CUDA support not compiled into marian");
|
||||
else
|
||||
return New<cpu::Device>(deviceId, alignment);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef CUDA_FOUND
|
||||
|
||||
#define DISPATCH1(Function, Arg1) \
|
||||
namespace gpu { \
|
||||
@ -127,3 +128,78 @@
|
||||
cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define DISPATCH1(Function, Arg1) \
|
||||
namespace cpu { \
|
||||
void Function(Arg1); \
|
||||
} \
|
||||
void Function(Arg1 arg1) { \
|
||||
cpu::Function(arg1); \
|
||||
}
|
||||
|
||||
#define DISPATCH2(Function, Arg1, Arg2) \
|
||||
namespace cpu { \
|
||||
void Function(Arg1, Arg2); \
|
||||
} \
|
||||
static inline void Function(Arg1 arg1, Arg2 arg2) { \
|
||||
cpu::Function(arg1, arg2); \
|
||||
}
|
||||
|
||||
#define DISPATCH3(Function, Arg1, Arg2, Arg3) \
|
||||
namespace cpu { \
|
||||
void Function(Arg1, Arg2, Arg3); \
|
||||
} \
|
||||
static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3) { \
|
||||
cpu::Function(arg1, arg2, arg3); \
|
||||
}
|
||||
|
||||
#define DISPATCH4(Function, Arg1, Arg2, Arg3, Arg4) \
|
||||
namespace cpu { \
|
||||
void Function(Arg1, Arg2, Arg3, Arg4); \
|
||||
} \
|
||||
static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4) { \
|
||||
cpu::Function(arg1, arg2, arg3, arg4); \
|
||||
}
|
||||
|
||||
#define DISPATCH5(Function, Arg1, Arg2, Arg3, Arg4, Arg5) \
|
||||
namespace cpu { \
|
||||
void Function(Arg1, Arg2, Arg3, Arg4, Arg5); \
|
||||
} \
|
||||
static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5) { \
|
||||
cpu::Function(arg1, arg2, arg3, arg4, arg5); \
|
||||
}
|
||||
|
||||
#define DISPATCH6(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6) \
|
||||
namespace cpu { \
|
||||
void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6); \
|
||||
} \
|
||||
static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6) { \
|
||||
cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6); \
|
||||
}
|
||||
|
||||
#define DISPATCH7(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7) \
|
||||
namespace cpu { \
|
||||
void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7); \
|
||||
} \
|
||||
static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7) { \
|
||||
cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7); \
|
||||
}
|
||||
|
||||
#define DISPATCH8(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8) \
|
||||
namespace cpu { \
|
||||
void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8); \
|
||||
} \
|
||||
static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7, Arg8 arg8) { \
|
||||
cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8); \
|
||||
}
|
||||
|
||||
#define DISPATCH9(Function, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9) \
|
||||
namespace cpu { \
|
||||
void Function(Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, Arg9); \
|
||||
} \
|
||||
static inline void Function(Arg1 arg1, Arg2 arg2, Arg3 arg3, Arg4 arg4, Arg5 arg5, Arg6 arg6, Arg7 arg7, Arg8 arg8, Arg9 arg9) { \
|
||||
cpu::Function(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -11,7 +11,10 @@
|
||||
#include "tensors/backend.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#ifdef CUDA_FOUND
|
||||
#include "tensors/gpu/algorithm.h"
|
||||
#endif
|
||||
|
||||
namespace marian {
|
||||
|
||||
@ -53,32 +56,40 @@ public:
|
||||
|
||||
float get(size_t i) {
|
||||
float temp;
|
||||
#ifdef CUDA_FOUND
|
||||
if(backend_->getDevice().type == DeviceType::gpu)
|
||||
gpu::copy(backend_, data() + i, data() + i + 1, &temp);
|
||||
else
|
||||
#endif
|
||||
std::copy(data() + i, data() + i + 1, &temp);
|
||||
return temp;
|
||||
}
|
||||
|
||||
void set(size_t i, float value) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(backend_->getDevice().type == DeviceType::gpu)
|
||||
gpu::copy(backend_, &value, &value + 1, data() + i);
|
||||
else
|
||||
#endif
|
||||
std::copy(&value, &value + 1, data() + i);
|
||||
}
|
||||
|
||||
void get(std::vector<float> &v) {
|
||||
v.resize(size());
|
||||
#ifdef CUDA_FOUND
|
||||
if(backend_->getDevice().type == DeviceType::gpu)
|
||||
gpu::copy(backend_, data(), data() + size(), v.data());
|
||||
else
|
||||
#endif
|
||||
std::copy(data(), data() + size(), v.data());
|
||||
}
|
||||
|
||||
void set(const float* begin, const float* end) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(backend_->getDevice().type == DeviceType::gpu)
|
||||
gpu::copy(backend_, begin, end, data());
|
||||
else
|
||||
#endif
|
||||
std::copy(begin, end, data());
|
||||
}
|
||||
|
||||
@ -87,27 +98,32 @@ public:
|
||||
}
|
||||
|
||||
void set(float value) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(backend_->getDevice().type == DeviceType::gpu)
|
||||
gpu::fill(backend_, data(), data() + size(), value);
|
||||
else
|
||||
#endif
|
||||
std::fill(data(), data() + size(), value);
|
||||
}
|
||||
|
||||
void setSparse(const std::vector<size_t> &k,
|
||||
const std::vector<float> &v) {
|
||||
if(backend_->getDevice().type == DeviceType::gpu) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(backend_->getDevice().type == DeviceType::gpu)
|
||||
gpu::setSparse(backend_, k, v, data());
|
||||
} else {
|
||||
else
|
||||
#endif
|
||||
for(int i = 0; i < k.size(); ++i)
|
||||
data()[k[i]] = v[i];
|
||||
}
|
||||
}
|
||||
|
||||
void copyFrom(Tensor in) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(in->getBackend()->getDevice().type == DeviceType::gpu ||
|
||||
backend_->getDevice().type == DeviceType::gpu)
|
||||
gpu::copy(backend_, in->data(), in->data() + in->size(), data());
|
||||
else
|
||||
#endif
|
||||
std::copy(in->data(), in->data() + in->size(), data());
|
||||
}
|
||||
|
||||
|
@ -10,9 +10,11 @@
|
||||
#include "functional/tmp.h"
|
||||
#include "functional/tensor.h"
|
||||
|
||||
#ifdef CUDA_FOUND
|
||||
#include "tensors/gpu/element.h"
|
||||
#include "tensors/gpu/add.h"
|
||||
#include "tensors/gpu/prod.h"
|
||||
#endif
|
||||
|
||||
#include "tensors/cpu/element.h"
|
||||
#include "tensors/cpu/add.h"
|
||||
@ -21,9 +23,11 @@ namespace marian {
|
||||
|
||||
template <class Functor, class ...Tensors>
|
||||
void Element(Functor functor, marian::Tensor out, Tensors ...tensors) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(out->getBackend()->getDevice().type == DeviceType::gpu)
|
||||
gpu::Element(functor, out, tensors...);
|
||||
else
|
||||
#endif
|
||||
cpu::Element(functor, out, tensors...);
|
||||
}
|
||||
|
||||
@ -32,9 +36,11 @@ namespace marian {
|
||||
float scale,
|
||||
marian::Tensor out,
|
||||
Tensors... tensors) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(out->getBackend()->getDevice().type == DeviceType::gpu)
|
||||
gpu::Add(functor, scale, out, tensors...);
|
||||
else
|
||||
#endif
|
||||
cpu::Add(functor, scale, out, tensors...);
|
||||
}
|
||||
|
||||
@ -81,21 +87,23 @@ namespace marian {
|
||||
|
||||
DISPATCH3(Concatenate, marian::Tensor, const std::vector<marian::Tensor>&, int)
|
||||
|
||||
#ifdef CUDA_FOUND
|
||||
namespace gpu {
|
||||
void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace cpu {
|
||||
void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax);
|
||||
}
|
||||
|
||||
static inline void Deconcatenate(std::vector<marian::Tensor>& outputs, const marian::Tensor in, int ax) {
|
||||
if(in->getBackend()->getDevice().type == DeviceType::gpu) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(in->getBackend()->getDevice().type == DeviceType::gpu)
|
||||
gpu::Deconcatenate(outputs, in, ax);
|
||||
}
|
||||
else {
|
||||
else
|
||||
#endif
|
||||
cpu::Deconcatenate(outputs, in, ax);
|
||||
}
|
||||
}
|
||||
|
||||
DISPATCH5(LayerNormalization, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, float)
|
||||
@ -116,11 +124,13 @@ namespace marian {
|
||||
DISPATCH2(LSTMCellForward, marian::Tensor, std::vector<marian::Tensor>)
|
||||
DISPATCH2(LSTMOutputForward, marian::Tensor, std::vector<marian::Tensor>);
|
||||
|
||||
#ifdef CUDA_FOUND
|
||||
namespace gpu {
|
||||
void LSTMCellBackward(std::vector<marian::Tensor> outputs,
|
||||
std::vector<marian::Tensor> inputs,
|
||||
marian::Tensor adj);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace cpu {
|
||||
void LSTMCellBackward(std::vector<marian::Tensor> outputs,
|
||||
@ -131,19 +141,21 @@ namespace marian {
|
||||
static inline void LSTMCellBackward(std::vector<marian::Tensor> outputs,
|
||||
std::vector<marian::Tensor> inputs,
|
||||
marian::Tensor adj) {
|
||||
if(adj->getBackend()->getDevice().type == DeviceType::gpu) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(adj->getBackend()->getDevice().type == DeviceType::gpu)
|
||||
gpu::LSTMCellBackward(outputs, inputs, adj);
|
||||
}
|
||||
else {
|
||||
else
|
||||
#endif
|
||||
cpu::LSTMCellBackward(outputs, inputs, adj);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CUDA_FOUND
|
||||
namespace gpu {
|
||||
void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
|
||||
std::vector<marian::Tensor> inputs,
|
||||
marian::Tensor adj);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace cpu {
|
||||
void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
|
||||
@ -154,22 +166,24 @@ namespace marian {
|
||||
static inline void LSTMOutputBackward(std::vector<marian::Tensor> outputs,
|
||||
std::vector<marian::Tensor> inputs,
|
||||
marian::Tensor adj) {
|
||||
if(adj->getBackend()->getDevice().type == DeviceType::gpu) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(adj->getBackend()->getDevice().type == DeviceType::gpu)
|
||||
gpu::LSTMOutputBackward(outputs, inputs, adj);
|
||||
}
|
||||
else {
|
||||
else
|
||||
#endif
|
||||
cpu::LSTMOutputBackward(outputs, inputs, adj);
|
||||
}
|
||||
}
|
||||
|
||||
DISPATCH3(GRUFastForward, marian::Tensor, std::vector<marian::Tensor>, bool)
|
||||
|
||||
#ifdef CUDA_FOUND
|
||||
namespace gpu {
|
||||
void GRUFastBackward(std::vector<marian::Tensor> outputs,
|
||||
std::vector<marian::Tensor> inputs,
|
||||
marian::Tensor adj,
|
||||
bool final);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace cpu {
|
||||
void GRUFastBackward(std::vector<marian::Tensor> outputs,
|
||||
@ -182,35 +196,37 @@ namespace marian {
|
||||
std::vector<marian::Tensor> inputs,
|
||||
marian::Tensor adj,
|
||||
bool final = false) {
|
||||
if(adj->getBackend()->getDevice().type == DeviceType::gpu) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(adj->getBackend()->getDevice().type == DeviceType::gpu)
|
||||
gpu::GRUFastBackward(outputs, inputs, adj, final);
|
||||
}
|
||||
else {
|
||||
else
|
||||
#endif
|
||||
cpu::GRUFastBackward(outputs, inputs, adj, final);
|
||||
}
|
||||
}
|
||||
|
||||
DISPATCH4(Att, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor)
|
||||
DISPATCH7(AttBack, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor)
|
||||
|
||||
#ifdef CUDA_FOUND
|
||||
namespace gpu {
|
||||
float L2Norm(marian::Tensor in);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace cpu {
|
||||
float L2Norm(marian::Tensor in);
|
||||
}
|
||||
|
||||
static inline float L2Norm(marian::Tensor in) {
|
||||
if(in->getBackend()->getDevice().type == DeviceType::gpu) {
|
||||
#ifdef CUDA_FOUND
|
||||
if(in->getBackend()->getDevice().type == DeviceType::gpu)
|
||||
return gpu::L2Norm(in);
|
||||
}
|
||||
else {
|
||||
else
|
||||
#endif
|
||||
return cpu::L2Norm(in);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
DISPATCH5(PoolingWithMaskingForward, marian::Tensor, marian::Tensor, marian::Tensor, int, bool)
|
||||
DISPATCH6(PoolingWithMaskingBackward, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, int, bool)
|
||||
|
||||
|
||||
}
|
||||
|
@ -9,7 +9,11 @@ set(UNIT_TESTS
|
||||
foreach(test ${UNIT_TESTS})
|
||||
add_executable("run_${test}" run_tests.cpp "${test}.cpp")
|
||||
target_link_libraries("run_${test}" marian ${EXT_LIBS} Catch)
|
||||
cuda_add_cublas_to_target("run_${test}")
|
||||
|
||||
if(CUDA_FOUND)
|
||||
target_link_libraries("run_${test}" marian marian_cuda ${EXT_LIBS} Catch)
|
||||
cuda_add_cublas_to_target("run_${test}")
|
||||
endif(CUDA_FOUND)
|
||||
|
||||
add_test(NAME ${test} COMMAND "run_${test}")
|
||||
endforeach(test)
|
||||
@ -18,35 +22,23 @@ endforeach(test)
|
||||
# Testing apps
|
||||
add_executable(logger_test logger_test.cpp)
|
||||
add_executable(dropout_test dropout_test.cpp)
|
||||
#cuda_add_executable(bn_test bn_test.cu)
|
||||
|
||||
if(CUDA_FOUND)
|
||||
cuda_add_executable(pooling_test pooling_test.cu)
|
||||
#cuda_add_executable(marian_test marian_test.cu)
|
||||
cuda_add_executable(tensor_test tensor_test.cu)
|
||||
endif(CUDA_FOUND)
|
||||
|
||||
add_executable(sqlite_test sqlite_test.cpp)
|
||||
|
||||
foreach(exec
|
||||
logger_test
|
||||
dropout_test
|
||||
pooling_test
|
||||
#marian_test
|
||||
#bn_test
|
||||
tensor_test
|
||||
sqlite_test
|
||||
)
|
||||
)
|
||||
target_link_libraries(${exec} marian ${EXT_LIBS})
|
||||
cuda_add_cublas_to_target(${exec})
|
||||
if(CUDA_FOUND)
|
||||
target_link_libraries(${exec} marian marian_cuda ${EXT_LIBS} Catch)
|
||||
cuda_add_cublas_to_target(${exec})
|
||||
endif(CUDA_FOUND)
|
||||
|
||||
set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
endforeach(exec)
|
||||
|
||||
# if(CUDNN_FOUND)
|
||||
# cuda_add_executable(conv_test conv_test.cu)
|
||||
|
||||
# foreach(exec
|
||||
# conv_test
|
||||
# )
|
||||
# target_link_libraries(${exec} marian ${EXT_LIBS})
|
||||
# cuda_add_cublas_to_target(${exec})
|
||||
# set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
# endforeach(exec)
|
||||
# endif(CUDNN_FOUND)
|
||||
|
@ -105,9 +105,11 @@ public:
|
||||
|
||||
// @TODO: unify this
|
||||
Ptr<NthElement> nth;
|
||||
#ifdef CUDA_FOUND
|
||||
if(graph->getDevice().type == DeviceType::gpu)
|
||||
nth = New<NthElementGPU>(localBeamSize, dimBatch, graph->getDevice());
|
||||
else
|
||||
#endif
|
||||
nth = New<NthElementCPU>(localBeamSize, dimBatch);
|
||||
|
||||
Beams beams(dimBatch);
|
||||
|
@ -38,18 +38,22 @@ void suppressUnk(Expr probs) {
|
||||
if(probs->val()->getBackend()->getDevice().type == DeviceType::cpu) {
|
||||
cpu::suppressUnk(probs);
|
||||
}
|
||||
#ifdef CUDA_FOUND
|
||||
else {
|
||||
gpu::suppressUnk(probs);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void suppressWord(Expr probs, Word id) {
|
||||
if(probs->val()->getBackend()->getDevice().type == DeviceType::cpu) {
|
||||
cpu::suppressWord(probs, id);
|
||||
}
|
||||
#ifdef CUDA_FOUND
|
||||
else {
|
||||
gpu::suppressWord(probs, id);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user