mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
enable GPU compilation
This commit is contained in:
parent
e70f0b4fdd
commit
9ed5c0078a
@ -70,28 +70,35 @@ else(MSVC)
|
||||
# only by used with BUILD_ARCH=native. For overridden BUILD_ARCH we
|
||||
# minimally use -msse4.1. This seems to work with MKL.
|
||||
set(INTRINSICS "")
|
||||
list(APPEND INTRINSICS_NVCC)
|
||||
|
||||
if(BUILD_ARCH STREQUAL "native")
|
||||
message(STATUS "Checking support for CPU intrinsics")
|
||||
include(FindSSE)
|
||||
if(SSE2_FOUND)
|
||||
message(STATUS "SSE2 support found")
|
||||
set(INTRINSICS "${INTRINSICS} -msse2")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse2)
|
||||
endif(SSE2_FOUND)
|
||||
if(SSE3_FOUND)
|
||||
message(STATUS "SSE3 support found")
|
||||
set(INTRINSICS "${INTRINSICS} -msse3")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse3)
|
||||
endif(SSE3_FOUND)
|
||||
if(SSE4_1_FOUND)
|
||||
message(STATUS "SSE4.1 support found")
|
||||
set(INTRINSICS "${INTRINSICS} -msse4.1")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -msse4.1)
|
||||
endif(SSE4_1_FOUND)
|
||||
if(AVX_FOUND)
|
||||
message(STATUS "AVX support found")
|
||||
set(INTRINSICS "${INTRINSICS} -mavx")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx)
|
||||
endif(AVX_FOUND)
|
||||
if(AVX2_FOUND)
|
||||
message(STATUS "AVX2 support found")
|
||||
set(INTRINSICS "${INTRINSICS} -mavx2")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx2)
|
||||
endif(AVX2_FOUND)
|
||||
if(AVX512_FOUND)
|
||||
message(STATUS "AVX512 support found")
|
||||
@ -164,7 +171,7 @@ if(USE_STATIC_LIBS)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
find_package(CUDA "8.0")
|
||||
find_package(CUDA "10.0")
|
||||
if(CUDA_FOUND)
|
||||
# CUDA >= 10.0 requires CMake >= 3.12.2
|
||||
if((CUDA_VERSION VERSION_EQUAL "10.0" OR CUDA_VERSION VERSION_GREATER "10.0") AND (CMAKE_VERSION VERSION_LESS "3.12.2"))
|
||||
@ -216,14 +223,16 @@ else(COMPILE_CUDA)
|
||||
message(WARNING "COMPILE_CUDA=off : Building only CPU version")
|
||||
endif(COMPILE_CUDA)
|
||||
|
||||
# TODO: make compatible with older CUDA versions
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -O0; -g; -arch=sm_30; -gencode=arch=compute_30,code=sm_30; -gencode=arch=compute_50,code=sm_50; -gencode=arch=compute_52,code=sm_52; -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61; -gencode=arch=compute_61,code=compute_61 ;)
|
||||
list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -O0; -g; --use_fast_math; -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61; -gencode=arch=compute_70,code=sm_70; -gencode=arch=compute_75,code=sm_75; -gencode=arch=compute_75,code=compute_75 ;)
|
||||
else(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -O3; -g; --use_fast_math; -arch=sm_30; -gencode=arch=compute_30,code=sm_30; -gencode=arch=compute_50,code=sm_50; -gencode=arch=compute_52,code=sm_52; -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61; -gencode=arch=compute_61,code=compute_61 ;)
|
||||
list(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -O3; -g; --use_fast_math; -gencode=arch=compute_60,code=sm_60; -gencode=arch=compute_61,code=sm_61; -gencode=arch=compute_70,code=sm_70; -gencode=arch=compute_75,code=sm_75; -gencode=arch=compute_75,code=compute_75 ;)
|
||||
endif(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
if(NOT MSVC)
|
||||
# @TODO: add warnings here too
|
||||
list(APPEND CUDA_NVCC_FLAGS -ccbin ${CMAKE_C_COMPILER}; -std=c++11; -Xcompiler\ -fPIC; -Xcompiler\ -Wno-unused-result; -Xcompiler\ -Wno-deprecated; -Xcompiler\ -Wno-pragmas; -Xcompiler\ -Wno-unused-value; -Xcompiler\ -Werror;)
|
||||
list(APPEND CUDA_NVCC_FLAGS ${INTRINSICS_NVCC})
|
||||
else()
|
||||
list(APPEND CUDA_NVCC_FLAGS -Xcompiler\ /FS; )
|
||||
endif()
|
||||
|
@ -277,11 +277,10 @@ template <> inline Type typeId<double>() { return Type::float64; }
|
||||
// Abort if given C++ does not correspond to runtime type
|
||||
template <typename T>
|
||||
void matchOrAbort(Type type) {
|
||||
// @TODO: hacky hack for WGNMT, turn this back on
|
||||
// ABORT_IF(!matchType<T>(type),
|
||||
// "Requested type ({}) and underlying type ({}) do not match",
|
||||
// request<T>(),
|
||||
// type);
|
||||
ABORT_IF(!matchType<T>(type),
|
||||
"Requested type ({}) and underlying type ({}) do not match",
|
||||
request<T>(),
|
||||
type);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
123
src/tensors/gpu/add.cu
Normal file → Executable file
123
src/tensors/gpu/add.cu
Normal file → Executable file
@ -11,12 +11,12 @@ namespace marian {
|
||||
|
||||
namespace gpu {
|
||||
|
||||
template <size_t K, class Functor, class AggFunctor>
|
||||
__global__ void gAggregateGeneric(Functor functor, float aggInit, AggFunctor aggFunctor,
|
||||
const functional::Shape full,
|
||||
functional::Tensor<float> out,
|
||||
functional::Array<functional::Tensor<float>, K> ins,
|
||||
float scale = 1.0) {
|
||||
template <size_t K, class Functor, class AggFunctor, typename T, typename AccType>
|
||||
__global__ void gAggregateGeneric(Functor functor, AccType aggInit, AggFunctor aggFunctor,
|
||||
const functional::Shape full,
|
||||
functional::Tensor<T> out,
|
||||
functional::Array<functional::Tensor<T>, K> ins,
|
||||
AccType scale = 1.0) {
|
||||
int outLength = out.shape().elements();
|
||||
bool same = outLength == full.elements();
|
||||
for(int i = 0; i < K; ++i)
|
||||
@ -32,21 +32,21 @@ __global__ void gAggregateGeneric(Functor functor, float aggInit, AggFunctor agg
|
||||
int index = bid + blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if(index < outLength) {
|
||||
if(same) {
|
||||
out[index] = aggFunctor(out[index], functional::apply(functor, ins, index) * scale);
|
||||
out[index] = aggFunctor(out[index], functional::apply(functor, ins, index) * (T)scale);
|
||||
} else {
|
||||
out.shape().dims(index, dims);
|
||||
out[index] = aggFunctor(out[index], functional::loops(functor, aggInit, aggFunctor, ins, len, dims) * scale);
|
||||
out[index] = aggFunctor(out[index], (T)(functional::loops(functor, aggInit, aggFunctor, ins, len, dims) * scale));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t K, class Functor, class AggFunctor>
|
||||
template <size_t K, class Functor, class AggFunctor, typename T, typename AccType>
|
||||
__global__ void gAggregateEqual(Functor functor, AggFunctor aggFunctor,
|
||||
functional::Tensor<float> out,
|
||||
functional::Array<functional::Tensor<float>, K> ins,
|
||||
float scale,
|
||||
bool broadcast) {
|
||||
functional::Tensor<T> out,
|
||||
functional::Array<functional::Tensor<T>, K> ins,
|
||||
AccType scale,
|
||||
bool broadcast) {
|
||||
int length = out.shape().elements();
|
||||
functional::Array<int, functional::Shape::size()> dims;
|
||||
|
||||
@ -62,17 +62,17 @@ __global__ void gAggregateEqual(Functor functor, AggFunctor aggFunctor,
|
||||
indices[i] = ins[i].shape().bindex(dims);
|
||||
}
|
||||
|
||||
out[index] = aggFunctor(out[index], functional::apply(functor, ins, indices) * scale);
|
||||
out[index] = aggFunctor(out[index], functional::apply(functor, ins, indices) * (T)scale);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t K, class Functor, class AggFunctor>
|
||||
__global__ void gAggregateReduce(Functor functor, float aggInit, AggFunctor aggFunctor,
|
||||
const functional::Shape full,
|
||||
functional::Tensor<float> out,
|
||||
functional::Array<functional::Tensor<float>, K> ins,
|
||||
float scale = 1.0) {
|
||||
template <size_t K, class Functor, class AggFunctor, typename T, typename AccType = float>
|
||||
__global__ void gAggregateReduce(Functor functor, AccType aggInit, AggFunctor aggFunctor,
|
||||
const functional::Shape full,
|
||||
functional::Tensor<T> out,
|
||||
functional::Array<functional::Tensor<T>, K> ins,
|
||||
AccType scale = 1.0) {
|
||||
int rows = full.elements() / full.back();
|
||||
int cols = full.back();
|
||||
|
||||
@ -83,15 +83,17 @@ __global__ void gAggregateReduce(Functor functor, float aggInit, AggFunctor aggF
|
||||
for(int bid = 0; bid < rows; bid += gridDim.x) {
|
||||
int j = bid + blockIdx.x;
|
||||
if(j < rows) {
|
||||
extern __shared__ float _share[];
|
||||
float* _sum = _share;
|
||||
// make sure shared memory is the same for different types
|
||||
// by using bytes instead of type T
|
||||
extern __shared__ uint8_t _sharedBytes[];
|
||||
AccType* _sum = (AccType*)_sharedBytes;
|
||||
|
||||
if(same) {
|
||||
_sum[threadIdx.x] = aggInit;
|
||||
for(int tid = 0; tid < cols; tid += blockDim.x) {
|
||||
int id = tid + threadIdx.x;
|
||||
if(id < cols)
|
||||
_sum[threadIdx.x] = aggFunctor(_sum[threadIdx.x], functional::apply(functor, ins, j * cols + id));
|
||||
_sum[threadIdx.x] = aggFunctor(_sum[threadIdx.x], (AccType)functional::apply(functor, ins, j * cols + id));
|
||||
}
|
||||
} else {
|
||||
functional::Array<int, functional::Shape::size()> dims;
|
||||
@ -104,7 +106,7 @@ __global__ void gAggregateReduce(Functor functor, float aggInit, AggFunctor aggF
|
||||
functional::Array<int, K> indices;
|
||||
for(int i = 0; i < K; ++i)
|
||||
indices[i] = ins[i].shape().bindex(dims);
|
||||
_sum[threadIdx.x] = aggFunctor(_sum[threadIdx.x], functional::apply(functor, ins, indices));
|
||||
_sum[threadIdx.x] = aggFunctor(_sum[threadIdx.x], (AccType)functional::apply(functor, ins, indices));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -119,14 +121,14 @@ __global__ void gAggregateReduce(Functor functor, float aggInit, AggFunctor aggF
|
||||
len = (len + 1) >> 1;
|
||||
}
|
||||
__syncthreads();
|
||||
out[j] = aggFunctor(out[j], _sum[0] * scale);
|
||||
out[j] = aggFunctor(out[j], (T)(_sum[0] * scale));
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
}
|
||||
|
||||
template <class Functor, class AggFunctor, class... Tensors>
|
||||
void Aggregate(Functor functor, float aggInit, AggFunctor aggFunctor, float scale, marian::Tensor out, Tensors... tensors) {
|
||||
template <typename T, typename AccType, class Functor, class AggFunctor, class... Tensors>
|
||||
void AggregateTyped(Functor functor, AccType aggInit, AggFunctor aggFunctor, AccType scale, marian::Tensor out, Tensors... tensors) {
|
||||
cudaSetDevice(out->getDeviceId().no);
|
||||
|
||||
auto full = marian::Shape::broadcast({out, tensors...});
|
||||
@ -135,8 +137,8 @@ void Aggregate(Functor functor, float aggInit, AggFunctor aggFunctor, float scal
|
||||
|
||||
constexpr size_t K = sizeof...(Tensors);
|
||||
|
||||
functional::Tensor<float> gOut = out;
|
||||
functional::Array<functional::Tensor<float>, K> gIns = {tensors...};
|
||||
functional::Tensor<T> gOut = out;
|
||||
functional::Array<functional::Tensor<T>, K> gIns = {tensors...};
|
||||
|
||||
if(full.back() != 1 && out->shape().back() == 1) {
|
||||
size_t m = full.elements() / length;
|
||||
@ -144,14 +146,13 @@ void Aggregate(Functor functor, float aggInit, AggFunctor aggFunctor, float scal
|
||||
|
||||
int blocks = std::min(MAX_BLOCKS, (int)m);
|
||||
int threads = std::min(MAX_THREADS, (int)k);
|
||||
int shared = sizeof(float) * threads;
|
||||
int shared = sizeof(AccType) * threads;
|
||||
|
||||
gAggregateReduce<<<blocks, threads, shared>>>(functor, aggInit, aggFunctor, full, gOut, gIns, scale);
|
||||
gAggregateReduce<K, Functor, AggFunctor, T, AccType><<<blocks, threads, shared>>>(functor, aggInit, aggFunctor, full, gOut, gIns, scale);
|
||||
|
||||
} else if(out->shape() == full) {
|
||||
int threads = std::min(MAX_THREADS, length);
|
||||
int blocks
|
||||
= std::min(MAX_BLOCKS, length / threads + (length % threads != 0));
|
||||
int blocks = std::min(MAX_BLOCKS, length / threads + (length % threads != 0));
|
||||
|
||||
bool broadcast = false;
|
||||
for(int i = 0; i < K; ++i)
|
||||
@ -159,55 +160,29 @@ void Aggregate(Functor functor, float aggInit, AggFunctor aggFunctor, float scal
|
||||
gAggregateEqual<<<blocks, threads>>>(functor, aggFunctor, gOut, gIns, scale, broadcast);
|
||||
} else {
|
||||
int threads = std::min(MAX_THREADS, length);
|
||||
int blocks
|
||||
= std::min(MAX_BLOCKS, length / threads + (length % threads != 0));
|
||||
int blocks = std::min(MAX_BLOCKS, length / threads + (length % threads != 0));
|
||||
|
||||
gAggregateGeneric<<<blocks, threads>>>(functor, aggInit, aggFunctor, full, gOut, gIns, scale);
|
||||
}
|
||||
}
|
||||
|
||||
// @TODO: this is a duplicate; can be removed, but need to redo all the add.inc entries...
|
||||
template <class Functor, class AggFunctor, class... Tensors>
|
||||
void Aggregate(Functor functor, float aggInit, AggFunctor aggFunctor, float scale, marian::Tensor out, Tensors... tensors) {
|
||||
if(out->type() == Type::float32) {
|
||||
AggregateTyped<float, float>(functor, aggInit, aggFunctor, scale, out, tensors...);
|
||||
#if 0
|
||||
} else if(out->type() == Type::float16) {
|
||||
AggregateTyped<half, float>(functor, aggInit, aggFunctor, scale, out, tensors...);
|
||||
#endif
|
||||
} else {
|
||||
ABORT("Type {} not yet supported", out->type());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Functor, class... Tensors>
|
||||
void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors) {
|
||||
cudaSetDevice(out->getDeviceId().no);
|
||||
|
||||
auto full = marian::Shape::broadcast({out, tensors...});
|
||||
|
||||
int length = out->shape().elements();
|
||||
|
||||
constexpr size_t K = sizeof...(Tensors);
|
||||
|
||||
functional::Tensor<float> gOut = out;
|
||||
functional::Array<functional::Tensor<float>, K> gIns = {tensors...};
|
||||
|
||||
auto addFunctor = functional::_1 + functional::_2;
|
||||
|
||||
if(full.back() != 1 && out->shape().back() == 1) {
|
||||
size_t m = full.elements() / length;
|
||||
size_t k = full.back();
|
||||
|
||||
int blocks = std::min(MAX_BLOCKS, (int)m);
|
||||
int threads = std::min(MAX_THREADS, (int)k);
|
||||
int shared = sizeof(float) * threads;
|
||||
|
||||
gAggregateReduce<<<blocks, threads, shared>>>(functor, 0, addFunctor, full, gOut, gIns, scale);
|
||||
|
||||
} else if(out->shape() == full) {
|
||||
int threads = std::min(MAX_THREADS, length);
|
||||
int blocks
|
||||
= std::min(MAX_BLOCKS, length / threads + (length % threads != 0));
|
||||
|
||||
bool broadcast = false;
|
||||
for(int i = 0; i < K; ++i)
|
||||
broadcast = broadcast || gOut.shape() != gIns[i].shape();
|
||||
gAggregateEqual<<<blocks, threads>>>(functor, addFunctor, gOut, gIns, scale, broadcast);
|
||||
} else {
|
||||
int threads = std::min(MAX_THREADS, length);
|
||||
int blocks
|
||||
= std::min(MAX_BLOCKS, length / threads + (length % threads != 0));
|
||||
|
||||
gAggregateGeneric<<<blocks, threads>>>(functor, 0, addFunctor, full, gOut, gIns, scale);
|
||||
}
|
||||
Aggregate(functor, 0.f, addFunctor, scale, out, tensors...);
|
||||
}
|
||||
|
||||
#include "tensors/gpu/add.inc"
|
||||
|
1
src/tensors/gpu/add.h
Normal file → Executable file
1
src/tensors/gpu/add.h
Normal file → Executable file
@ -8,6 +8,7 @@ namespace gpu {
|
||||
|
||||
template <class Functor, class... Tensors>
|
||||
void Add(Functor functor, float scale, marian::Tensor out, Tensors... tensors);
|
||||
|
||||
template <class Functor, class AggFunctor, class... Tensors>
|
||||
void Aggregate(Functor functor, float initAgg, AggFunctor aggFunctor, float scale, marian::Tensor out, Tensors... tensors);
|
||||
}
|
||||
|
26
src/tensors/gpu/add.inc
Normal file → Executable file
26
src/tensors/gpu/add.inc
Normal file → Executable file
@ -15,21 +15,23 @@ template void Add<BinaryFunctor<elem::Mult, Assignee<1>, BinaryFunctor<elem::Div
|
||||
template void Add<BinaryFunctor<elem::Mult, Assignee<1>, BinaryFunctor<elem::sPReLUBack, Assignee<2>, Capture>>, marian::Tensor, marian::Tensor>(BinaryFunctor<elem::Mult, Assignee<1>, BinaryFunctor<elem::sPReLUBack, Assignee<2>, Capture>>, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Add<BinaryFunctor<elem::Mult, Assignee<1>, UnaryFunctor<elem::sReLUBack, Assignee<2>>>, marian::Tensor, marian::Tensor>(BinaryFunctor<elem::Mult, Assignee<1>, UnaryFunctor<elem::sReLUBack, Assignee<2>>>, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Add<BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Assignee<1>, Assignee<2>>, BinaryFunctor<elem::Minus, Capture, Assignee<2>>>, marian::Tensor, marian::Tensor>(BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Assignee<1>, Assignee<2>>, BinaryFunctor<elem::Minus, Capture, Assignee<2>>>, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Add<BinaryFunctor<elem::Mult, BinaryFunctor<elem::Bump, Assignee<1>, Capture>, Assignee<2>>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(BinaryFunctor<elem::Mult, BinaryFunctor<elem::Bump, Assignee<1>, Capture>, Assignee<2> >, float, std::shared_ptr<marian::TensorBase>, marian::Tensor, marian::Tensor);
|
||||
template void Add<BinaryFunctor<elem::Mult, BinaryFunctor<elem::Bump, Assignee<1>, Capture>, Assignee<2>>, marian::Tensor, marian::Tensor >(BinaryFunctor<elem::Mult, BinaryFunctor<elem::Bump, Assignee<1>, Capture>, Assignee<2> >, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Add<BinaryFunctor<elem::Mult, BinaryFunctor<elem::Lt, Assignee<2>, Assignee<3>>, Assignee<1>>, marian::Tensor, marian::Tensor, marian::Tensor>(BinaryFunctor<elem::Mult, BinaryFunctor<elem::Lt, Assignee<2>, Assignee<3>>, Assignee<1>>, float, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Add<BinaryFunctor<elem::Mult, BinaryFunctor<elem::Geq, Assignee<2>, Assignee<3>>, Assignee<1>>, marian::Tensor, marian::Tensor, marian::Tensor>(BinaryFunctor<elem::Mult, BinaryFunctor<elem::Geq, Assignee<2>, Assignee<3>>, Assignee<1>>, float, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Add<BinaryFunctor<elem::Mult, Assignee<1>, UnaryFunctor<elem::Sigmoid, BinaryFunctor<elem::Minus, Assignee<3>, Assignee<2>>>>, marian::Tensor, marian::Tensor, marian::Tensor>(BinaryFunctor<elem::Mult, Assignee<1>, UnaryFunctor<elem::Sigmoid, BinaryFunctor<elem::Minus, Assignee<3>, Assignee<2>>>>, float, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Add<BinaryFunctor<elem::Mult, BinaryFunctor<elem::Gt, Assignee<2>, Assignee<3>>, Assignee<1>>, marian::Tensor, marian::Tensor, marian::Tensor>(BinaryFunctor<elem::Mult, BinaryFunctor<elem::Gt, Assignee<2>, Assignee<3>>, Assignee<1>>, float, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Add<BinaryFunctor<elem::Mult, BinaryFunctor<elem::Leq, Assignee<2>, Assignee<3>>, Assignee<1>>, marian::Tensor, marian::Tensor, marian::Tensor>(BinaryFunctor<elem::Mult, BinaryFunctor<elem::Leq, Assignee<2>, Assignee<3>>, Assignee<1>>, float, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Add<BinaryFunctor<elem::Mult, Assignee<1>, UnaryFunctor<elem::Sigmoid, BinaryFunctor<elem::Minus, Assignee<2>, Assignee<3>>>>, marian::Tensor, marian::Tensor, marian::Tensor>(BinaryFunctor<elem::Mult, Assignee<1>, UnaryFunctor<elem::Sigmoid, BinaryFunctor<elem::Minus, Assignee<2>, Assignee<3>>>>, float, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Add<BinaryFunctor<elem::Div, Assignee<1>, Assignee<2> >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(BinaryFunctor<elem::Div, Assignee<1>, Assignee<2> >, float, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<1> >, std::shared_ptr<marian::TensorBase> >(marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<1> >, float, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Aggregate<marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Minimum, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, std::shared_ptr<marian::TensorBase> >(marian::functional::Assignee<1>, float, marian::functional::BinaryFunctor<marian::functional::elem::Minimum, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Aggregate<marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Maximum, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, std::shared_ptr<marian::TensorBase> >(marian::functional::Assignee<1>, float, marian::functional::BinaryFunctor<marian::functional::elem::Maximum, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Aggregate<marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, std::shared_ptr<marian::TensorBase> >(marian::functional::Assignee<1>, float, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Aggregate<marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::LogAddExp, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, std::shared_ptr<marian::TensorBase> >(marian::functional::Assignee<1>, float, marian::functional::BinaryFunctor<marian::functional::elem::LogAddExp, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Eq, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::functional::Assignee<3> >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Eq, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::functional::Assignee<3> >, float, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Exp, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<2>, marian::functional::Assignee<3> > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Exp, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<2>, marian::functional::Assignee<3> > > >, float, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::functional::Assignee<3> >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::functional::Assignee<3> >, float, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Capture>, marian::functional::Assignee<2> >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Capture>, marian::functional::Assignee<2> >, float, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<3> >, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::UnaryFunctor<marian::functional::elem::Sigmoid, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<2> > >, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Capture, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<3> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<3> >, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::UnaryFunctor<marian::functional::elem::Sigmoid, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<2> > >, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Capture, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<3> > > > > >, float, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void Add<BinaryFunctor<elem::Div, Assignee<1>, Assignee<2> >, marian::Tensor, marian::Tensor >(BinaryFunctor<elem::Div, Assignee<1>, Assignee<2> >, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<1> >, marian::Tensor >(marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<1> >, float, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Aggregate<marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Min, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::Tensor >(marian::functional::Assignee<1>, float, marian::functional::BinaryFunctor<marian::functional::elem::Min, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Aggregate<marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Max, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::Tensor >(marian::functional::Assignee<1>, float, marian::functional::BinaryFunctor<marian::functional::elem::Max, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Aggregate<marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::Tensor >(marian::functional::Assignee<1>, float, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Aggregate<marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::LogAddExp, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::Tensor >(marian::functional::Assignee<1>, float, marian::functional::BinaryFunctor<marian::functional::elem::LogAddExp, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, float, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Eq, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::functional::Assignee<3> >, marian::Tensor, marian::Tensor, marian::Tensor >(marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Eq, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::functional::Assignee<3> >, float, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Exp, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<2>, marian::functional::Assignee<3> > > >, marian::Tensor, marian::Tensor, marian::Tensor >(marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::UnaryFunctor<marian::functional::elem::Exp, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<2>, marian::functional::Assignee<3> > > >, float, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::functional::Assignee<3> >, marian::Tensor, marian::Tensor, marian::Tensor >(marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Assignee<2> >, marian::functional::Assignee<3> >, float, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Capture>, marian::functional::Assignee<2> >, marian::Tensor, marian::Tensor >(marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::Capture>, marian::functional::Assignee<2> >, float, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Add<marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<3> >, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::UnaryFunctor<marian::functional::elem::Sigmoid, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<2> > >, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Capture, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<3> > > > > >, marian::Tensor, marian::Tensor, marian::Tensor >(marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<3> >, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::UnaryFunctor<marian::functional::elem::Sigmoid, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<2> > >, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Capture, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<3> > > > > >, float, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include "tensors/gpu/cuda_helpers.h"
|
||||
// clang-format on
|
||||
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
namespace marian {
|
||||
namespace gpu {
|
||||
|
||||
@ -27,6 +29,7 @@ template void copy<uint32_t>(Ptr<Backend>, const uint32_t*, const uint32_t*, uin
|
||||
template void copy<uint64_t>(Ptr<Backend>, const uint64_t*, const uint64_t*, uint64_t*);
|
||||
|
||||
template void copy<char>(Ptr<Backend>, const char*, const char*, char*);
|
||||
template void copy<float16>(Ptr<Backend>, const float16*, const float16*, float16*);
|
||||
template void copy<float>(Ptr<Backend>, const float*, const float*, float*);
|
||||
template void copy<double>(Ptr<Backend>, const double*, const double*, double*);
|
||||
// clang-format on
|
||||
@ -55,6 +58,18 @@ void fill(Ptr<Backend> backend, T* begin, T* end, T value) {
|
||||
CUDA_CHECK(cudaStreamSynchronize(0));
|
||||
}
|
||||
|
||||
template <>
|
||||
void fill<float16>(Ptr<Backend> backend, float16* begin, float16* end, float16 value) {
|
||||
int size = end - begin;
|
||||
if (size == 0)
|
||||
return;
|
||||
CUDA_CHECK(cudaSetDevice(backend->getDeviceId().no));
|
||||
int threadsPerBlock = std::min(MAX_THREADS, size);
|
||||
int blocks = (size / threadsPerBlock) + (size % threadsPerBlock != 0); // @TODO: (size+threadsPerBlock-1)/threadsPerBlock or CeilDiv(a,b)
|
||||
gFill<<<blocks, threadsPerBlock>>>((__half*)begin, size, (__half)value);
|
||||
CUDA_CHECK(cudaStreamSynchronize(0));
|
||||
}
|
||||
|
||||
template void fill<bool>(Ptr<Backend>, bool*, bool*, bool);
|
||||
template void fill<int8_t>(Ptr<Backend>, int8_t*, int8_t*, int8_t);
|
||||
template void fill<int16_t>(Ptr<Backend>, int16_t*, int16_t*, int16_t);
|
||||
@ -102,7 +117,21 @@ void swap_ranges(Ptr<Backend> backend, T* begin, T* end, T* dest) {
|
||||
CUDA_CHECK(cudaStreamSynchronize(0));
|
||||
}
|
||||
|
||||
template <>
|
||||
void swap_ranges<float16>(Ptr<Backend> backend, float16* begin, float16* end, float16* dest) {
|
||||
int size = end - begin;
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
CUDA_CHECK(cudaSetDevice(backend->getDeviceId().no));
|
||||
int threadsPerBlock = std::min(MAX_THREADS, size);
|
||||
int blocks = (size / threadsPerBlock) + (size % threadsPerBlock != 0); // @TODO: (size+threadsPerBlock-1)/threadsPerBlock or CeilDiv(a,b)
|
||||
gSwap<<<blocks, threadsPerBlock>>>((__half*)begin, (__half*)dest, size);
|
||||
CUDA_CHECK(cudaStreamSynchronize(0));
|
||||
}
|
||||
|
||||
// clang-format off
|
||||
template void swap_ranges<char>(Ptr<Backend>, char*, char*, char*);
|
||||
template void swap_ranges<int8_t>(Ptr<Backend>, int8_t*, int8_t*, int8_t*);
|
||||
template void swap_ranges<int16_t>(Ptr<Backend>, int16_t*, int16_t*, int16_t*);
|
||||
template void swap_ranges<int32_t>(Ptr<Backend>, int32_t*, int32_t*, int32_t*);
|
||||
@ -113,7 +142,6 @@ template void swap_ranges<uint16_t>(Ptr<Backend>, uint16_t*, uint16_t*, uint16_t
|
||||
template void swap_ranges<uint32_t>(Ptr<Backend>, uint32_t*, uint32_t*, uint32_t*);
|
||||
template void swap_ranges<uint64_t>(Ptr<Backend>, uint64_t*, uint64_t*, uint64_t*);
|
||||
|
||||
template void swap_ranges<char>(Ptr<Backend>, char*, char*, char*);
|
||||
template void swap_ranges<float>(Ptr<Backend>, float*, float*, float*);
|
||||
template void swap_ranges<double>(Ptr<Backend>, double*, double*, double*);
|
||||
// clang-format on
|
||||
|
39
src/tensors/gpu/element.cu
Normal file → Executable file
39
src/tensors/gpu/element.cu
Normal file → Executable file
@ -9,10 +9,10 @@
|
||||
namespace marian {
|
||||
namespace gpu {
|
||||
|
||||
template <size_t K, bool broadcast, class Functor>
|
||||
template <size_t K, bool broadcast, class Functor, typename T>
|
||||
__global__ void gElement(
|
||||
Functor functor,
|
||||
functional::Array<functional::Tensor<float>, K> tensors) {
|
||||
functional::Array<functional::Tensor<T>, K> tensors) {
|
||||
int length = tensors[0].shape().elements();
|
||||
functional::Array<int, functional::Shape::size()> dims;
|
||||
functional::Array<int, K> indices;
|
||||
@ -28,32 +28,51 @@ __global__ void gElement(
|
||||
indices[i] = tensors[i].shape().bindex(dims);
|
||||
}
|
||||
|
||||
tensors[0][index] = functional::apply(functor, tensors, indices);
|
||||
tensors[0].data()[index] = functional::apply(functor, tensors, indices);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Functor, class... Tensors>
|
||||
void Element(Functor functor, Tensor out, Tensors... tensors) {
|
||||
|
||||
template <typename T, class Functor, class... Tensors>
|
||||
void ElementTyped(Functor functor, Tensor out, Tensors... tensors) {
|
||||
//matchOrAbort<T>(out->type()); // @TODO: figure out undefined reference
|
||||
|
||||
cudaSetDevice(out->getDeviceId().no);
|
||||
|
||||
constexpr size_t K = sizeof...(tensors) + 1;
|
||||
functional::Array<functional::Tensor<float>, K> gTensors = {out, tensors...};
|
||||
|
||||
int length = gTensors[0].shape().elements();
|
||||
int length = out->shape().elements();
|
||||
int threads = std::min(MAX_THREADS, length);
|
||||
int blocks = std::min(MAX_BLOCKS, length / threads + (length % threads != 0));
|
||||
|
||||
constexpr size_t K = sizeof...(tensors) + 1;
|
||||
functional::Array<functional::Tensor<T>, K> gTensors = {out, tensors...};
|
||||
|
||||
bool broadcast = false;
|
||||
for(int i = 1; i < K; ++i)
|
||||
broadcast = broadcast || gTensors[0].shape() != gTensors[i].shape();
|
||||
|
||||
if(broadcast)
|
||||
gElement<K, true><<<blocks, threads>>>(functor, gTensors);
|
||||
else
|
||||
gElement<K, false><<<blocks, threads>>>(functor, gTensors);
|
||||
}
|
||||
|
||||
template <class Functor, class... Tensors>
|
||||
void Element(Functor functor, Tensor out, Tensors... tensors) {
|
||||
checkCommonType(out, tensors...);
|
||||
|
||||
if(out->type() == Type::float32) {
|
||||
ElementTyped<float>(functor, out, tensors...);
|
||||
#if 0
|
||||
} else if(out->type() == Type::float16) {
|
||||
ElementTyped<half>(functor, out, tensors...);
|
||||
} else if(out->type() == Type::float64) {
|
||||
ElementTyped<double>(functor, out, tensors...);
|
||||
#endif
|
||||
} else {
|
||||
ABORT("Type {} not yet supported", out->type());
|
||||
}
|
||||
}
|
||||
|
||||
#include "tensors/gpu/element.inc"
|
||||
} // namespace gpu
|
||||
} // namespace marian
|
||||
|
31
src/tensors/gpu/element.inc
Normal file → Executable file
31
src/tensors/gpu/element.inc
Normal file → Executable file
@ -39,24 +39,29 @@ template void Element<Assign<Var<1>, TernaryFunctor<elem::IfThenElse, BinaryFunc
|
||||
template void Element<Assign<Var<1>, TernaryFunctor<elem::IfThenElse, BinaryFunctor<elem::Leq, UnaryFunctor<elem::Abs, Assignee<2>>, Capture>, Capture, Capture>>, marian::Tensor>(Assign<Var<1>, TernaryFunctor<elem::IfThenElse, BinaryFunctor<elem::Leq, UnaryFunctor<elem::Abs, Assignee<2>>, Capture>, Capture, Capture>>, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Clip, Assignee<2>, Capture>>, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Clip, Assignee<2>, Capture>>, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::LogAddExp, Assignee<2>, Assignee<3>>>, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::LogAddExp, Assignee<2>, Assignee<3>>>, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Maximum, Assignee<2>, Assignee<3>>>, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Maximum, Assignee<2>, Assignee<3>>>, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minimum, Assignee<2>, Assignee<3>>>, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Minimum, Assignee<2>, Assignee<3>>>, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Max, Assignee<2>, Assignee<3>>>, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Max, Assignee<2>, Assignee<3>>>, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Min, Assignee<2>, Assignee<3>>>, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Min, Assignee<2>, Assignee<3>>>, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Div, Assignee<1>, Capture>>>(Assign<Var<1>, BinaryFunctor<elem::Div, Assignee<1>, Capture>>, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Mult, Capture, Assignee<1>>, Capture>>>(Assign<Var<1>, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Mult, Capture, Assignee<1>>, Capture>>, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Lt, Assignee<1>, Capture>, Capture>>>(Assign<Var<1>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Lt, Assignee<1>, Capture>, Capture>>, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, Assignee<1>>>>>>>(Assign<Var<1>, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, Assignee<1>>>>>>, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, BinaryFunctor<elem::Plus, Assignee<1>, Capture>>>, Capture>>>>>(Assign<Var<1>, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Neg, UnaryFunctor<elem::Log, BinaryFunctor<elem::Plus, Assignee<1>, Capture>>>, Capture>>>>, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Lt, Assignee<1>, Capture>, Capture>>>(Assign<Var<1>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Lt, Assignee<1>, Capture>, Capture> >, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> > > >, Capture>, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> > > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> > > >, Capture>, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> > > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> >, Capture> > >, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> >, Capture> > >, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<5> >, Assignee<4> >, Capture> >, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> >, Capture> > >, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<5> >, Assignee<4> >, Capture> >, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4> >, Assignee<4> >, Capture> > >, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4> >, Capture> > >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture> >, Capture> >, BinaryFunctor<elem::Mult, Capture, Assignee<1> > > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::NEq, marian::functional::BinaryFunctor<marian::functional::elem::Eq, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::BinaryFunctor<marian::functional::elem::Gt, marian::functional::Assignee<2>, marian::functional::Assignee<3> >, marian::functional::BinaryFunctor<marian::functional::elem::Lt, marian::functional::Assignee<2>, marian::functional::Assignee<3> > >, marian::functional::Capture>, marian::functional::Capture> >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase> >(marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::NEq, marian::functional::BinaryFunctor<marian::functional::elem::Eq, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::BinaryFunctor<marian::functional::elem::Gt, marian::functional::Assignee<2>, marian::functional::Assignee<3> >, marian::functional::BinaryFunctor<marian::functional::elem::Lt, marian::functional::Assignee<2>, marian::functional::Assignee<3> > >, marian::functional::Capture>, marian::functional::Capture> >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::UnaryFunctor<marian::functional::elem::Sqrt, marian::functional::Assignee<1> > >>(marian::functional::Assign<marian::functional::Var<1>, marian::functional::UnaryFunctor<marian::functional::elem::Sqrt, marian::functional::Assignee<1> > >, std::shared_ptr<marian::TensorBase>);
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<2>, marian::functional::UnaryFunctor<marian::functional::elem::Sigmoid, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<2> > > > >, std::shared_ptr<marian::TensorBase> >(marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<2>, marian::functional::UnaryFunctor<marian::functional::elem::Sigmoid, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<2> > > > >, std::shared_ptr<marian::TensorBase>, std::shared_ptr<marian::TensorBase>);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Lt, Assignee<1>, Capture>, Capture>>>(Assign<Var<1>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Lt, Assignee<1>, Capture>, Capture>>, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4>>, Assignee<4>>>>, Capture>, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4>>>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4>>, Assignee<4>>>>, Capture>, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4>>>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4>>, Assignee<4>>, Capture>>>, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4>>, Capture>>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4>>, Assignee<4>>, Capture>>>, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4>>, Capture>>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4>>, Capture>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4>>, Capture>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<5>>, Assignee<4>>, Capture>>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4>>, Assignee<4>>, Capture>>>, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4>>, Capture>>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, UnaryFunctor<elem::sReLU, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Minus, Assignee<3>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<5>>, Assignee<4>>, Capture>>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, BinaryFunctor<elem::Mult, Capture, Assignee<4>>, Assignee<4>>, Capture>>>, Capture>, BinaryFunctor<elem::Div, BinaryFunctor<elem::Mult, Assignee<4>, Assignee<4>>, Capture>>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void Element<Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor>(Assign<Var<1>, BinaryFunctor<elem::Minus, Assignee<1>, BinaryFunctor<elem::Mult, Capture, BinaryFunctor<elem::Plus, BinaryFunctor<elem::Div, BinaryFunctor<elem::Div, Assignee<2>, Capture>, BinaryFunctor<elem::Plus, UnaryFunctor<elem::Sqrt, BinaryFunctor<elem::Div, Assignee<3>, Capture>>, Capture>>, BinaryFunctor<elem::Mult, Capture, Assignee<1>>>>>>, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::NEq, marian::functional::BinaryFunctor<marian::functional::elem::Eq, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::BinaryFunctor<marian::functional::elem::Gt, marian::functional::Assignee<2>, marian::functional::Assignee<3> >, marian::functional::BinaryFunctor<marian::functional::elem::Lt, marian::functional::Assignee<2>, marian::functional::Assignee<3> > >, marian::functional::Capture>, marian::functional::Capture> >, marian::Tensor, marian::Tensor >(marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::NEq, marian::functional::BinaryFunctor<marian::functional::elem::Eq, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::BinaryFunctor<marian::functional::elem::Gt, marian::functional::Assignee<2>, marian::functional::Assignee<3> >, marian::functional::BinaryFunctor<marian::functional::elem::Lt, marian::functional::Assignee<2>, marian::functional::Assignee<3> > >, marian::functional::Capture>, marian::functional::Capture> >, marian::Tensor, marian::Tensor, marian::Tensor);
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::UnaryFunctor<marian::functional::elem::Sqrt, marian::functional::Assignee<1> > >>(marian::functional::Assign<marian::functional::Var<1>, marian::functional::UnaryFunctor<marian::functional::elem::Sqrt, marian::functional::Assignee<1> > >, marian::Tensor);
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<2>, marian::functional::UnaryFunctor<marian::functional::elem::Sigmoid, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<2> > > > >, marian::Tensor >(marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Assignee<2>, marian::functional::UnaryFunctor<marian::functional::elem::Sigmoid, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<2> > > > >, marian::Tensor, marian::Tensor);
|
||||
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Lt, marian::functional::Assignee<1>, marian::functional::Capture>, marian::functional::Capture>, marian::functional::Capture> >>(marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::BinaryFunctor<marian::functional::elem::Lt, marian::functional::Assignee<1>, marian::functional::Capture>, marian::functional::Capture>, marian::functional::Capture> >, marian::Tensor);
|
||||
|
||||
template void marian::gpu::Element<marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<2>, marian::functional::Capture>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<1> > > > > >, IntrusivePtr<marian::TensorBase> >(marian::functional::Assign<marian::functional::Var<1>, marian::functional::BinaryFunctor<marian::functional::elem::Minus, marian::functional::Assignee<1>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::BinaryFunctor<marian::functional::elem::Plus, marian::functional::BinaryFunctor<marian::functional::elem::Div, marian::functional::Assignee<2>, marian::functional::Capture>, marian::functional::BinaryFunctor<marian::functional::elem::Mult, marian::functional::Capture, marian::functional::Assignee<1> > > > > >, marian::Tensor, marian::Tensor);
|
||||
|
||||
// How to add new specializations:
|
||||
// When you use a new specialization, it will cause a link error of this form (example):
|
||||
@ -64,3 +69,5 @@ template void marian::gpu::Element<marian::functional::Assign<marian::functional
|
||||
// To fix this, copy the line with the error message in here and:
|
||||
// - replace up to including "undefined reference to `" by "template"
|
||||
// - replace final ' by a semicolon
|
||||
// - replace 'IntrusivePtr<marian::TensorBase>' with 'marian::Tensor'
|
||||
|
||||
|
@ -250,7 +250,7 @@ void CSRProd(marian::Tensor C,
|
||||
ABORT_IF(numOffsets != rowsS, "Unexpected number of rows in CSR argument");
|
||||
ABORT_IF(S_values->shape() != S_indices->shape(), "CSR values and indices must have the same size");
|
||||
float alpha = 1;
|
||||
Ptr<MemoryPiece> St_values, St_indices, St_offsets;
|
||||
MemoryPiece::PtrType St_values, St_indices, St_offsets;
|
||||
if (transS != swapOperands) {
|
||||
// Cusparse gemmi() does not support this specific version of transpose, and csrmm() is non-deterministic.
|
||||
// Hence, we transpose the matrix explicitly.
|
||||
|
@ -1350,8 +1350,9 @@ float L2Norm(Tensor in) {
|
||||
|
||||
uint8_t* data;
|
||||
cudaMalloc(&data, blocks * sizeof(float));
|
||||
Tensor out(new TensorBase(New<MemoryPiece>(data, blocks * sizeof(float)),
|
||||
{1, blocks},
|
||||
Tensor out(TensorBase::New(MemoryPiece::New(data, blocks * sizeof(float)),
|
||||
Shape({1, blocks}),
|
||||
Type::float32,
|
||||
in->getBackend()));
|
||||
|
||||
using namespace functional;
|
||||
|
Loading…
Reference in New Issue
Block a user