mirror of
https://github.com/marian-nmt/marian.git
synced 2024-11-05 01:31:46 +03:00
a number of comments and test
This commit is contained in:
parent
8797b5ffd3
commit
4c8b6bb171
@ -3,7 +3,7 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
|
||||
|
||||
project(marian CXX)
|
||||
SET(CMAKE_CXX_FLAGS " -std=c++11 -g -O3 -funroll-loops -Wno-unused-result -Wno-deprecated")
|
||||
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math; -Xcompiler '-fPIC')
|
||||
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math; --expt-extended-lambda; -Xcompiler '-fPIC')
|
||||
add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
|
||||
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
||||
|
||||
|
@ -33,8 +33,8 @@ ExpressionGraph build_graph(const std::vector<int>& dims) {
|
||||
layers.emplace_back(x);
|
||||
}
|
||||
else {
|
||||
layers.emplace_back(reluplus(dot(layers.back(), weights.back()), biases.back()));
|
||||
//layers.emplace_back(relu(dot(layers.back(), weights.back()) + biases.back()));
|
||||
//layers.emplace_back(reluplus(dot(layers.back(), weights.back()), biases.back()));
|
||||
layers.emplace_back(relu(dot(layers.back(), weights.back()) + biases.back()));
|
||||
}
|
||||
|
||||
weights.emplace_back(
|
||||
|
@ -454,11 +454,14 @@ struct ReLUPlusNodeOp : public BinaryNodeOp {
|
||||
: BinaryNodeOp(a, b, keywords::shape=a->shape(), args...) { }
|
||||
|
||||
void forward() {
|
||||
// v = f(g(a, b))
|
||||
Element(_1 = ReLU(_2 + _3),
|
||||
val_, a_->val(), b_->val());
|
||||
}
|
||||
|
||||
void backward() {
|
||||
// df/da = adj * f'(g(a, b)) : dg/da * df/dg
|
||||
// df/db = adj * f'(g(a, b)) : dg/db * df/dg
|
||||
Element(_1 += _2 * ReLUback(_3 + _4),
|
||||
a_->grad(), adj_, a_->val(), b_->val());
|
||||
Element(_1 += _2 * ReLUback(_3 + _4),
|
||||
|
57
src/test.cu
57
src/test.cu
@ -30,15 +30,58 @@
|
||||
using namespace marian;
|
||||
using namespace keywords;
|
||||
|
||||
template <class Functor>
|
||||
__global__ void tgElement(Functor functor, TensorView t, int rows, int cols) {
|
||||
for(int bid = 0; bid < rows; bid += gridDim.x) {
|
||||
int i = bid + blockIdx.x;
|
||||
if(i < rows) {
|
||||
for(int tid = 0; tid < cols; tid += blockDim.x) {
|
||||
int j = tid + threadIdx.x;
|
||||
if(j < cols)
|
||||
t(i, j) = functor(i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Functor>
|
||||
void tElement(Functor functor, Tensor t) {
|
||||
|
||||
|
||||
int m = t.shape()[0];
|
||||
int n = t.shape()[1];
|
||||
|
||||
int blocks = std::min(MAX_BLOCKS, m);
|
||||
int threads = std::min(MAX_THREADS, n);
|
||||
tgElement<<<blocks, threads>>>(functor, TensorView(t), m, n);
|
||||
cudaStreamSynchronize(0);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
ExpressionGraph g;
|
||||
|
||||
Tensor a({1000, 1000}, 3);
|
||||
Tensor b({1, 1}, 2);
|
||||
|
||||
boost::timer::cpu_timer timer;
|
||||
for(int i = 0; i < 1000; ++i)
|
||||
Element(_1 += _1 * _2, a, b);
|
||||
std::cerr << timer.format(5, "%ws") << std::endl;
|
||||
//Tensor a({1000, 1000}, 3);
|
||||
//Tensor b({1, 1}, 2);
|
||||
//
|
||||
//TensorView ta(a);
|
||||
//TensorView tb(b);
|
||||
//
|
||||
//boost::timer::cpu_timer timer;
|
||||
//
|
||||
//
|
||||
//auto f = _1 + _2;
|
||||
//auto pp1 = [=] __device__ (int i, int j) mutable -> float {
|
||||
// return f(ta(i, j), tb(i, j));
|
||||
//};
|
||||
//
|
||||
//auto pp2 = [=] __device__ (int i, int j) mutable -> float {
|
||||
// return f(pp1(i, j), tb(i, j));
|
||||
//};
|
||||
//
|
||||
//for(int i = 0; i < 1000; ++i)
|
||||
// tElement(pp2, a);
|
||||
|
||||
|
||||
// std::cerr << timer.format(5, "%ws") << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user