mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
working separated graphs, added params
This commit is contained in:
parent
976c8039db
commit
94914d56e8
@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.5.1)
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
|
||||
|
||||
project(marian CXX)
|
||||
SET(CMAKE_CXX_FLAGS " -std=c++11 -g -O0 -funroll-loops -Wno-unused-result -Wno-deprecated")
|
||||
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O0; -arch=sm_35; -lineinfo; --use_fast_math; -Xcompiler '-fPIC')
|
||||
SET(CMAKE_CXX_FLAGS " -std=c++11 -g -O3 -funroll-loops -Wno-unused-result -Wno-deprecated")
|
||||
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math; -Xcompiler '-fPIC')
|
||||
add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
|
||||
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "definitions.h"
|
||||
#include "chainable.h"
|
||||
#include "node_operators.h"
|
||||
@ -65,7 +67,9 @@ class ExpressionGraph {
|
||||
|
||||
template <typename ...Args>
|
||||
inline Expr param(Args ...args) {
|
||||
return Expr(this, new ParamNode(args...));
|
||||
Expr e(this, new ParamNode(args...));
|
||||
params_.emplace_back(e);
|
||||
return e;
|
||||
}
|
||||
|
||||
template <typename ...Args>
|
||||
@ -89,8 +93,28 @@ class ExpressionGraph {
|
||||
return stack_;
|
||||
}
|
||||
|
||||
Expr& operator[](const std::string& name) {
|
||||
auto it = named_.find(name);
|
||||
UTIL_THROW_IF2(it == named_.end(), "No such named node in graph: " << name);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
bool has_node(const std::string& name) const {
|
||||
return named_.count(name) > 0;
|
||||
}
|
||||
|
||||
void add_named_node(Expr e, const std::string& name) {
|
||||
named_.emplace(name, e);
|
||||
}
|
||||
|
||||
std::vector<Expr>& params() {
|
||||
return params_;
|
||||
}
|
||||
|
||||
private:
|
||||
ChainableStackPtr stack_;
|
||||
std::map<std::string, Expr> named_;
|
||||
std::vector<Expr> params_;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -4,6 +4,11 @@
|
||||
|
||||
namespace marian {
|
||||
|
||||
Expr named(Expr a, const std::string& name) {
|
||||
a.graph()->add_named_node(a, name);
|
||||
return a;
|
||||
}
|
||||
|
||||
Expr logit(Expr a) {
|
||||
return Expr(a.graph(), new LogitNodeOp(a));
|
||||
}
|
||||
@ -26,6 +31,25 @@ Expr operator-(Expr a) {
|
||||
|
||||
/*********************************************************/
|
||||
|
||||
static Shape newShape(ChainPtr a, ChainPtr b) {
|
||||
size_t dimsA = a->shape().size();
|
||||
size_t dimsB = b->shape().size();
|
||||
UTIL_THROW_IF2(dimsA != dimsB,
|
||||
"Tensors have different numbers of dimensions");
|
||||
Shape shape(dimsA);
|
||||
for(size_t i = 0; i < dimsA; ++i) {
|
||||
int dimA = a->shape()[i];
|
||||
int dimB = b->shape()[i];
|
||||
bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
|
||||
UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
|
||||
<< "operation cannot be broadcasted: " << dimA << " != " << dimB);
|
||||
shape[i] = std::max(dimA, dimB);
|
||||
if(dimA == whatevs || dimB == whatevs)
|
||||
shape[i] = whatevs;
|
||||
}
|
||||
return shape;
|
||||
}
|
||||
|
||||
Expr broadcast(Shape bShape, Expr a) {
|
||||
const Shape& aShape = a.node()->shape();
|
||||
if(aShape == bShape) {
|
||||
@ -61,30 +85,11 @@ Expr broadcast(Shape bShape, Expr a) {
|
||||
}
|
||||
}
|
||||
|
||||
static Shape newShape(ChainPtr a, ChainPtr b) {
|
||||
size_t dimsA = a->shape().size();
|
||||
size_t dimsB = b->shape().size();
|
||||
UTIL_THROW_IF2(dimsA != dimsB,
|
||||
"Tensors have different numbers of dimensions");
|
||||
Shape shape(dimsA);
|
||||
for(size_t i = 0; i < dimsA; ++i) {
|
||||
int dimA = a->shape()[i];
|
||||
int dimB = b->shape()[i];
|
||||
bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
|
||||
UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
|
||||
<< "operation cannot be broadcasted: " << dimA << " != " << dimB);
|
||||
shape[i] = std::max(dimA, dimB);
|
||||
if(dimA == whatevs || dimB == whatevs)
|
||||
shape[i] = whatevs;
|
||||
}
|
||||
return shape;
|
||||
}
|
||||
|
||||
Expr operator+(Expr a, Expr b) {
|
||||
Shape shape = newShape(a, b);
|
||||
Expr cast_a = broadcast(shape, a);
|
||||
Expr cast_b = broadcast(shape, b);
|
||||
return Expr(a.graph(), new PlusNodeOp(a, b));
|
||||
return Expr(a.graph(), new PlusNodeOp(cast_a, cast_b));
|
||||
}
|
||||
|
||||
Expr operator-(Expr a, Expr b) {
|
||||
@ -109,13 +114,7 @@ Expr operator/(Expr a, Expr b) {
|
||||
}
|
||||
|
||||
Expr dot(Expr a, Expr b) {
|
||||
Shape shape = newShape(a, b);
|
||||
Expr cast_a = broadcast(shape, a);
|
||||
Expr cast_b = broadcast(shape, b);
|
||||
return Expr(a.graph(), new DotNodeOp(cast_a, cast_b));
|
||||
return Expr(a.graph(), new DotNodeOp(a, b));
|
||||
}
|
||||
|
||||
/******************************************************/
|
||||
|
||||
|
||||
}
|
||||
|
@ -4,6 +4,8 @@
|
||||
|
||||
namespace marian {
|
||||
|
||||
Expr named(Expr a, const std::string& name);
|
||||
|
||||
Expr logit(Expr a);
|
||||
|
||||
Expr tanh(Expr a);
|
||||
|
@ -194,8 +194,7 @@ struct BinaryNodeOp : public Node {
|
||||
struct DotNodeOp : public BinaryNodeOp {
|
||||
template <typename ...Args>
|
||||
DotNodeOp(ChainPtr a, ChainPtr b, Args ...args)
|
||||
: BinaryNodeOp(
|
||||
a, b,
|
||||
: BinaryNodeOp(a, b,
|
||||
keywords::shape=newShape(a, b),
|
||||
args...) { }
|
||||
|
||||
@ -224,35 +223,6 @@ struct DotNodeOp : public BinaryNodeOp {
|
||||
}
|
||||
};
|
||||
|
||||
//struct BroadcastingNodeOp : public BinaryNodeOp {
|
||||
// template <typename ...Args>
|
||||
// BroadcastingNodeOp(ChainPtr a, ChainPtr b, Args ...args)
|
||||
// : BinaryNodeOp(broadcast(newShape(a ,b), a),
|
||||
// broadcast(newShape(a ,b), b),
|
||||
// keywords::shape=newShape(a, b),
|
||||
// args...) {}
|
||||
//
|
||||
// static Shape newShape(ChainPtr a, ChainPtr b) {
|
||||
// size_t dimsA = a->shape().size();
|
||||
// size_t dimsB = b->shape().size();
|
||||
// UTIL_THROW_IF2(dimsA != dimsB,
|
||||
// "Tensors have different numbers of dimensions");
|
||||
// Shape shape(dimsA);
|
||||
// for(size_t i = 0; i < dimsA; ++i) {
|
||||
// int dimA = a->shape()[i];
|
||||
// int dimB = b->shape()[i];
|
||||
// bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
|
||||
// UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
|
||||
// << "operation cannot be broadcasted: " << dimA << " != " << dimB);
|
||||
// shape[i] = std::max(dimA, dimB);
|
||||
// if(dimA == whatevs || dimB == whatevs)
|
||||
// shape[i] = whatevs;
|
||||
// }
|
||||
// return shape;
|
||||
// }
|
||||
//};
|
||||
|
||||
|
||||
struct PlusNodeOp : public BinaryNodeOp {
|
||||
template <typename ...Args>
|
||||
PlusNodeOp(ChainPtr a, ChainPtr b, Args ...args)
|
||||
|
@ -43,7 +43,7 @@ std::function<void(Tensor)> uniform(float a = 0.0, float b = 0.1) {
|
||||
}
|
||||
|
||||
std::function<void(Tensor)> from_vector(const std::vector<float>& v) {
|
||||
return [&v](Tensor t) {
|
||||
return [v](Tensor t) {
|
||||
t << v;
|
||||
};
|
||||
}
|
||||
|
@ -6,19 +6,11 @@
|
||||
using namespace marian;
|
||||
using namespace keywords;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
cudaSetDevice(1);
|
||||
|
||||
const size_t IMAGE_SIZE = 784;
|
||||
const size_t LABEL_SIZE = 10;
|
||||
int BATCH_SIZE = 10000;
|
||||
|
||||
std::cerr << "Loading test set...";
|
||||
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
|
||||
std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
|
||||
std::cerr << "Done." << std::endl;
|
||||
const size_t IMAGE_SIZE = 784;
|
||||
const size_t LABEL_SIZE = 10;
|
||||
int BATCH_SIZE = 10000;
|
||||
|
||||
ExpressionGraph build_graph() {
|
||||
std::cerr << "Loading model params...";
|
||||
NpzConverter converter("../scripts/test_model/model.npz");
|
||||
|
||||
@ -31,29 +23,43 @@ int main(int argc, char** argv) {
|
||||
std::cerr << "Building model...";
|
||||
|
||||
ExpressionGraph g;
|
||||
auto x = g.input(shape={whatevs, IMAGE_SIZE}, name="X");
|
||||
auto y = g.input(shape={whatevs, LABEL_SIZE});
|
||||
auto x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
|
||||
auto y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
|
||||
|
||||
auto w = g.param(shape={IMAGE_SIZE, LABEL_SIZE},
|
||||
init=from_vector(wData));
|
||||
auto b = g.param(shape={1, LABEL_SIZE},
|
||||
init=from_vector(bData));
|
||||
auto w = named(g.param(shape={IMAGE_SIZE, LABEL_SIZE},
|
||||
init=from_vector(wData)), "w");
|
||||
|
||||
auto b = named(g.param(shape={1, LABEL_SIZE},
|
||||
init=from_vector(bData)), "b");
|
||||
|
||||
auto probs = softmax(dot(x, w) + b, axis=1);
|
||||
auto cost = -mean(sum(y * log(probs), axis=1), axis=0);
|
||||
auto probs = named(softmax(dot(x, w) + b, axis=1), "probs");
|
||||
auto cost = named(-mean(sum(y * log(probs), axis=1), axis=0), "cost");
|
||||
|
||||
std::cerr << "Done." << std::endl;
|
||||
return g;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
cudaSetDevice(0);
|
||||
|
||||
std::cerr << "Loading test set...";
|
||||
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
|
||||
std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
|
||||
std::cerr << "Done." << std::endl;
|
||||
|
||||
ExpressionGraph g = build_graph();
|
||||
|
||||
Tensor xt({BATCH_SIZE, IMAGE_SIZE});
|
||||
Tensor yt({BATCH_SIZE, LABEL_SIZE});
|
||||
|
||||
x = xt << testImages;
|
||||
y = yt << testLabels;
|
||||
g["x"] = (xt << testImages);
|
||||
g["y"] = (yt << testLabels);
|
||||
|
||||
g.forward(BATCH_SIZE);
|
||||
|
||||
std::vector<float> results;
|
||||
results << probs.val();
|
||||
results << g["probs"].val();
|
||||
|
||||
size_t acc = 0;
|
||||
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
|
||||
@ -65,7 +71,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
acc += (correct == proposed);
|
||||
}
|
||||
std::cerr << "Cost: " << cost.val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
|
||||
std::cerr << "Cost: " << g["cost"].val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
|
||||
|
||||
float eta = 0.1;
|
||||
for (size_t j = 0; j < 10; ++j) {
|
||||
@ -73,14 +79,14 @@ int main(int argc, char** argv) {
|
||||
g.backward();
|
||||
|
||||
auto update_rule = _1 -= eta * _2;
|
||||
Element(update_rule, w.val(), w.grad());
|
||||
Element(update_rule, b.val(), b.grad());
|
||||
for(auto param : g.params())
|
||||
Element(update_rule, param.val(), param.grad());
|
||||
|
||||
g.forward(BATCH_SIZE);
|
||||
}
|
||||
std::cerr << "Epoch: " << j << std::endl;
|
||||
std::vector<float> results;
|
||||
results << probs.val();
|
||||
results << g["probs"].val();
|
||||
|
||||
size_t acc = 0;
|
||||
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
|
||||
@ -92,7 +98,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
acc += (correct == proposed);
|
||||
}
|
||||
std::cerr << "Cost: " << cost.val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
|
||||
std::cerr << "Cost: " << g["cost"].val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user