working separated graphs, added params

This commit is contained in:
Marcin Junczys-Dowmunt 2016-09-16 00:23:47 +02:00
parent 976c8039db
commit 94914d56e8
7 changed files with 91 additions and 90 deletions

View File

@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.5.1)
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
project(marian CXX)
SET(CMAKE_CXX_FLAGS " -std=c++11 -g -O0 -funroll-loops -Wno-unused-result -Wno-deprecated")
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O0; -arch=sm_35; -lineinfo; --use_fast_math; -Xcompiler '-fPIC')
SET(CMAKE_CXX_FLAGS " -std=c++11 -g -O3 -funroll-loops -Wno-unused-result -Wno-deprecated")
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math; -Xcompiler '-fPIC')
add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)

View File

@ -1,5 +1,7 @@
#pragma once
#include <map>
#include "definitions.h"
#include "chainable.h"
#include "node_operators.h"
@ -65,7 +67,9 @@ class ExpressionGraph {
template <typename ...Args>
inline Expr param(Args ...args) {
return Expr(this, new ParamNode(args...));
Expr e(this, new ParamNode(args...));
params_.emplace_back(e);
return e;
}
template <typename ...Args>
@ -89,8 +93,28 @@ class ExpressionGraph {
return stack_;
}
Expr& operator[](const std::string& name) {
auto it = named_.find(name);
UTIL_THROW_IF2(it == named_.end(), "No such named node in graph: " << name);
return it->second;
}
bool has_node(const std::string& name) const {
return named_.count(name) > 0;
}
void add_named_node(Expr e, const std::string& name) {
named_.emplace(name, e);
}
std::vector<Expr>& params() {
return params_;
}
private:
ChainableStackPtr stack_;
std::map<std::string, Expr> named_;
std::vector<Expr> params_;
};
}

View File

@ -4,6 +4,11 @@
namespace marian {
Expr named(Expr a, const std::string& name) {
a.graph()->add_named_node(a, name);
return a;
}
Expr logit(Expr a) {
return Expr(a.graph(), new LogitNodeOp(a));
}
@ -26,6 +31,25 @@ Expr operator-(Expr a) {
/*********************************************************/
static Shape newShape(ChainPtr a, ChainPtr b) {
size_t dimsA = a->shape().size();
size_t dimsB = b->shape().size();
UTIL_THROW_IF2(dimsA != dimsB,
"Tensors have different numbers of dimensions");
Shape shape(dimsA);
for(size_t i = 0; i < dimsA; ++i) {
int dimA = a->shape()[i];
int dimB = b->shape()[i];
bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
<< "operation cannot be broadcasted: " << dimA << " != " << dimB);
shape[i] = std::max(dimA, dimB);
if(dimA == whatevs || dimB == whatevs)
shape[i] = whatevs;
}
return shape;
}
Expr broadcast(Shape bShape, Expr a) {
const Shape& aShape = a.node()->shape();
if(aShape == bShape) {
@ -61,30 +85,11 @@ Expr broadcast(Shape bShape, Expr a) {
}
}
static Shape newShape(ChainPtr a, ChainPtr b) {
size_t dimsA = a->shape().size();
size_t dimsB = b->shape().size();
UTIL_THROW_IF2(dimsA != dimsB,
"Tensors have different numbers of dimensions");
Shape shape(dimsA);
for(size_t i = 0; i < dimsA; ++i) {
int dimA = a->shape()[i];
int dimB = b->shape()[i];
bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
<< "operation cannot be broadcasted: " << dimA << " != " << dimB);
shape[i] = std::max(dimA, dimB);
if(dimA == whatevs || dimB == whatevs)
shape[i] = whatevs;
}
return shape;
}
Expr operator+(Expr a, Expr b) {
Shape shape = newShape(a, b);
Expr cast_a = broadcast(shape, a);
Expr cast_b = broadcast(shape, b);
return Expr(a.graph(), new PlusNodeOp(a, b));
return Expr(a.graph(), new PlusNodeOp(cast_a, cast_b));
}
Expr operator-(Expr a, Expr b) {
@ -109,13 +114,7 @@ Expr operator/(Expr a, Expr b) {
}
Expr dot(Expr a, Expr b) {
Shape shape = newShape(a, b);
Expr cast_a = broadcast(shape, a);
Expr cast_b = broadcast(shape, b);
return Expr(a.graph(), new DotNodeOp(cast_a, cast_b));
return Expr(a.graph(), new DotNodeOp(a, b));
}
/******************************************************/
}

View File

@ -4,6 +4,8 @@
namespace marian {
Expr named(Expr a, const std::string& name);
Expr logit(Expr a);
Expr tanh(Expr a);

View File

@ -194,8 +194,7 @@ struct BinaryNodeOp : public Node {
struct DotNodeOp : public BinaryNodeOp {
template <typename ...Args>
DotNodeOp(ChainPtr a, ChainPtr b, Args ...args)
: BinaryNodeOp(
a, b,
: BinaryNodeOp(a, b,
keywords::shape=newShape(a, b),
args...) { }
@ -224,35 +223,6 @@ struct DotNodeOp : public BinaryNodeOp {
}
};
//struct BroadcastingNodeOp : public BinaryNodeOp {
// template <typename ...Args>
// BroadcastingNodeOp(ChainPtr a, ChainPtr b, Args ...args)
// : BinaryNodeOp(broadcast(newShape(a ,b), a),
// broadcast(newShape(a ,b), b),
// keywords::shape=newShape(a, b),
// args...) {}
//
// static Shape newShape(ChainPtr a, ChainPtr b) {
// size_t dimsA = a->shape().size();
// size_t dimsB = b->shape().size();
// UTIL_THROW_IF2(dimsA != dimsB,
// "Tensors have different numbers of dimensions");
// Shape shape(dimsA);
// for(size_t i = 0; i < dimsA; ++i) {
// int dimA = a->shape()[i];
// int dimB = b->shape()[i];
// bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
// UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
// << "operation cannot be broadcasted: " << dimA << " != " << dimB);
// shape[i] = std::max(dimA, dimB);
// if(dimA == whatevs || dimB == whatevs)
// shape[i] = whatevs;
// }
// return shape;
// }
//};
struct PlusNodeOp : public BinaryNodeOp {
template <typename ...Args>
PlusNodeOp(ChainPtr a, ChainPtr b, Args ...args)

View File

@ -43,7 +43,7 @@ std::function<void(Tensor)> uniform(float a = 0.0, float b = 0.1) {
}
std::function<void(Tensor)> from_vector(const std::vector<float>& v) {
return [&v](Tensor t) {
return [v](Tensor t) {
t << v;
};
}

View File

@ -6,19 +6,11 @@
using namespace marian;
using namespace keywords;
int main(int argc, char** argv) {
cudaSetDevice(1);
const size_t IMAGE_SIZE = 784;
const size_t LABEL_SIZE = 10;
int BATCH_SIZE = 10000;
std::cerr << "Loading test set...";
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
std::cerr << "Done." << std::endl;
const size_t IMAGE_SIZE = 784;
const size_t LABEL_SIZE = 10;
int BATCH_SIZE = 10000;
ExpressionGraph build_graph() {
std::cerr << "Loading model params...";
NpzConverter converter("../scripts/test_model/model.npz");
@ -31,29 +23,43 @@ int main(int argc, char** argv) {
std::cerr << "Building model...";
ExpressionGraph g;
auto x = g.input(shape={whatevs, IMAGE_SIZE}, name="X");
auto y = g.input(shape={whatevs, LABEL_SIZE});
auto x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
auto y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
auto w = g.param(shape={IMAGE_SIZE, LABEL_SIZE},
init=from_vector(wData));
auto b = g.param(shape={1, LABEL_SIZE},
init=from_vector(bData));
auto w = named(g.param(shape={IMAGE_SIZE, LABEL_SIZE},
init=from_vector(wData)), "w");
auto b = named(g.param(shape={1, LABEL_SIZE},
init=from_vector(bData)), "b");
auto probs = softmax(dot(x, w) + b, axis=1);
auto cost = -mean(sum(y * log(probs), axis=1), axis=0);
auto probs = named(softmax(dot(x, w) + b, axis=1), "probs");
auto cost = named(-mean(sum(y * log(probs), axis=1), axis=0), "cost");
std::cerr << "Done." << std::endl;
return g;
}
int main(int argc, char** argv) {
cudaSetDevice(0);
std::cerr << "Loading test set...";
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
std::cerr << "Done." << std::endl;
ExpressionGraph g = build_graph();
Tensor xt({BATCH_SIZE, IMAGE_SIZE});
Tensor yt({BATCH_SIZE, LABEL_SIZE});
x = xt << testImages;
y = yt << testLabels;
g["x"] = (xt << testImages);
g["y"] = (yt << testLabels);
g.forward(BATCH_SIZE);
std::vector<float> results;
results << probs.val();
results << g["probs"].val();
size_t acc = 0;
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
@ -65,7 +71,7 @@ int main(int argc, char** argv) {
}
acc += (correct == proposed);
}
std::cerr << "Cost: " << cost.val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
std::cerr << "Cost: " << g["cost"].val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
float eta = 0.1;
for (size_t j = 0; j < 10; ++j) {
@ -73,14 +79,14 @@ int main(int argc, char** argv) {
g.backward();
auto update_rule = _1 -= eta * _2;
Element(update_rule, w.val(), w.grad());
Element(update_rule, b.val(), b.grad());
for(auto param : g.params())
Element(update_rule, param.val(), param.grad());
g.forward(BATCH_SIZE);
}
std::cerr << "Epoch: " << j << std::endl;
std::vector<float> results;
results << probs.val();
results << g["probs"].val();
size_t acc = 0;
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
@ -92,7 +98,7 @@ int main(int argc, char** argv) {
}
acc += (correct == proposed);
}
std::cerr << "Cost: " << cost.val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
std::cerr << "Cost: " << g["cost"].val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
}
return 0;
}