mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
working separated graphs, added params
This commit is contained in:
parent
976c8039db
commit
94914d56e8
@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.5.1)
|
|||||||
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
|
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
|
||||||
|
|
||||||
project(marian CXX)
|
project(marian CXX)
|
||||||
SET(CMAKE_CXX_FLAGS " -std=c++11 -g -O0 -funroll-loops -Wno-unused-result -Wno-deprecated")
|
SET(CMAKE_CXX_FLAGS " -std=c++11 -g -O3 -funroll-loops -Wno-unused-result -Wno-deprecated")
|
||||||
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O0; -arch=sm_35; -lineinfo; --use_fast_math; -Xcompiler '-fPIC')
|
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -g; -O3; -arch=sm_35; -lineinfo; --use_fast_math; -Xcompiler '-fPIC')
|
||||||
add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
|
add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
|
||||||
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
||||||
|
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
#include "definitions.h"
|
#include "definitions.h"
|
||||||
#include "chainable.h"
|
#include "chainable.h"
|
||||||
#include "node_operators.h"
|
#include "node_operators.h"
|
||||||
@ -65,7 +67,9 @@ class ExpressionGraph {
|
|||||||
|
|
||||||
template <typename ...Args>
|
template <typename ...Args>
|
||||||
inline Expr param(Args ...args) {
|
inline Expr param(Args ...args) {
|
||||||
return Expr(this, new ParamNode(args...));
|
Expr e(this, new ParamNode(args...));
|
||||||
|
params_.emplace_back(e);
|
||||||
|
return e;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ...Args>
|
template <typename ...Args>
|
||||||
@ -89,8 +93,28 @@ class ExpressionGraph {
|
|||||||
return stack_;
|
return stack_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Expr& operator[](const std::string& name) {
|
||||||
|
auto it = named_.find(name);
|
||||||
|
UTIL_THROW_IF2(it == named_.end(), "No such named node in graph: " << name);
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_node(const std::string& name) const {
|
||||||
|
return named_.count(name) > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_named_node(Expr e, const std::string& name) {
|
||||||
|
named_.emplace(name, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<Expr>& params() {
|
||||||
|
return params_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ChainableStackPtr stack_;
|
ChainableStackPtr stack_;
|
||||||
|
std::map<std::string, Expr> named_;
|
||||||
|
std::vector<Expr> params_;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,11 @@
|
|||||||
|
|
||||||
namespace marian {
|
namespace marian {
|
||||||
|
|
||||||
|
Expr named(Expr a, const std::string& name) {
|
||||||
|
a.graph()->add_named_node(a, name);
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
|
||||||
Expr logit(Expr a) {
|
Expr logit(Expr a) {
|
||||||
return Expr(a.graph(), new LogitNodeOp(a));
|
return Expr(a.graph(), new LogitNodeOp(a));
|
||||||
}
|
}
|
||||||
@ -26,6 +31,25 @@ Expr operator-(Expr a) {
|
|||||||
|
|
||||||
/*********************************************************/
|
/*********************************************************/
|
||||||
|
|
||||||
|
static Shape newShape(ChainPtr a, ChainPtr b) {
|
||||||
|
size_t dimsA = a->shape().size();
|
||||||
|
size_t dimsB = b->shape().size();
|
||||||
|
UTIL_THROW_IF2(dimsA != dimsB,
|
||||||
|
"Tensors have different numbers of dimensions");
|
||||||
|
Shape shape(dimsA);
|
||||||
|
for(size_t i = 0; i < dimsA; ++i) {
|
||||||
|
int dimA = a->shape()[i];
|
||||||
|
int dimB = b->shape()[i];
|
||||||
|
bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
|
||||||
|
UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
|
||||||
|
<< "operation cannot be broadcasted: " << dimA << " != " << dimB);
|
||||||
|
shape[i] = std::max(dimA, dimB);
|
||||||
|
if(dimA == whatevs || dimB == whatevs)
|
||||||
|
shape[i] = whatevs;
|
||||||
|
}
|
||||||
|
return shape;
|
||||||
|
}
|
||||||
|
|
||||||
Expr broadcast(Shape bShape, Expr a) {
|
Expr broadcast(Shape bShape, Expr a) {
|
||||||
const Shape& aShape = a.node()->shape();
|
const Shape& aShape = a.node()->shape();
|
||||||
if(aShape == bShape) {
|
if(aShape == bShape) {
|
||||||
@ -61,30 +85,11 @@ Expr broadcast(Shape bShape, Expr a) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static Shape newShape(ChainPtr a, ChainPtr b) {
|
|
||||||
size_t dimsA = a->shape().size();
|
|
||||||
size_t dimsB = b->shape().size();
|
|
||||||
UTIL_THROW_IF2(dimsA != dimsB,
|
|
||||||
"Tensors have different numbers of dimensions");
|
|
||||||
Shape shape(dimsA);
|
|
||||||
for(size_t i = 0; i < dimsA; ++i) {
|
|
||||||
int dimA = a->shape()[i];
|
|
||||||
int dimB = b->shape()[i];
|
|
||||||
bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
|
|
||||||
UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
|
|
||||||
<< "operation cannot be broadcasted: " << dimA << " != " << dimB);
|
|
||||||
shape[i] = std::max(dimA, dimB);
|
|
||||||
if(dimA == whatevs || dimB == whatevs)
|
|
||||||
shape[i] = whatevs;
|
|
||||||
}
|
|
||||||
return shape;
|
|
||||||
}
|
|
||||||
|
|
||||||
Expr operator+(Expr a, Expr b) {
|
Expr operator+(Expr a, Expr b) {
|
||||||
Shape shape = newShape(a, b);
|
Shape shape = newShape(a, b);
|
||||||
Expr cast_a = broadcast(shape, a);
|
Expr cast_a = broadcast(shape, a);
|
||||||
Expr cast_b = broadcast(shape, b);
|
Expr cast_b = broadcast(shape, b);
|
||||||
return Expr(a.graph(), new PlusNodeOp(a, b));
|
return Expr(a.graph(), new PlusNodeOp(cast_a, cast_b));
|
||||||
}
|
}
|
||||||
|
|
||||||
Expr operator-(Expr a, Expr b) {
|
Expr operator-(Expr a, Expr b) {
|
||||||
@ -109,13 +114,7 @@ Expr operator/(Expr a, Expr b) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Expr dot(Expr a, Expr b) {
|
Expr dot(Expr a, Expr b) {
|
||||||
Shape shape = newShape(a, b);
|
return Expr(a.graph(), new DotNodeOp(a, b));
|
||||||
Expr cast_a = broadcast(shape, a);
|
|
||||||
Expr cast_b = broadcast(shape, b);
|
|
||||||
return Expr(a.graph(), new DotNodeOp(cast_a, cast_b));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/******************************************************/
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
|
|
||||||
namespace marian {
|
namespace marian {
|
||||||
|
|
||||||
|
Expr named(Expr a, const std::string& name);
|
||||||
|
|
||||||
Expr logit(Expr a);
|
Expr logit(Expr a);
|
||||||
|
|
||||||
Expr tanh(Expr a);
|
Expr tanh(Expr a);
|
||||||
|
@ -194,8 +194,7 @@ struct BinaryNodeOp : public Node {
|
|||||||
struct DotNodeOp : public BinaryNodeOp {
|
struct DotNodeOp : public BinaryNodeOp {
|
||||||
template <typename ...Args>
|
template <typename ...Args>
|
||||||
DotNodeOp(ChainPtr a, ChainPtr b, Args ...args)
|
DotNodeOp(ChainPtr a, ChainPtr b, Args ...args)
|
||||||
: BinaryNodeOp(
|
: BinaryNodeOp(a, b,
|
||||||
a, b,
|
|
||||||
keywords::shape=newShape(a, b),
|
keywords::shape=newShape(a, b),
|
||||||
args...) { }
|
args...) { }
|
||||||
|
|
||||||
@ -224,35 +223,6 @@ struct DotNodeOp : public BinaryNodeOp {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
//struct BroadcastingNodeOp : public BinaryNodeOp {
|
|
||||||
// template <typename ...Args>
|
|
||||||
// BroadcastingNodeOp(ChainPtr a, ChainPtr b, Args ...args)
|
|
||||||
// : BinaryNodeOp(broadcast(newShape(a ,b), a),
|
|
||||||
// broadcast(newShape(a ,b), b),
|
|
||||||
// keywords::shape=newShape(a, b),
|
|
||||||
// args...) {}
|
|
||||||
//
|
|
||||||
// static Shape newShape(ChainPtr a, ChainPtr b) {
|
|
||||||
// size_t dimsA = a->shape().size();
|
|
||||||
// size_t dimsB = b->shape().size();
|
|
||||||
// UTIL_THROW_IF2(dimsA != dimsB,
|
|
||||||
// "Tensors have different numbers of dimensions");
|
|
||||||
// Shape shape(dimsA);
|
|
||||||
// for(size_t i = 0; i < dimsA; ++i) {
|
|
||||||
// int dimA = a->shape()[i];
|
|
||||||
// int dimB = b->shape()[i];
|
|
||||||
// bool broadcastable = (dimA == dimB || dimA == 1 || dimB == 1);
|
|
||||||
// UTIL_THROW_IF2(!broadcastable, "Different dimensions in elementwise "
|
|
||||||
// << "operation cannot be broadcasted: " << dimA << " != " << dimB);
|
|
||||||
// shape[i] = std::max(dimA, dimB);
|
|
||||||
// if(dimA == whatevs || dimB == whatevs)
|
|
||||||
// shape[i] = whatevs;
|
|
||||||
// }
|
|
||||||
// return shape;
|
|
||||||
// }
|
|
||||||
//};
|
|
||||||
|
|
||||||
|
|
||||||
struct PlusNodeOp : public BinaryNodeOp {
|
struct PlusNodeOp : public BinaryNodeOp {
|
||||||
template <typename ...Args>
|
template <typename ...Args>
|
||||||
PlusNodeOp(ChainPtr a, ChainPtr b, Args ...args)
|
PlusNodeOp(ChainPtr a, ChainPtr b, Args ...args)
|
||||||
|
@ -43,7 +43,7 @@ std::function<void(Tensor)> uniform(float a = 0.0, float b = 0.1) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::function<void(Tensor)> from_vector(const std::vector<float>& v) {
|
std::function<void(Tensor)> from_vector(const std::vector<float>& v) {
|
||||||
return [&v](Tensor t) {
|
return [v](Tensor t) {
|
||||||
t << v;
|
t << v;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -6,19 +6,11 @@
|
|||||||
using namespace marian;
|
using namespace marian;
|
||||||
using namespace keywords;
|
using namespace keywords;
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
const size_t IMAGE_SIZE = 784;
|
||||||
|
const size_t LABEL_SIZE = 10;
|
||||||
cudaSetDevice(1);
|
int BATCH_SIZE = 10000;
|
||||||
|
|
||||||
const size_t IMAGE_SIZE = 784;
|
|
||||||
const size_t LABEL_SIZE = 10;
|
|
||||||
int BATCH_SIZE = 10000;
|
|
||||||
|
|
||||||
std::cerr << "Loading test set...";
|
|
||||||
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
|
|
||||||
std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
|
|
||||||
std::cerr << "Done." << std::endl;
|
|
||||||
|
|
||||||
|
ExpressionGraph build_graph() {
|
||||||
std::cerr << "Loading model params...";
|
std::cerr << "Loading model params...";
|
||||||
NpzConverter converter("../scripts/test_model/model.npz");
|
NpzConverter converter("../scripts/test_model/model.npz");
|
||||||
|
|
||||||
@ -31,29 +23,43 @@ int main(int argc, char** argv) {
|
|||||||
std::cerr << "Building model...";
|
std::cerr << "Building model...";
|
||||||
|
|
||||||
ExpressionGraph g;
|
ExpressionGraph g;
|
||||||
auto x = g.input(shape={whatevs, IMAGE_SIZE}, name="X");
|
auto x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
|
||||||
auto y = g.input(shape={whatevs, LABEL_SIZE});
|
auto y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
|
||||||
|
|
||||||
auto w = g.param(shape={IMAGE_SIZE, LABEL_SIZE},
|
auto w = named(g.param(shape={IMAGE_SIZE, LABEL_SIZE},
|
||||||
init=from_vector(wData));
|
init=from_vector(wData)), "w");
|
||||||
auto b = g.param(shape={1, LABEL_SIZE},
|
|
||||||
init=from_vector(bData));
|
auto b = named(g.param(shape={1, LABEL_SIZE},
|
||||||
|
init=from_vector(bData)), "b");
|
||||||
|
|
||||||
auto probs = softmax(dot(x, w) + b, axis=1);
|
auto probs = named(softmax(dot(x, w) + b, axis=1), "probs");
|
||||||
auto cost = -mean(sum(y * log(probs), axis=1), axis=0);
|
auto cost = named(-mean(sum(y * log(probs), axis=1), axis=0), "cost");
|
||||||
|
|
||||||
std::cerr << "Done." << std::endl;
|
std::cerr << "Done." << std::endl;
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
|
cudaSetDevice(0);
|
||||||
|
|
||||||
|
std::cerr << "Loading test set...";
|
||||||
|
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
|
||||||
|
std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
|
||||||
|
std::cerr << "Done." << std::endl;
|
||||||
|
|
||||||
|
ExpressionGraph g = build_graph();
|
||||||
|
|
||||||
Tensor xt({BATCH_SIZE, IMAGE_SIZE});
|
Tensor xt({BATCH_SIZE, IMAGE_SIZE});
|
||||||
Tensor yt({BATCH_SIZE, LABEL_SIZE});
|
Tensor yt({BATCH_SIZE, LABEL_SIZE});
|
||||||
|
|
||||||
x = xt << testImages;
|
g["x"] = (xt << testImages);
|
||||||
y = yt << testLabels;
|
g["y"] = (yt << testLabels);
|
||||||
|
|
||||||
g.forward(BATCH_SIZE);
|
g.forward(BATCH_SIZE);
|
||||||
|
|
||||||
std::vector<float> results;
|
std::vector<float> results;
|
||||||
results << probs.val();
|
results << g["probs"].val();
|
||||||
|
|
||||||
size_t acc = 0;
|
size_t acc = 0;
|
||||||
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
|
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
|
||||||
@ -65,7 +71,7 @@ int main(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
acc += (correct == proposed);
|
acc += (correct == proposed);
|
||||||
}
|
}
|
||||||
std::cerr << "Cost: " << cost.val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
|
std::cerr << "Cost: " << g["cost"].val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
|
||||||
|
|
||||||
float eta = 0.1;
|
float eta = 0.1;
|
||||||
for (size_t j = 0; j < 10; ++j) {
|
for (size_t j = 0; j < 10; ++j) {
|
||||||
@ -73,14 +79,14 @@ int main(int argc, char** argv) {
|
|||||||
g.backward();
|
g.backward();
|
||||||
|
|
||||||
auto update_rule = _1 -= eta * _2;
|
auto update_rule = _1 -= eta * _2;
|
||||||
Element(update_rule, w.val(), w.grad());
|
for(auto param : g.params())
|
||||||
Element(update_rule, b.val(), b.grad());
|
Element(update_rule, param.val(), param.grad());
|
||||||
|
|
||||||
g.forward(BATCH_SIZE);
|
g.forward(BATCH_SIZE);
|
||||||
}
|
}
|
||||||
std::cerr << "Epoch: " << j << std::endl;
|
std::cerr << "Epoch: " << j << std::endl;
|
||||||
std::vector<float> results;
|
std::vector<float> results;
|
||||||
results << probs.val();
|
results << g["probs"].val();
|
||||||
|
|
||||||
size_t acc = 0;
|
size_t acc = 0;
|
||||||
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
|
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
|
||||||
@ -92,7 +98,7 @@ int main(int argc, char** argv) {
|
|||||||
}
|
}
|
||||||
acc += (correct == proposed);
|
acc += (correct == proposed);
|
||||||
}
|
}
|
||||||
std::cerr << "Cost: " << cost.val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
|
std::cerr << "Cost: " << g["cost"].val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user