changed files for new graph format, adapted test.cu to not use pointers to Expr

This commit is contained in:
Marcin Junczys-Dowmunt 2016-09-16 08:36:29 +02:00
parent 02629ca13b
commit 036284d39c
6 changed files with 59 additions and 63 deletions

View File

@ -5,6 +5,7 @@ cuda_add_library(marian_lib
cnpy/cnpy.cpp cnpy/cnpy.cpp
exception.cpp exception.cpp
expression_graph.cu expression_graph.cu
sgd.cu
tensor.cu tensor.cu
tensor_operators.cu tensor_operators.cu
expression_operators.cu expression_operators.cu

View File

@ -7,15 +7,11 @@
using namespace std; using namespace std;
namespace marian { namespace marian {
SGD::SGD(Expr& cost_func, Expr& inX, Expr& inY, SGD::SGD(ExpressionGraph& g, float eta,
const std::vector<Expr*> params, float eta,
std::vector<float>& xData, size_t numFeatures, std::vector<float>& xData, size_t numFeatures,
std::vector<float>& yData, size_t numClasses, std::vector<float>& yData, size_t numClasses,
size_t epochs, size_t batchSize) size_t epochs, size_t batchSize)
: cost_function_(&cost_func), : graph_(g),
inX_(&inX),
inY_(&inY),
params_(params),
eta_(eta), eta_(eta),
xData_(xData), xData_(xData),
numFeatures_(numFeatures), numFeatures_(numFeatures),
@ -45,11 +41,11 @@ void SGD::Run()
size_t endId = startId + batchSize; size_t endId = startId + batchSize;
PrepareBatch(startId, endId, batchSize, shuffle, xt, yt); PrepareBatch(startId, endId, batchSize, shuffle, xt, yt);
*inX_ = xt; graph_["x"] = xt;
*inY_ = yt; graph_["y"] = yt;
cost_function_->forward(maxBatchSize_); graph_.forward(maxBatchSize_);
cost_function_->backward(); graph_.backward();
UpdateModel(); UpdateModel();
@ -136,9 +132,9 @@ void SGD::PrepareBatch(
} }
void SGD::UpdateModel() { void SGD::UpdateModel() {
for (auto& param : params_) { for (auto& param : graph_.params()) {
using namespace thrust::placeholders; using namespace thrust::placeholders;
Element(_1 = _1 - eta_ * _2, param->val(), param->grad()); Element(_1 -= eta_ * _2, param.val(), param.grad());
} }
} }

View File

@ -3,7 +3,7 @@
#include <memory> #include <memory>
#include <iostream> #include <iostream>
#include "expressions.h" #include "expression_graph.h"
#include "thrust_functions.h" #include "thrust_functions.h"
#include "tensor_operators.h" #include "tensor_operators.h"
@ -11,8 +11,7 @@ namespace marian {
class SGD { class SGD {
public: public:
SGD(Expr& cost_func, Expr& inX, Expr& inY, SGD(ExpressionGraph& g, float eta,
const std::vector<Expr*> params, float eta,
std::vector<float>& xData, size_t numFeatures, std::vector<float>& xData, size_t numFeatures,
std::vector<float>& yData, size_t numClasses, std::vector<float>& yData, size_t numClasses,
size_t epochs, size_t batchSize); size_t epochs, size_t batchSize);
@ -20,10 +19,7 @@ class SGD {
void Run(); void Run();
private: private:
Expr *cost_function_; ExpressionGraph& graph_;
Expr *inX_;
Expr *inY_;
std::vector<Expr*> params_;
const float eta_; const float eta_;
std::vector<float>& xData_; std::vector<float>& xData_;
const size_t numFeatures_; const size_t numFeatures_;

View File

@ -14,42 +14,41 @@ int main(int argc, char** argv) {
int hidden_size = 5; int hidden_size = 5;
int num_inputs = 8; int num_inputs = 8;
std::vector<Expr*> X(num_inputs); std::vector<Expr> X;
std::vector<Expr*> Y(num_inputs); std::vector<Expr> Y;
std::vector<Expr*> H(num_inputs); std::vector<Expr> H;
ExpressionGraph g;
for (int t = 0; t < num_inputs; ++t) { for (int t = 0; t < num_inputs; ++t) {
X[t] = new Expr(input(shape={batch_size, input_size})); X.emplace_back(g.input(shape={batch_size, input_size}));
Y[t] = new Expr(input(shape={batch_size, output_size})); Y.emplace_back(g.input(shape={batch_size, output_size}));
} }
Expr Wxh = param(shape={input_size, hidden_size}, init=uniform(), name="Wxh"); Expr Wxh = g.param(shape={input_size, hidden_size}, init=uniform(), name="Wxh");
Expr Whh = param(shape={hidden_size, hidden_size}, init=uniform(), name="Whh"); Expr Whh = g.param(shape={hidden_size, hidden_size}, init=uniform(), name="Whh");
Expr bh = param(shape={1, hidden_size}, init=uniform(), name="bh"); Expr bh = g.param(shape={1, hidden_size}, init=uniform(), name="bh");
Expr h0 = param(shape={1, hidden_size}, init=uniform(), name="h0"); Expr h0 = g.param(shape={1, hidden_size}, init=uniform(), name="h0");
std::cerr << "Building RNN..." << std::endl; std::cerr << "Building RNN..." << std::endl;
H[0] = new Expr(tanh(dot(*X[0], Wxh) + dot(h0, Whh) + bh)); H.emplace_back(tanh(dot(X[0], Wxh) + dot(h0, Whh) + bh));
for (int t = 1; t < num_inputs; ++t) { for (int t = 1; t < num_inputs; ++t) {
H[t] = new Expr(tanh(dot(*X[t], Wxh) + dot(*H[t-1], Whh) + bh)); H.emplace_back(tanh(dot(X[t], Wxh) + dot(H[t-1], Whh) + bh));
} }
Expr Why = param(shape={hidden_size, output_size}, init=uniform(), name="Why"); Expr Why = g.param(shape={hidden_size, output_size}, init=uniform(), name="Why");
Expr by = param(shape={1, output_size}, init=uniform(), name="by"); Expr by = g.param(shape={1, output_size}, init=uniform(), name="by");
std::cerr << "Building output layer..." << std::endl; std::cerr << "Building output layer..." << std::endl;
std::vector<Expr*> Yp(num_inputs); std::vector<Expr> Yp;
Expr* cross_entropy = NULL; Yp.emplace_back(softmax_fast(dot(H[0], Why) + by));
for (int t = 0; t < num_inputs; ++t) { Expr cross_entropy = sum(Y[0] * log(Yp[0]), axis=1);
Yp[t] = new Expr(softmax_fast(dot(*H[t], Why) + by, name="pred")); for (int t = 1; t < num_inputs; ++t) {
if (!cross_entropy) { Yp.emplace_back(softmax_fast(dot(H[t], Why) + by));
cross_entropy = new Expr(sum(*Y[t] * log(*Yp[t]), axis=1)); cross_entropy = cross_entropy + sum(Y[t] * log(Yp[t]), axis=1);
} else {
*cross_entropy = *cross_entropy + sum(*Y[t] * log(*Yp[t]), axis=1);
}
} }
auto graph = -mean(*cross_entropy, axis=0, name="cost"); auto graph = -mean(cross_entropy, axis=0, name="cost");
for (int t = 0; t < num_inputs; ++t) { for (int t = 0; t < num_inputs; ++t) {
Tensor Xt({batch_size, input_size}); Tensor Xt({batch_size, input_size});
@ -72,17 +71,17 @@ int main(int argc, char** argv) {
thrust::copy(values.begin(), values.end(), Xt.begin()); thrust::copy(values.begin(), values.end(), Xt.begin());
thrust::copy(classes.begin(), classes.end(), Yt.begin()); thrust::copy(classes.begin(), classes.end(), Yt.begin());
*X[t] = Xt; X[t] = Xt;
*Y[t] = Yt; Y[t] = Yt;
} }
graph.forward(batch_size); g.forward(batch_size);
graph.backward(); g.backward();
std::cerr << graph.val().Debug() << std::endl; std::cerr << graph.val().Debug() << std::endl;
std::cerr << X[0]->val().Debug() << std::endl; std::cerr << X[0].val().Debug() << std::endl;
std::cerr << Y[0]->val().Debug() << std::endl; std::cerr << Y[0].val().Debug() << std::endl;
std::cerr << Whh.grad().Debug() << std::endl; std::cerr << Whh.grad().Debug() << std::endl;
std::cerr << bh.grad().Debug() << std::endl; std::cerr << bh.grad().Debug() << std::endl;

View File

@ -16,22 +16,24 @@ int main(int argc, char** argv) {
using namespace marian; using namespace marian;
using namespace keywords; using namespace keywords;
Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); ExpressionGraph g;
Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y");
Expr x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
Expr y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); Expr w = named(g.param(shape={IMAGE_SIZE, LABEL_SIZE}), "w");
Expr b = param(shape={1, LABEL_SIZE}, name="b0"); Expr b = named(g.param(shape={1, LABEL_SIZE}), "b");
std::vector<Expr*> params; std::vector<Expr*> params;
params.push_back(&w); params.push_back(&w);
params.push_back(&b); params.push_back(&b);
auto scores = dot(x, w) + b; auto scores = dot(x, w) + b;
auto lr = softmax_fast(scores, axis=1, name="pred"); auto lr = softmax_fast(scores);
auto cost = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); auto cost = named(-mean(sum(y * log(lr), axis=1), axis=0), "cost");
cerr << "lr=" << lr.Debug() << endl; cerr << "lr=" << lr.Debug() << endl;
SGD opt(cost, x, y, params, 0.9, trainImages, IMAGE_SIZE, trainLabels, LABEL_SIZE, 3, 24); SGD opt(g, 0.9, trainImages, IMAGE_SIZE, trainLabels, LABEL_SIZE, 3, 24);
opt.Run(); opt.Run();
return 0; return 0;
} }

View File

@ -59,13 +59,15 @@ int main(int argc, char** argv) {
std::cerr << "\tDone." << std::endl; std::cerr << "\tDone." << std::endl;
auto x = input(shape={whatevs, IMAGE_SIZE}, name="X"); ExpressionGraph g;
auto y = input(shape={whatevs, LABEL_SIZE}, name="Y");
auto w1 = param(shape={IMAGE_SIZE, 100}, name="W0", init=initW1); auto x = g.input(shape={whatevs, IMAGE_SIZE}, name="X");
auto b1 = param(shape={1, 100}, name="b0", init=initB1); auto y = g.input(shape={whatevs, LABEL_SIZE}, name="Y");
auto w2 = param(shape={100, LABEL_SIZE}, name="W1", init=initW2);
auto b2 = param(shape={1, LABEL_SIZE}, name="b1", init=initB2); auto w1 = g.param(shape={IMAGE_SIZE, 100}, name="W0", init=initW1);
auto b1 = g.param(shape={1, 100}, name="b0", init=initB1);
auto w2 = g.param(shape={100, LABEL_SIZE}, name="W1", init=initW2);
auto b2 = g.param(shape={1, LABEL_SIZE}, name="b1", init=initB2);
std::cerr << "Building model..."; std::cerr << "Building model...";
auto layer1 = tanh(dot(x, w1) + b1); auto layer1 = tanh(dot(x, w1) + b1);
@ -86,7 +88,7 @@ int main(int argc, char** argv) {
xt << tmp; xt << tmp;
x = xt; x = xt;
predict.forward(BATCH_SIZE); g.forward(BATCH_SIZE);
std::vector<float> results(LABEL_SIZE * BATCH_SIZE); std::vector<float> results(LABEL_SIZE * BATCH_SIZE);
results << predict.val(); results << predict.val();
@ -113,7 +115,7 @@ int main(int argc, char** argv) {
xt << tmp; xt << tmp;
x = xt; x = xt;
predict.forward(endId - startId); g.forward(endId - startId);
std::vector<float> results(LABEL_SIZE * BATCH_SIZE); std::vector<float> results(LABEL_SIZE * BATCH_SIZE);
results << predict.val(); results << predict.val();