mirror of
https://github.com/marian-nmt/marian.git
synced 2024-11-04 14:04:24 +03:00
changed files for new graph format, adapted test.cu to not use pointers to Expr
This commit is contained in:
parent
02629ca13b
commit
036284d39c
@ -5,6 +5,7 @@ cuda_add_library(marian_lib
|
|||||||
cnpy/cnpy.cpp
|
cnpy/cnpy.cpp
|
||||||
exception.cpp
|
exception.cpp
|
||||||
expression_graph.cu
|
expression_graph.cu
|
||||||
|
sgd.cu
|
||||||
tensor.cu
|
tensor.cu
|
||||||
tensor_operators.cu
|
tensor_operators.cu
|
||||||
expression_operators.cu
|
expression_operators.cu
|
||||||
|
20
src/sgd.cu
20
src/sgd.cu
@ -7,15 +7,11 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
namespace marian {
|
namespace marian {
|
||||||
SGD::SGD(Expr& cost_func, Expr& inX, Expr& inY,
|
SGD::SGD(ExpressionGraph& g, float eta,
|
||||||
const std::vector<Expr*> params, float eta,
|
|
||||||
std::vector<float>& xData, size_t numFeatures,
|
std::vector<float>& xData, size_t numFeatures,
|
||||||
std::vector<float>& yData, size_t numClasses,
|
std::vector<float>& yData, size_t numClasses,
|
||||||
size_t epochs, size_t batchSize)
|
size_t epochs, size_t batchSize)
|
||||||
: cost_function_(&cost_func),
|
: graph_(g),
|
||||||
inX_(&inX),
|
|
||||||
inY_(&inY),
|
|
||||||
params_(params),
|
|
||||||
eta_(eta),
|
eta_(eta),
|
||||||
xData_(xData),
|
xData_(xData),
|
||||||
numFeatures_(numFeatures),
|
numFeatures_(numFeatures),
|
||||||
@ -45,11 +41,11 @@ void SGD::Run()
|
|||||||
size_t endId = startId + batchSize;
|
size_t endId = startId + batchSize;
|
||||||
|
|
||||||
PrepareBatch(startId, endId, batchSize, shuffle, xt, yt);
|
PrepareBatch(startId, endId, batchSize, shuffle, xt, yt);
|
||||||
*inX_ = xt;
|
graph_["x"] = xt;
|
||||||
*inY_ = yt;
|
graph_["y"] = yt;
|
||||||
|
|
||||||
cost_function_->forward(maxBatchSize_);
|
graph_.forward(maxBatchSize_);
|
||||||
cost_function_->backward();
|
graph_.backward();
|
||||||
|
|
||||||
UpdateModel();
|
UpdateModel();
|
||||||
|
|
||||||
@ -136,9 +132,9 @@ void SGD::PrepareBatch(
|
|||||||
}
|
}
|
||||||
|
|
||||||
void SGD::UpdateModel() {
|
void SGD::UpdateModel() {
|
||||||
for (auto& param : params_) {
|
for (auto& param : graph_.params()) {
|
||||||
using namespace thrust::placeholders;
|
using namespace thrust::placeholders;
|
||||||
Element(_1 = _1 - eta_ * _2, param->val(), param->grad());
|
Element(_1 -= eta_ * _2, param.val(), param.grad());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
10
src/sgd.h
10
src/sgd.h
@ -3,7 +3,7 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "expressions.h"
|
#include "expression_graph.h"
|
||||||
#include "thrust_functions.h"
|
#include "thrust_functions.h"
|
||||||
#include "tensor_operators.h"
|
#include "tensor_operators.h"
|
||||||
|
|
||||||
@ -11,8 +11,7 @@ namespace marian {
|
|||||||
|
|
||||||
class SGD {
|
class SGD {
|
||||||
public:
|
public:
|
||||||
SGD(Expr& cost_func, Expr& inX, Expr& inY,
|
SGD(ExpressionGraph& g, float eta,
|
||||||
const std::vector<Expr*> params, float eta,
|
|
||||||
std::vector<float>& xData, size_t numFeatures,
|
std::vector<float>& xData, size_t numFeatures,
|
||||||
std::vector<float>& yData, size_t numClasses,
|
std::vector<float>& yData, size_t numClasses,
|
||||||
size_t epochs, size_t batchSize);
|
size_t epochs, size_t batchSize);
|
||||||
@ -20,10 +19,7 @@ class SGD {
|
|||||||
void Run();
|
void Run();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Expr *cost_function_;
|
ExpressionGraph& graph_;
|
||||||
Expr *inX_;
|
|
||||||
Expr *inY_;
|
|
||||||
std::vector<Expr*> params_;
|
|
||||||
const float eta_;
|
const float eta_;
|
||||||
std::vector<float>& xData_;
|
std::vector<float>& xData_;
|
||||||
const size_t numFeatures_;
|
const size_t numFeatures_;
|
||||||
|
57
src/test.cu
57
src/test.cu
@ -14,42 +14,41 @@ int main(int argc, char** argv) {
|
|||||||
int hidden_size = 5;
|
int hidden_size = 5;
|
||||||
int num_inputs = 8;
|
int num_inputs = 8;
|
||||||
|
|
||||||
std::vector<Expr*> X(num_inputs);
|
std::vector<Expr> X;
|
||||||
std::vector<Expr*> Y(num_inputs);
|
std::vector<Expr> Y;
|
||||||
std::vector<Expr*> H(num_inputs);
|
std::vector<Expr> H;
|
||||||
|
|
||||||
|
ExpressionGraph g;
|
||||||
|
|
||||||
for (int t = 0; t < num_inputs; ++t) {
|
for (int t = 0; t < num_inputs; ++t) {
|
||||||
X[t] = new Expr(input(shape={batch_size, input_size}));
|
X.emplace_back(g.input(shape={batch_size, input_size}));
|
||||||
Y[t] = new Expr(input(shape={batch_size, output_size}));
|
Y.emplace_back(g.input(shape={batch_size, output_size}));
|
||||||
}
|
}
|
||||||
|
|
||||||
Expr Wxh = param(shape={input_size, hidden_size}, init=uniform(), name="Wxh");
|
Expr Wxh = g.param(shape={input_size, hidden_size}, init=uniform(), name="Wxh");
|
||||||
Expr Whh = param(shape={hidden_size, hidden_size}, init=uniform(), name="Whh");
|
Expr Whh = g.param(shape={hidden_size, hidden_size}, init=uniform(), name="Whh");
|
||||||
Expr bh = param(shape={1, hidden_size}, init=uniform(), name="bh");
|
Expr bh = g.param(shape={1, hidden_size}, init=uniform(), name="bh");
|
||||||
Expr h0 = param(shape={1, hidden_size}, init=uniform(), name="h0");
|
Expr h0 = g.param(shape={1, hidden_size}, init=uniform(), name="h0");
|
||||||
|
|
||||||
std::cerr << "Building RNN..." << std::endl;
|
std::cerr << "Building RNN..." << std::endl;
|
||||||
H[0] = new Expr(tanh(dot(*X[0], Wxh) + dot(h0, Whh) + bh));
|
H.emplace_back(tanh(dot(X[0], Wxh) + dot(h0, Whh) + bh));
|
||||||
for (int t = 1; t < num_inputs; ++t) {
|
for (int t = 1; t < num_inputs; ++t) {
|
||||||
H[t] = new Expr(tanh(dot(*X[t], Wxh) + dot(*H[t-1], Whh) + bh));
|
H.emplace_back(tanh(dot(X[t], Wxh) + dot(H[t-1], Whh) + bh));
|
||||||
}
|
}
|
||||||
|
|
||||||
Expr Why = param(shape={hidden_size, output_size}, init=uniform(), name="Why");
|
Expr Why = g.param(shape={hidden_size, output_size}, init=uniform(), name="Why");
|
||||||
Expr by = param(shape={1, output_size}, init=uniform(), name="by");
|
Expr by = g.param(shape={1, output_size}, init=uniform(), name="by");
|
||||||
|
|
||||||
std::cerr << "Building output layer..." << std::endl;
|
std::cerr << "Building output layer..." << std::endl;
|
||||||
std::vector<Expr*> Yp(num_inputs);
|
std::vector<Expr> Yp;
|
||||||
|
|
||||||
Expr* cross_entropy = NULL;
|
Yp.emplace_back(softmax_fast(dot(H[0], Why) + by));
|
||||||
for (int t = 0; t < num_inputs; ++t) {
|
Expr cross_entropy = sum(Y[0] * log(Yp[0]), axis=1);
|
||||||
Yp[t] = new Expr(softmax_fast(dot(*H[t], Why) + by, name="pred"));
|
for (int t = 1; t < num_inputs; ++t) {
|
||||||
if (!cross_entropy) {
|
Yp.emplace_back(softmax_fast(dot(H[t], Why) + by));
|
||||||
cross_entropy = new Expr(sum(*Y[t] * log(*Yp[t]), axis=1));
|
cross_entropy = cross_entropy + sum(Y[t] * log(Yp[t]), axis=1);
|
||||||
} else {
|
|
||||||
*cross_entropy = *cross_entropy + sum(*Y[t] * log(*Yp[t]), axis=1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
auto graph = -mean(*cross_entropy, axis=0, name="cost");
|
auto graph = -mean(cross_entropy, axis=0, name="cost");
|
||||||
|
|
||||||
for (int t = 0; t < num_inputs; ++t) {
|
for (int t = 0; t < num_inputs; ++t) {
|
||||||
Tensor Xt({batch_size, input_size});
|
Tensor Xt({batch_size, input_size});
|
||||||
@ -72,17 +71,17 @@ int main(int argc, char** argv) {
|
|||||||
thrust::copy(values.begin(), values.end(), Xt.begin());
|
thrust::copy(values.begin(), values.end(), Xt.begin());
|
||||||
thrust::copy(classes.begin(), classes.end(), Yt.begin());
|
thrust::copy(classes.begin(), classes.end(), Yt.begin());
|
||||||
|
|
||||||
*X[t] = Xt;
|
X[t] = Xt;
|
||||||
*Y[t] = Yt;
|
Y[t] = Yt;
|
||||||
}
|
}
|
||||||
|
|
||||||
graph.forward(batch_size);
|
g.forward(batch_size);
|
||||||
graph.backward();
|
g.backward();
|
||||||
|
|
||||||
std::cerr << graph.val().Debug() << std::endl;
|
std::cerr << graph.val().Debug() << std::endl;
|
||||||
|
|
||||||
std::cerr << X[0]->val().Debug() << std::endl;
|
std::cerr << X[0].val().Debug() << std::endl;
|
||||||
std::cerr << Y[0]->val().Debug() << std::endl;
|
std::cerr << Y[0].val().Debug() << std::endl;
|
||||||
|
|
||||||
std::cerr << Whh.grad().Debug() << std::endl;
|
std::cerr << Whh.grad().Debug() << std::endl;
|
||||||
std::cerr << bh.grad().Debug() << std::endl;
|
std::cerr << bh.grad().Debug() << std::endl;
|
||||||
|
@ -16,22 +16,24 @@ int main(int argc, char** argv) {
|
|||||||
using namespace marian;
|
using namespace marian;
|
||||||
using namespace keywords;
|
using namespace keywords;
|
||||||
|
|
||||||
Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X");
|
ExpressionGraph g;
|
||||||
Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y");
|
|
||||||
|
Expr x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
|
||||||
|
Expr y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
|
||||||
|
|
||||||
Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0");
|
Expr w = named(g.param(shape={IMAGE_SIZE, LABEL_SIZE}), "w");
|
||||||
Expr b = param(shape={1, LABEL_SIZE}, name="b0");
|
Expr b = named(g.param(shape={1, LABEL_SIZE}), "b");
|
||||||
|
|
||||||
std::vector<Expr*> params;
|
std::vector<Expr*> params;
|
||||||
params.push_back(&w);
|
params.push_back(&w);
|
||||||
params.push_back(&b);
|
params.push_back(&b);
|
||||||
|
|
||||||
auto scores = dot(x, w) + b;
|
auto scores = dot(x, w) + b;
|
||||||
auto lr = softmax_fast(scores, axis=1, name="pred");
|
auto lr = softmax_fast(scores);
|
||||||
auto cost = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
|
auto cost = named(-mean(sum(y * log(lr), axis=1), axis=0), "cost");
|
||||||
cerr << "lr=" << lr.Debug() << endl;
|
cerr << "lr=" << lr.Debug() << endl;
|
||||||
|
|
||||||
SGD opt(cost, x, y, params, 0.9, trainImages, IMAGE_SIZE, trainLabels, LABEL_SIZE, 3, 24);
|
SGD opt(g, 0.9, trainImages, IMAGE_SIZE, trainLabels, LABEL_SIZE, 3, 24);
|
||||||
opt.Run();
|
opt.Run();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -59,13 +59,15 @@ int main(int argc, char** argv) {
|
|||||||
std::cerr << "\tDone." << std::endl;
|
std::cerr << "\tDone." << std::endl;
|
||||||
|
|
||||||
|
|
||||||
auto x = input(shape={whatevs, IMAGE_SIZE}, name="X");
|
ExpressionGraph g;
|
||||||
auto y = input(shape={whatevs, LABEL_SIZE}, name="Y");
|
|
||||||
|
|
||||||
auto w1 = param(shape={IMAGE_SIZE, 100}, name="W0", init=initW1);
|
auto x = g.input(shape={whatevs, IMAGE_SIZE}, name="X");
|
||||||
auto b1 = param(shape={1, 100}, name="b0", init=initB1);
|
auto y = g.input(shape={whatevs, LABEL_SIZE}, name="Y");
|
||||||
auto w2 = param(shape={100, LABEL_SIZE}, name="W1", init=initW2);
|
|
||||||
auto b2 = param(shape={1, LABEL_SIZE}, name="b1", init=initB2);
|
auto w1 = g.param(shape={IMAGE_SIZE, 100}, name="W0", init=initW1);
|
||||||
|
auto b1 = g.param(shape={1, 100}, name="b0", init=initB1);
|
||||||
|
auto w2 = g.param(shape={100, LABEL_SIZE}, name="W1", init=initW2);
|
||||||
|
auto b2 = g.param(shape={1, LABEL_SIZE}, name="b1", init=initB2);
|
||||||
|
|
||||||
std::cerr << "Building model...";
|
std::cerr << "Building model...";
|
||||||
auto layer1 = tanh(dot(x, w1) + b1);
|
auto layer1 = tanh(dot(x, w1) + b1);
|
||||||
@ -86,7 +88,7 @@ int main(int argc, char** argv) {
|
|||||||
xt << tmp;
|
xt << tmp;
|
||||||
x = xt;
|
x = xt;
|
||||||
|
|
||||||
predict.forward(BATCH_SIZE);
|
g.forward(BATCH_SIZE);
|
||||||
|
|
||||||
std::vector<float> results(LABEL_SIZE * BATCH_SIZE);
|
std::vector<float> results(LABEL_SIZE * BATCH_SIZE);
|
||||||
results << predict.val();
|
results << predict.val();
|
||||||
@ -113,7 +115,7 @@ int main(int argc, char** argv) {
|
|||||||
xt << tmp;
|
xt << tmp;
|
||||||
x = xt;
|
x = xt;
|
||||||
|
|
||||||
predict.forward(endId - startId);
|
g.forward(endId - startId);
|
||||||
|
|
||||||
std::vector<float> results(LABEL_SIZE * BATCH_SIZE);
|
std::vector<float> results(LABEL_SIZE * BATCH_SIZE);
|
||||||
results << predict.val();
|
results << predict.val();
|
||||||
|
Loading…
Reference in New Issue
Block a user