From 3d177ccc28bad0f2990293c617ce64b30a3f150e Mon Sep 17 00:00:00 2001 From: Marcin Junczys-Dowmunt Date: Wed, 14 Sep 2016 14:43:27 +0200 Subject: [PATCH 1/3] small changes to test.cu --- src/graph.h | 4 ++-- src/tensor.h | 2 +- src/test.cu | 53 +++++++--------------------------------------------- 3 files changed, 10 insertions(+), 49 deletions(-) diff --git a/src/graph.h b/src/graph.h index 15b4721d..4df7f37c 100644 --- a/src/graph.h +++ b/src/graph.h @@ -19,11 +19,11 @@ struct Chainable { virtual const Shape& shape() = 0; virtual DataType val() = 0; virtual DataType grad() = 0; - virtual void setVal(Tensor t) { + virtual void setVal(DataType t) { UTIL_THROW2("Tensors can only be assigned to input nodes"); }; - typedef std::vector*> ChainableStack; + typedef std::vector*> ChainableStack; static ChainableStack stack; }; diff --git a/src/tensor.h b/src/tensor.h index 487a553a..cd4f642c 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -111,7 +111,7 @@ class TensorImpl { value_type operator[](size_t i) const { return data_[i]; } - + auto begin() -> decltype( data_.begin() ) { return data_.begin(); } diff --git a/src/test.cu b/src/test.cu index 4a2445fd..b5dbba18 100644 --- a/src/test.cu +++ b/src/test.cu @@ -8,20 +8,21 @@ int main(int argc, char** argv) { using namespace marian; using namespace keywords; - Expr x = input(shape={whatevs, 784}, name="X"); - Expr y = input(shape={whatevs, 10}, name="Y"); + Expr x = input(name="X"); + Expr y = input(name="Y"); Expr w = param(shape={784, 10}, name="W0"); Expr b = param(shape={1, 10}, name="b0"); - auto scores = dot(x, w) + b; - auto lr = softmax(scores, axis=1, name="pred"); - auto graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); - cerr << "lr=" << lr.Debug() << endl; + Expr pred = softmax(dot(x, w) + b, axis=1); + cerr << "lr=" << pred.Debug() << endl; + Expr graph = -mean(sum(y * log(pred), axis=1), + axis=0, name="cost"); Tensor tx({500, 784}, 1); Tensor ty({500, 10}, 1); + cerr << "tx=" << tx.Debug() << endl; cerr << "ty=" << ty.Debug() << endl; @@ -29,47 +30,7 @@ int main(int argc, char** argv) { y = ty; graph.forward(500); - - std::cerr << "Result: "; - for (auto val : scores.val().shape()) { - std::cerr << val << " "; - } - std::cerr << std::endl; - std::cerr << "Result: "; - for (auto val : lr.val().shape()) { - std::cerr << val << " "; - } - std::cerr << std::endl; - std::cerr << "Log-likelihood: "; - for (auto val : graph.val().shape()) { - std::cerr << val << " "; - } - std::cerr << std::endl; - graph.backward(); - //std::cerr << graph["pred"].val()[0] << std::endl; - -#if 0 - hook0(graph); - graph.autodiff(); - std::cerr << graph["cost"].val()[0] << std::endl; - //hook1(graph); - for(auto p : graph.params()) { - auto update = _1 = _1 - alpha * _2; - Element(update, p.val(), p.grad()); - } - hook2(graph); - - auto opt = adadelta(cost_function=cost, - eta=0.9, gamma=0.1, - set_batch=set, - before_update=before, - after_update=after, - set_valid=valid, - validation_freq=100, - verbose=1, epochs=3, early_stopping=10); - opt.run(); -#endif return 0; } From dcdfcd80cc71be3f6532879cf9de6f01cb7a78ec Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 14 Sep 2016 14:34:26 +0000 Subject: [PATCH 2/3] less debug --- src/test.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test.cu b/src/test.cu index a78e182f..0285e3a5 100644 --- a/src/test.cu +++ b/src/test.cu @@ -68,7 +68,7 @@ int main(int argc, char** argv) { std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ; //std::cerr << "scores=" << scores.val().Debug() << endl; - std::cerr << "lr=" << lr.val().Debug() << endl; + //std::cerr << "lr=" << lr.val().Debug() << endl; graph.backward(); From 37a73e20be23af49a53707fb1799e176187cf8bc Mon Sep 17 00:00:00 2001 From: Marcin Junczys-Dowmunt Date: Wed, 14 Sep 2016 16:48:01 +0200 Subject: [PATCH 3/3] changes names --- src/graph_operators.h | 33 +++++++++++++++++++++++++++++++++ src/test.cu | 26 +++++++++++++++----------- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/src/graph_operators.h b/src/graph_operators.h index 5a12f807..eb30ff29 100644 --- a/src/graph_operators.h +++ b/src/graph_operators.h @@ -101,6 +101,39 @@ struct TanhNodeOp : public UnaryNodeOp { } }; +struct ArgmaxOp : public UnaryNodeOp { + template + ArgmaxOp(ChainPtr a, Args ...args) + : UnaryNodeOp(a, keywords::shape=newShape(a, -1), args...), + axis_(-1) { } + + Shape newShape(ChainPtr a, int axis) { + Shape shape1 = a->shape(); + UTIL_THROW_IF2(shape1.size() > 2, + "Tensors with more than 2 dimensions not supported yet"); + if(axis == 0) { + shape1[0] = 1; + } + else if(axis == 1) { + shape1[1] = 1; + } + else { + shape1 = {1, 1}; + } + return shape1; + } + + void forward() { + //val_ = Argmax(a_->val(), axis_); + } + + void backward() {} + + private: + int axis_; +}; + + struct SoftmaxNodeOp : public UnaryNodeOp { template SoftmaxNodeOp(ChainPtr a, Args ...args) diff --git a/src/test.cu b/src/test.cu index a71939b4..0e9f9752 100644 --- a/src/test.cu +++ b/src/test.cu @@ -21,10 +21,14 @@ int main(int argc, char** argv) { Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); Expr b = param(shape={1, LABEL_SIZE}, name="b0"); - auto scores = dot(x, w) + b; - auto lr = softmax_fast(scores, axis=1, name="pred"); - auto graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); - cerr << "lr=" << lr.Debug() << endl; + auto z = dot(x, w) + b; + auto pred = softmax(z); + //auto decision = argmax(pred, axis=1); + + auto cost = -mean(sum(y * log(pred), axis=1), + axis=0); + + cerr << "pred=" << pred.Debug() << endl; #if 0 int numofdata; @@ -49,27 +53,27 @@ int main(int argc, char** argv) { x = tx; y = ty; - graph.forward(500); + cost.forward(500); std::cerr << "Result: "; - for (auto val : scores.val().shape()) { + for (auto val : pred.val().shape()) { std::cerr << val << " "; } std::cerr << std::endl; std::cerr << "Result: "; - for (auto val : lr.val().shape()) { + for (auto val : pred.val().shape()) { std::cerr << val << " "; } std::cerr << std::endl; - lr.val().Print(); + pred.val().Print(); std::cerr << "Log-likelihood: "; - for (auto val : graph.val().shape()) { + for (auto val : cost.val().shape()) { std::cerr << val << " "; } std::cerr << std::endl; - graph.val().Print(); + cost.val().Print(); - graph.backward(); + cost.backward(); //std::cerr << graph["pred"].val()[0] << std::endl;