diff --git a/src/expression_operators.h b/src/expression_operators.h index 878f9882..253047d3 100644 --- a/src/expression_operators.h +++ b/src/expression_operators.h @@ -32,8 +32,8 @@ inline Expr zeroes(Args ...args) { /*********************************************************/ -inline Expr sigmoid(Expr a) { - return Expr(new SigmoidNodeOp(a)); +inline Expr logit(Expr a) { + return Expr(new LogitNodeOp(a)); } inline Expr tanh(Expr a) { diff --git a/src/graph_operators.h b/src/graph_operators.h index f231103b..c7c0a057 100644 --- a/src/graph_operators.h +++ b/src/graph_operators.h @@ -76,9 +76,9 @@ struct UnaryNodeOp : public Node { a_(a) {} }; -struct SigmoidNodeOp : public UnaryNodeOp { +struct LogitNodeOp : public UnaryNodeOp { template - SigmoidNodeOp(Args ...args) + LogitNodeOp(Args ...args) : UnaryNodeOp(args...) { } void forward() { diff --git a/src/param_initializers.h b/src/param_initializers.h index 5a04a25c..3e442c6e 100644 --- a/src/param_initializers.h +++ b/src/param_initializers.h @@ -22,7 +22,7 @@ void ones(Tensor t) { void randreal(Tensor t) { std::random_device device; std::default_random_engine engine(device()); - std::uniform_real_distribution<> dist(0, 0.1); + std::uniform_real_distribution<> dist(0, 0.01); auto gen = std::bind(dist, engine); std::vector vals(t.size()); diff --git a/src/tensor.h b/src/tensor.h index b13e55fe..a32e9b04 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -221,6 +221,7 @@ class Tensor { } void get(std::vector &vout) const { + vout.resize(size()); pimpl_->get(vout.begin()); } }; diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu index 2226eb16..e9b5735d 100644 --- a/src/validate_mnist.cu +++ b/src/validate_mnist.cu @@ -41,7 +41,7 @@ int main(int argc, char** argv) { init=[bData](Tensor t) { t.set(bData); }); auto probs = softmax(dot(x, w) + b, axis=1); - auto graph = -mean(sum(y * log(probs), axis=1), axis=0); + auto cost = -mean(sum(y * log(probs), axis=1), axis=0); std::cerr << "Done." << std::endl; @@ -51,50 +51,49 @@ int main(int argc, char** argv) { x = xt << testImages; y = yt << testLabels; - graph.forward(BATCH_SIZE); - auto results = probs.val(); - std::vector resultsv(results.size()); - resultsv << results; + cost.forward(BATCH_SIZE); + + std::vector results; + results << probs.val(); size_t acc = 0; for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) { size_t correct = 0; - size_t probsed = 0; + size_t proposed = 0; for (size_t j = 0; j < LABEL_SIZE; ++j) { if (testLabels[i+j]) correct = j; - if (resultsv[i + j] > resultsv[i + probsed]) probsed = j; + if (results[i + j] > results[i + proposed]) proposed = j; } - acc += (correct == probsed); + acc += (correct == proposed); } - std::cerr << "Cost: " << graph.val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl; + std::cerr << "Cost: " << cost.val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl; float eta = 0.1; for (size_t j = 0; j < 10; ++j) { for(size_t i = 0; i < 60; ++i) { - graph.backward(); + cost.backward(); auto update_rule = _1 -= eta * _2; Element(update_rule, w.val(), w.grad()); Element(update_rule, b.val(), b.grad()); - graph.forward(BATCH_SIZE); + cost.forward(BATCH_SIZE); } std::cerr << "Epoch: " << j << std::endl; - auto results = probs.val(); - std::vector resultsv(results.size()); - resultsv << results; + std::vector results; + results << probs.val(); size_t acc = 0; for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) { size_t correct = 0; - size_t probsed = 0; + size_t proposed = 0; for (size_t j = 0; j < LABEL_SIZE; ++j) { if (testLabels[i+j]) correct = j; - if (resultsv[i + j] > resultsv[i + probsed]) probsed = j; + if (results[i + j] > results[i + proposed]) proposed = j; } - acc += (correct == probsed); + acc += (correct == proposed); } - std::cerr << "Cost: " << graph.val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl; + std::cerr << "Cost: " << cost.val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl; } return 0; }