This commit is contained in:
Hieu Hoang 2016-09-13 18:30:56 +02:00
commit 8f25a1d4bf
6 changed files with 173 additions and 56 deletions

1
.gitignore vendored
View File

@ -39,3 +39,4 @@ build
# Examples
examples/*/*.gz
examples/mnist/*ubyte

View File

@ -2,10 +2,13 @@
all: download
download: train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx3-ubyte.gz
download: train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
%.gz:
wget http://yann.lecun.com/exdb/mnist/$*.gz -O $@
%-ubyte: %-ubyte.gz
gzip -d < $^ > $@
%-ubyte.gz:
wget http://yann.lecun.com/exdb/mnist/$*-ubyte.gz -O $@
clean:
rm -f *.gz
rm -f *.gz *-ubyte

View File

@ -134,6 +134,7 @@ inline Expr sum(Expr a, Args ...args) {
else if(ax == 1) {
auto lshape = [n]() -> Shape {
int cols = n->val().shape()[1];
//std::cerr << "Shape will be " << cols << " by 1." << std::endl;
return {cols, 1};
};
Expr one = ones(shape={n->shape()[1], 1},
@ -153,6 +154,20 @@ inline Expr sum(Expr a, Args ...args) {
template <typename ...Args>
inline Expr softmax(Expr a, Args ...args) {
Expr e = exp(a);
#if 0
ChainPtr n = a.node();
auto print_shape = [n]() -> Shape {
std::cerr << "Shape: ";
for (auto val : n->val().shape()) {
std::cerr << val << " ";
}
std::cerr << std::endl;
return {1,1};
};
using namespace keywords;
Expr one = ones(shape={1, 1}, lazy_shape=print_shape);
#endif
return e / sum(e, args...);
}
@ -187,4 +202,4 @@ inline Expr mean(Expr a, Args ...args) {
}
}
}
}

View File

@ -118,9 +118,15 @@ struct LogNodeOp : public UnaryNodeOp {
struct ExpNodeOp : public UnaryNodeOp {
template <typename ...Args>
ExpNodeOp(Args ...args)
: UnaryNodeOp(args...) { }
ExpNodeOp(ChainPtr a, Args ...args)
: UnaryNodeOp(a, keywords::shape=newShape(a),
args...) { }
Shape newShape(ChainPtr a) {
Shape shape = a->shape();
return shape;
}
void forward() {
Element(_1 = Exp(_2), val_, a_->val());
}
@ -289,4 +295,4 @@ struct DivNodeOp : public BroadcastingNodeOp {
}
};
}
}

94
src/mnist.h Normal file
View File

@ -0,0 +1,94 @@
#pragma once
#include <string>
#include <iostream>
#include <fstream>
#include <vector>
namespace datasets {
namespace mnist {
typedef unsigned char uchar;
auto reverseInt = [](int i) {
unsigned char c1, c2, c3, c4;
c1 = i & 255, c2 = (i >> 8) & 255, c3 = (i >> 16) & 255, c4 = (i >> 24) & 255;
return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4;
};
std::vector<std::vector<float>> ReadImages(const std::string& full_path) {
std::ifstream file(full_path);
if (! file.is_open())
throw std::runtime_error("Cannot open file `" + full_path + "`!");
int magic_number = 0, n_rows = 0, n_cols = 0;
file.read((char *)&magic_number, sizeof(magic_number));
magic_number = reverseInt(magic_number);
if (magic_number != 2051)
throw std::runtime_error("Invalid MNIST image file!");
int number_of_images = 0;
file.read((char *)&number_of_images, sizeof(number_of_images)), number_of_images = reverseInt(number_of_images);
file.read((char *)&n_rows, sizeof(n_rows)), n_rows = reverseInt(n_rows);
file.read((char *)&n_cols, sizeof(n_cols)), n_cols = reverseInt(n_cols);
int image_size = n_rows * n_cols;
std::vector<std::vector<float>> _dataset(number_of_images, std::vector<float>(image_size));
unsigned char pixel = 0;
for (int i = 0; i < number_of_images; i++) {
for (int j = 0; j < image_size; j++) {
file.read((char*)&pixel, sizeof(pixel));
_dataset[i][j] = pixel / 255.0f;
}
}
return _dataset;
}
std::vector<int> ReadLabels(const std::string& full_path) {
std::ifstream file(full_path);
if (! file.is_open())
throw std::runtime_error("Cannot open file `" + full_path + "`!");
int magic_number = 0;
file.read((char *)&magic_number, sizeof(magic_number));
magic_number = reverseInt(magic_number);
if (magic_number != 2049)
throw std::runtime_error("Invalid MNIST label file!");
int number_of_labels = 0;
file.read((char *)&number_of_labels, sizeof(number_of_labels)), number_of_labels = reverseInt(number_of_labels);
std::vector<int> _dataset(number_of_labels);
for (int i = 0; i < number_of_labels; i++) {
file.read((char*)&_dataset[i], 1);
}
return _dataset;
}
} // namespace mnist
} // namespace datasets
//int main(int argc, const char *argv[]) {
//auto images = datasets::mnist::ReadImages("t10k-images-idx3-ubyte");
//auto labels = datasets::mnist::ReadLabels("t10k-labels-idx1-ubyte");
//std::cout
//<< "Number of images: " << images.size() << std::endl
//<< "Image size: " << images[0].size() << std::endl;
//for (int i = 0; i < 3; i++) {
//for (int j = 0; j < images[i].size(); j++) {
//std::cout << images[i][j] << ",";
//}
//std::cout << " label=" << (int)labels[i] << std::endl;
//}
//return 0;
//}

View File

@ -1,26 +1,28 @@
#include "marian.h"
#include "mnist.h"
using namespace std;
int main(int argc, char** argv) {
/*auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte");*/
/*auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte");*/
/*std::cerr << images.size() << " " << images[0].size() << std::endl;*/
using namespace marian;
using namespace keywords;
/*
Expr x = input(shape={whatevs, 784}, name="X");
Expr y = input(shape={whatevs, 10}, name="Y");
Expr w = param(shape={784, 10}, name="W0");
Expr b = param(shape={1, 10}, name="b0");
Expr n5 = dot(x, w);
Expr n6 = n5 + b;
Expr lr = softmax(n6, axis=1, name="pred");
auto scores = dot(x, w) + b;
auto lr = softmax(scores, axis=1, name="pred");
auto graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
cerr << "lr=" << lr.Debug() << endl;
Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
Tensor tx({500, 784}, 1);
Tensor ty({500, 10}, 1);
@ -31,51 +33,47 @@ int main(int argc, char** argv) {
y = ty;
graph.forward(500);
std::cerr << "Result: ";
for (auto val : scores.val().shape()) {
std::cerr << val << " ";
}
std::cerr << std::endl;
std::cerr << "Result: ";
for (auto val : lr.val().shape()) {
std::cerr << val << " ";
}
std::cerr << std::endl;
std::cerr << "Log-likelihood: ";
for (auto val : graph.val().shape()) {
std::cerr << val << " ";
}
std::cerr << std::endl;
graph.backward();
//std::cerr << graph["pred"].val()[0] << std::endl;
*/
Expr x = input(shape={whatevs, 2}, name="X");
Expr y = input(shape={whatevs, 2}, name="Y");
Expr w = param(shape={2, 1}, name="W0");
Expr b = param(shape={1, 1}, name="b0");
Expr n5 = dot(x, w);
Expr n6 = n5 + b;
Expr lr = softmax(n6, axis=1, name="pred");
cerr << "lr=" << lr.Debug() << endl;
Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
Tensor tx({4, 2}, 1);
Tensor ty({4, 1}, 1);
cerr << "tx=" << tx.Debug() << endl;
cerr << "ty=" << ty.Debug() << endl;
tx.Load("../examples/xor/train.txt");
ty.Load("../examples/xor/label.txt");
//hook0(graph);
//graph.autodiff();
//std::cerr << graph["cost"].val()[0] << std::endl;
#if 0
hook0(graph);
graph.autodiff();
std::cerr << graph["cost"].val()[0] << std::endl;
//hook1(graph);
//for(auto p : graph.params()) {
// auto update = _1 = _1 - alpha * _2;
// Element(update, p.val(), p.grad());
//}
//hook2(graph);
//
//auto opt = adadelta(cost_function=cost,
// eta=0.9, gamma=0.1,
// set_batch=set,
// before_update=before,
// after_update=after,
// set_valid=valid,
// validation_freq=100,
// verbose=1, epochs=3, early_stopping=10);
//opt.run();
for(auto p : graph.params()) {
auto update = _1 = _1 - alpha * _2;
Element(update, p.val(), p.grad());
}
hook2(graph);
auto opt = adadelta(cost_function=cost,
eta=0.9, gamma=0.1,
set_batch=set,
before_update=before,
after_update=after,
set_valid=valid,
validation_freq=100,
verbose=1, epochs=3, early_stopping=10);
opt.run();
#endif
return 0;
}