From 0b64c9639d150ce279f56560e8979183f8a0672a Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 13 Sep 2016 17:08:57 +0200 Subject: [PATCH 01/19] simple xor load --- src/test.cu | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/test.cu b/src/test.cu index 8716e8c4..977cb8fb 100644 --- a/src/test.cu +++ b/src/test.cu @@ -53,6 +53,17 @@ int main(int argc, char** argv) { // validation_freq=100, // verbose=1, epochs=3, early_stopping=10); //opt.run(); + + Expr x2 = input(shape={whatevs, 2}, name="X2"); + Expr y2 = input(shape={whatevs, 2}, name="Y2"); + Expr w2 = param(shape={2, 1}, name="W02"); + Expr b2 = param(shape={1, 1}, name="b02"); + + Expr n52 = dot(x2, w2); + Expr n62 = n52 + b2; + Expr lr2 = softmax(n62, axis=1, name="pred2"); + cerr << "lr=" << lr2.Debug() << endl; + return 0; } From 75d81a0c1516cbee30da70748309c0ac2a9d63fd Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 13 Sep 2016 17:32:31 +0200 Subject: [PATCH 02/19] load text --- marian/.cproject | 6 +++--- src/CMakeLists.txt | 1 + src/tensor.cu | 23 +++++++++++++++++++++++ src/tensor.h | 2 ++ src/test.cu | 35 ++++++++++++++++++++++------------- 5 files changed, 51 insertions(+), 16 deletions(-) create mode 100644 src/tensor.cu diff --git a/marian/.cproject b/marian/.cproject index 29c37771..195ef668 100644 --- a/marian/.cproject +++ b/marian/.cproject @@ -56,11 +56,11 @@ - - + + - + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 244977db..49832492 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,6 +10,7 @@ cuda_add_executable( test.cu expressions.cu tensor_operators.cu + tensor.cu $ ) diff --git a/src/tensor.cu b/src/tensor.cu new file mode 100644 index 00000000..a8a3418a --- /dev/null +++ b/src/tensor.cu @@ -0,0 +1,23 @@ +#include +#include "tensor.h" + +using namespace std; + +namespace marian { + +void Tensor::Load(const std::string &path) +{ + fstream strm; + strm.open(path.c_str()); + + string line; + while ( getline (strm, line) ) + { + cerr << line << '\n'; + } + strm.close(); + +} + +} + diff --git a/src/tensor.h b/src/tensor.h index 487a553a..dfb289ea 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -240,6 +240,8 @@ class Tensor { return pimpl_->Debug(); } + void Load(const std::string &path); + }; } diff --git a/src/test.cu b/src/test.cu index 977cb8fb..d8fb1423 100644 --- a/src/test.cu +++ b/src/test.cu @@ -8,6 +8,7 @@ int main(int argc, char** argv) { using namespace marian; using namespace keywords; + /* Expr x = input(shape={whatevs, 784}, name="X"); Expr y = input(shape={whatevs, 10}, name="Y"); @@ -30,10 +31,29 @@ int main(int argc, char** argv) { y = ty; graph.forward(500); - graph.backward(); //std::cerr << graph["pred"].val()[0] << std::endl; - + */ + + Expr x = input(shape={whatevs, 2}, name="X"); + Expr y = input(shape={whatevs, 2}, name="Y"); + + Expr w = param(shape={2, 1}, name="W0"); + Expr b = param(shape={1, 1}, name="b0"); + + Expr n5 = dot(x, w); + Expr n6 = n5 + b; + Expr lr = softmax(n6, axis=1, name="pred"); + cerr << "lr=" << lr.Debug() << endl; + + Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); + + Tensor tx({4, 2}, 1); + Tensor ty({4, 1}, 1); + tx.Load("/Users/hieu/workspace/experiment/issues/marian/1st/train.txt"); + ty.Load("/Users/hieu/workspace/experiment/issues/marian/1st/label.txt"); + + //hook0(graph); //graph.autodiff(); //std::cerr << graph["cost"].val()[0] << std::endl; @@ -54,16 +74,5 @@ int main(int argc, char** argv) { // verbose=1, epochs=3, early_stopping=10); //opt.run(); - Expr x2 = input(shape={whatevs, 2}, name="X2"); - Expr y2 = input(shape={whatevs, 2}, name="Y2"); - - Expr w2 = param(shape={2, 1}, name="W02"); - Expr b2 = param(shape={1, 1}, name="b02"); - - Expr n52 = dot(x2, w2); - Expr n62 = n52 + b2; - Expr lr2 = softmax(n62, axis=1, name="pred2"); - cerr << "lr=" << lr2.Debug() << endl; - return 0; } From 6d74d57453d54d83916d9072b67a1297ae913f70 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 13 Sep 2016 17:34:20 +0200 Subject: [PATCH 03/19] add xor example --- examples/xor/label.txt | 4 ++++ examples/xor/train.txt | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 examples/xor/label.txt create mode 100644 examples/xor/train.txt diff --git a/examples/xor/label.txt b/examples/xor/label.txt new file mode 100644 index 00000000..d9ff83f1 --- /dev/null +++ b/examples/xor/label.txt @@ -0,0 +1,4 @@ +1 +1 +0 +0 diff --git a/examples/xor/train.txt b/examples/xor/train.txt new file mode 100644 index 00000000..e6610d1d --- /dev/null +++ b/examples/xor/train.txt @@ -0,0 +1,4 @@ +0 0 +1 1 +1 0 +0 1 \ No newline at end of file From 057d200a406888d236b9a3ab1d2f646c4eb01c49 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 13 Sep 2016 17:35:02 +0200 Subject: [PATCH 04/19] load text --- src/test.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test.cu b/src/test.cu index d8fb1423..5657bf20 100644 --- a/src/test.cu +++ b/src/test.cu @@ -50,8 +50,8 @@ int main(int argc, char** argv) { Tensor tx({4, 2}, 1); Tensor ty({4, 1}, 1); - tx.Load("/Users/hieu/workspace/experiment/issues/marian/1st/train.txt"); - ty.Load("/Users/hieu/workspace/experiment/issues/marian/1st/label.txt"); + tx.Load("../examples/xor/train.txt"); + ty.Load("../examples/xor/label.txt"); //hook0(graph); From 9ee6a60a7f76e498d49626d72d3ed119cb5c0c7e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 13 Sep 2016 17:52:57 +0200 Subject: [PATCH 05/19] load text --- src/tensor.cu | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/test.cu | 3 +++ 2 files changed, 62 insertions(+) diff --git a/src/tensor.cu b/src/tensor.cu index a8a3418a..cd57c3b0 100644 --- a/src/tensor.cu +++ b/src/tensor.cu @@ -5,15 +5,74 @@ using namespace std; namespace marian { +inline std::vector Tokenize(const std::string& str, + const std::string& delimiters = " \t") +{ + std::vector tokens; + // Skip delimiters at beginning. + std::string::size_type lastPos = str.find_first_not_of(delimiters, 0); + // Find first "non-delimiter". + std::string::size_type pos = str.find_first_of(delimiters, lastPos); + + while (std::string::npos != pos || std::string::npos != lastPos) { + // Found a token, add it to the vector. + tokens.push_back(str.substr(lastPos, pos - lastPos)); + // Skip delimiters. Note the "not_of" + lastPos = str.find_first_not_of(delimiters, pos); + // Find next "non-delimiter" + pos = str.find_first_of(delimiters, lastPos); + } + + return tokens; +} + +//! convert string to variable of type T. Used to reading floats, int etc from files +template +T Scan(const std::string &input) +{ + std::stringstream stream(input); + T ret; + stream >> ret; + return ret; +} + +//! convert vectors of string to vectors of type T variables +template +inline std::vector Scan(const std::vector< std::string > &input) +{ + std::vector output(input.size()); + for (size_t i = 0 ; i < input.size() ; i++) { + output[i] = Scan( input[i] ); + } + return output; +} + +//! tokenise input string to vector of type T +template +inline std::vector Tokenize( const std::string &input + , const std::string& delimiters = " \t") +{ + std::vector stringVector = Tokenize(input, delimiters); + return Scan( stringVector ); +} + + void Tensor::Load(const std::string &path) { fstream strm; strm.open(path.c_str()); + size_t lineNum = 0; string line; while ( getline (strm, line) ) { cerr << line << '\n'; + vector toks = Tokenize(line); + for (size_t i = 0; i < toks.size(); ++i) { + //(*this)[lineNum][i] = toks[i]; + } + + ++lineNum; } strm.close(); diff --git a/src/test.cu b/src/test.cu index 5657bf20..4e382141 100644 --- a/src/test.cu +++ b/src/test.cu @@ -50,6 +50,9 @@ int main(int argc, char** argv) { Tensor tx({4, 2}, 1); Tensor ty({4, 1}, 1); + cerr << "tx=" << tx.Debug() << endl; + cerr << "ty=" << ty.Debug() << endl; + tx.Load("../examples/xor/train.txt"); ty.Load("../examples/xor/label.txt"); From cc7a48310f19423f269f19330c32090b866d3c90 Mon Sep 17 00:00:00 2001 From: romang Date: Tue, 13 Sep 2016 18:08:45 +0200 Subject: [PATCH 06/19] add functions loading MNIST dataset --- .gitignore | 1 + examples/mnist/Makefile | 7 ++- src/mnist.h | 94 +++++++++++++++++++++++++++++++++++++++++ src/test.cu | 4 ++ 4 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 src/mnist.h diff --git a/.gitignore b/.gitignore index 4dfd397b..53468680 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ build # Examples examples/*/*.gz +examples/mnist/*ubyte diff --git a/examples/mnist/Makefile b/examples/mnist/Makefile index 7e4e812f..26f65554 100644 --- a/examples/mnist/Makefile +++ b/examples/mnist/Makefile @@ -2,9 +2,12 @@ all: download -download: train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx3-ubyte.gz +download: train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte -%.gz: +%-ubyte: %-ubyte.gz + gzip -d < $^ > $@ + +%-ubyte.gz: wget http://yann.lecun.com/exdb/mnist/$*.gz -O $@ clean: diff --git a/src/mnist.h b/src/mnist.h new file mode 100644 index 00000000..7727bacc --- /dev/null +++ b/src/mnist.h @@ -0,0 +1,94 @@ +#pragma once + +#include +#include +#include +#include + +namespace datasets { +namespace mnist { + +typedef unsigned char uchar; + +auto reverseInt = [](int i) { + unsigned char c1, c2, c3, c4; + c1 = i & 255, c2 = (i >> 8) & 255, c3 = (i >> 16) & 255, c4 = (i >> 24) & 255; + return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4; +}; + +std::vector> ReadImages(const std::string& full_path) { + std::ifstream file(full_path); + + if (! file.is_open()) + throw std::runtime_error("Cannot open file `" + full_path + "`!"); + + int magic_number = 0, n_rows = 0, n_cols = 0; + + file.read((char *)&magic_number, sizeof(magic_number)); + magic_number = reverseInt(magic_number); + + if (magic_number != 2051) + throw std::runtime_error("Invalid MNIST image file!"); + + int number_of_images = 0; + file.read((char *)&number_of_images, sizeof(number_of_images)), number_of_images = reverseInt(number_of_images); + file.read((char *)&n_rows, sizeof(n_rows)), n_rows = reverseInt(n_rows); + file.read((char *)&n_cols, sizeof(n_cols)), n_cols = reverseInt(n_cols); + + int image_size = n_rows * n_cols; + std::vector> _dataset(number_of_images, std::vector(image_size)); + unsigned char pixel = 0; + + for (int i = 0; i < number_of_images; i++) { + for (int j = 0; j < image_size; j++) { + file.read((char*)&pixel, sizeof(pixel)); + _dataset[i][j] = pixel / 255.0f; + } + } + return _dataset; +} + +std::vector ReadLabels(const std::string& full_path) { + std::ifstream file(full_path); + + if (! file.is_open()) + throw std::runtime_error("Cannot open file `" + full_path + "`!"); + + int magic_number = 0; + file.read((char *)&magic_number, sizeof(magic_number)); + magic_number = reverseInt(magic_number); + + if (magic_number != 2049) + throw std::runtime_error("Invalid MNIST label file!"); + + int number_of_labels = 0; + file.read((char *)&number_of_labels, sizeof(number_of_labels)), number_of_labels = reverseInt(number_of_labels); + + std::vector _dataset(number_of_labels); + for (int i = 0; i < number_of_labels; i++) { + file.read((char*)&_dataset[i], 1); + } + + return _dataset; +} + +} // namespace mnist +} // namespace datasets + + +//int main(int argc, const char *argv[]) { + //auto images = datasets::mnist::ReadImages("t10k-images-idx3-ubyte"); + //auto labels = datasets::mnist::ReadLabels("t10k-labels-idx1-ubyte"); + + //std::cout + //<< "Number of images: " << images.size() << std::endl + //<< "Image size: " << images[0].size() << std::endl; + + //for (int i = 0; i < 3; i++) { + //for (int j = 0; j < images[i].size(); j++) { + //std::cout << images[i][j] << ","; + //} + //std::cout << " label=" << (int)labels[i] << std::endl; + //} + //return 0; +//} diff --git a/src/test.cu b/src/test.cu index 4a2445fd..c2b0d62e 100644 --- a/src/test.cu +++ b/src/test.cu @@ -1,9 +1,13 @@ #include "marian.h" +#include "mnist.h" using namespace std; int main(int argc, char** argv) { + /*auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte");*/ + /*auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte");*/ + /*std::cerr << images.size() << " " << images[0].size() << std::endl;*/ using namespace marian; using namespace keywords; From d45f88af6db0fa7fca3877448c810533347ba6f9 Mon Sep 17 00:00:00 2001 From: Roman Grundkiewicz Date: Tue, 13 Sep 2016 18:20:44 +0200 Subject: [PATCH 07/19] fix unpacking data --- examples/mnist/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/mnist/Makefile b/examples/mnist/Makefile index 26f65554..051d5a60 100644 --- a/examples/mnist/Makefile +++ b/examples/mnist/Makefile @@ -8,7 +8,7 @@ download: train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte gzip -d < $^ > $@ %-ubyte.gz: - wget http://yann.lecun.com/exdb/mnist/$*.gz -O $@ + wget http://yann.lecun.com/exdb/mnist/$*-ubyte.gz -O $@ clean: - rm -f *.gz + rm -f *.gz *-ubyte From a8a664ca36a9a5d36132ab7fbbb5088ede08de59 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 13 Sep 2016 18:27:14 +0200 Subject: [PATCH 08/19] set value in tensor --- src/tensor.cu | 4 ++-- src/tensor.h | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/tensor.cu b/src/tensor.cu index cd57c3b0..4711e62b 100644 --- a/src/tensor.cu +++ b/src/tensor.cu @@ -67,9 +67,9 @@ void Tensor::Load(const std::string &path) while ( getline (strm, line) ) { cerr << line << '\n'; - vector toks = Tokenize(line); + vector toks = Tokenize(line); for (size_t i = 0; i < toks.size(); ++i) { - //(*this)[lineNum][i] = toks[i]; + pimpl_->set(toks[i], lineNum, i); } ++lineNum; diff --git a/src/tensor.h b/src/tensor.h index dfb289ea..fea9398c 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -152,6 +152,12 @@ class TensorImpl { thrust::fill(data_.begin(), data_.end(), value); } + void set(value_type value, size_t x, size_t y) { + assert(shape().size() == 2); + size_t sizeRow = sizeof(Float) * shape()[1]; + data_[x + sizeRow * y] = value; + } + std::string Debug() const { std::stringstream strm; From 6935334b64bcbd351b883d7a010742225d1b86ab Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 13 Sep 2016 18:33:44 +0200 Subject: [PATCH 09/19] xor --- src/test.cu | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/test.cu b/src/test.cu index c2b0d62e..ed43f052 100644 --- a/src/test.cu +++ b/src/test.cu @@ -12,6 +12,7 @@ int main(int argc, char** argv) { using namespace marian; using namespace keywords; + /* Expr x = input(shape={whatevs, 784}, name="X"); Expr y = input(shape={whatevs, 10}, name="Y"); @@ -53,7 +54,29 @@ int main(int argc, char** argv) { graph.backward(); //std::cerr << graph["pred"].val()[0] << std::endl; + */ + Expr x = input(shape={whatevs, 2}, name="X"); + Expr y = input(shape={whatevs, 2}, name="Y"); + + Expr w = param(shape={2, 1}, name="W0"); + Expr b = param(shape={1, 1}, name="b0"); + + Expr n5 = dot(x, w); + Expr n6 = n5 + b; + Expr lr = softmax(n6, axis=1, name="pred"); + cerr << "lr=" << lr.Debug() << endl; + + Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); + + Tensor tx({4, 2}, 1); + Tensor ty({4, 1}, 1); + cerr << "tx=" << tx.Debug() << endl; + cerr << "ty=" << ty.Debug() << endl; + + tx.Load("../examples/xor/train.txt"); + ty.Load("../examples/xor/label.txt"); + #if 0 hook0(graph); graph.autodiff(); From 8dcdf8f28a9bd619a6d67929d0947517945801a1 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 13 Sep 2016 18:55:48 +0200 Subject: [PATCH 10/19] set value in tensor --- src/tensor.cu | 12 +++++++++--- src/tensor.h | 6 ++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/tensor.cu b/src/tensor.cu index 4711e62b..95c684b3 100644 --- a/src/tensor.cu +++ b/src/tensor.cu @@ -59,23 +59,29 @@ inline std::vector Tokenize( const std::string &input void Tensor::Load(const std::string &path) { + size_t totSize = std::accumulate(pimpl_->shape().begin(), pimpl_->shape().end(), + 1, std::multiplies()); + cerr << "totSize=" << totSize << endl; + std::vector hostData(totSize); + fstream strm; strm.open(path.c_str()); - size_t lineNum = 0; string line; + size_t ind = 0; while ( getline (strm, line) ) { cerr << line << '\n'; vector toks = Tokenize(line); for (size_t i = 0; i < toks.size(); ++i) { - pimpl_->set(toks[i], lineNum, i); + hostData[ind] = toks[i]; } - ++lineNum; + ++ind; } strm.close(); + } } diff --git a/src/tensor.h b/src/tensor.h index fea9398c..e8ff92bf 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -152,10 +152,8 @@ class TensorImpl { thrust::fill(data_.begin(), data_.end(), value); } - void set(value_type value, size_t x, size_t y) { - assert(shape().size() == 2); - size_t sizeRow = sizeof(Float) * shape()[1]; - data_[x + sizeRow * y] = value; + void set(const std::vector &values) { + thrust::copy(values.begin(), values.end(), data_.begin()); } std::string Debug() const From 86a58060bca41a0273e74f2fab7f9cafc2bbdaae Mon Sep 17 00:00:00 2001 From: romang Date: Tue, 13 Sep 2016 18:56:19 +0200 Subject: [PATCH 11/19] MNIST loader returns 1d vector --- src/mnist.h | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/mnist.h b/src/mnist.h index 7727bacc..8e94931f 100644 --- a/src/mnist.h +++ b/src/mnist.h @@ -1,4 +1,4 @@ -#pragma once +//#pragma once #include #include @@ -16,7 +16,7 @@ auto reverseInt = [](int i) { return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4; }; -std::vector> ReadImages(const std::string& full_path) { +std::vector ReadImages(const std::string& full_path, int& number_of_images, int& image_size) { std::ifstream file(full_path); if (! file.is_open()) @@ -30,20 +30,18 @@ std::vector> ReadImages(const std::string& full_path) { if (magic_number != 2051) throw std::runtime_error("Invalid MNIST image file!"); - int number_of_images = 0; file.read((char *)&number_of_images, sizeof(number_of_images)), number_of_images = reverseInt(number_of_images); file.read((char *)&n_rows, sizeof(n_rows)), n_rows = reverseInt(n_rows); file.read((char *)&n_cols, sizeof(n_cols)), n_cols = reverseInt(n_cols); - int image_size = n_rows * n_cols; - std::vector> _dataset(number_of_images, std::vector(image_size)); + image_size = n_rows * n_cols; + int n = number_of_images * image_size; + std::vector _dataset(n); unsigned char pixel = 0; - for (int i = 0; i < number_of_images; i++) { - for (int j = 0; j < image_size; j++) { - file.read((char*)&pixel, sizeof(pixel)); - _dataset[i][j] = pixel / 255.0f; - } + for (int i = 0; i < n; i++) { + file.read((char*)&pixel, sizeof(pixel)); + _dataset[i] = pixel / 255.0f; } return _dataset; } @@ -77,16 +75,17 @@ std::vector ReadLabels(const std::string& full_path) { //int main(int argc, const char *argv[]) { - //auto images = datasets::mnist::ReadImages("t10k-images-idx3-ubyte"); - //auto labels = datasets::mnist::ReadLabels("t10k-labels-idx1-ubyte"); + //int numImg, imgSize; + //auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg, imgSize); + //auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte"); //std::cout - //<< "Number of images: " << images.size() << std::endl - //<< "Image size: " << images[0].size() << std::endl; + //<< "Number of images: " << numImg << std::endl + //<< "Image size: " << imgSize << std::endl; //for (int i = 0; i < 3; i++) { - //for (int j = 0; j < images[i].size(); j++) { - //std::cout << images[i][j] << ","; + //for (int j = 0; j < imgSize; j++) { + //std::cout << images[(i * imgSize) + j] << ","; //} //std::cout << " label=" << (int)labels[i] << std::endl; //} From 482c0df90a14e83fddca26202d83c8eabbc60250 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 13 Sep 2016 19:07:12 +0200 Subject: [PATCH 12/19] load --- marian/.project | 5 +++++ src/tensor.cu | 5 +++++ src/tensor.h | 1 + src/test.cu | 7 +++++++ 4 files changed, 18 insertions(+) diff --git a/marian/.project b/marian/.project index 215485f6..e5b195c5 100644 --- a/marian/.project +++ b/marian/.project @@ -85,6 +85,11 @@ 1 PARENT-1-PROJECT_LOC/src/marian.h + + mnist.h + 1 + PARENT-1-PROJECT_LOC/src/mnist.h + tensor.cu 1 diff --git a/src/tensor.cu b/src/tensor.cu index 95c684b3..6048bb43 100644 --- a/src/tensor.cu +++ b/src/tensor.cu @@ -81,7 +81,12 @@ void Tensor::Load(const std::string &path) } strm.close(); + Load(hostData); +} +void Tensor::Load(const std::vector &values) +{ + pimpl_->set(values); } } diff --git a/src/tensor.h b/src/tensor.h index e8ff92bf..a801cd2a 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -245,6 +245,7 @@ class Tensor { } void Load(const std::string &path); + void Load(const std::vector &values); }; diff --git a/src/test.cu b/src/test.cu index ed43f052..1e07bdb3 100644 --- a/src/test.cu +++ b/src/test.cu @@ -27,6 +27,11 @@ int main(int argc, char** argv) { Tensor tx({500, 784}, 1); Tensor ty({500, 10}, 1); + + int numImg, imgSize; + vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg, imgSize); + vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte"); + cerr << "tx=" << tx.Debug() << endl; cerr << "ty=" << ty.Debug() << endl; @@ -56,6 +61,7 @@ int main(int argc, char** argv) { //std::cerr << graph["pred"].val()[0] << std::endl; */ + // XOR Expr x = input(shape={whatevs, 2}, name="X"); Expr y = input(shape={whatevs, 2}, name="Y"); @@ -77,6 +83,7 @@ int main(int argc, char** argv) { tx.Load("../examples/xor/train.txt"); ty.Load("../examples/xor/label.txt"); + #if 0 hook0(graph); graph.autodiff(); From 93eb3ca7abd7b11bd5043b955ef384ed52ac742d Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 13 Sep 2016 19:13:49 +0200 Subject: [PATCH 13/19] debug --- src/tensor.h | 6 +++++- src/test.cu | 9 ++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/tensor.h b/src/tensor.h index a801cd2a..2cf4c267 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -153,7 +153,11 @@ class TensorImpl { } void set(const std::vector &values) { - thrust::copy(values.begin(), values.end(), data_.begin()); + size_t totSize = std::accumulate(shape().begin(), shape().end(), + 1, std::multiplies()); + std::cerr << "totSize=" << totSize << " " << values.size() << std::endl; + assert(totSize == values.size()); + thrust::copy(values.begin(), values.end(), data_.begin()); } std::string Debug() const diff --git a/src/test.cu b/src/test.cu index 1e07bdb3..d87c9d1a 100644 --- a/src/test.cu +++ b/src/test.cu @@ -12,7 +12,7 @@ int main(int argc, char** argv) { using namespace marian; using namespace keywords; - /* + Expr x = input(shape={whatevs, 784}, name="X"); Expr y = input(shape={whatevs, 10}, name="Y"); @@ -31,6 +31,8 @@ int main(int argc, char** argv) { int numImg, imgSize; vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg, imgSize); vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte"); + tx.Load(images); + //ty.Load(labels); cerr << "tx=" << tx.Debug() << endl; cerr << "ty=" << ty.Debug() << endl; @@ -59,9 +61,10 @@ int main(int argc, char** argv) { graph.backward(); //std::cerr << graph["pred"].val()[0] << std::endl; - */ + // XOR + /* Expr x = input(shape={whatevs, 2}, name="X"); Expr y = input(shape={whatevs, 2}, name="Y"); @@ -82,7 +85,7 @@ int main(int argc, char** argv) { tx.Load("../examples/xor/train.txt"); ty.Load("../examples/xor/label.txt"); - + */ #if 0 hook0(graph); From 5c7c044dd99f85fcaa63ca7a862b6604af0d0376 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 14 Sep 2016 08:36:31 +0200 Subject: [PATCH 14/19] debug --- src/tensor.h | 2 +- src/test.cu | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tensor.h b/src/tensor.h index 2cf4c267..4c6b89dd 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -155,7 +155,7 @@ class TensorImpl { void set(const std::vector &values) { size_t totSize = std::accumulate(shape().begin(), shape().end(), 1, std::multiplies()); - std::cerr << "totSize=" << totSize << " " << values.size() << std::endl; + std::cerr << "tensor size=" << totSize << " vector size=" << values.size() << std::endl; assert(totSize == values.size()); thrust::copy(values.begin(), values.end(), data_.begin()); } diff --git a/src/test.cu b/src/test.cu index d87c9d1a..dd65d256 100644 --- a/src/test.cu +++ b/src/test.cu @@ -31,6 +31,7 @@ int main(int argc, char** argv) { int numImg, imgSize; vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg, imgSize); vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte"); + cerr << "images=" << images.size() << " labels=" << labels.size() << endl; tx.Load(images); //ty.Load(labels); From 6a8d5c19970a2893e70e1ef25adb66e3ed50d06a Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 14 Sep 2016 08:41:05 +0200 Subject: [PATCH 15/19] ReadLabels() return vector of floats --- marian/.project | 96 ++----------------------------------------------- src/mnist.h | 8 +++-- src/test.cu | 2 +- 3 files changed, 9 insertions(+), 97 deletions(-) diff --git a/marian/.project b/marian/.project index e5b195c5..d1163076 100644 --- a/marian/.project +++ b/marian/.project @@ -26,99 +26,9 @@ - CMakeLists.txt - 1 - PARENT-1-PROJECT_LOC/src/CMakeLists.txt - - - compile_time_crc32.h - 1 - PARENT-1-PROJECT_LOC/src/compile_time_crc32.h - - - definitions.h - 1 - PARENT-1-PROJECT_LOC/src/definitions.h - - - exception.cpp - 1 - PARENT-1-PROJECT_LOC/src/exception.cpp - - - exception.h - 1 - PARENT-1-PROJECT_LOC/src/exception.h - - - expression_operators.h - 1 - PARENT-1-PROJECT_LOC/src/expression_operators.h - - - expressions.cu - 1 - PARENT-1-PROJECT_LOC/src/expressions.cu - - - expressions.h - 1 - PARENT-1-PROJECT_LOC/src/expressions.h - - - graph.h - 1 - PARENT-1-PROJECT_LOC/src/graph.h - - - graph_operators.h - 1 - PARENT-1-PROJECT_LOC/src/graph_operators.h - - - keywords.h - 1 - PARENT-1-PROJECT_LOC/src/keywords.h - - - marian.h - 1 - PARENT-1-PROJECT_LOC/src/marian.h - - - mnist.h - 1 - PARENT-1-PROJECT_LOC/src/mnist.h - - - tensor.cu - 1 - PARENT-1-PROJECT_LOC/src/tensor.cu - - - tensor.h - 1 - PARENT-1-PROJECT_LOC/src/tensor.h - - - tensor_operators.cu - 1 - PARENT-1-PROJECT_LOC/src/tensor_operators.cu - - - tensor_operators.h - 1 - PARENT-1-PROJECT_LOC/src/tensor_operators.h - - - test.cu - 1 - PARENT-1-PROJECT_LOC/src/test.cu - - - thrust_functions.h - 1 - PARENT-1-PROJECT_LOC/src/thrust_functions.h + src + 2 + PARENT-1-PROJECT_LOC/src diff --git a/src/mnist.h b/src/mnist.h index 8e94931f..9f867ee8 100644 --- a/src/mnist.h +++ b/src/mnist.h @@ -46,7 +46,7 @@ std::vector ReadImages(const std::string& full_path, int& number_of_image return _dataset; } -std::vector ReadLabels(const std::string& full_path) { +std::vector ReadLabels(const std::string& full_path) { std::ifstream file(full_path); if (! file.is_open()) @@ -62,9 +62,11 @@ std::vector ReadLabels(const std::string& full_path) { int number_of_labels = 0; file.read((char *)&number_of_labels, sizeof(number_of_labels)), number_of_labels = reverseInt(number_of_labels); - std::vector _dataset(number_of_labels); + std::vector _dataset(number_of_labels); for (int i = 0; i < number_of_labels; i++) { - file.read((char*)&_dataset[i], 1); + int label; + file.read((char*)&label, 1); + _dataset[i] = label; } return _dataset; diff --git a/src/test.cu b/src/test.cu index dd65d256..c1f53c02 100644 --- a/src/test.cu +++ b/src/test.cu @@ -30,7 +30,7 @@ int main(int argc, char** argv) { int numImg, imgSize; vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg, imgSize); - vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte"); + vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte"); cerr << "images=" << images.size() << " labels=" << labels.size() << endl; tx.Load(images); //ty.Load(labels); From 14e536c7794121ad220bb17a8e99d4fc3e501496 Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Wed, 14 Sep 2016 07:53:43 +0100 Subject: [PATCH 16/19] load images. segfaults on labels --- src/test.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/test.cu b/src/test.cu index c1f53c02..4ef9953f 100644 --- a/src/test.cu +++ b/src/test.cu @@ -24,16 +24,16 @@ int main(int argc, char** argv) { auto graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); cerr << "lr=" << lr.Debug() << endl; - - Tensor tx({500, 784}, 1); - Tensor ty({500, 10}, 1); - int numImg, imgSize; vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg, imgSize); vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte"); cerr << "images=" << images.size() << " labels=" << labels.size() << endl; + + Tensor tx({numImg, 784}, 1); + Tensor ty({numImg, 10}, 1); + tx.Load(images); - //ty.Load(labels); + ty.Load(labels); cerr << "tx=" << tx.Debug() << endl; cerr << "ty=" << ty.Debug() << endl; From d2b50d6a6c2b65f3d03874cb767bf5a9e8cf05b0 Mon Sep 17 00:00:00 2001 From: romang Date: Wed, 14 Sep 2016 09:23:51 +0200 Subject: [PATCH 17/19] update MNIST label loading --- src/mnist.h | 102 +++++++++++++++++++++++++++++----------------------- src/test.cu | 6 ++-- 2 files changed, 61 insertions(+), 47 deletions(-) diff --git a/src/mnist.h b/src/mnist.h index 8e94931f..2f7e7d06 100644 --- a/src/mnist.h +++ b/src/mnist.h @@ -4,49 +4,26 @@ #include #include #include +#include namespace datasets { namespace mnist { typedef unsigned char uchar; +const size_t IMAGE_SIZE = 784; +const size_t LABEL_SIZE = 10; + +const size_t IMAGE_MAGIC_NUMBER = 2051; +const size_t LABEL_MAGIC_NUMBER = 2049; + auto reverseInt = [](int i) { unsigned char c1, c2, c3, c4; c1 = i & 255, c2 = (i >> 8) & 255, c3 = (i >> 16) & 255, c4 = (i >> 24) & 255; return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4; }; -std::vector ReadImages(const std::string& full_path, int& number_of_images, int& image_size) { - std::ifstream file(full_path); - - if (! file.is_open()) - throw std::runtime_error("Cannot open file `" + full_path + "`!"); - - int magic_number = 0, n_rows = 0, n_cols = 0; - - file.read((char *)&magic_number, sizeof(magic_number)); - magic_number = reverseInt(magic_number); - - if (magic_number != 2051) - throw std::runtime_error("Invalid MNIST image file!"); - - file.read((char *)&number_of_images, sizeof(number_of_images)), number_of_images = reverseInt(number_of_images); - file.read((char *)&n_rows, sizeof(n_rows)), n_rows = reverseInt(n_rows); - file.read((char *)&n_cols, sizeof(n_cols)), n_cols = reverseInt(n_cols); - - image_size = n_rows * n_cols; - int n = number_of_images * image_size; - std::vector _dataset(n); - unsigned char pixel = 0; - - for (int i = 0; i < n; i++) { - file.read((char*)&pixel, sizeof(pixel)); - _dataset[i] = pixel / 255.0f; - } - return _dataset; -} - -std::vector ReadLabels(const std::string& full_path) { +std::vector ReadImages(const std::string& full_path, int& number_of_images) { std::ifstream file(full_path); if (! file.is_open()) @@ -56,15 +33,50 @@ std::vector ReadLabels(const std::string& full_path) { file.read((char *)&magic_number, sizeof(magic_number)); magic_number = reverseInt(magic_number); - if (magic_number != 2049) + if (magic_number != IMAGE_MAGIC_NUMBER) + throw std::runtime_error("Invalid MNIST image file!"); + + int n_rows = 0; + int n_cols = 0; + file.read((char *)&number_of_images, sizeof(number_of_images)), number_of_images = reverseInt(number_of_images); + file.read((char *)&n_rows, sizeof(n_rows)), n_rows = reverseInt(n_rows); + file.read((char *)&n_cols, sizeof(n_cols)), n_cols = reverseInt(n_cols); + + assert(n_rows * n_cols == IMAGE_SIZE); + + int n = number_of_images * IMAGE_SIZE; + std::vector _dataset(n); + + for (int i = 0; i < n; i++) { + unsigned char pixel = 0; + file.read((char*)&pixel, sizeof(pixel)); + _dataset[i] = pixel / 255.0f; + } + return _dataset; +} + +std::vector ReadLabels(const std::string& full_path, int& number_of_labels) { + std::ifstream file(full_path); + + if (! file.is_open()) + throw std::runtime_error("Cannot open file `" + full_path + "`!"); + + int magic_number = 0; + file.read((char *)&magic_number, sizeof(magic_number)); + magic_number = reverseInt(magic_number); + + if (magic_number != LABEL_MAGIC_NUMBER) throw std::runtime_error("Invalid MNIST label file!"); - int number_of_labels = 0; file.read((char *)&number_of_labels, sizeof(number_of_labels)), number_of_labels = reverseInt(number_of_labels); - std::vector _dataset(number_of_labels); + int n = number_of_labels * LABEL_SIZE; + std::vector _dataset(n, 0.0f); + for (int i = 0; i < number_of_labels; i++) { - file.read((char*)&_dataset[i], 1); + unsigned char label; + file.read((char*)&label, 1); + _dataset[(i * 10) + (int)(label)] = 1.0f; } return _dataset; @@ -75,19 +87,21 @@ std::vector ReadLabels(const std::string& full_path) { //int main(int argc, const char *argv[]) { - //int numImg, imgSize; - //auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg, imgSize); - //auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte"); + //int numImg = 0; + //auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg); + //auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numImg); - //std::cout - //<< "Number of images: " << numImg << std::endl - //<< "Image size: " << imgSize << std::endl; + //std::cout << "Number of images: " << numImg << std::endl; //for (int i = 0; i < 3; i++) { - //for (int j = 0; j < imgSize; j++) { - //std::cout << images[(i * imgSize) + j] << ","; + //for (int j = 0; j < datasets::mnist::IMAGE_SIZE; j++) { + //std::cout << images[(i * datasets::mnist::IMAGE_SIZE) + j] << ","; //} - //std::cout << " label=" << (int)labels[i] << std::endl; + //std::cout << "\nlabels= "; + //for (int k = 0; k < 10; k++) { + //std::cout << labels[(i * 10) + k] << ","; + //} + //std::cout << std::endl; //} //return 0; //} diff --git a/src/test.cu b/src/test.cu index ed43f052..23971051 100644 --- a/src/test.cu +++ b/src/test.cu @@ -5,9 +5,9 @@ using namespace std; int main(int argc, char** argv) { - /*auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte");*/ - /*auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte");*/ - /*std::cerr << images.size() << " " << images[0].size() << std::endl;*/ + /*int numImg = 0;*/ + /*auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg);*/ + /*auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numImg);*/ using namespace marian; using namespace keywords; From 03809913932ccd148e62261741163597ad72fe70 Mon Sep 17 00:00:00 2001 From: Andre Martins Date: Wed, 14 Sep 2016 08:36:01 +0100 Subject: [PATCH 18/19] Implemented fast softmax. --- src/expression_operators.h | 7 ++++++ src/graph_operators.h | 24 +++++++++++++++++++ src/tensor.h | 7 ++++++ src/tensor_operators.cu | 47 ++++++++++++++++++++++++++++++++++++++ src/tensor_operators.h | 4 ++++ src/test.cu | 6 +++-- 6 files changed, 93 insertions(+), 2 deletions(-) diff --git a/src/expression_operators.h b/src/expression_operators.h index 8eabbd04..3d42400f 100644 --- a/src/expression_operators.h +++ b/src/expression_operators.h @@ -171,6 +171,13 @@ inline Expr softmax(Expr a, Args ...args) { return e / sum(e, args...); } +template +inline Expr softmax_fast(Expr a, Args ...args) { + Expr e = Expr(new SoftmaxNodeOp(a, args...)); + return e; +} + + // inefficient template inline Expr mean(Expr a, Args ...args) { diff --git a/src/graph_operators.h b/src/graph_operators.h index 30456153..5a12f807 100644 --- a/src/graph_operators.h +++ b/src/graph_operators.h @@ -101,6 +101,30 @@ struct TanhNodeOp : public UnaryNodeOp { } }; +struct SoftmaxNodeOp : public UnaryNodeOp { + template + SoftmaxNodeOp(ChainPtr a, Args ...args) + : UnaryNodeOp(a, keywords::shape=newShape(a), + args...) { } + + Shape newShape(ChainPtr a) { + Shape shape = a->shape(); + return shape; + } + + void forward() { + // B = softmax(A). + val_ = a_->val(); + Softmax(&val_); + } + + void backward() { + // TODO + Element(_1 += _2 * Exp(_3), + a_->grad(), adj_, a_->val()); + } +}; + struct LogNodeOp : public UnaryNodeOp { template LogNodeOp(Args ...args) diff --git a/src/tensor.h b/src/tensor.h index 487a553a..bf6b8ef8 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -240,6 +240,13 @@ class Tensor { return pimpl_->Debug(); } + void Print() const { + for (int i = 0; i < size(); ++i) { + std::cerr << (*this)[i] << " "; + } + std::cerr << std::endl; + } + }; } diff --git a/src/tensor_operators.cu b/src/tensor_operators.cu index a8f72893..2d1d541d 100644 --- a/src/tensor_operators.cu +++ b/src/tensor_operators.cu @@ -2,6 +2,53 @@ namespace marian { +// TODO: implement this. +__global__ void gSoftMax(float* softMaxP, size_t rows, size_t cols) { + for(int bid = 0; bid < rows; bid += gridDim.x) { + int j = bid + blockIdx.x; + if(j < rows) { + extern __shared__ float _share[]; + float* _sum = _share + blockDim.x; + float* sp = softMaxP + j * cols; + _sum[threadIdx.x] = 0.0; + for(int tid = 0; tid < cols; tid += blockDim.x) { + int id = tid + threadIdx.x; + if(id < cols) { + sp[id] = __expf(sp[id]); + _sum[threadIdx.x] += sp[id]; + } + } + __syncthreads(); + int len = blockDim.x; + while(len != 1) { + __syncthreads(); + int skip = (len + 1) >> 1; + if(threadIdx.x < (len >> 1)) + _sum[threadIdx.x] += _sum[threadIdx.x + skip]; + len = (len + 1) >> 1; + } + __syncthreads(); + for(int tid = 0; tid < cols; tid += blockDim.x){ + int id = tid + threadIdx.x; + if(id < cols) + sp[id] /= _sum[0]; + } + } + } +} + +// TODO: implement this. +void Softmax(Tensor* Out) { + size_t m = Out->shape()[0]; + size_t k = Out->shape()[1]; + + int blocks = std::min(MAX_BLOCKS, (int) m); + int threads = std::min(MAX_THREADS, (int) k); + int shared = sizeof(float) * threads * 2; + gSoftMax<<>>(Out->data(), m, k); + cudaStreamSynchronize(0); +} + Tensor Prod(cublasHandle_t handle, Tensor C, const Tensor A, const Tensor B, bool transA, bool transB, Float beta) { Float alpha = 1.0; diff --git a/src/tensor_operators.h b/src/tensor_operators.h index 7ec4ca68..a0c30104 100644 --- a/src/tensor_operators.h +++ b/src/tensor_operators.h @@ -142,6 +142,10 @@ void Element(Functor functor, cudaStreamSynchronize(0); } +__global__ void gSoftMax(float* softMaxP, size_t rows, size_t cols); + +void Softmax(Tensor* Out); + Tensor Prod(cublasHandle_t handle, Tensor C, const Tensor A, const Tensor B, bool transA, bool transB, Float beta); diff --git a/src/test.cu b/src/test.cu index 4a2445fd..1948b74f 100644 --- a/src/test.cu +++ b/src/test.cu @@ -15,7 +15,7 @@ int main(int argc, char** argv) { Expr b = param(shape={1, 10}, name="b0"); auto scores = dot(x, w) + b; - auto lr = softmax(scores, axis=1, name="pred"); + auto lr = softmax_fast(scores, axis=1, name="pred"); auto graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); cerr << "lr=" << lr.Debug() << endl; @@ -40,12 +40,14 @@ int main(int argc, char** argv) { std::cerr << val << " "; } std::cerr << std::endl; + lr.val().Print(); std::cerr << "Log-likelihood: "; for (auto val : graph.val().shape()) { std::cerr << val << " "; } std::cerr << std::endl; - + graph.val().Print(); + graph.backward(); //std::cerr << graph["pred"].val()[0] << std::endl; From 2c39bad1d603af02bc2dc74647a60fcb2457a2f3 Mon Sep 17 00:00:00 2001 From: Maximiliana Behnke Date: Wed, 14 Sep 2016 14:27:08 +0200 Subject: [PATCH 19/19] Add test model training script --- scripts/train_test_model.py | 91 +++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100755 scripts/train_test_model.py diff --git a/scripts/train_test_model.py b/scripts/train_test_model.py new file mode 100755 index 00000000..4f3236a9 --- /dev/null +++ b/scripts/train_test_model.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +import sys +import os +import numpy as np +from keras.datasets import mnist +from keras.utils import np_utils +from keras.models import Sequential +from keras.layers import Dense +from keras.layers import Dropout + +def softmax(x): + return np.exp(x) / np.sum(np.exp(x), axis=1)[:, None] + + +def baseline_model(pixels_count, classes_count): + model = Sequential() + # model.add(Dense(pixels_count, input_dim=pixels_count, init='normal', activation='relu')) + model.add(Dense(classes_count, input_dim=pixels_count, init='normal', activation='softmax')) + model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) + return model + + +if __name__ == "__main__": + ### Load trainset from mnist + + (X_train, y_train), (X_test, y_test) = mnist.load_data() + + ### Flatten pictures into vectors + + pixels_count = X_train.shape[1] * X_train.shape[2] + X_train = X_train.reshape(X_train.shape[0], pixels_count).astype('float32') + print "X shape: ", X_train.shape + + X_test = X_test.reshape(X_test.shape[0], pixels_count).astype('float32') + + ### Normalize data to (0, 1) + + X_train = X_train / 255 + X_test = X_test / 255 + + ### Change classes to one hot encoding matrixes + + y_train = np_utils.to_categorical(y_train) + classes_count = y_train.shape[1] + print "Y shape: ", y_train.shape + + y_test = np_utils.to_categorical(y_test) + + # Train weight matrix + + # Build the model + model = baseline_model(pixels_count, classes_count) + # Fit the model + model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200, verbose=2) + # Final evaluation of the model + scores = model.evaluate(X_test, y_test, verbose=0) + print("Baseline Error: %.2f%%" % (100-scores[1]*100)) + + ### Weight and bias matrixes - we extract them from the model + + # weights_ones = np.ones((pixels_count, classes_count)) + # print weights_ones.shape + + weights, bias = model.get_weights() + print weights.shape + print bias.shape + print bias + + ### We calculate lr using softmax! + + dot_out = np.dot(X_train, weights) + print "dot_out shape: ", dot_out.shape + # print dot_out[:10] + + add_out = np.add(bias, dot_out) + print "add_out shape: ", add_out.shape + # print add_out[:10] + + # lr = np.around(softmax(add_out), decimals = 6) + lr = softmax(add_out) + print "lr shape: ", lr.shape + # print lr[:10] + # print np.count_nonzero(lr)i + + ### Save model to npz files + if not os.path.exists("test_model"): + os.makedirs("test_model") + np.savez("test_model/model", weights = weights, bias = bias) + + print "Model saved! Check test_model directory"