diff --git a/.gitignore b/.gitignore index 4dfd397b..53468680 100644 --- a/.gitignore +++ b/.gitignore @@ -39,3 +39,4 @@ build # Examples examples/*/*.gz +examples/mnist/*ubyte diff --git a/examples/mnist/Makefile b/examples/mnist/Makefile index 7e4e812f..051d5a60 100644 --- a/examples/mnist/Makefile +++ b/examples/mnist/Makefile @@ -2,10 +2,13 @@ all: download -download: train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx3-ubyte.gz +download: train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte -%.gz: - wget http://yann.lecun.com/exdb/mnist/$*.gz -O $@ +%-ubyte: %-ubyte.gz + gzip -d < $^ > $@ + +%-ubyte.gz: + wget http://yann.lecun.com/exdb/mnist/$*-ubyte.gz -O $@ clean: - rm -f *.gz + rm -f *.gz *-ubyte diff --git a/examples/xor/label.txt b/examples/xor/label.txt new file mode 100644 index 00000000..d9ff83f1 --- /dev/null +++ b/examples/xor/label.txt @@ -0,0 +1,4 @@ +1 +1 +0 +0 diff --git a/examples/xor/train.txt b/examples/xor/train.txt new file mode 100644 index 00000000..e6610d1d --- /dev/null +++ b/examples/xor/train.txt @@ -0,0 +1,4 @@ +0 0 +1 1 +1 0 +0 1 \ No newline at end of file diff --git a/marian/.cproject b/marian/.cproject index 29c37771..195ef668 100644 --- a/marian/.cproject +++ b/marian/.cproject @@ -56,11 +56,11 @@ - - + + - + diff --git a/marian/.project b/marian/.project index 215485f6..d1163076 100644 --- a/marian/.project +++ b/marian/.project @@ -26,94 +26,9 @@ - CMakeLists.txt - 1 - PARENT-1-PROJECT_LOC/src/CMakeLists.txt - - - compile_time_crc32.h - 1 - PARENT-1-PROJECT_LOC/src/compile_time_crc32.h - - - definitions.h - 1 - PARENT-1-PROJECT_LOC/src/definitions.h - - - exception.cpp - 1 - PARENT-1-PROJECT_LOC/src/exception.cpp - - - exception.h - 1 - PARENT-1-PROJECT_LOC/src/exception.h - - - expression_operators.h - 1 - PARENT-1-PROJECT_LOC/src/expression_operators.h - - - expressions.cu - 1 - PARENT-1-PROJECT_LOC/src/expressions.cu - - - expressions.h - 1 - PARENT-1-PROJECT_LOC/src/expressions.h - - - graph.h - 1 - PARENT-1-PROJECT_LOC/src/graph.h - - - graph_operators.h - 1 - PARENT-1-PROJECT_LOC/src/graph_operators.h - - - keywords.h - 1 - PARENT-1-PROJECT_LOC/src/keywords.h - - - marian.h - 1 - PARENT-1-PROJECT_LOC/src/marian.h - - - tensor.cu - 1 - PARENT-1-PROJECT_LOC/src/tensor.cu - - - tensor.h - 1 - PARENT-1-PROJECT_LOC/src/tensor.h - - - tensor_operators.cu - 1 - PARENT-1-PROJECT_LOC/src/tensor_operators.cu - - - tensor_operators.h - 1 - PARENT-1-PROJECT_LOC/src/tensor_operators.h - - - test.cu - 1 - PARENT-1-PROJECT_LOC/src/test.cu - - - thrust_functions.h - 1 - PARENT-1-PROJECT_LOC/src/thrust_functions.h + src + 2 + PARENT-1-PROJECT_LOC/src diff --git a/scripts/train_test_model.py b/scripts/train_test_model.py new file mode 100755 index 00000000..4f3236a9 --- /dev/null +++ b/scripts/train_test_model.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +import sys +import os +import numpy as np +from keras.datasets import mnist +from keras.utils import np_utils +from keras.models import Sequential +from keras.layers import Dense +from keras.layers import Dropout + +def softmax(x): + return np.exp(x) / np.sum(np.exp(x), axis=1)[:, None] + + +def baseline_model(pixels_count, classes_count): + model = Sequential() + # model.add(Dense(pixels_count, input_dim=pixels_count, init='normal', activation='relu')) + model.add(Dense(classes_count, input_dim=pixels_count, init='normal', activation='softmax')) + model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) + return model + + +if __name__ == "__main__": + ### Load trainset from mnist + + (X_train, y_train), (X_test, y_test) = mnist.load_data() + + ### Flatten pictures into vectors + + pixels_count = X_train.shape[1] * X_train.shape[2] + X_train = X_train.reshape(X_train.shape[0], pixels_count).astype('float32') + print "X shape: ", X_train.shape + + X_test = X_test.reshape(X_test.shape[0], pixels_count).astype('float32') + + ### Normalize data to (0, 1) + + X_train = X_train / 255 + X_test = X_test / 255 + + ### Change classes to one hot encoding matrixes + + y_train = np_utils.to_categorical(y_train) + classes_count = y_train.shape[1] + print "Y shape: ", y_train.shape + + y_test = np_utils.to_categorical(y_test) + + # Train weight matrix + + # Build the model + model = baseline_model(pixels_count, classes_count) + # Fit the model + model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200, verbose=2) + # Final evaluation of the model + scores = model.evaluate(X_test, y_test, verbose=0) + print("Baseline Error: %.2f%%" % (100-scores[1]*100)) + + ### Weight and bias matrixes - we extract them from the model + + # weights_ones = np.ones((pixels_count, classes_count)) + # print weights_ones.shape + + weights, bias = model.get_weights() + print weights.shape + print bias.shape + print bias + + ### We calculate lr using softmax! + + dot_out = np.dot(X_train, weights) + print "dot_out shape: ", dot_out.shape + # print dot_out[:10] + + add_out = np.add(bias, dot_out) + print "add_out shape: ", add_out.shape + # print add_out[:10] + + # lr = np.around(softmax(add_out), decimals = 6) + lr = softmax(add_out) + print "lr shape: ", lr.shape + # print lr[:10] + # print np.count_nonzero(lr)i + + ### Save model to npz files + if not os.path.exists("test_model"): + os.makedirs("test_model") + np.savez("test_model/model", weights = weights, bias = bias) + + print "Model saved! Check test_model directory" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 244977db..49832492 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,6 +10,7 @@ cuda_add_executable( test.cu expressions.cu tensor_operators.cu + tensor.cu $ ) diff --git a/src/expression_operators.h b/src/expression_operators.h index 8eabbd04..3d42400f 100644 --- a/src/expression_operators.h +++ b/src/expression_operators.h @@ -171,6 +171,13 @@ inline Expr softmax(Expr a, Args ...args) { return e / sum(e, args...); } +template +inline Expr softmax_fast(Expr a, Args ...args) { + Expr e = Expr(new SoftmaxNodeOp(a, args...)); + return e; +} + + // inefficient template inline Expr mean(Expr a, Args ...args) { diff --git a/src/graph_operators.h b/src/graph_operators.h index 30456153..5a12f807 100644 --- a/src/graph_operators.h +++ b/src/graph_operators.h @@ -101,6 +101,30 @@ struct TanhNodeOp : public UnaryNodeOp { } }; +struct SoftmaxNodeOp : public UnaryNodeOp { + template + SoftmaxNodeOp(ChainPtr a, Args ...args) + : UnaryNodeOp(a, keywords::shape=newShape(a), + args...) { } + + Shape newShape(ChainPtr a) { + Shape shape = a->shape(); + return shape; + } + + void forward() { + // B = softmax(A). + val_ = a_->val(); + Softmax(&val_); + } + + void backward() { + // TODO + Element(_1 += _2 * Exp(_3), + a_->grad(), adj_, a_->val()); + } +}; + struct LogNodeOp : public UnaryNodeOp { template LogNodeOp(Args ...args) diff --git a/src/mnist.h b/src/mnist.h new file mode 100644 index 00000000..43931ed2 --- /dev/null +++ b/src/mnist.h @@ -0,0 +1,105 @@ +//#pragma once + +#include +#include +#include +#include +#include + +namespace datasets { +namespace mnist { + +typedef unsigned char uchar; + + +const size_t IMAGE_MAGIC_NUMBER = 2051; +const size_t LABEL_MAGIC_NUMBER = 2049; + +auto reverseInt = [](int i) { + unsigned char c1, c2, c3, c4; + c1 = i & 255, c2 = (i >> 8) & 255, c3 = (i >> 16) & 255, c4 = (i >> 24) & 255; + return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4; +}; + +std::vector ReadImages(const std::string& full_path, int& number_of_images, int imgSize) { + std::ifstream file(full_path); + + if (! file.is_open()) + throw std::runtime_error("Cannot open file `" + full_path + "`!"); + + int magic_number = 0; + file.read((char *)&magic_number, sizeof(magic_number)); + magic_number = reverseInt(magic_number); + + if (magic_number != IMAGE_MAGIC_NUMBER) + throw std::runtime_error("Invalid MNIST image file!"); + + int n_rows = 0; + int n_cols = 0; + file.read((char *)&number_of_images, sizeof(number_of_images)), number_of_images = reverseInt(number_of_images); + file.read((char *)&n_rows, sizeof(n_rows)), n_rows = reverseInt(n_rows); + file.read((char *)&n_cols, sizeof(n_cols)), n_cols = reverseInt(n_cols); + + assert(n_rows * n_cols == imgSize); + + int n = number_of_images * imgSize; + std::vector _dataset(n); + + for (int i = 0; i < n; i++) { + unsigned char pixel = 0; + file.read((char*)&pixel, sizeof(pixel)); + _dataset[i] = pixel / 255.0f; + } + return _dataset; +} + +std::vector ReadLabels(const std::string& full_path, int& number_of_labels, int label_size) { + std::ifstream file(full_path); + + if (! file.is_open()) + throw std::runtime_error("Cannot open file `" + full_path + "`!"); + + int magic_number = 0; + file.read((char *)&magic_number, sizeof(magic_number)); + magic_number = reverseInt(magic_number); + + if (magic_number != LABEL_MAGIC_NUMBER) + throw std::runtime_error("Invalid MNIST label file!"); + + file.read((char *)&number_of_labels, sizeof(number_of_labels)), number_of_labels = reverseInt(number_of_labels); + + int n = number_of_labels * label_size; + std::vector _dataset(n, 0.0f); + + for (int i = 0; i < number_of_labels; i++) { + unsigned char label; + file.read((char*)&label, 1); + _dataset[(i * 10) + (int)(label)] = 1.0f; + } + + return _dataset; +} + +} // namespace mnist +} // namespace datasets + + +//int main(int argc, const char *argv[]) { + //int numImg = 0; + //auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg); + //auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numImg); + + //std::cout << "Number of images: " << numImg << std::endl; + + //for (int i = 0; i < 3; i++) { + //for (int j = 0; j < datasets::mnist::IMAGE_SIZE; j++) { + //std::cout << images[(i * datasets::mnist::IMAGE_SIZE) + j] << ","; + //} + //std::cout << "\nlabels= "; + //for (int k = 0; k < 10; k++) { + //std::cout << labels[(i * 10) + k] << ","; + //} + //std::cout << std::endl; + //} + //return 0; +//} diff --git a/src/tensor.cu b/src/tensor.cu new file mode 100644 index 00000000..6048bb43 --- /dev/null +++ b/src/tensor.cu @@ -0,0 +1,93 @@ +#include +#include "tensor.h" + +using namespace std; + +namespace marian { + +inline std::vector Tokenize(const std::string& str, + const std::string& delimiters = " \t") +{ + std::vector tokens; + // Skip delimiters at beginning. + std::string::size_type lastPos = str.find_first_not_of(delimiters, 0); + // Find first "non-delimiter". + std::string::size_type pos = str.find_first_of(delimiters, lastPos); + + while (std::string::npos != pos || std::string::npos != lastPos) { + // Found a token, add it to the vector. + tokens.push_back(str.substr(lastPos, pos - lastPos)); + // Skip delimiters. Note the "not_of" + lastPos = str.find_first_not_of(delimiters, pos); + // Find next "non-delimiter" + pos = str.find_first_of(delimiters, lastPos); + } + + return tokens; +} + +//! convert string to variable of type T. Used to reading floats, int etc from files +template +T Scan(const std::string &input) +{ + std::stringstream stream(input); + T ret; + stream >> ret; + return ret; +} + +//! convert vectors of string to vectors of type T variables +template +inline std::vector Scan(const std::vector< std::string > &input) +{ + std::vector output(input.size()); + for (size_t i = 0 ; i < input.size() ; i++) { + output[i] = Scan( input[i] ); + } + return output; +} + +//! tokenise input string to vector of type T +template +inline std::vector Tokenize( const std::string &input + , const std::string& delimiters = " \t") +{ + std::vector stringVector = Tokenize(input, delimiters); + return Scan( stringVector ); +} + + +void Tensor::Load(const std::string &path) +{ + size_t totSize = std::accumulate(pimpl_->shape().begin(), pimpl_->shape().end(), + 1, std::multiplies()); + cerr << "totSize=" << totSize << endl; + std::vector hostData(totSize); + + fstream strm; + strm.open(path.c_str()); + + string line; + size_t ind = 0; + while ( getline (strm, line) ) + { + cerr << line << '\n'; + vector toks = Tokenize(line); + for (size_t i = 0; i < toks.size(); ++i) { + hostData[ind] = toks[i]; + } + + ++ind; + } + strm.close(); + + Load(hostData); +} + +void Tensor::Load(const std::vector &values) +{ + pimpl_->set(values); +} + +} + diff --git a/src/tensor.h b/src/tensor.h index cd4f642c..6792e9ac 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -152,6 +152,14 @@ class TensorImpl { thrust::fill(data_.begin(), data_.end(), value); } + void set(const std::vector &values) { + size_t totSize = std::accumulate(shape().begin(), shape().end(), + 1, std::multiplies()); + std::cerr << "tensor size=" << totSize << " vector size=" << values.size() << std::endl; + assert(totSize == values.size()); + thrust::copy(values.begin(), values.end(), data_.begin()); + } + std::string Debug() const { std::stringstream strm; @@ -240,6 +248,16 @@ class Tensor { return pimpl_->Debug(); } + void Print() const { + for (int i = 0; i < size(); ++i) { + std::cerr << (*this)[i] << " "; + } + std::cerr << std::endl; + } + + void Load(const std::string &path); + void Load(const std::vector &values); + }; } diff --git a/src/tensor_operators.cu b/src/tensor_operators.cu index a8f72893..2d1d541d 100644 --- a/src/tensor_operators.cu +++ b/src/tensor_operators.cu @@ -2,6 +2,53 @@ namespace marian { +// TODO: implement this. +__global__ void gSoftMax(float* softMaxP, size_t rows, size_t cols) { + for(int bid = 0; bid < rows; bid += gridDim.x) { + int j = bid + blockIdx.x; + if(j < rows) { + extern __shared__ float _share[]; + float* _sum = _share + blockDim.x; + float* sp = softMaxP + j * cols; + _sum[threadIdx.x] = 0.0; + for(int tid = 0; tid < cols; tid += blockDim.x) { + int id = tid + threadIdx.x; + if(id < cols) { + sp[id] = __expf(sp[id]); + _sum[threadIdx.x] += sp[id]; + } + } + __syncthreads(); + int len = blockDim.x; + while(len != 1) { + __syncthreads(); + int skip = (len + 1) >> 1; + if(threadIdx.x < (len >> 1)) + _sum[threadIdx.x] += _sum[threadIdx.x + skip]; + len = (len + 1) >> 1; + } + __syncthreads(); + for(int tid = 0; tid < cols; tid += blockDim.x){ + int id = tid + threadIdx.x; + if(id < cols) + sp[id] /= _sum[0]; + } + } + } +} + +// TODO: implement this. +void Softmax(Tensor* Out) { + size_t m = Out->shape()[0]; + size_t k = Out->shape()[1]; + + int blocks = std::min(MAX_BLOCKS, (int) m); + int threads = std::min(MAX_THREADS, (int) k); + int shared = sizeof(float) * threads * 2; + gSoftMax<<>>(Out->data(), m, k); + cudaStreamSynchronize(0); +} + Tensor Prod(cublasHandle_t handle, Tensor C, const Tensor A, const Tensor B, bool transA, bool transB, Float beta) { Float alpha = 1.0; diff --git a/src/tensor_operators.h b/src/tensor_operators.h index 7ec4ca68..a0c30104 100644 --- a/src/tensor_operators.h +++ b/src/tensor_operators.h @@ -142,6 +142,10 @@ void Element(Functor functor, cudaStreamSynchronize(0); } +__global__ void gSoftMax(float* softMaxP, size_t rows, size_t cols); + +void Softmax(Tensor* Out); + Tensor Prod(cublasHandle_t handle, Tensor C, const Tensor A, const Tensor B, bool transA, bool transB, Float beta); diff --git a/src/test.cu b/src/test.cu index b5dbba18..a71939b4 100644 --- a/src/test.cu +++ b/src/test.cu @@ -1,36 +1,123 @@ #include "marian.h" +#include "mnist.h" using namespace std; int main(int argc, char** argv) { + /*int numImg = 0;*/ + /*auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg);*/ + /*auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numImg);*/ using namespace marian; using namespace keywords; - Expr x = input(name="X"); - Expr y = input(name="Y"); - - Expr w = param(shape={784, 10}, name="W0"); - Expr b = param(shape={1, 10}, name="b0"); - - Expr pred = softmax(dot(x, w) + b, axis=1); - cerr << "lr=" << pred.Debug() << endl; + const size_t IMAGE_SIZE = 784; + const size_t LABEL_SIZE = 10; - Expr graph = -mean(sum(y * log(pred), axis=1), - axis=0, name="cost"); + Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); + Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y"); - Tensor tx({500, 784}, 1); - Tensor ty({500, 10}, 1); + Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); + Expr b = param(shape={1, LABEL_SIZE}, name="b0"); + auto scores = dot(x, w) + b; + auto lr = softmax_fast(scores, axis=1, name="pred"); + auto graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); + cerr << "lr=" << lr.Debug() << endl; + +#if 0 + int numofdata; + vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); + vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); + cerr << "images=" << images.size() << " labels=" << labels.size() << endl; + cerr << "numofdata=" << numofdata << endl; + + Tensor tx({numofdata, IMAGE_SIZE}, 1); + Tensor ty({numofdata, LABEL_SIZE}, 1); + + tx.Load(images); + ty.Load(labels); + cerr << "tx=" << tx.Debug() << endl; cerr << "ty=" << ty.Debug() << endl; +#else + Tensor tx({500, 784}, 1); + Tensor ty({500, 10}, 1); +#endif x = tx; y = ty; graph.forward(500); + + std::cerr << "Result: "; + for (auto val : scores.val().shape()) { + std::cerr << val << " "; + } + std::cerr << std::endl; + std::cerr << "Result: "; + for (auto val : lr.val().shape()) { + std::cerr << val << " "; + } + std::cerr << std::endl; + lr.val().Print(); + std::cerr << "Log-likelihood: "; + for (auto val : graph.val().shape()) { + std::cerr << val << " "; + } + std::cerr << std::endl; + graph.val().Print(); + graph.backward(); + //std::cerr << graph["pred"].val()[0] << std::endl; + + + // XOR + /* + Expr x = input(shape={whatevs, 2}, name="X"); + Expr y = input(shape={whatevs, 2}, name="Y"); + + Expr w = param(shape={2, 1}, name="W0"); + Expr b = param(shape={1, 1}, name="b0"); + + Expr n5 = dot(x, w); + Expr n6 = n5 + b; + Expr lr = softmax(n6, axis=1, name="pred"); + cerr << "lr=" << lr.Debug() << endl; + + Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); + + Tensor tx({4, 2}, 1); + Tensor ty({4, 1}, 1); + cerr << "tx=" << tx.Debug() << endl; + cerr << "ty=" << ty.Debug() << endl; + + tx.Load("../examples/xor/train.txt"); + ty.Load("../examples/xor/label.txt"); + */ + +#if 0 + hook0(graph); + graph.autodiff(); + std::cerr << graph["cost"].val()[0] << std::endl; + //hook1(graph); + for(auto p : graph.params()) { + auto update = _1 = _1 - alpha * _2; + Element(update, p.val(), p.grad()); + } + hook2(graph); + + auto opt = adadelta(cost_function=cost, + eta=0.9, gamma=0.1, + set_batch=set, + before_update=before, + after_update=after, + set_valid=valid, + validation_freq=100, + verbose=1, epochs=3, early_stopping=10); + opt.run(); +#endif return 0; }