From 823a3a624a240aedfc3b8fc765e228c40e81a9d8 Mon Sep 17 00:00:00 2001 From: Marcin Junczys-Dowmunt Date: Wed, 14 Sep 2016 19:51:26 +0200 Subject: [PATCH] found not-working conf --- src/tensor.h | 103 ++++++++++---------- src/tensor_operators.cu | 6 +- src/test.cu | 207 +++++++++++++++++++--------------------- src/validate_mnist.cu | 44 ++++++--- 4 files changed, 182 insertions(+), 178 deletions(-) diff --git a/src/tensor.h b/src/tensor.h index b9c81a91..0f6029d8 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -13,27 +12,27 @@ namespace marian { -struct Handles { - cudnnHandle_t cudnnHandle; - cublasHandle_t cublasHandle; - - cudnnOpTensorDescriptor_t add; - - Handles() { - cudnnCreate(&cudnnHandle); - cublasCreate(&cublasHandle); - cudnnCreateOpTensorDescriptor(&add); - cudnnSetOpTensorDescriptor(add, CUDNN_OP_TENSOR_ADD, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN); - } - - ~Handles() { - cudnnDestroy(cudnnHandle); - cublasDestroy(cublasHandle); - cudnnDestroyOpTensorDescriptor(add); - } -}; - -const Handles handles; +//struct Handles { +// //cudnnHandle_t cudnnHandle; +// //cublasHandle_t cublasHandle; +// +// //cudnnOpTensorDescriptor_t add; +// +// Handles() { +// cudnnCreate(&cudnnHandle); +// cublasCreate(&cublasHandle); +// cudnnCreateOpTensorDescriptor(&add); +// cudnnSetOpTensorDescriptor(add, CUDNN_OP_TENSOR_ADD, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN); +// } +// +// ~Handles() { +// cudnnDestroy(cudnnHandle); +// cublasDestroy(cublasHandle); +// cudnnDestroyOpTensorDescriptor(add); +// } +//}; +// +//const Handles handles; // typedef std::vector Shape; @@ -60,17 +59,17 @@ class TensorImpl { private: Shape shape_; thrust::device_vector data_; - cudnnTensorDescriptor_t desc_; + //cudnnTensorDescriptor_t desc_; size_t tno_; static size_t tensorCounter; - cudnnDataType_t dataType() { - switch(sizeof(Float)) { - case 2: return CUDNN_DATA_HALF; - case 8: return CUDNN_DATA_DOUBLE; - default: return CUDNN_DATA_FLOAT; - } - } + //cudnnDataType_t dataType() { + // switch(sizeof(Float)) { + // case 2: return CUDNN_DATA_HALF; + // case 8: return CUDNN_DATA_DOUBLE; + // default: return CUDNN_DATA_FLOAT; + // } + //} public: typedef Float value_type; @@ -90,28 +89,28 @@ class TensorImpl { int size = GetTotalSize(shape_); data_.resize(size, value); - cudnnCreateTensorDescriptor(&desc_); - switch (shape_.size()) { - case 1: - cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), - shape_[0], 1, 1, 1); break; - case 2: - cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), - shape_[0], shape_[1], 1, 1); break; - case 3: - cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), - shape_[0], shape_[1], shape_[2], 1); break; - case 4: - cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), - shape_[0], shape_[1], shape_[2], shape_[3]); break; - } + //cudnnCreateTensorDescriptor(&desc_); + //switch (shape_.size()) { + // case 1: + // cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), + // shape_[0], 1, 1, 1); break; + // case 2: + // cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), + // shape_[0], shape_[1], 1, 1); break; + // case 3: + // cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), + // shape_[0], shape_[1], shape_[2], 1); break; + // case 4: + // cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), + // shape_[0], shape_[1], shape_[2], shape_[3]); break; + //} } TensorImpl(const TensorImpl&) = delete; TensorImpl(TensorImpl&&) = delete; ~TensorImpl() { - cudnnDestroyTensorDescriptor(desc_); + //cudnnDestroyTensorDescriptor(desc_); } value_type operator[](size_t i) const { @@ -146,9 +145,9 @@ class TensorImpl { return thrust::raw_pointer_cast(data_.data()); } - cudnnTensorDescriptor_t desc() const { - return desc_; - } + //cudnnTensorDescriptor_t desc() const { + // return desc_; + //} size_t id() const { return tno_; @@ -246,9 +245,9 @@ class Tensor { return pimpl_->shape(); } - cudnnTensorDescriptor_t desc() const { - return pimpl_->desc(); - } + //cudnnTensorDescriptor_t desc() const { + // return pimpl_->desc(); + //} void set(value_type value) { pimpl_->set(value); diff --git a/src/tensor_operators.cu b/src/tensor_operators.cu index 2aa96331..e9e09ee6 100644 --- a/src/tensor_operators.cu +++ b/src/tensor_operators.cu @@ -130,7 +130,11 @@ Tensor Prod(cublasHandle_t handle, Tensor C, const Tensor A, const Tensor B, Tensor Prod(Tensor C, const Tensor A, const Tensor B, bool transA, bool transB, Float beta) { - return Prod(handles.cublasHandle, C, A, B, transA, transB, beta); + cublasHandle_t cublasHandle; + cublasCreate(&cublasHandle); + Tensor temp = Prod(cublasHandle, C, A, B, transA, transB, beta); + cublasDestroy(cublasHandle); + return temp; } } \ No newline at end of file diff --git a/src/test.cu b/src/test.cu index 0285e3a5..629c1bc2 100644 --- a/src/test.cu +++ b/src/test.cu @@ -2,9 +2,9 @@ #include "marian.h" #include "mnist.h" -using namespace std; - int main(int argc, char** argv) { + cudaSetDevice(0); + /*int numImg = 0;*/ /*auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg);*/ /*auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numImg);*/ @@ -12,117 +12,104 @@ int main(int argc, char** argv) { using namespace marian; using namespace keywords; - const size_t BATCH_SIZE = 500; - const size_t IMAGE_SIZE = 784; - const size_t LABEL_SIZE = 10; - - Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); - Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y"); + Expr x = input(shape={1, 2}); + Expr y = input(shape={1, 2}); - Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); - Expr b = param(shape={1, LABEL_SIZE}, name="b0"); - - Expr z = dot(x, w) + b; - Expr lr = softmax(z, axis=1, name="pred"); - Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); - //cerr << "x=" << Debug(lr.val().shape()) << endl; + Expr w = param(shape={2, 2}, name="W0"); + //Expr b = param(shape={1, 2}, name="b0"); - int numofdata; - //vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); - //vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); - vector images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE); - vector labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE); - cerr << "images=" << images.size() << " labels=" << labels.size() << endl; - cerr << "numofdata=" << numofdata << endl; - - size_t startInd = 0; - size_t startIndData = 0; - while (startInd < numofdata) { - size_t batchSize = (startInd + BATCH_SIZE < numofdata) ? BATCH_SIZE : numofdata - startInd; - cerr << "startInd=" << startInd - << " startIndData=" << startIndData - << " batchSize=" << batchSize << endl; - - Tensor tx({numofdata, IMAGE_SIZE}, 1); - Tensor ty({numofdata, LABEL_SIZE}, 1); - - tx.Load(images.begin() + startIndData, images.begin() + startIndData + batchSize * IMAGE_SIZE); - ty.Load(labels.begin() + startInd, labels.begin() + startInd + batchSize); - - //cerr << "tx=" << Debug(tx.shape()) << endl; - //cerr << "ty=" << Debug(ty.shape()) << endl; - - x = tx; - y = ty; - - cerr << "x=" << Debug(x.val().shape()) << endl; - cerr << "y=" << Debug(y.val().shape()) << endl; - - - graph.forward(batchSize); - - cerr << "w=" << Debug(w.val().shape()) << endl; - cerr << "b=" << Debug(b.val().shape()) << endl; - std::cerr << "z: " << Debug(z.val().shape()) << endl; - std::cerr << "lr: " << Debug(lr.val().shape()) << endl; - std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ; - - //std::cerr << "scores=" << scores.val().Debug() << endl; - //std::cerr << "lr=" << lr.val().Debug() << endl; - - graph.backward(); - - //std::cerr << graph["pred"].val()[0] << std::endl; - - startInd += batchSize; - startIndData += batchSize * IMAGE_SIZE; - } + std::cerr << "Building model..."; + auto predict = softmax(dot(x, w), + axis=1, name="pred"); + auto graph = -mean(sum(y * log(predict), axis=1), + axis=0, name="cost"); - - // XOR - /* - Expr x = input(shape={whatevs, 2}, name="X"); - Expr y = input(shape={whatevs, 2}, name="Y"); - - Expr w = param(shape={2, 1}, name="W0"); - Expr b = param(shape={1, 1}, name="b0"); - - Expr n5 = dot(x, w); - Expr n6 = n5 + b; - Expr lr = softmax(n6, axis=1, name="pred"); - cerr << "lr=" << lr.Debug() << endl; - - Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); - - Tensor tx({4, 2}, 1); - Tensor ty({4, 1}, 1); - cerr << "tx=" << tx.Debug() << endl; - cerr << "ty=" << ty.Debug() << endl; - - tx.Load("../examples/xor/train.txt"); - ty.Load("../examples/xor/label.txt"); - */ - -#if 0 - hook0(graph); - graph.autodiff(); - std::cerr << graph["cost"].val()[0] << std::endl; - //hook1(graph); - for(auto p : graph.params()) { - auto update = _1 = _1 - alpha * _2; - Element(update, p.val(), p.grad()); - } - hook2(graph); + Tensor x1t({1, 2}); + std::vector xv = { 0.6, 0.1 }; + thrust::copy(xv.begin(), xv.end(), x1t.begin()); + + Tensor x2t({1, 2}); + std::vector yv = { 0, 1 }; + thrust::copy(yv.begin(), yv.end(), x2t.begin()); + + x = x1t; + y = x2t; + + graph.forward(1); + graph.backward(); + + std::cerr << graph.val().Debug() << std::endl; + std::cerr << w.grad().Debug() << std::endl; + //std::cerr << b.grad().Debug() << std::endl; + +// using namespace marian; +// using namespace keywords; +// +// const size_t BATCH_SIZE = 500; +// const size_t IMAGE_SIZE = 784; +// const size_t LABEL_SIZE = 10; +// +// Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); +// Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y"); +// +// Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); +// Expr b = param(shape={1, LABEL_SIZE}, name="b0"); +// +// Expr z = dot(x, w) + b; +// Expr lr = softmax(z, axis=1, name="pred"); +// Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); +// //cerr << "x=" << Debug(lr.val().shape()) << endl; +// +// int numofdata; +// //vector images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); +// //vector labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); +// vector images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE); +// vector labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE); +// cerr << "images=" << images.size() << " labels=" << labels.size() << endl; +// cerr << "numofdata=" << numofdata << endl; +// +// size_t startInd = 0; +// size_t startIndData = 0; +// while (startInd < numofdata) { +// size_t batchSize = (startInd + BATCH_SIZE < numofdata) ? BATCH_SIZE : numofdata - startInd; +// cerr << "startInd=" << startInd +// << " startIndData=" << startIndData +// << " batchSize=" << batchSize << endl; +// +// Tensor tx({numofdata, IMAGE_SIZE}, 1); +// Tensor ty({numofdata, LABEL_SIZE}, 1); +// +// tx.Load(images.begin() + startIndData, images.begin() + startIndData + batchSize * IMAGE_SIZE); +// ty.Load(labels.begin() + startInd, labels.begin() + startInd + batchSize); +// +// //cerr << "tx=" << Debug(tx.shape()) << endl; +// //cerr << "ty=" << Debug(ty.shape()) << endl; +// +// x = tx; +// y = ty; +// +// cerr << "x=" << Debug(x.val().shape()) << endl; +// cerr << "y=" << Debug(y.val().shape()) << endl; +// +// +// graph.forward(batchSize); +// +// cerr << "w=" << Debug(w.val().shape()) << endl; +// cerr << "b=" << Debug(b.val().shape()) << endl; +// std::cerr << "z: " << Debug(z.val().shape()) << endl; +// std::cerr << "lr: " << Debug(lr.val().shape()) << endl; +// std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ; +// +// //std::cerr << "scores=" << scores.val().Debug() << endl; +// //std::cerr << "lr=" << lr.val().Debug() << endl; +// +// //graph.backward(); +// +// //std::cerr << graph["pred"].val()[0] << std::endl; +// +// startInd += batchSize; +// startIndData += batchSize * IMAGE_SIZE; +// } - auto opt = adadelta(cost_function=cost, - eta=0.9, gamma=0.1, - set_batch=set, - before_update=before, - after_update=after, - set_valid=valid, - validation_freq=100, - verbose=1, epochs=3, early_stopping=10); - opt.run(); -#endif return 0; } diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu index a42fa881..023aba8b 100644 --- a/src/validate_mnist.cu +++ b/src/validate_mnist.cu @@ -7,13 +7,16 @@ using namespace marian; using namespace keywords; int main(int argc, char** argv) { + + cudaSetDevice(0); + const size_t IMAGE_SIZE = 784; const size_t LABEL_SIZE = 10; int numofdata; std::cerr << "Loading test set..."; std::vector testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); - std::vectortestLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); + std::vector testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); std::cerr << "\tDone." << std::endl; std::cerr << "Loading model params..."; @@ -27,11 +30,11 @@ int main(int argc, char** argv) { Shape bShape; converter.Load("bias", bData, bShape); - auto initW = [&wData](Tensor t) { + auto initW = [wData](Tensor t) { thrust::copy(wData.begin(), wData.end(), t.begin()); }; - auto initB = [&bData](Tensor t) { + auto initB = [bData](Tensor t) { thrust::copy(bData.begin(), bData.end(), t.begin()); }; @@ -39,24 +42,35 @@ int main(int argc, char** argv) { Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); - + Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y"); + Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW); Expr b = param(shape={1, LABEL_SIZE}, name="b0", init=initB); std::cerr << "Building model..."; - auto scores = dot(x, w) + b; - auto predict = softmax(scores, axis=1, name="pred"); + auto predict = softmax(dot(x, w) + b, + axis=1, name="pred"); + auto graph = -mean(sum(y * log(predict), axis=1), + axis=0, name="cost"); + std::cerr << "\tDone." << std::endl; Tensor xt({numofdata, IMAGE_SIZE}); xt.Load(testImages); - - predict.forward(numofdata); - + + Tensor yt({numofdata, LABEL_SIZE}); + yt.Load(testLabels); + + x = xt; + y = yt; + + graph.forward(numofdata); auto results = predict.val(); + graph.backward(); + + std::cerr << b.grad().Debug() << std::endl; size_t acc = 0; - for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) { size_t correct = 0; size_t predicted = 0; @@ -65,11 +79,11 @@ int main(int argc, char** argv) { if (results[i + j] > results[i + predicted]) predicted = j; } acc += (correct == predicted); - std::cerr << "corect: " << correct << " | " << predicted << "("; - for (size_t j = 0; j < LABEL_SIZE; ++j) { - std::cerr << results[i+j] << " "; - } - std::cerr << std::endl; + //std::cerr << "corect: " << correct << " | " << predicted << "("; + //for (size_t j = 0; j < LABEL_SIZE; ++j) { + // std::cerr << results[i+j] << " "; + //} + //std::cerr << std::endl; } std::cerr << "ACC: " << float(acc)/numofdata << std::endl;