found not-working conf

This commit is contained in:
Marcin Junczys-Dowmunt 2016-09-14 19:51:26 +02:00
parent 74626d347f
commit 823a3a624a
4 changed files with 182 additions and 178 deletions

View File

@ -1,6 +1,5 @@
#pragma once #pragma once
#include <cudnn.h>
#include <cublas_v2.h> #include <cublas_v2.h>
#include <thrust/device_vector.h> #include <thrust/device_vector.h>
#include <thrust/functional.h> #include <thrust/functional.h>
@ -13,27 +12,27 @@
namespace marian { namespace marian {
struct Handles { //struct Handles {
cudnnHandle_t cudnnHandle; // //cudnnHandle_t cudnnHandle;
cublasHandle_t cublasHandle; // //cublasHandle_t cublasHandle;
//
cudnnOpTensorDescriptor_t add; // //cudnnOpTensorDescriptor_t add;
//
Handles() { // Handles() {
cudnnCreate(&cudnnHandle); // cudnnCreate(&cudnnHandle);
cublasCreate(&cublasHandle); // cublasCreate(&cublasHandle);
cudnnCreateOpTensorDescriptor(&add); // cudnnCreateOpTensorDescriptor(&add);
cudnnSetOpTensorDescriptor(add, CUDNN_OP_TENSOR_ADD, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN); // cudnnSetOpTensorDescriptor(add, CUDNN_OP_TENSOR_ADD, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN);
} // }
//
~Handles() { // ~Handles() {
cudnnDestroy(cudnnHandle); // cudnnDestroy(cudnnHandle);
cublasDestroy(cublasHandle); // cublasDestroy(cublasHandle);
cudnnDestroyOpTensorDescriptor(add); // cudnnDestroyOpTensorDescriptor(add);
} // }
}; //};
//
const Handles handles; //const Handles handles;
// typedef std::vector<int> Shape; // typedef std::vector<int> Shape;
@ -60,17 +59,17 @@ class TensorImpl {
private: private:
Shape shape_; Shape shape_;
thrust::device_vector<Float> data_; thrust::device_vector<Float> data_;
cudnnTensorDescriptor_t desc_; //cudnnTensorDescriptor_t desc_;
size_t tno_; size_t tno_;
static size_t tensorCounter; static size_t tensorCounter;
cudnnDataType_t dataType() { //cudnnDataType_t dataType() {
switch(sizeof(Float)) { // switch(sizeof(Float)) {
case 2: return CUDNN_DATA_HALF; // case 2: return CUDNN_DATA_HALF;
case 8: return CUDNN_DATA_DOUBLE; // case 8: return CUDNN_DATA_DOUBLE;
default: return CUDNN_DATA_FLOAT; // default: return CUDNN_DATA_FLOAT;
} // }
} //}
public: public:
typedef Float value_type; typedef Float value_type;
@ -90,28 +89,28 @@ class TensorImpl {
int size = GetTotalSize(shape_); int size = GetTotalSize(shape_);
data_.resize(size, value); data_.resize(size, value);
cudnnCreateTensorDescriptor(&desc_); //cudnnCreateTensorDescriptor(&desc_);
switch (shape_.size()) { //switch (shape_.size()) {
case 1: // case 1:
cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), // cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(),
shape_[0], 1, 1, 1); break; // shape_[0], 1, 1, 1); break;
case 2: // case 2:
cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), // cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(),
shape_[0], shape_[1], 1, 1); break; // shape_[0], shape_[1], 1, 1); break;
case 3: // case 3:
cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), // cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(),
shape_[0], shape_[1], shape_[2], 1); break; // shape_[0], shape_[1], shape_[2], 1); break;
case 4: // case 4:
cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(), // cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(),
shape_[0], shape_[1], shape_[2], shape_[3]); break; // shape_[0], shape_[1], shape_[2], shape_[3]); break;
} //}
} }
TensorImpl(const TensorImpl&) = delete; TensorImpl(const TensorImpl&) = delete;
TensorImpl(TensorImpl&&) = delete; TensorImpl(TensorImpl&&) = delete;
~TensorImpl() { ~TensorImpl() {
cudnnDestroyTensorDescriptor(desc_); //cudnnDestroyTensorDescriptor(desc_);
} }
value_type operator[](size_t i) const { value_type operator[](size_t i) const {
@ -146,9 +145,9 @@ class TensorImpl {
return thrust::raw_pointer_cast(data_.data()); return thrust::raw_pointer_cast(data_.data());
} }
cudnnTensorDescriptor_t desc() const { //cudnnTensorDescriptor_t desc() const {
return desc_; // return desc_;
} //}
size_t id() const { size_t id() const {
return tno_; return tno_;
@ -246,9 +245,9 @@ class Tensor {
return pimpl_->shape(); return pimpl_->shape();
} }
cudnnTensorDescriptor_t desc() const { //cudnnTensorDescriptor_t desc() const {
return pimpl_->desc(); // return pimpl_->desc();
} //}
void set(value_type value) { void set(value_type value) {
pimpl_->set(value); pimpl_->set(value);

View File

@ -130,7 +130,11 @@ Tensor Prod(cublasHandle_t handle, Tensor C, const Tensor A, const Tensor B,
Tensor Prod(Tensor C, const Tensor A, const Tensor B, Tensor Prod(Tensor C, const Tensor A, const Tensor B,
bool transA, bool transB, Float beta) { bool transA, bool transB, Float beta) {
return Prod(handles.cublasHandle, C, A, B, transA, transB, beta); cublasHandle_t cublasHandle;
cublasCreate(&cublasHandle);
Tensor temp = Prod(cublasHandle, C, A, B, transA, transB, beta);
cublasDestroy(cublasHandle);
return temp;
} }
} }

View File

@ -2,9 +2,9 @@
#include "marian.h" #include "marian.h"
#include "mnist.h" #include "mnist.h"
using namespace std;
int main(int argc, char** argv) { int main(int argc, char** argv) {
cudaSetDevice(0);
/*int numImg = 0;*/ /*int numImg = 0;*/
/*auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg);*/ /*auto images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numImg);*/
/*auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numImg);*/ /*auto labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numImg);*/
@ -12,117 +12,104 @@ int main(int argc, char** argv) {
using namespace marian; using namespace marian;
using namespace keywords; using namespace keywords;
const size_t BATCH_SIZE = 500; Expr x = input(shape={1, 2});
const size_t IMAGE_SIZE = 784; Expr y = input(shape={1, 2});
const size_t LABEL_SIZE = 10;
Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X");
Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y");
Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); Expr w = param(shape={2, 2}, name="W0");
Expr b = param(shape={1, LABEL_SIZE}, name="b0"); //Expr b = param(shape={1, 2}, name="b0");
Expr z = dot(x, w) + b;
Expr lr = softmax(z, axis=1, name="pred");
Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
//cerr << "x=" << Debug(lr.val().shape()) << endl;
int numofdata; std::cerr << "Building model...";
//vector<float> images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); auto predict = softmax(dot(x, w),
//vector<float> labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); axis=1, name="pred");
vector<float> images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE); auto graph = -mean(sum(y * log(predict), axis=1),
vector<float> labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE); axis=0, name="cost");
cerr << "images=" << images.size() << " labels=" << labels.size() << endl;
cerr << "numofdata=" << numofdata << endl;
size_t startInd = 0;
size_t startIndData = 0;
while (startInd < numofdata) {
size_t batchSize = (startInd + BATCH_SIZE < numofdata) ? BATCH_SIZE : numofdata - startInd;
cerr << "startInd=" << startInd
<< " startIndData=" << startIndData
<< " batchSize=" << batchSize << endl;
Tensor tx({numofdata, IMAGE_SIZE}, 1);
Tensor ty({numofdata, LABEL_SIZE}, 1);
tx.Load(images.begin() + startIndData, images.begin() + startIndData + batchSize * IMAGE_SIZE);
ty.Load(labels.begin() + startInd, labels.begin() + startInd + batchSize);
//cerr << "tx=" << Debug(tx.shape()) << endl;
//cerr << "ty=" << Debug(ty.shape()) << endl;
x = tx;
y = ty;
cerr << "x=" << Debug(x.val().shape()) << endl;
cerr << "y=" << Debug(y.val().shape()) << endl;
graph.forward(batchSize);
cerr << "w=" << Debug(w.val().shape()) << endl;
cerr << "b=" << Debug(b.val().shape()) << endl;
std::cerr << "z: " << Debug(z.val().shape()) << endl;
std::cerr << "lr: " << Debug(lr.val().shape()) << endl;
std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;
//std::cerr << "scores=" << scores.val().Debug() << endl;
//std::cerr << "lr=" << lr.val().Debug() << endl;
graph.backward();
//std::cerr << graph["pred"].val()[0] << std::endl;
startInd += batchSize;
startIndData += batchSize * IMAGE_SIZE;
}
Tensor x1t({1, 2});
// XOR std::vector<float> xv = { 0.6, 0.1 };
/* thrust::copy(xv.begin(), xv.end(), x1t.begin());
Expr x = input(shape={whatevs, 2}, name="X");
Expr y = input(shape={whatevs, 2}, name="Y"); Tensor x2t({1, 2});
std::vector<float> yv = { 0, 1 };
Expr w = param(shape={2, 1}, name="W0"); thrust::copy(yv.begin(), yv.end(), x2t.begin());
Expr b = param(shape={1, 1}, name="b0");
x = x1t;
Expr n5 = dot(x, w); y = x2t;
Expr n6 = n5 + b;
Expr lr = softmax(n6, axis=1, name="pred"); graph.forward(1);
cerr << "lr=" << lr.Debug() << endl; graph.backward();
Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); std::cerr << graph.val().Debug() << std::endl;
std::cerr << w.grad().Debug() << std::endl;
Tensor tx({4, 2}, 1); //std::cerr << b.grad().Debug() << std::endl;
Tensor ty({4, 1}, 1);
cerr << "tx=" << tx.Debug() << endl; // using namespace marian;
cerr << "ty=" << ty.Debug() << endl; // using namespace keywords;
//
tx.Load("../examples/xor/train.txt"); // const size_t BATCH_SIZE = 500;
ty.Load("../examples/xor/label.txt"); // const size_t IMAGE_SIZE = 784;
*/ // const size_t LABEL_SIZE = 10;
//
#if 0 // Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X");
hook0(graph); // Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y");
graph.autodiff(); //
std::cerr << graph["cost"].val()[0] << std::endl; // Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0");
//hook1(graph); // Expr b = param(shape={1, LABEL_SIZE}, name="b0");
for(auto p : graph.params()) { //
auto update = _1 = _1 - alpha * _2; // Expr z = dot(x, w) + b;
Element(update, p.val(), p.grad()); // Expr lr = softmax(z, axis=1, name="pred");
} // Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
hook2(graph); // //cerr << "x=" << Debug(lr.val().shape()) << endl;
//
// int numofdata;
// //vector<float> images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
// //vector<float> labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);
// vector<float> images = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", numofdata, IMAGE_SIZE);
// vector<float> labels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", numofdata, LABEL_SIZE);
// cerr << "images=" << images.size() << " labels=" << labels.size() << endl;
// cerr << "numofdata=" << numofdata << endl;
//
// size_t startInd = 0;
// size_t startIndData = 0;
// while (startInd < numofdata) {
// size_t batchSize = (startInd + BATCH_SIZE < numofdata) ? BATCH_SIZE : numofdata - startInd;
// cerr << "startInd=" << startInd
// << " startIndData=" << startIndData
// << " batchSize=" << batchSize << endl;
//
// Tensor tx({numofdata, IMAGE_SIZE}, 1);
// Tensor ty({numofdata, LABEL_SIZE}, 1);
//
// tx.Load(images.begin() + startIndData, images.begin() + startIndData + batchSize * IMAGE_SIZE);
// ty.Load(labels.begin() + startInd, labels.begin() + startInd + batchSize);
//
// //cerr << "tx=" << Debug(tx.shape()) << endl;
// //cerr << "ty=" << Debug(ty.shape()) << endl;
//
// x = tx;
// y = ty;
//
// cerr << "x=" << Debug(x.val().shape()) << endl;
// cerr << "y=" << Debug(y.val().shape()) << endl;
//
//
// graph.forward(batchSize);
//
// cerr << "w=" << Debug(w.val().shape()) << endl;
// cerr << "b=" << Debug(b.val().shape()) << endl;
// std::cerr << "z: " << Debug(z.val().shape()) << endl;
// std::cerr << "lr: " << Debug(lr.val().shape()) << endl;
// std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;
//
// //std::cerr << "scores=" << scores.val().Debug() << endl;
// //std::cerr << "lr=" << lr.val().Debug() << endl;
//
// //graph.backward();
//
// //std::cerr << graph["pred"].val()[0] << std::endl;
//
// startInd += batchSize;
// startIndData += batchSize * IMAGE_SIZE;
// }
auto opt = adadelta(cost_function=cost,
eta=0.9, gamma=0.1,
set_batch=set,
before_update=before,
after_update=after,
set_valid=valid,
validation_freq=100,
verbose=1, epochs=3, early_stopping=10);
opt.run();
#endif
return 0; return 0;
} }

View File

@ -7,13 +7,16 @@ using namespace marian;
using namespace keywords; using namespace keywords;
int main(int argc, char** argv) { int main(int argc, char** argv) {
cudaSetDevice(0);
const size_t IMAGE_SIZE = 784; const size_t IMAGE_SIZE = 784;
const size_t LABEL_SIZE = 10; const size_t LABEL_SIZE = 10;
int numofdata; int numofdata;
std::cerr << "Loading test set..."; std::cerr << "Loading test set...";
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
std::vector<float>testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);
std::cerr << "\tDone." << std::endl; std::cerr << "\tDone." << std::endl;
std::cerr << "Loading model params..."; std::cerr << "Loading model params...";
@ -27,11 +30,11 @@ int main(int argc, char** argv) {
Shape bShape; Shape bShape;
converter.Load("bias", bData, bShape); converter.Load("bias", bData, bShape);
auto initW = [&wData](Tensor t) { auto initW = [wData](Tensor t) {
thrust::copy(wData.begin(), wData.end(), t.begin()); thrust::copy(wData.begin(), wData.end(), t.begin());
}; };
auto initB = [&bData](Tensor t) { auto initB = [bData](Tensor t) {
thrust::copy(bData.begin(), bData.end(), t.begin()); thrust::copy(bData.begin(), bData.end(), t.begin());
}; };
@ -39,24 +42,35 @@ int main(int argc, char** argv) {
Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X");
Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y");
Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW); Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW);
Expr b = param(shape={1, LABEL_SIZE}, name="b0", init=initB); Expr b = param(shape={1, LABEL_SIZE}, name="b0", init=initB);
std::cerr << "Building model..."; std::cerr << "Building model...";
auto scores = dot(x, w) + b; auto predict = softmax(dot(x, w) + b,
auto predict = softmax(scores, axis=1, name="pred"); axis=1, name="pred");
auto graph = -mean(sum(y * log(predict), axis=1),
axis=0, name="cost");
std::cerr << "\tDone." << std::endl; std::cerr << "\tDone." << std::endl;
Tensor xt({numofdata, IMAGE_SIZE}); Tensor xt({numofdata, IMAGE_SIZE});
xt.Load(testImages); xt.Load(testImages);
predict.forward(numofdata); Tensor yt({numofdata, LABEL_SIZE});
yt.Load(testLabels);
x = xt;
y = yt;
graph.forward(numofdata);
auto results = predict.val(); auto results = predict.val();
graph.backward();
std::cerr << b.grad().Debug() << std::endl;
size_t acc = 0; size_t acc = 0;
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) { for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
size_t correct = 0; size_t correct = 0;
size_t predicted = 0; size_t predicted = 0;
@ -65,11 +79,11 @@ int main(int argc, char** argv) {
if (results[i + j] > results[i + predicted]) predicted = j; if (results[i + j] > results[i + predicted]) predicted = j;
} }
acc += (correct == predicted); acc += (correct == predicted);
std::cerr << "corect: " << correct << " | " << predicted << "("; //std::cerr << "corect: " << correct << " | " << predicted << "(";
for (size_t j = 0; j < LABEL_SIZE; ++j) { //for (size_t j = 0; j < LABEL_SIZE; ++j) {
std::cerr << results[i+j] << " "; // std::cerr << results[i+j] << " ";
} //}
std::cerr << std::endl; //std::cerr << std::endl;
} }
std::cerr << "ACC: " << float(acc)/numofdata << std::endl; std::cerr << "ACC: " << float(acc)/numofdata << std::endl;