From 0be1b07308f70af085f55b8a3a56a74209185bb7 Mon Sep 17 00:00:00 2001
From: Marcin Junczys-Dowmunt <junczys@amu.edu.pl>
Date: Wed, 14 Sep 2016 23:17:53 +0200
Subject: [PATCH] faster set/get

---
 src/sgd.h             |  4 +--
 src/tensor.cu         | 83 ++-----------------------------------------
 src/tensor.h          | 80 ++++++++---------------------------------
 src/validate_mnist.cu | 29 ++++++++-------
 4 files changed, 34 insertions(+), 162 deletions(-)
diff --git a/src/sgd.h b/src/sgd.h
index 298cd358..0dab8df0 100644
--- a/src/sgd.h
+++ b/src/sgd.h
@@ -60,8 +60,8 @@ class SGD {
       std::vector<float> y(yData_.begin() + startId * numClasses_,
                            yData_.begin() + endId * numClasses_);
 
-      xt.Load(x);
-      yt.Load(y);
+      xt.set(x);
+      yt.set(y);
     }
 
     void UpdateModel() {
diff --git a/src/tensor.cu b/src/tensor.cu
index 09355b21..fea21926 100644
--- a/src/tensor.cu
+++ b/src/tensor.cu
@@ -5,91 +5,12 @@ using namespace std;
 
 namespace marian {
 
-inline std::vector<std::string> Tokenize(const std::string& str,
-    const std::string& delimiters = " \t")
-{
-  std::vector<std::string> tokens;
-  // Skip delimiters at beginning.
-  std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
-  // Find first "non-delimiter".
-  std::string::size_type pos     = str.find_first_of(delimiters, lastPos);
-
-  while (std::string::npos != pos || std::string::npos != lastPos) {
-    // Found a token, add it to the vector.
-    tokens.push_back(str.substr(lastPos, pos - lastPos));
-    // Skip delimiters.  Note the "not_of"
-    lastPos = str.find_first_not_of(delimiters, pos);
-    // Find next "non-delimiter"
-    pos = str.find_first_of(delimiters, lastPos);
-  }
-
-  return tokens;
-}
-
-//! convert string to variable of type T. Used to reading floats, int etc from files
-template<typename T>
-T Scan(const std::string &input)
-{
-  std::stringstream stream(input);
-  T ret;
-  stream >> ret;
-  return ret;
-}
-
-//! convert vectors of string to vectors of type T variables
-template<typename T>
-inline std::vector<T> Scan(const std::vector< std::string > &input)
-{
-  std::vector<T> output(input.size());
-  for (size_t i = 0 ; i < input.size() ; i++) {
-    output[i] = Scan<T>( input[i] );
-  }
-  return output;
-}
-
-//! tokenise input string to vector of type T
-template<typename T>
-inline std::vector<T> Tokenize( const std::string &input
-                                , const std::string& delimiters = " \t")
-{
-  std::vector<std::string> stringVector = Tokenize(input, delimiters);
-  return Scan<T>( stringVector );
-}
-
-
-void Tensor::Load(const std::string &path)
-{
-  size_t totSize = GetTotalSize(pimpl_->shape());
-  cerr << "totSize=" << totSize << endl;
-  std::vector<float> hostData(totSize);
-
-  fstream strm;
-  strm.open(path.c_str());
-
-  string line;
-  size_t ind = 0;
-  while ( getline (strm, line) )
-  {
-	cerr << line << '\n';
-	vector<Float> toks = Tokenize<Float>(line);
-	for (size_t i = 0; i < toks.size(); ++i) {
-		hostData[ind] = toks[i];
-	}
-
-	++ind;
-  }
-  strm.close();
-
-  Load(hostData.begin(), hostData.begin());
-}
-
-void Tensor::Load(const std::vector<float>& data)
+void Tensor::set(const std::vector<float>& data)
 {
 	pimpl_->set(data.begin(), data.end());
 }
 
-
-void Tensor::Load(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end)
+void Tensor::set(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end)
 {
 	pimpl_->set(begin, end);
 }
diff --git a/src/tensor.h b/src/tensor.h
index 0f6029d8..af9069de 100644
--- a/src/tensor.h
+++ b/src/tensor.h
@@ -12,30 +12,6 @@
 
 namespace marian {
 
-//struct Handles {
-//  //cudnnHandle_t cudnnHandle;
-//  //cublasHandle_t cublasHandle;
-//
-//  //cudnnOpTensorDescriptor_t add;
-//
-//  Handles() {
-//    cudnnCreate(&cudnnHandle);
-//    cublasCreate(&cublasHandle);
-//    cudnnCreateOpTensorDescriptor(&add);
-//    cudnnSetOpTensorDescriptor(add, CUDNN_OP_TENSOR_ADD, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN);
-//  }
-//
-//  ~Handles() {
-//    cudnnDestroy(cudnnHandle);
-//    cublasDestroy(cublasHandle);
-//    cudnnDestroyOpTensorDescriptor(add);
-//  }
-//};
-//
-//const Handles handles;
-
-// typedef std::vector<int> Shape;
-
 inline std::string Debug(const Shape &shape)
 {
 	std::stringstream strm;
@@ -59,18 +35,9 @@ class TensorImpl {
   private:
     Shape shape_;
     thrust::device_vector<Float> data_;
-    //cudnnTensorDescriptor_t desc_;
     size_t tno_;
     static size_t tensorCounter;
 
-    //cudnnDataType_t dataType() {
-    //  switch(sizeof(Float)) {
-    //    case 2: return CUDNN_DATA_HALF;
-    //    case 8: return CUDNN_DATA_DOUBLE;
-    //    default: return CUDNN_DATA_FLOAT;
-    //  }
-    //}
-
   public:
     typedef Float value_type;
 
@@ -89,30 +56,11 @@ class TensorImpl {
 
       int size = GetTotalSize(shape_);
       data_.resize(size, value);
-      //cudnnCreateTensorDescriptor(&desc_);
-      //switch (shape_.size()) {
-      //  case 1:
-      //    cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(),
-      //                               shape_[0], 1, 1, 1); break;
-      //  case 2:
-      //    cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(),
-      //                               shape_[0], shape_[1], 1, 1); break;
-      //  case 3:
-      //    cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(),
-      //                               shape_[0], shape_[1], shape_[2], 1); break;
-      //  case 4:
-      //    cudnnSetTensor4dDescriptor(desc_, CUDNN_TENSOR_NCHW, dataType(),
-      //                               shape_[0], shape_[1], shape_[2], shape_[3]); break;
-      //}
     }
 
     TensorImpl(const TensorImpl&) = delete;
     TensorImpl(TensorImpl&&) = delete;
 
-    ~TensorImpl() {
-      //cudnnDestroyTensorDescriptor(desc_);
-    }
-
    value_type operator[](size_t i) const {
       return data_[i];
     }
@@ -145,10 +93,6 @@ class TensorImpl {
       return thrust::raw_pointer_cast(data_.data());
     }
 
-    //cudnnTensorDescriptor_t desc() const {
-    //  return desc_;
-    //}
-
     size_t id() const {
       return tno_;
     }
@@ -158,12 +102,13 @@ class TensorImpl {
     }
 
     void set(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end) {
-	  size_t totSize = GetTotalSize(shape());
-	  //std::cerr << "tensor size=" << totSize << " vector size=" << values.size() << std::endl;
-	  //assert(totSize == values.size());
 	  thrust::copy(begin, end, data_.begin());
     }
 
+    void get(std::vector<float>::iterator out) {
+	  thrust::copy(data_.begin(), data_.end(), out);      
+    }
+    
     std::string Debug() const
     {
     	std::stringstream strm;
@@ -245,10 +190,6 @@ class Tensor {
       return pimpl_->shape();
     }
 
-    //cudnnTensorDescriptor_t desc() const {
-    //  return pimpl_->desc();
-    //}
-
     void set(value_type value) {
       pimpl_->set(value);
     }
@@ -273,10 +214,17 @@ class Tensor {
       std::cerr << std::endl;
     }
 
-    void Load(const std::string &path);
-    void Load(const std::vector<float>& data);
-    void Load(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end);
+    //void Load(const std::string &path);
+    void set(const std::vector<float>& data);
+    void set(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end);
 
+    void get(std::vector<float>::iterator out) {
+      pimpl_->get(out);
+    }
+    
+    void get(std::vector<float> &vout) {
+      pimpl_->get(vout.begin());
+    }
 };
 
 }
diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu
index 023aba8b..58697d46 100644
--- a/src/validate_mnist.cu
+++ b/src/validate_mnist.cu
@@ -31,21 +31,21 @@ int main(int argc, char** argv) {
   converter.Load("bias", bData, bShape);
 
   auto initW = [wData](Tensor t) {
-    thrust::copy(wData.begin(), wData.end(), t.begin());
+    t.set(wData.begin(), wData.end());
   };
 
   auto initB = [bData](Tensor t) {
-    thrust::copy(bData.begin(), bData.end(), t.begin());
+    t.set(bData.begin(), bData.end());
   };
 
   std::cerr << "\tDone." << std::endl;
 
 
-  Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X");
-  Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y");
+  auto x = input(shape={whatevs, IMAGE_SIZE}, name="X");
+  auto y = input(shape={whatevs, LABEL_SIZE}, name="Y");
   
-  Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW);
-  Expr b = param(shape={1, LABEL_SIZE}, name="b0", init=initB);
+  auto w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW);
+  auto b = param(shape={1, LABEL_SIZE}, name="b0", init=initB);
 
   std::cerr << "Building model...";
   auto predict = softmax(dot(x, w) + b,
@@ -53,13 +53,13 @@ int main(int argc, char** argv) {
   auto graph = -mean(sum(y * log(predict), axis=1),
                      axis=0, name="cost");
   
-  std::cerr << "\tDone." << std::endl;
+  std::cerr << "Done." << std::endl;
 
   Tensor xt({numofdata, IMAGE_SIZE});
-  xt.Load(testImages);
+  xt.set(testImages);
   
   Tensor yt({numofdata, LABEL_SIZE});
-  yt.Load(testLabels);
+  yt.set(testLabels);
   
   x = xt;
   y = yt;
@@ -68,6 +68,9 @@ int main(int argc, char** argv) {
   auto results = predict.val();
   graph.backward();
   
+  std::vector<float> resultsv(results.size());
+  results.get(resultsv);
+  
   std::cerr << b.grad().Debug() << std::endl;
 
   size_t acc = 0;
@@ -76,14 +79,14 @@ int main(int argc, char** argv) {
     size_t predicted = 0;
     for (size_t j = 0; j < LABEL_SIZE; ++j) {
       if (testLabels[i+j]) correct = j;
-      if (results[i + j] > results[i + predicted]) predicted = j;
+      if (resultsv[i + j] > resultsv[i + predicted]) predicted = j;
     }
     acc += (correct == predicted);
-    //std::cerr << "corect: " << correct << " | " << predicted <<  "(";
+    //std::cerr << correct << " | " << predicted <<  " ( ";
     //for (size_t j = 0; j < LABEL_SIZE; ++j) {
-    //  std::cerr << results[i+j] << " ";
+    //  std::cerr << resultsv[i+j] << " ";
     //}
-    //std::cerr << std::endl;
+    //std::cerr << ")" << std::endl;
   }
   std::cerr << "ACC: " << float(acc)/numofdata << std::endl;