Merge branch 'master' of https://github.com/emjotde/Marian

2024-09-11 06:15:56 +03:00 · 2016-09-14 14:30:48 +01:00 · 2016-09-14 14:30:48 +01:00 · f1fce72a0e
commit f1fce72a0e
parent 0421d8504d a7219b6867
4 changed files with 129 additions and 36 deletions
--- a/scripts/train_test_model.py
+++ b/scripts/train_test_model.py
@ -0,0 +1,91 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import numpy as np
+from keras.datasets import mnist
+from keras.utils import np_utils
+from keras.models import Sequential
+from keras.layers import Dense
+from keras.layers import Dropout
+
+def softmax(x):
+    return np.exp(x) / np.sum(np.exp(x), axis=1)[:, None]
+
+
+def baseline_model(pixels_count, classes_count):
+    model = Sequential()
+    # model.add(Dense(pixels_count, input_dim=pixels_count, init='normal', activation='relu'))
+    model.add(Dense(classes_count, input_dim=pixels_count, init='normal', activation='softmax'))
+    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
+    return model
+
+
+if __name__ == "__main__":
+    ### Load trainset from mnist
+
+    (X_train, y_train), (X_test, y_test) = mnist.load_data()
+
+    ### Flatten pictures into vectors
+   
+    pixels_count = X_train.shape[1] * X_train.shape[2]
+    X_train = X_train.reshape(X_train.shape[0], pixels_count).astype('float32')
+    print "X shape: ", X_train.shape
+
+    X_test = X_test.reshape(X_test.shape[0], pixels_count).astype('float32')
+
+    ### Normalize data to (0, 1)
+
+    X_train = X_train / 255
+    X_test = X_test / 255
+
+    ### Change classes to one hot encoding matrixes
+
+    y_train = np_utils.to_categorical(y_train)
+    classes_count = y_train.shape[1]
+    print "Y shape: ", y_train.shape
+    
+    y_test = np_utils.to_categorical(y_test)
+
+    # Train weight matrix
+
+    # Build the model
+    model = baseline_model(pixels_count, classes_count)
+    # Fit the model
+    model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200, verbose=2)
+    # Final evaluation of the model
+    scores = model.evaluate(X_test, y_test, verbose=0)
+    print("Baseline Error: %.2f%%" % (100-scores[1]*100))
+
+    ### Weight and bias matrixes - we extract them from the model
+    
+    # weights_ones = np.ones((pixels_count, classes_count))
+    # print weights_ones.shape
+
+    weights, bias = model.get_weights()
+    print weights.shape
+    print bias.shape
+    print bias
+
+    ### We calculate lr using softmax!
+
+    dot_out = np.dot(X_train, weights)
+    print "dot_out shape: ", dot_out.shape
+    # print dot_out[:10]
+    
+    add_out = np.add(bias, dot_out)
+    print "add_out shape: ", add_out.shape
+    # print add_out[:10]
+    
+    # lr = np.around(softmax(add_out), decimals = 6)
+    lr = softmax(add_out)
+    print "lr shape: ", lr.shape
+    # print lr[:10]
+    # print np.count_nonzero(lr)i
+
+    ### Save model to npz files
+    if not os.path.exists("test_model"):
+        os.makedirs("test_model")
+    np.savez("test_model/model", weights = weights, bias = bias)
+
+    print "Model saved! Check test_model directory"
--- a/src/tensor.cu
+++ b/src/tensor.cu
@ -59,8 +59,7 @@ inline std::vector<T> Tokenize( const std::string &input

 void Tensor::Load(const std::string &path)
 {
-  size_t totSize = std::accumulate(pimpl_->shape().begin(), pimpl_->shape().end(),
-		  1, std::multiplies<int>());
+  size_t totSize = GetTotalSize(pimpl_->shape());
  cerr << "totSize=" << totSize << endl;
  std::vector<float> hostData(totSize);

--- a/src/tensor.h
+++ b/src/tensor.h
@ -48,6 +48,13 @@ inline std::string Debug(const Shape &shape)
 	return strm.str();
 }

+inline size_t GetTotalSize(const Shape &shape)
+{
+	size_t ret = std::accumulate(shape.begin(), shape.end(),
+			  1, std::multiplies<int>());
+	return ret;
+}
+
 template<class Float>
 class TensorImpl {
  private:
@ -81,8 +88,7 @@ class TensorImpl {

      std::cerr << "Allocating : " << shape[0] << " " << shape[1] << std::endl;

-      int size = std::accumulate(shape_.begin(), shape_.end(),
-                                 1, std::multiplies<int>());
+      int size = GetTotalSize(shape_);
      data_.resize(size, value);
      cudnnCreateTensorDescriptor(&desc_);
      switch (shape_.size()) {
@ -153,8 +159,7 @@ class TensorImpl {
    }

    void set(const std::vector<Float> &values) {
-	  size_t totSize = std::accumulate(shape().begin(), shape().end(),
-			  1, std::multiplies<int>());
+	  size_t totSize = GetTotalSize(shape());
 	  std::cerr << "tensor size=" << totSize << " vector size=" << values.size() << std::endl;
 	  assert(totSize == values.size());
 	  thrust::copy(values.begin(), values.end(), data_.begin());
@ -164,7 +169,21 @@ class TensorImpl {
    {
    	std::stringstream strm;
    	assert(shape_.size());
-    	strm << "shape=" << marian::Debug(shape_);
+    	strm << "shape=" << marian::Debug(shape_) << std::endl;
+
+    	// values
+    	size_t totSize = GetTotalSize(shape());
+    	std::vector<Float> values(totSize);
+		thrust::copy(data_.begin(), data_.end(), values.begin());
+
+		size_t ind = 0;
+		for (size_t i = 0; i < shape()[0]; ++i) {
+			for (size_t j = 0; j < shape()[1]; ++j) {
+				strm << values[ind] << " ";
+				++ind;
+			}
+			strm << std::endl;
+		}
    	return strm.str();
    }
 };
--- a/src/test.cu
+++ b/src/test.cu
@ -21,12 +21,11 @@ int main(int argc, char** argv) {
  Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0");
  Expr b = param(shape={1, LABEL_SIZE}, name="b0");
  
-  auto scores = dot(x, w) + b;
-  auto lr = softmax_fast(scores, axis=1, name="pred");
-  auto graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
-  cerr << "lr=" << lr.Debug() << endl;
+  Expr z = dot(x, w) + b;
+  Expr lr = softmax(z, axis=1, name="pred");
+  Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
+  //cerr << "lr=" << Debug(lr.val().shape()) << endl;

-#if 0
  int numofdata;
  vector<float> images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
  vector<float> labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);
@ -39,36 +38,21 @@ int main(int argc, char** argv) {
  tx.Load(images);
  ty.Load(labels);

-  cerr << "tx=" << tx.Debug() << endl;
-  cerr << "ty=" << ty.Debug() << endl;
-#else
-  Tensor tx({500, 784}, 1);
-  Tensor ty({500, 10}, 1);
-#endif
+  cerr << "tx=" << Debug(tx.shape()) << endl;
+  cerr << "ty=" << Debug(ty.shape()) << endl;

  x = tx;
  y = ty;

  graph.forward(500);

-  std::cerr << "Result: ";
-  for (auto val : scores.val().shape()) {
-    std::cerr << val << " ";
-  }
-  std::cerr << std::endl;
-  std::cerr << "Result: ";
-  for (auto val : lr.val().shape()) {
-    std::cerr << val << " ";
-  }
-  std::cerr << std::endl;
-  lr.val().Print();
-  std::cerr << "Log-likelihood: ";
-  for (auto val : graph.val().shape()) {
-    std::cerr << val << " ";
-  }
-  std::cerr << std::endl;
-  graph.val().Print();
-  
+  std::cerr << "z: " << Debug(z.val().shape()) << endl;
+  std::cerr << "lr: " << Debug(lr.val().shape()) << endl;
+  std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;
+
+  //std::cerr << "scores=" << scores.val().Debug() << endl;
+  std::cerr << "lr=" << lr.val().Debug() << endl;
+
  graph.backward();
  
  //std::cerr << graph["pred"].val()[0] << std::endl;