From c28ba2e67f62aad1d9a3acbe6a15d82ac2bb9792 Mon Sep 17 00:00:00 2001
From: Roman Grundkiewicz <romang@amu.edu.pl>
Date: Wed, 14 Sep 2016 18:18:58 +0200
Subject: [PATCH 1/2] add param random initializers

---
 src/marian.h             |  1 +
 src/param_initializers.h | 34 ++++++++++++++++++++++++++++++++++
 src/test.cu              |  1 +
 3 files changed, 36 insertions(+)
 create mode 100644 src/param_initializers.h
diff --git a/src/marian.h b/src/marian.h
index 8c987ccf..0876d4cd 100644
--- a/src/marian.h
+++ b/src/marian.h
@@ -5,4 +5,5 @@
 #include "graph_operators.h"
 #include "expressions.h"
 #include "expression_operators.h"
+#include "param_initializers.h"
 
diff --git a/src/param_initializers.h b/src/param_initializers.h
new file mode 100644
index 00000000..ab781064
--- /dev/null
+++ b/src/param_initializers.h
@@ -0,0 +1,34 @@
+#pragma once
+
+#include <random>
+#include <algorithm>
+#include <iterator>
+#include <functional>
+
+#include "tensor.h"
+
+namespace marian {
+
+void zeros(Tensor t) {
+  std::vector<float> vals(t.size(), 0.0f);
+  thrust::copy(vals.begin(), vals.end(), t.begin());
+}
+
+void ones(Tensor t) {
+  std::vector<float> vals(t.size(), 1.0f);
+  thrust::copy(vals.begin(), vals.end(), t.begin());
+}
+
+void randreal(Tensor t) {
+  std::random_device device;
+  std::default_random_engine engine(device());
+  std::uniform_real_distribution<> dist(0, 1);
+  auto gen = std::bind(dist, engine);
+
+  std::vector<float> vals(t.size());
+  std::generate(begin(vals), end(vals), gen);
+
+  thrust::copy(vals.begin(), vals.end(), t.begin());
+}
+
+} // namespace marian
diff --git a/src/test.cu b/src/test.cu
index 0285e3a5..a86c60ee 100644
--- a/src/test.cu
+++ b/src/test.cu
@@ -20,6 +20,7 @@ int main(int argc, char** argv) {
   Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y");
   
   Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0");
+  // Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=randreal);
   Expr b = param(shape={1, LABEL_SIZE}, name="b0");
     
   Expr z = dot(x, w) + b;

From ea04f8a6baf692520aeed5de56dd1cefd18df712 Mon Sep 17 00:00:00 2001
From: Maximiliana Behnke <maxib@fatboy.lan>
Date: Wed, 14 Sep 2016 18:56:13 +0200
Subject: [PATCH 2/2] Modify single layer training script, add 2-layer training
 script

---
 scripts/train_test_model_multi.py             | 72 +++++++++++++++++++
 ...st_model.py => train_test_model_single.py} |  8 +--
 2 files changed, 76 insertions(+), 4 deletions(-)
 create mode 100755 scripts/train_test_model_multi.py
 rename scripts/{train_test_model.py => train_test_model_single.py} (91%)

diff --git a/scripts/train_test_model_multi.py b/scripts/train_test_model_multi.py
new file mode 100755
index 00000000..67ae0131
--- /dev/null
+++ b/scripts/train_test_model_multi.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import numpy as np
+from keras.datasets import mnist
+from keras.utils import np_utils
+from keras.models import Sequential
+from keras.layers import Dense
+from keras.layers import Dropout
+
+def softmax(x):
+    return np.exp(x) / np.sum(np.exp(x), axis=1)[:, None]
+
+
+def baseline_model(pixels_count, classes_count):
+    model = Sequential()
+    model.add(Dense(100, input_dim=pixels_count, init='normal', activation='tanh'))
+    model.add(Dense(classes_count, input_dim=100, init='normal', activation='softmax'))
+    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
+    return model
+
+
+if __name__ == "__main__":
+    ### Load trainset from mnist
+
+    (X_train, y_train), (X_test, y_test) = mnist.load_data()
+
+    ### Flatten pictures into vectors
+   
+    pixels_count = X_train.shape[1] * X_train.shape[2]
+    X_train = X_train.reshape(X_train.shape[0], pixels_count).astype('float32')
+    print "X shape: ", X_train.shape
+
+    X_test = X_test.reshape(X_test.shape[0], pixels_count).astype('float32')
+
+    ### Normalize data to (0, 1)
+
+    X_train = X_train / 255
+    X_test = X_test / 255
+
+    ### Change classes to one hot encoding matrixes
+
+    y_train = np_utils.to_categorical(y_train)
+    classes_count = y_train.shape[1]
+    print "Y shape: ", y_train.shape
+    
+    y_test = np_utils.to_categorical(y_test)
+
+    # Train weight matrix
+
+    # Build the model
+    model = baseline_model(pixels_count, classes_count)
+    # Fit the model
+    model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200, verbose=2)
+    # Final evaluation of the model
+    scores = model.evaluate(X_test, y_test, verbose=0)
+    print("Baseline Error: %.2f%%" % (100-scores[1]*100))
+
+    ### Weight and bias matrixes - we extract them from the model
+    
+    # weights_ones = np.ones((pixels_count, classes_count))
+    # print weights_ones.shape
+
+    weights1, bias1, weights2, bias2 = model.get_weights()
+    ### Save model to npz files
+    if not os.path.exists("test_model_multi"):
+        os.makedirs("test_model_multi")
+    # np.savez("test_model_multi/model", *model)
+    np.savez("test_model_multi/model", weights1 = weights1, bias1 = bias1, weights2 = weights2, bias2 = bias2)
+
+    print "Model saved! Check test_model_multi directory"
diff --git a/scripts/train_test_model.py b/scripts/train_test_model_single.py
similarity index 91%
rename from scripts/train_test_model.py
rename to scripts/train_test_model_single.py
index 4f3236a9..f3a769b4 100755
--- a/scripts/train_test_model.py
+++ b/scripts/train_test_model_single.py
@@ -84,8 +84,8 @@ if __name__ == "__main__":
     # print np.count_nonzero(lr)i
 
     ### Save model to npz files
-    if not os.path.exists("test_model"):
-        os.makedirs("test_model")
-    np.savez("test_model/model", weights = weights, bias = bias)
+    if not os.path.exists("test_model_single"):
+        os.makedirs("test_model_single")
+    np.savez("test_model_single/model", weights = weights, bias = bias)
 
-    print "Model saved! Check test_model directory"
+    print "Model saved! Check test_model_single directory"