From c28ba2e67f62aad1d9a3acbe6a15d82ac2bb9792 Mon Sep 17 00:00:00 2001 From: Roman Grundkiewicz Date: Wed, 14 Sep 2016 18:18:58 +0200 Subject: [PATCH 1/2] add param random initializers --- src/marian.h | 1 + src/param_initializers.h | 34 ++++++++++++++++++++++++++++++++++ src/test.cu | 1 + 3 files changed, 36 insertions(+) create mode 100644 src/param_initializers.h diff --git a/src/marian.h b/src/marian.h index 8c987ccf..0876d4cd 100644 --- a/src/marian.h +++ b/src/marian.h @@ -5,4 +5,5 @@ #include "graph_operators.h" #include "expressions.h" #include "expression_operators.h" +#include "param_initializers.h" diff --git a/src/param_initializers.h b/src/param_initializers.h new file mode 100644 index 00000000..ab781064 --- /dev/null +++ b/src/param_initializers.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include +#include +#include + +#include "tensor.h" + +namespace marian { + +void zeros(Tensor t) { + std::vector vals(t.size(), 0.0f); + thrust::copy(vals.begin(), vals.end(), t.begin()); +} + +void ones(Tensor t) { + std::vector vals(t.size(), 1.0f); + thrust::copy(vals.begin(), vals.end(), t.begin()); +} + +void randreal(Tensor t) { + std::random_device device; + std::default_random_engine engine(device()); + std::uniform_real_distribution<> dist(0, 1); + auto gen = std::bind(dist, engine); + + std::vector vals(t.size()); + std::generate(begin(vals), end(vals), gen); + + thrust::copy(vals.begin(), vals.end(), t.begin()); +} + +} // namespace marian diff --git a/src/test.cu b/src/test.cu index 0285e3a5..a86c60ee 100644 --- a/src/test.cu +++ b/src/test.cu @@ -20,6 +20,7 @@ int main(int argc, char** argv) { Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y"); Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); + // Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=randreal); Expr b = param(shape={1, LABEL_SIZE}, name="b0"); Expr z = dot(x, w) + b; From ea04f8a6baf692520aeed5de56dd1cefd18df712 Mon Sep 17 00:00:00 2001 From: Maximiliana Behnke Date: Wed, 14 Sep 2016 18:56:13 +0200 Subject: [PATCH 2/2] Modify single layer training script, add 2-layer training script --- scripts/train_test_model_multi.py | 72 +++++++++++++++++++ ...st_model.py => train_test_model_single.py} | 8 +-- 2 files changed, 76 insertions(+), 4 deletions(-) create mode 100755 scripts/train_test_model_multi.py rename scripts/{train_test_model.py => train_test_model_single.py} (91%) diff --git a/scripts/train_test_model_multi.py b/scripts/train_test_model_multi.py new file mode 100755 index 00000000..67ae0131 --- /dev/null +++ b/scripts/train_test_model_multi.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python + +import sys +import os +import numpy as np +from keras.datasets import mnist +from keras.utils import np_utils +from keras.models import Sequential +from keras.layers import Dense +from keras.layers import Dropout + +def softmax(x): + return np.exp(x) / np.sum(np.exp(x), axis=1)[:, None] + + +def baseline_model(pixels_count, classes_count): + model = Sequential() + model.add(Dense(100, input_dim=pixels_count, init='normal', activation='tanh')) + model.add(Dense(classes_count, input_dim=100, init='normal', activation='softmax')) + model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) + return model + + +if __name__ == "__main__": + ### Load trainset from mnist + + (X_train, y_train), (X_test, y_test) = mnist.load_data() + + ### Flatten pictures into vectors + + pixels_count = X_train.shape[1] * X_train.shape[2] + X_train = X_train.reshape(X_train.shape[0], pixels_count).astype('float32') + print "X shape: ", X_train.shape + + X_test = X_test.reshape(X_test.shape[0], pixels_count).astype('float32') + + ### Normalize data to (0, 1) + + X_train = X_train / 255 + X_test = X_test / 255 + + ### Change classes to one hot encoding matrixes + + y_train = np_utils.to_categorical(y_train) + classes_count = y_train.shape[1] + print "Y shape: ", y_train.shape + + y_test = np_utils.to_categorical(y_test) + + # Train weight matrix + + # Build the model + model = baseline_model(pixels_count, classes_count) + # Fit the model + model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200, verbose=2) + # Final evaluation of the model + scores = model.evaluate(X_test, y_test, verbose=0) + print("Baseline Error: %.2f%%" % (100-scores[1]*100)) + + ### Weight and bias matrixes - we extract them from the model + + # weights_ones = np.ones((pixels_count, classes_count)) + # print weights_ones.shape + + weights1, bias1, weights2, bias2 = model.get_weights() + ### Save model to npz files + if not os.path.exists("test_model_multi"): + os.makedirs("test_model_multi") + # np.savez("test_model_multi/model", *model) + np.savez("test_model_multi/model", weights1 = weights1, bias1 = bias1, weights2 = weights2, bias2 = bias2) + + print "Model saved! Check test_model_multi directory" diff --git a/scripts/train_test_model.py b/scripts/train_test_model_single.py similarity index 91% rename from scripts/train_test_model.py rename to scripts/train_test_model_single.py index 4f3236a9..f3a769b4 100755 --- a/scripts/train_test_model.py +++ b/scripts/train_test_model_single.py @@ -84,8 +84,8 @@ if __name__ == "__main__": # print np.count_nonzero(lr)i ### Save model to npz files - if not os.path.exists("test_model"): - os.makedirs("test_model") - np.savez("test_model/model", weights = weights, bias = bias) + if not os.path.exists("test_model_single"): + os.makedirs("test_model_single") + np.savez("test_model_single/model", weights = weights, bias = bias) - print "Model saved! Check test_model directory" + print "Model saved! Check test_model_single directory"