mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
Merge branch 'master' of https://github.com/emjotde/Marian
This commit is contained in:
commit
f1fce72a0e
91
scripts/train_test_model.py
Executable file
91
scripts/train_test_model.py
Executable file
@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import sys
|
||||
import os
|
||||
import numpy as np
|
||||
from keras.datasets import mnist
|
||||
from keras.utils import np_utils
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense
|
||||
from keras.layers import Dropout
|
||||
|
||||
def softmax(x):
|
||||
return np.exp(x) / np.sum(np.exp(x), axis=1)[:, None]
|
||||
|
||||
|
||||
def baseline_model(pixels_count, classes_count):
|
||||
model = Sequential()
|
||||
# model.add(Dense(pixels_count, input_dim=pixels_count, init='normal', activation='relu'))
|
||||
model.add(Dense(classes_count, input_dim=pixels_count, init='normal', activation='softmax'))
|
||||
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
||||
return model
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
### Load trainset from mnist
|
||||
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
|
||||
### Flatten pictures into vectors
|
||||
|
||||
pixels_count = X_train.shape[1] * X_train.shape[2]
|
||||
X_train = X_train.reshape(X_train.shape[0], pixels_count).astype('float32')
|
||||
print "X shape: ", X_train.shape
|
||||
|
||||
X_test = X_test.reshape(X_test.shape[0], pixels_count).astype('float32')
|
||||
|
||||
### Normalize data to (0, 1)
|
||||
|
||||
X_train = X_train / 255
|
||||
X_test = X_test / 255
|
||||
|
||||
### Change classes to one hot encoding matrixes
|
||||
|
||||
y_train = np_utils.to_categorical(y_train)
|
||||
classes_count = y_train.shape[1]
|
||||
print "Y shape: ", y_train.shape
|
||||
|
||||
y_test = np_utils.to_categorical(y_test)
|
||||
|
||||
# Train weight matrix
|
||||
|
||||
# Build the model
|
||||
model = baseline_model(pixels_count, classes_count)
|
||||
# Fit the model
|
||||
model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200, verbose=2)
|
||||
# Final evaluation of the model
|
||||
scores = model.evaluate(X_test, y_test, verbose=0)
|
||||
print("Baseline Error: %.2f%%" % (100-scores[1]*100))
|
||||
|
||||
### Weight and bias matrixes - we extract them from the model
|
||||
|
||||
# weights_ones = np.ones((pixels_count, classes_count))
|
||||
# print weights_ones.shape
|
||||
|
||||
weights, bias = model.get_weights()
|
||||
print weights.shape
|
||||
print bias.shape
|
||||
print bias
|
||||
|
||||
### We calculate lr using softmax!
|
||||
|
||||
dot_out = np.dot(X_train, weights)
|
||||
print "dot_out shape: ", dot_out.shape
|
||||
# print dot_out[:10]
|
||||
|
||||
add_out = np.add(bias, dot_out)
|
||||
print "add_out shape: ", add_out.shape
|
||||
# print add_out[:10]
|
||||
|
||||
# lr = np.around(softmax(add_out), decimals = 6)
|
||||
lr = softmax(add_out)
|
||||
print "lr shape: ", lr.shape
|
||||
# print lr[:10]
|
||||
# print np.count_nonzero(lr)i
|
||||
|
||||
### Save model to npz files
|
||||
if not os.path.exists("test_model"):
|
||||
os.makedirs("test_model")
|
||||
np.savez("test_model/model", weights = weights, bias = bias)
|
||||
|
||||
print "Model saved! Check test_model directory"
|
@ -59,8 +59,7 @@ inline std::vector<T> Tokenize( const std::string &input
|
||||
|
||||
void Tensor::Load(const std::string &path)
|
||||
{
|
||||
size_t totSize = std::accumulate(pimpl_->shape().begin(), pimpl_->shape().end(),
|
||||
1, std::multiplies<int>());
|
||||
size_t totSize = GetTotalSize(pimpl_->shape());
|
||||
cerr << "totSize=" << totSize << endl;
|
||||
std::vector<float> hostData(totSize);
|
||||
|
||||
|
29
src/tensor.h
29
src/tensor.h
@ -48,6 +48,13 @@ inline std::string Debug(const Shape &shape)
|
||||
return strm.str();
|
||||
}
|
||||
|
||||
inline size_t GetTotalSize(const Shape &shape)
|
||||
{
|
||||
size_t ret = std::accumulate(shape.begin(), shape.end(),
|
||||
1, std::multiplies<int>());
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<class Float>
|
||||
class TensorImpl {
|
||||
private:
|
||||
@ -81,8 +88,7 @@ class TensorImpl {
|
||||
|
||||
std::cerr << "Allocating : " << shape[0] << " " << shape[1] << std::endl;
|
||||
|
||||
int size = std::accumulate(shape_.begin(), shape_.end(),
|
||||
1, std::multiplies<int>());
|
||||
int size = GetTotalSize(shape_);
|
||||
data_.resize(size, value);
|
||||
cudnnCreateTensorDescriptor(&desc_);
|
||||
switch (shape_.size()) {
|
||||
@ -153,8 +159,7 @@ class TensorImpl {
|
||||
}
|
||||
|
||||
void set(const std::vector<Float> &values) {
|
||||
size_t totSize = std::accumulate(shape().begin(), shape().end(),
|
||||
1, std::multiplies<int>());
|
||||
size_t totSize = GetTotalSize(shape());
|
||||
std::cerr << "tensor size=" << totSize << " vector size=" << values.size() << std::endl;
|
||||
assert(totSize == values.size());
|
||||
thrust::copy(values.begin(), values.end(), data_.begin());
|
||||
@ -164,7 +169,21 @@ class TensorImpl {
|
||||
{
|
||||
std::stringstream strm;
|
||||
assert(shape_.size());
|
||||
strm << "shape=" << marian::Debug(shape_);
|
||||
strm << "shape=" << marian::Debug(shape_) << std::endl;
|
||||
|
||||
// values
|
||||
size_t totSize = GetTotalSize(shape());
|
||||
std::vector<Float> values(totSize);
|
||||
thrust::copy(data_.begin(), data_.end(), values.begin());
|
||||
|
||||
size_t ind = 0;
|
||||
for (size_t i = 0; i < shape()[0]; ++i) {
|
||||
for (size_t j = 0; j < shape()[1]; ++j) {
|
||||
strm << values[ind] << " ";
|
||||
++ind;
|
||||
}
|
||||
strm << std::endl;
|
||||
}
|
||||
return strm.str();
|
||||
}
|
||||
};
|
||||
|
40
src/test.cu
40
src/test.cu
@ -21,12 +21,11 @@ int main(int argc, char** argv) {
|
||||
Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0");
|
||||
Expr b = param(shape={1, LABEL_SIZE}, name="b0");
|
||||
|
||||
auto scores = dot(x, w) + b;
|
||||
auto lr = softmax_fast(scores, axis=1, name="pred");
|
||||
auto graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
|
||||
cerr << "lr=" << lr.Debug() << endl;
|
||||
Expr z = dot(x, w) + b;
|
||||
Expr lr = softmax(z, axis=1, name="pred");
|
||||
Expr graph = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
|
||||
//cerr << "lr=" << Debug(lr.val().shape()) << endl;
|
||||
|
||||
#if 0
|
||||
int numofdata;
|
||||
vector<float> images = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
|
||||
vector<float> labels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);
|
||||
@ -39,35 +38,20 @@ int main(int argc, char** argv) {
|
||||
tx.Load(images);
|
||||
ty.Load(labels);
|
||||
|
||||
cerr << "tx=" << tx.Debug() << endl;
|
||||
cerr << "ty=" << ty.Debug() << endl;
|
||||
#else
|
||||
Tensor tx({500, 784}, 1);
|
||||
Tensor ty({500, 10}, 1);
|
||||
#endif
|
||||
cerr << "tx=" << Debug(tx.shape()) << endl;
|
||||
cerr << "ty=" << Debug(ty.shape()) << endl;
|
||||
|
||||
x = tx;
|
||||
y = ty;
|
||||
|
||||
graph.forward(500);
|
||||
|
||||
std::cerr << "Result: ";
|
||||
for (auto val : scores.val().shape()) {
|
||||
std::cerr << val << " ";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
std::cerr << "Result: ";
|
||||
for (auto val : lr.val().shape()) {
|
||||
std::cerr << val << " ";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
lr.val().Print();
|
||||
std::cerr << "Log-likelihood: ";
|
||||
for (auto val : graph.val().shape()) {
|
||||
std::cerr << val << " ";
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
graph.val().Print();
|
||||
std::cerr << "z: " << Debug(z.val().shape()) << endl;
|
||||
std::cerr << "lr: " << Debug(lr.val().shape()) << endl;
|
||||
std::cerr << "Log-likelihood: " << Debug(graph.val().shape()) << endl ;
|
||||
|
||||
//std::cerr << "scores=" << scores.val().Debug() << endl;
|
||||
std::cerr << "lr=" << lr.val().Debug() << endl;
|
||||
|
||||
graph.backward();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user