mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
preparing mnist benchmark
This commit is contained in:
parent
1d11b4a40e
commit
bfd9c369f8
@ -3,6 +3,7 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import time
|
||||||
from keras.datasets import mnist
|
from keras.datasets import mnist
|
||||||
from keras.utils import np_utils
|
from keras.utils import np_utils
|
||||||
from keras.models import Sequential
|
from keras.models import Sequential
|
||||||
@ -15,7 +16,11 @@ def softmax(x):
|
|||||||
|
|
||||||
def baseline_model(pixels_count, classes_count):
|
def baseline_model(pixels_count, classes_count):
|
||||||
model = Sequential()
|
model = Sequential()
|
||||||
model.add(Dense(100, input_dim=pixels_count, init='normal', activation='tanh'))
|
model.add(Dense(2000, input_dim=pixels_count, init='normal', activation='tanh'))
|
||||||
|
model.add(Dense(2000, init='normal', activation='tanh'))
|
||||||
|
model.add(Dense(2000, init='normal', activation='tanh'))
|
||||||
|
model.add(Dense(2000, init='normal', activation='tanh'))
|
||||||
|
model.add(Dense(2000, init='normal', activation='tanh'))
|
||||||
model.add(Dense(classes_count, input_dim=100, init='normal', activation='softmax'))
|
model.add(Dense(classes_count, input_dim=100, init='normal', activation='softmax'))
|
||||||
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
||||||
return model
|
return model
|
||||||
@ -52,21 +57,24 @@ if __name__ == "__main__":
|
|||||||
# Build the model
|
# Build the model
|
||||||
model = baseline_model(pixels_count, classes_count)
|
model = baseline_model(pixels_count, classes_count)
|
||||||
# Fit the model
|
# Fit the model
|
||||||
model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200, verbose=2)
|
|
||||||
|
start = time.time();
|
||||||
|
model.fit(X_train, y_train, nb_epoch=10, batch_size=200, verbose=2)
|
||||||
|
print "Time elapsed", time.time() - start, "s"
|
||||||
# Final evaluation of the model
|
# Final evaluation of the model
|
||||||
scores = model.evaluate(X_test, y_test, verbose=0)
|
scores = model.evaluate(X_test, y_test, verbose=0)
|
||||||
print("Baseline Error: %.2f%%" % (100-scores[1]*100))
|
print("Accuracy: %.2f%%" % (scores[1] * 100))
|
||||||
|
|
||||||
### Weight and bias matrixes - we extract them from the model
|
### Weight and bias matrixes - we extract them from the model
|
||||||
|
|
||||||
# weights_ones = np.ones((pixels_count, classes_count))
|
# weights_ones = np.ones((pixels_count, classes_count))
|
||||||
# print weights_ones.shape
|
# print weights_ones.shape
|
||||||
|
|
||||||
weights1, bias1, weights2, bias2 = model.get_weights()
|
#weights1, bias1, weights2, bias2 = model.get_weights()
|
||||||
### Save model to npz files
|
### Save model to npz files
|
||||||
if not os.path.exists("test_model_multi"):
|
#if not os.path.exists("test_model_multi"):
|
||||||
os.makedirs("test_model_multi")
|
# os.makedirs("test_model_multi")
|
||||||
# np.savez("test_model_multi/model", *model)
|
# np.savez("test_model_multi/model", *model)
|
||||||
np.savez("test_model_multi/model", weights1 = weights1, bias1 = bias1, weights2 = weights2, bias2 = bias2)
|
#np.savez("test_model_multi/model", weights1 = weights1, bias1 = bias1, weights2 = weights2, bias2 = bias2)
|
||||||
|
|
||||||
print "Model saved! Check test_model_multi directory"
|
#print "Model saved! Check test_model_multi directory"
|
||||||
|
@ -21,30 +21,25 @@ cuda_add_executable(
|
|||||||
target_link_libraries(marian marian_lib)
|
target_link_libraries(marian marian_lib)
|
||||||
|
|
||||||
cuda_add_executable(
|
cuda_add_executable(
|
||||||
train_mnist
|
mnist_benchmark
|
||||||
train_mnist.cu
|
mnist_benchmark.cu
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(train_mnist marian_lib)
|
|
||||||
|
|
||||||
cuda_add_executable(
|
|
||||||
validate_mnist
|
|
||||||
validate_mnist.cu
|
|
||||||
)
|
|
||||||
cuda_add_executable(
|
cuda_add_executable(
|
||||||
validate_mnist_batch
|
validate_mnist_batch
|
||||||
validate_mnist_batch.cu
|
validate_mnist_batch.cu
|
||||||
)
|
)
|
||||||
|
|
||||||
cuda_add_executable(
|
cuda_add_executable(
|
||||||
validate_encoder_decoder
|
validate_encoder_decoder
|
||||||
validate_encoder_decoder.cu
|
validate_encoder_decoder.cu
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(validate_mnist marian_lib)
|
target_link_libraries(mnist_benchmark marian_lib)
|
||||||
target_link_libraries(validate_mnist_batch marian_lib)
|
target_link_libraries(validate_mnist_batch marian_lib)
|
||||||
target_link_libraries(validate_encoder_decoder marian_lib)
|
target_link_libraries(validate_encoder_decoder marian_lib)
|
||||||
|
|
||||||
foreach(exec marian train_mnist validate_mnist validate_mnist_batch validate_encoder_decoder)
|
foreach(exec marian mnist_benchmark validate_mnist_batch validate_encoder_decoder)
|
||||||
target_link_libraries(${exec} ${EXT_LIBS} cuda cudnn)
|
target_link_libraries(${exec} ${EXT_LIBS} cuda cudnn)
|
||||||
cuda_add_cublas_to_target(${exec})
|
cuda_add_cublas_to_target(${exec})
|
||||||
set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||||
|
164
src/mnist_benchmark.cu
Normal file
164
src/mnist_benchmark.cu
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
#include <algorithm>
|
||||||
|
#include <chrono>
|
||||||
|
#include <boost/timer/timer.hpp>
|
||||||
|
|
||||||
|
#include "marian.h"
|
||||||
|
#include "mnist.h"
|
||||||
|
#include "npz_converter.h"
|
||||||
|
#include "optimizers.h"
|
||||||
|
|
||||||
|
using namespace marian;
|
||||||
|
using namespace keywords;
|
||||||
|
|
||||||
|
const size_t IMAGE_SIZE = 784;
|
||||||
|
const size_t LABEL_SIZE = 10;
|
||||||
|
int BATCH_SIZE = 200;
|
||||||
|
|
||||||
|
ExpressionGraph build_graph(const std::vector<int>& dims) {
|
||||||
|
std::cerr << "Building model... ";
|
||||||
|
boost::timer::cpu_timer timer;
|
||||||
|
|
||||||
|
ExpressionGraph g;
|
||||||
|
auto x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
|
||||||
|
auto y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
|
||||||
|
|
||||||
|
std::vector<Expr> layers, weights, biases;
|
||||||
|
for(int i = 0; i < dims.size()-1; ++i) {
|
||||||
|
int in = dims[i];
|
||||||
|
int out = dims[i+1];
|
||||||
|
|
||||||
|
if(i == 0) {
|
||||||
|
layers.emplace_back(x);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
layers.emplace_back(tanh(dot(layers.back(), weights.back())) + biases.back());
|
||||||
|
}
|
||||||
|
|
||||||
|
weights.emplace_back(
|
||||||
|
g.param(shape={in, out},
|
||||||
|
init=normal()));
|
||||||
|
biases.emplace_back(
|
||||||
|
g.param(shape={1, out},
|
||||||
|
init=normal()));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
auto probs = named(
|
||||||
|
softmax(dot(layers.back(), weights.back()) + biases.back()),
|
||||||
|
"probs"
|
||||||
|
);
|
||||||
|
|
||||||
|
auto cost = -mean(sum(y * log(probs), axis=1), axis=0);
|
||||||
|
auto costreg = named(
|
||||||
|
cost, "cost"
|
||||||
|
);
|
||||||
|
|
||||||
|
std::cerr << timer.format(5, "%ws") << std::endl;
|
||||||
|
return g;
|
||||||
|
}
|
||||||
|
|
||||||
|
void shuffle(std::vector<float>& x, std::vector<float>& y, size_t dimx, size_t dimy) {
|
||||||
|
std::srand(std::time(0));
|
||||||
|
std::vector<size_t> ind;
|
||||||
|
for(size_t i = 0; i < x.size() / dimx; ++i) {
|
||||||
|
ind.push_back(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::random_shuffle(ind.begin(), ind.end());
|
||||||
|
|
||||||
|
std::vector<float> xShuffled(x.size());
|
||||||
|
std::vector<float> yShuffled(y.size());
|
||||||
|
|
||||||
|
int j = 0;
|
||||||
|
for(auto i : ind) {
|
||||||
|
std::copy(x.begin() + j * dimx, x.begin() + j * dimx + dimx, xShuffled.begin() + i * dimx);
|
||||||
|
std::copy(y.begin() + j * dimy, y.begin() + j * dimy + dimy, yShuffled.begin() + i * dimy);
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
x = xShuffled;
|
||||||
|
y = yShuffled;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
|
int trainRows, testRows;
|
||||||
|
|
||||||
|
std::cerr << "Loading train set...";
|
||||||
|
std::vector<float> trainImages = datasets::mnist::ReadImages("../examples/mnist/train-images-idx3-ubyte", trainRows, IMAGE_SIZE);
|
||||||
|
std::vector<float> trainLabels = datasets::mnist::ReadLabels("../examples/mnist/train-labels-idx1-ubyte", trainRows, LABEL_SIZE);
|
||||||
|
std::cerr << "Done." << std::endl;
|
||||||
|
|
||||||
|
std::cerr << "Loading test set...";
|
||||||
|
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", testRows, IMAGE_SIZE);
|
||||||
|
std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", testRows, LABEL_SIZE);
|
||||||
|
std::cerr << "Done." << std::endl;
|
||||||
|
|
||||||
|
ExpressionGraph g = build_graph({IMAGE_SIZE, 2000, 2000, 2000, 2000, 2000, LABEL_SIZE});
|
||||||
|
|
||||||
|
Tensor xt({BATCH_SIZE, IMAGE_SIZE});
|
||||||
|
Tensor yt({BATCH_SIZE, LABEL_SIZE});
|
||||||
|
|
||||||
|
|
||||||
|
boost::timer::cpu_timer total;
|
||||||
|
Adam opt;
|
||||||
|
for(int i = 1; i <= 10; ++i) {
|
||||||
|
boost::timer::cpu_timer timer;
|
||||||
|
shuffle(trainImages, trainLabels, IMAGE_SIZE, LABEL_SIZE);
|
||||||
|
float cost = 0;
|
||||||
|
for(int j = 0; j < trainRows / BATCH_SIZE; j++) {
|
||||||
|
size_t xBatch = IMAGE_SIZE * BATCH_SIZE;
|
||||||
|
auto xbegin = trainImages.begin() + j * xBatch;
|
||||||
|
auto xend = xbegin + xBatch;
|
||||||
|
xt.set(xbegin, xend);
|
||||||
|
|
||||||
|
size_t yBatch = LABEL_SIZE * BATCH_SIZE;
|
||||||
|
auto ybegin = trainLabels.begin() + j * yBatch;
|
||||||
|
auto yend = ybegin + yBatch;
|
||||||
|
yt.set(ybegin, yend);
|
||||||
|
|
||||||
|
g["x"] = xt;
|
||||||
|
g["y"] = yt;
|
||||||
|
|
||||||
|
opt(g, BATCH_SIZE);
|
||||||
|
cost += g["cost"].val()[0];
|
||||||
|
}
|
||||||
|
std::cerr << "Epoch: " << i << " - Cost: " << cost / trainRows * BATCH_SIZE << " - " << timer.format(3, "%ws") << std::endl;
|
||||||
|
}
|
||||||
|
std::cerr << "Total: " << total.format(3, "%ws") << std::endl;
|
||||||
|
|
||||||
|
std::vector<float> results;
|
||||||
|
for(int j = 0; j < testRows / BATCH_SIZE; j++) {
|
||||||
|
size_t xBatch = IMAGE_SIZE * BATCH_SIZE;
|
||||||
|
auto xbegin = testImages.begin() + j * xBatch;
|
||||||
|
auto xend = xbegin + xBatch;
|
||||||
|
xt.set(xbegin, xend);
|
||||||
|
yt.set(0);
|
||||||
|
|
||||||
|
g["x"] = xt;
|
||||||
|
g["y"] = yt;
|
||||||
|
|
||||||
|
g.forward(BATCH_SIZE);
|
||||||
|
|
||||||
|
std::vector<float> bResults;
|
||||||
|
bResults << g["probs"].val();
|
||||||
|
results.insert(results.end(), bResults.begin(), bResults.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t acc = 0;
|
||||||
|
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
|
||||||
|
size_t correct = 0;
|
||||||
|
size_t proposed = 0;
|
||||||
|
for (size_t j = 0; j < LABEL_SIZE; ++j) {
|
||||||
|
if (testLabels[i + j])
|
||||||
|
correct = j;
|
||||||
|
if (results[i + j] > results[i + proposed])
|
||||||
|
proposed = j;
|
||||||
|
}
|
||||||
|
acc += (correct == proposed);
|
||||||
|
}
|
||||||
|
std::cerr << "Accuracy: " << float(acc) / testRows << std::endl;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
@ -31,7 +31,7 @@ class Sgd {
|
|||||||
// @TODO: Add serialization for historic gradients and parameters
|
// @TODO: Add serialization for historic gradients and parameters
|
||||||
class Adagrad {
|
class Adagrad {
|
||||||
public:
|
public:
|
||||||
Adagrad(float eta=0.01, float eps=10e-8)
|
Adagrad(float eta=0.01, float eps=1e-8)
|
||||||
: eta_(eta), eps_(eps) {}
|
: eta_(eta), eps_(eps) {}
|
||||||
|
|
||||||
void operator()(ExpressionGraph& graph, int batchSize) {
|
void operator()(ExpressionGraph& graph, int batchSize) {
|
||||||
@ -61,7 +61,7 @@ class Adagrad {
|
|||||||
// https://arxiv.org/pdf/1412.6980v8.pdf
|
// https://arxiv.org/pdf/1412.6980v8.pdf
|
||||||
class Adam {
|
class Adam {
|
||||||
public:
|
public:
|
||||||
Adam(float eta=0.001, float beta1=0.999, float beta2=0.999, float eps=10e-8)
|
Adam(float eta=0.001, float beta1=0.9, float beta2=0.999, float eps=1e-8)
|
||||||
: eta_(eta), beta1_(beta1), beta2_(beta2), eps_(eps), t_(0) {}
|
: eta_(eta), beta1_(beta1), beta2_(beta2), eps_(eps), t_(0) {}
|
||||||
|
|
||||||
void operator()(ExpressionGraph& graph, int batchSize) {
|
void operator()(ExpressionGraph& graph, int batchSize) {
|
||||||
@ -101,4 +101,4 @@ class Adam {
|
|||||||
std::vector<Tensor> vt_;
|
std::vector<Tensor> vt_;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
@ -33,13 +33,13 @@ void distribution(Tensor t, float a, float b) {
|
|||||||
t << vals;
|
t << vals;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::function<void(Tensor)> normal(float mean = 0.0, float std = 0.1) {
|
std::function<void(Tensor)> normal(float mean = 0.0, float std = 0.05) {
|
||||||
return [mean, std](Tensor t) {
|
return [mean, std](Tensor t) {
|
||||||
distribution<std::normal_distribution<float>>(t, mean, std);
|
distribution<std::normal_distribution<float>>(t, mean, std);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::function<void(Tensor)> uniform(float a = 0.0, float b = 0.1) {
|
std::function<void(Tensor)> uniform(float a = 0.0, float b = 0.05) {
|
||||||
return [a, b](Tensor t) {
|
return [a, b](Tensor t) {
|
||||||
distribution<std::uniform_real_distribution<float>>(t, a, b);
|
distribution<std::uniform_real_distribution<float>>(t, a, b);
|
||||||
};
|
};
|
||||||
|
@ -4,6 +4,14 @@ using namespace std;
|
|||||||
|
|
||||||
namespace marian {
|
namespace marian {
|
||||||
|
|
||||||
|
// @TODO: handle this better, maybe per thread?
|
||||||
|
static cublasHandle_t create_handle() {
|
||||||
|
cublasHandle_t cublasHandle;
|
||||||
|
cublasCreate(&cublasHandle);
|
||||||
|
return cublasHandle;
|
||||||
|
}
|
||||||
|
cublasHandle_t cublasHandle = create_handle();
|
||||||
|
|
||||||
__global__ void gSubtractMean(float* out, float* weights,
|
__global__ void gSubtractMean(float* out, float* weights,
|
||||||
size_t rows, size_t cols) {
|
size_t rows, size_t cols) {
|
||||||
for(int bid = 0; bid < rows; bid += gridDim.x) {
|
for(int bid = 0; bid < rows; bid += gridDim.x) {
|
||||||
@ -212,10 +220,7 @@ Tensor Prod(cublasHandle_t handle, Tensor C, const Tensor A, const Tensor B,
|
|||||||
Tensor Prod(Tensor C, const Tensor A, const Tensor B,
|
Tensor Prod(Tensor C, const Tensor A, const Tensor B,
|
||||||
bool transA, bool transB, Float beta) {
|
bool transA, bool transB, Float beta) {
|
||||||
|
|
||||||
cublasHandle_t cublasHandle;
|
|
||||||
cublasCreate(&cublasHandle);
|
|
||||||
Tensor temp = Prod(cublasHandle, C, A, B, transA, transB, beta);
|
Tensor temp = Prod(cublasHandle, C, A, B, transA, transB, beta);
|
||||||
cublasDestroy(cublasHandle);
|
|
||||||
return temp;
|
return temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,34 +0,0 @@
|
|||||||
|
|
||||||
#include "marian.h"
|
|
||||||
#include "mnist.h"
|
|
||||||
#include "optimizers.h"
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
|
||||||
const size_t IMAGE_SIZE = 784;
|
|
||||||
const size_t LABEL_SIZE = 10;
|
|
||||||
int numofdata;
|
|
||||||
|
|
||||||
std::vector<float> trainImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
|
|
||||||
std::vector<float> trainLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);
|
|
||||||
|
|
||||||
using namespace marian;
|
|
||||||
using namespace keywords;
|
|
||||||
|
|
||||||
ExpressionGraph g;
|
|
||||||
|
|
||||||
Expr x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
|
|
||||||
Expr y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
|
|
||||||
|
|
||||||
Expr w = named(g.param(shape={IMAGE_SIZE, LABEL_SIZE}), "w");
|
|
||||||
Expr b = named(g.param(shape={1, LABEL_SIZE}), "b");
|
|
||||||
|
|
||||||
auto scores = dot(x, w) + b;
|
|
||||||
auto lr = softmax(scores);
|
|
||||||
auto cost = named(-mean(sum(y * log(lr), axis=1), axis=0), "cost");
|
|
||||||
std::cerr << "lr=" << lr.Debug() << std::endl;
|
|
||||||
|
|
||||||
Adam opt;
|
|
||||||
opt(g, 300);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
@ -1,90 +0,0 @@
|
|||||||
|
|
||||||
#include "marian.h"
|
|
||||||
#include "mnist.h"
|
|
||||||
#include "npz_converter.h"
|
|
||||||
#include "optimizers.h"
|
|
||||||
|
|
||||||
using namespace marian;
|
|
||||||
using namespace keywords;
|
|
||||||
|
|
||||||
const size_t IMAGE_SIZE = 784;
|
|
||||||
const size_t LABEL_SIZE = 10;
|
|
||||||
int BATCH_SIZE = 10000;
|
|
||||||
|
|
||||||
ExpressionGraph build_graph() {
|
|
||||||
std::cerr << "Loading model params...";
|
|
||||||
NpzConverter converter("../scripts/test_model_single/model.npz");
|
|
||||||
|
|
||||||
std::vector<float> wData, bData;
|
|
||||||
Shape wShape, bShape;
|
|
||||||
converter.Load("weights", wData, wShape);
|
|
||||||
converter.Load("bias", bData, bShape);
|
|
||||||
std::cerr << "Done." << std::endl;
|
|
||||||
|
|
||||||
std::cerr << "Building model...";
|
|
||||||
|
|
||||||
ExpressionGraph g;
|
|
||||||
auto x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
|
|
||||||
auto y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
|
|
||||||
|
|
||||||
auto w = named(g.param(shape={IMAGE_SIZE, LABEL_SIZE},
|
|
||||||
init=from_vector(wData)), "w");
|
|
||||||
auto b = named(g.param(shape={1, LABEL_SIZE},
|
|
||||||
init=from_vector(bData)), "b");
|
|
||||||
|
|
||||||
|
|
||||||
auto probs = named(
|
|
||||||
softmax(dot(x, w) + b),
|
|
||||||
"probs"
|
|
||||||
);
|
|
||||||
|
|
||||||
auto cost = named(
|
|
||||||
-mean(sum(y * log(probs), axis=1), axis=0),
|
|
||||||
"cost"
|
|
||||||
);
|
|
||||||
|
|
||||||
std::cerr << "Done." << std::endl;
|
|
||||||
return g;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
|
||||||
|
|
||||||
std::cerr << "Loading test set...";
|
|
||||||
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
|
|
||||||
std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
|
|
||||||
std::cerr << "Done." << std::endl;
|
|
||||||
|
|
||||||
ExpressionGraph g = build_graph();
|
|
||||||
|
|
||||||
Tensor xt({BATCH_SIZE, IMAGE_SIZE});
|
|
||||||
Tensor yt({BATCH_SIZE, LABEL_SIZE});
|
|
||||||
|
|
||||||
g["x"] = (xt << testImages);
|
|
||||||
g["y"] = (yt << testLabels);
|
|
||||||
|
|
||||||
Adagrad opt;
|
|
||||||
for(size_t j = 0; j < 20; ++j) {
|
|
||||||
for(size_t i = 0; i < 60; ++i) {
|
|
||||||
opt(g, BATCH_SIZE);
|
|
||||||
}
|
|
||||||
std::cerr << g["cost"].val()[0] << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> results;
|
|
||||||
results << g["probs"].val();
|
|
||||||
|
|
||||||
size_t acc = 0;
|
|
||||||
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
|
|
||||||
size_t correct = 0;
|
|
||||||
size_t proposed = 0;
|
|
||||||
for (size_t j = 0; j < LABEL_SIZE; ++j) {
|
|
||||||
if (testLabels[i+j])
|
|
||||||
correct = j;
|
|
||||||
if (results[i + j] > results[i + proposed])
|
|
||||||
proposed = j;
|
|
||||||
}
|
|
||||||
acc += (correct == proposed);
|
|
||||||
}
|
|
||||||
std::cerr << "Cost: " << g["cost"].val()[0] << " - Accuracy: " << float(acc) / BATCH_SIZE << std::endl;
|
|
||||||
return 0;
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user