diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 365df389..cb121111 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,10 +29,15 @@ cuda_add_executable( validate_mnist validate_mnist.cu ) +cuda_add_executable( + validate_mnist_batch + validate_mnist_batch.cu +) target_link_libraries(validate_mnist marian_lib) +target_link_libraries(validate_mnist_batch marian_lib) -foreach(exec marian train_mnist validate_mnist) +foreach(exec marian train_mnist validate_mnist validate_mnist_batch ) target_link_libraries(${exec} ${EXT_LIBS} cuda cudnn) cuda_add_cublas_to_target(${exec}) set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") diff --git a/src/validate_mnist_batch.cu b/src/validate_mnist_batch.cu new file mode 100644 index 00000000..ac4e7359 --- /dev/null +++ b/src/validate_mnist_batch.cu @@ -0,0 +1,113 @@ + +#include "marian.h" +#include "mnist.h" +#include "npz_converter.h" + +using namespace marian; +using namespace keywords; + +int main(int argc, char** argv) { + + cudaSetDevice(0); + + const size_t IMAGE_SIZE = 784; + const size_t LABEL_SIZE = 10; + const size_t BATCH_SIZE = 24; + int numofdata; + + std::cerr << "Loading test set..."; + std::vector testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); + std::vector testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); + std::cerr << "\tDone." << std::endl; + + std::cerr << "Loading model params..."; + NpzConverter converter("../scripts/test_model/model.npz"); + + std::vector wData; + Shape wShape; + converter.Load("weights", wData, wShape); + + std::vector bData; + Shape bShape; + converter.Load("bias", bData, bShape); + + auto initW = [wData](Tensor t) { + t.set(wData); + }; + + auto initB = [bData](Tensor t) { + t.set(bData); + }; + + std::cerr << "\tDone." << std::endl; + + + auto x = input(shape={whatevs, IMAGE_SIZE}, name="X"); + auto y = input(shape={whatevs, LABEL_SIZE}, name="Y"); + + auto w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW); + auto b = param(shape={1, LABEL_SIZE}, name="b0", init=initB); + + std::cerr << "Building model..."; + auto predict = softmax(dot(x, w) + b, axis=1, name="pred"); + + std::cerr << "Done." << std::endl; + + Tensor xt({BATCH_SIZE, IMAGE_SIZE}); + + size_t acc = 0; + size_t startId = 0; + size_t endId = startId + BATCH_SIZE; + + while (endId < numofdata) { + std::vector tmp(testImages.begin() + (startId * IMAGE_SIZE), + testImages.begin() + (endId * IMAGE_SIZE)); + xt << tmp; + x = xt; + + predict.forward(BATCH_SIZE); + + std::vector results(LABEL_SIZE * BATCH_SIZE); + results << predict.val(); + + for (size_t i = 0; i < BATCH_SIZE * LABEL_SIZE; i += LABEL_SIZE) { + size_t correct = 0; + size_t predicted = 0; + for (size_t j = 0; j < LABEL_SIZE; ++j) { + if (testLabels[startId * LABEL_SIZE + i + j]) correct = j; + if (results[i + j] > results[i + predicted]) predicted = j; + } + acc += (correct == predicted); + } + + startId += BATCH_SIZE; + endId += BATCH_SIZE; + } + if (endId != numofdata) { + endId = numofdata; + if (endId - startId > 0) { + std::vector tmp(testImages.begin() + (startId * IMAGE_SIZE), + testImages.begin() + (endId * IMAGE_SIZE)); + xt << tmp; + x = xt; + + predict.forward(endId - startId); + + std::vector results(LABEL_SIZE * BATCH_SIZE); + results << predict.val(); + + for (size_t i = 0; i < (endId - startId) * LABEL_SIZE; i += LABEL_SIZE) { + size_t correct = 0; + size_t predicted = 0; + for (size_t j = 0; j < LABEL_SIZE; ++j) { + if (testLabels[startId * LABEL_SIZE + i + j]) correct = j; + if (results[i + j] > results[i + predicted]) predicted = j; + } + acc += (correct == predicted); + } + } + } + std::cerr << "ACC: " << float(acc)/numofdata << std::endl; + + return 0; +}