This commit is contained in:
Maximiliana Behnke 2016-09-14 18:56:31 +02:00
commit e5dc4c0c2d
17 changed files with 946 additions and 53 deletions

View File

@ -3,6 +3,7 @@ include_directories(.)
add_library(libcommon OBJECT
exception.cpp
cnpy/cnpy.cpp
)
cuda_add_executable(
@ -14,7 +15,25 @@ cuda_add_executable(
$<TARGET_OBJECTS:libcommon>
)
foreach(exec marian)
cuda_add_executable(
train_mnist
train_mnist.cu
expressions.cu
tensor_operators.cu
tensor.cu
$<TARGET_OBJECTS:libcommon>
)
cuda_add_executable(
validate_mnist
validate_mnist.cu
expressions.cu
tensor_operators.cu
tensor.cu
$<TARGET_OBJECTS:libcommon>
)
foreach(exec marian train_mnist validate_mnist)
target_link_libraries(${exec} ${EXT_LIBS} cuda cudnn)
cuda_add_cublas_to_target(${exec})
set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")

21
src/cnpy/LICENSE Normal file
View File

@ -0,0 +1,21 @@
The MIT License
Copyright (c) Carl Rogers, 2011
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

251
src/cnpy/cnpy.cpp Normal file
View File

@ -0,0 +1,251 @@
//Copyright (C) 2011 Carl Rogers
//Released under MIT License
//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
#include"cnpy.h"
#include<complex>
#include<cstdlib>
#include<algorithm>
#include<cstring>
#include<iomanip>
char cnpy::BigEndianTest() {
unsigned char x[] = {1,0};
short y = *(short*) x;
return y == 1 ? '<' : '>';
}
char cnpy::map_type(const std::type_info& t)
{
if(t == typeid(float) ) return 'f';
if(t == typeid(double) ) return 'f';
if(t == typeid(long double) ) return 'f';
if(t == typeid(int) ) return 'i';
if(t == typeid(char) ) return 'i';
if(t == typeid(short) ) return 'i';
if(t == typeid(long) ) return 'i';
if(t == typeid(long long) ) return 'i';
if(t == typeid(unsigned char) ) return 'u';
if(t == typeid(unsigned short) ) return 'u';
if(t == typeid(unsigned long) ) return 'u';
if(t == typeid(unsigned long long) ) return 'u';
if(t == typeid(unsigned int) ) return 'u';
if(t == typeid(bool) ) return 'b';
if(t == typeid(std::complex<float>) ) return 'c';
if(t == typeid(std::complex<double>) ) return 'c';
if(t == typeid(std::complex<long double>) ) return 'c';
else return '?';
}
template<> std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const std::string rhs) {
lhs.insert(lhs.end(),rhs.begin(),rhs.end());
return lhs;
}
template<> std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const char* rhs) {
//write in little endian
size_t len = strlen(rhs);
lhs.reserve(len);
for(size_t byte = 0; byte < len; byte++) {
lhs.push_back(rhs[byte]);
}
return lhs;
}
void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& shape, unsigned int& ndims, bool& fortran_order) {
char buffer[256];
size_t res = fread(buffer,sizeof(char),11,fp);
if(res != 11)
throw std::runtime_error("parse_npy_header: failed fread");
std::string header = fgets(buffer,256,fp);
assert(header[header.size()-1] == '\n');
int loc1, loc2;
//fortran order
loc1 = header.find("fortran_order")+16;
fortran_order = (header.substr(loc1,5) == "True" ? true : false);
//shape
loc1 = header.find("(");
loc2 = header.find(")");
std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
if(str_shape.length() == 0) ndims = 0;
else if(str_shape[str_shape.size()-1] == ',') ndims = 1;
else ndims = std::count(str_shape.begin(),str_shape.end(),',')+1;
shape = new unsigned int[ndims];
for(unsigned int i = 0;i < ndims;i++) {
loc1 = str_shape.find(",");
shape[i] = atoi(str_shape.substr(0,loc1).c_str());
str_shape = str_shape.substr(loc1+1);
}
//endian, word size, data type
//byte order code | stands for not applicable.
//not sure when this applies except for byte array
loc1 = header.find("descr")+9;
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false);
assert(littleEndian);
//char type = header[loc1+1];
//assert(type == map_type(T));
std::string str_ws = header.substr(loc1+2);
loc2 = str_ws.find("'");
word_size = atoi(str_ws.substr(0,loc2).c_str());
}
void cnpy::parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& global_header_size, unsigned int& global_header_offset)
{
std::vector<char> footer(22);
fseek(fp,-22,SEEK_END);
size_t res = fread(&footer[0],sizeof(char),22,fp);
if(res != 22)
throw std::runtime_error("parse_zip_footer: failed fread");
unsigned short disk_no, disk_start, nrecs_on_disk, comment_len;
disk_no = *(unsigned short*) &footer[4];
disk_start = *(unsigned short*) &footer[6];
nrecs_on_disk = *(unsigned short*) &footer[8];
nrecs = *(unsigned short*) &footer[10];
global_header_size = *(unsigned int*) &footer[12];
global_header_offset = *(unsigned int*) &footer[16];
comment_len = *(unsigned short*) &footer[20];
assert(disk_no == 0);
assert(disk_start == 0);
assert(nrecs_on_disk == nrecs);
assert(comment_len == 0);
}
cnpy::NpyArray load_the_npy_file(FILE* fp) {
unsigned int* shape;
unsigned int ndims, word_size;
bool fortran_order;
cnpy::parse_npy_header(fp,word_size,shape,ndims,fortran_order);
unsigned long long size = 1; //long long so no overflow when multiplying by word_size
for(unsigned int i = 0;i < ndims;i++) size *= shape[i];
cnpy::NpyArray arr;
arr.word_size = word_size;
arr.shape = std::vector<unsigned int>(shape,shape+ndims);
delete[] shape;
arr.data = new char[size*word_size];
arr.fortran_order = fortran_order;
size_t nread = fread(arr.data,word_size,size,fp);
if(nread != size)
throw std::runtime_error("load_the_npy_file: failed fread");
return arr;
}
cnpy::npz_t cnpy::npz_load(std::string fname) {
FILE* fp = fopen(fname.c_str(),"rb");
if(!fp) printf("npz_load: Error! Unable to open file %s!\n",fname.c_str());
assert(fp);
cnpy::npz_t arrays;
while(1) {
std::vector<char> local_header(30);
size_t headerres = fread(&local_header[0],sizeof(char),30,fp);
if(headerres != 30)
throw std::runtime_error("npz_load: failed fread");
//if we've reached the global header, stop reading
if(local_header[2] != 0x03 || local_header[3] != 0x04) break;
//read in the variable name
unsigned short name_len = *(unsigned short*) &local_header[26];
std::string varname(name_len,' ');
size_t vname_res = fread(&varname[0],sizeof(char),name_len,fp);
if(vname_res != name_len)
throw std::runtime_error("npz_load: failed fread");
//erase the lagging .npy
varname.erase(varname.end()-4,varname.end());
//read in the extra field
unsigned short extra_field_len = *(unsigned short*) &local_header[28];
if(extra_field_len > 0) {
std::vector<char> buff(extra_field_len);
size_t efield_res = fread(&buff[0],sizeof(char),extra_field_len,fp);
if(efield_res != extra_field_len)
throw std::runtime_error("npz_load: failed fread");
}
arrays[varname] = load_the_npy_file(fp);
}
fclose(fp);
return arrays;
}
cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) {
FILE* fp = fopen(fname.c_str(),"rb");
if(!fp) {
printf("npz_load: Error! Unable to open file %s!\n",fname.c_str());
abort();
}
while(1) {
std::vector<char> local_header(30);
size_t header_res = fread(&local_header[0],sizeof(char),30,fp);
if(header_res != 30)
throw std::runtime_error("npz_load: failed fread");
//if we've reached the global header, stop reading
if(local_header[2] != 0x03 || local_header[3] != 0x04) break;
//read in the variable name
unsigned short name_len = *(unsigned short*) &local_header[26];
std::string vname(name_len,' ');
size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp);
if(vname_res != name_len)
throw std::runtime_error("npz_load: failed fread");
vname.erase(vname.end()-4,vname.end()); //erase the lagging .npy
//read in the extra field
unsigned short extra_field_len = *(unsigned short*) &local_header[28];
fseek(fp,extra_field_len,SEEK_CUR); //skip past the extra field
if(vname == varname) {
NpyArray array = load_the_npy_file(fp);
fclose(fp);
return array;
}
else {
//skip past the data
unsigned int size = *(unsigned int*) &local_header[22];
fseek(fp,size,SEEK_CUR);
}
}
fclose(fp);
printf("npz_load: Error! Variable name %s not found in %s!\n",varname.c_str(),fname.c_str());
abort();
}
cnpy::NpyArray cnpy::npy_load(std::string fname) {
FILE* fp = fopen(fname.c_str(), "rb");
if(!fp) {
printf("npy_load: Error! Unable to open file %s!\n",fname.c_str());
abort();
}
NpyArray arr = load_the_npy_file(fp);
fclose(fp);
return arr;
}

241
src/cnpy/cnpy.h Normal file
View File

@ -0,0 +1,241 @@
//Copyright (C) 2011 Carl Rogers
//Released under MIT License
//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
#ifndef LIBCNPY_H_
#define LIBCNPY_H_
#include<string>
#include<stdexcept>
#include<sstream>
#include<vector>
#include<cstdio>
#include<typeinfo>
#include<iostream>
#include<cassert>
#include<zlib.h>
#include<map>
namespace cnpy {
struct NpyArray {
char* data;
std::vector<unsigned int> shape;
unsigned int word_size;
bool fortran_order;
void destruct() {delete[] data;}
};
struct npz_t : public std::map<std::string, NpyArray>
{
void destruct()
{
npz_t::iterator it = this->begin();
for(; it != this->end(); ++it) (*it).second.destruct();
}
};
char BigEndianTest();
char map_type(const std::type_info& t);
template<typename T> std::vector<char> create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims);
void parse_npy_header(FILE* fp,unsigned int& word_size, unsigned int*& shape, unsigned int& ndims, bool& fortran_order);
void parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& global_header_size, unsigned int& global_header_offset);
npz_t npz_load(std::string fname);
NpyArray npz_load(std::string fname, std::string varname);
NpyArray npy_load(std::string fname);
template<typename T> std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) {
//write in little endian
for(char byte = 0; byte < sizeof(T); byte++) {
char val = *((char*)&rhs+byte);
lhs.push_back(val);
}
return lhs;
}
template<> std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs);
template<> std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs);
template<typename T> std::string tostring(T i, int pad = 0, char padval = ' ') {
std::stringstream s;
s << i;
return s.str();
}
template<typename T> void npy_save(std::string fname, const T* data, const unsigned int* shape, const unsigned int ndims, std::string mode = "w") {
FILE* fp = NULL;
if(mode == "a") fp = fopen(fname.c_str(),"r+b");
if(fp) {
//file exists. we need to append to it. read the header, modify the array size
unsigned int word_size, tmp_dims;
unsigned int* tmp_shape = 0;
bool fortran_order;
parse_npy_header(fp,word_size,tmp_shape,tmp_dims,fortran_order);
assert(!fortran_order);
if(word_size != sizeof(T)) {
std::cout<<"libnpy error: "<<fname<<" has word size "<<word_size<<" but npy_save appending data sized "<<sizeof(T)<<"\n";
assert( word_size == sizeof(T) );
}
if(tmp_dims != ndims) {
std::cout<<"libnpy error: npy_save attempting to append misdimensioned data to "<<fname<<"\n";
assert(tmp_dims == ndims);
}
for(int i = 1; i < ndims; i++) {
if(shape[i] != tmp_shape[i]) {
std::cout<<"libnpy error: npy_save attempting to append misshaped data to "<<fname<<"\n";
assert(shape[i] == tmp_shape[i]);
}
}
tmp_shape[0] += shape[0];
fseek(fp,0,SEEK_SET);
std::vector<char> header = create_npy_header(data,tmp_shape,ndims);
fwrite(&header[0],sizeof(char),header.size(),fp);
fseek(fp,0,SEEK_END);
delete[] tmp_shape;
}
else {
fp = fopen(fname.c_str(),"wb");
std::vector<char> header = create_npy_header(data,shape,ndims);
fwrite(&header[0],sizeof(char),header.size(),fp);
}
unsigned int nels = 1;
for(int i = 0;i < ndims;i++) nels *= shape[i];
fwrite(data,sizeof(T),nels,fp);
fclose(fp);
}
template<typename T> void npz_save(std::string zipname, std::string fname, const T* data, const unsigned int* shape, const unsigned int ndims, std::string mode = "w")
{
//first, append a .npy to the fname
fname += ".npy";
//now, on with the show
FILE* fp = NULL;
unsigned short nrecs = 0;
unsigned int global_header_offset = 0;
std::vector<char> global_header;
if(mode == "a") fp = fopen(zipname.c_str(),"r+b");
if(fp) {
//zip file exists. we need to add a new npy file to it.
//first read the footer. this gives us the offset and size of the global header
//then read and store the global header.
//below, we will write the the new data at the start of the global header then append the global header and footer below it
unsigned int global_header_size;
parse_zip_footer(fp,nrecs,global_header_size,global_header_offset);
fseek(fp,global_header_offset,SEEK_SET);
global_header.resize(global_header_size);
size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp);
if(res != global_header_size){
throw std::runtime_error("npz_save: header read error while adding to existing zip");
}
fseek(fp,global_header_offset,SEEK_SET);
}
else {
fp = fopen(zipname.c_str(),"wb");
}
std::vector<char> npy_header = create_npy_header(data,shape,ndims);
unsigned long nels = 1;
for (int m=0; m<ndims; m++ ) nels *= shape[m];
int nbytes = nels*sizeof(T) + npy_header.size();
//get the CRC of the data to be added
unsigned int crc = crc32(0L,(unsigned char*)&npy_header[0],npy_header.size());
crc = crc32(crc,(unsigned char*)data,nels*sizeof(T));
//build the local header
std::vector<char> local_header;
local_header += "PK"; //first part of sig
local_header += (unsigned short) 0x0403; //second part of sig
local_header += (unsigned short) 20; //min version to extract
local_header += (unsigned short) 0; //general purpose bit flag
local_header += (unsigned short) 0; //compression method
local_header += (unsigned short) 0; //file last mod time
local_header += (unsigned short) 0; //file last mod date
local_header += (unsigned int) crc; //crc
local_header += (unsigned int) nbytes; //compressed size
local_header += (unsigned int) nbytes; //uncompressed size
local_header += (unsigned short) fname.size(); //fname length
local_header += (unsigned short) 0; //extra field length
local_header += fname;
//build global header
global_header += "PK"; //first part of sig
global_header += (unsigned short) 0x0201; //second part of sig
global_header += (unsigned short) 20; //version made by
global_header.insert(global_header.end(),local_header.begin()+4,local_header.begin()+30);
global_header += (unsigned short) 0; //file comment length
global_header += (unsigned short) 0; //disk number where file starts
global_header += (unsigned short) 0; //internal file attributes
global_header += (unsigned int) 0; //external file attributes
global_header += (unsigned int) global_header_offset; //relative offset of local file header, since it begins where the global header used to begin
global_header += fname;
//build footer
std::vector<char> footer;
footer += "PK"; //first part of sig
footer += (unsigned short) 0x0605; //second part of sig
footer += (unsigned short) 0; //number of this disk
footer += (unsigned short) 0; //disk where footer starts
footer += (unsigned short) (nrecs+1); //number of records on this disk
footer += (unsigned short) (nrecs+1); //total number of records
footer += (unsigned int) global_header.size(); //nbytes of global headers
footer += (unsigned int) (global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array
footer += (unsigned short) 0; //zip file comment length
//write everything
fwrite(&local_header[0],sizeof(char),local_header.size(),fp);
fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp);
fwrite(data,sizeof(T),nels,fp);
fwrite(&global_header[0],sizeof(char),global_header.size(),fp);
fwrite(&footer[0],sizeof(char),footer.size(),fp);
fclose(fp);
}
template<typename T> std::vector<char> create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims) {
std::vector<char> dict;
dict += "{'descr': '";
dict += BigEndianTest();
dict += map_type(typeid(T));
dict += tostring(sizeof(T));
dict += "', 'fortran_order': False, 'shape': (";
dict += tostring(shape[0]);
for(int i = 1;i < ndims;i++) {
dict += ", ";
dict += tostring(shape[i]);
}
if(ndims == 1) dict += ",";
dict += "), }";
//pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n
int remainder = 16 - (10 + dict.size()) % 16;
dict.insert(dict.end(),remainder,' ');
dict.back() = '\n';
std::vector<char> header;
header += (char) 0x93;
header += "NUMPY";
header += (char) 0x01; //major version of numpy format
header += (char) 0x00; //minor version of numpy format
header += (unsigned short) dict.size();
header.insert(header.end(),dict.begin(),dict.end());
return header;
}
}
#endif

View File

@ -5,13 +5,13 @@
#include <functional>
namespace marian {
typedef float Float;
typedef float Float;
typedef std::vector<int> Shape;
const int whatevs{-1};
}
#include "keywords.h"
#include "tensor.h"
// #include "tensor.h"
namespace marian {
class Tensor;

View File

@ -10,7 +10,7 @@ Expr::Expr(Chainable<Tensor>* chainable) : pimpl_(chainable) {}
Expr::Expr(Float v) : pimpl_(new ConstantNode(keywords::value=v,
keywords::shape={1,1})) {}
Tensor &Expr::val() {
Tensor Expr::val() {
return pimpl_->val();
}

View File

@ -9,25 +9,25 @@ class Expr {
public:
Expr(Chainable<Tensor>* chainable);
Expr(Float v);
Expr operator=(Tensor t) {
pimpl_->setVal(t);
return *this;
}
Tensor &val();
Tensor val();
Tensor grad();
void forward(size_t batchSize);
void backward();
ChainPtr node();
operator ChainPtr();
std::string Debug() const;
private:
ChainPtr pimpl_;
ChainPtr pimpl_;
};
}

View File

@ -5,4 +5,5 @@
#include "graph_operators.h"
#include "expressions.h"
#include "expression_operators.h"
#include "param_initializers.h"

39
src/npz_converter.cpp Normal file
View File

@ -0,0 +1,39 @@
#include "common/npz_converter.h"
NpzConverter::NpzConverter(const std::string& file)
: model_(cnpy::npz_load(file)),
destructed_(false) {
}
NpzConverter::~NpzConverter() {
if(!destructed_)
model_.destruct();
}
void NpzConverter::Destruct() {
model_.destruct();
destructed_ = true;
}
mblas::Matrix NpzConverter::operator[](const std::string& key) const {
typedef blaze::CustomMatrix<float, blaze::unaligned,
blaze::unpadded, blaze::rowMajor> BlazeWrapper;
mblas::Matrix matrix;
auto it = model_.find(key);
if(it != model_.end()) {
NpyMatrixWrapper np(it->second);
matrix = BlazeWrapper(np.data(), np.size1(), np.size2());
}
else {
std::cerr << "Missing " << key << std::endl;
}
return std::move(matrix);
}
mblas::Matrix NpzConverter::operator()(const std::string& key, bool transpose) const {
mblas::Matrix matrix = (*this)[key];
mblas::Trans(matrix);
return std::move(matrix);
}

77
src/npz_converter.h Normal file
View File

@ -0,0 +1,77 @@
#pragma once
#include "cnpy/cnpy.h"
#include "tensor.h"
class NpzConverter {
private:
class NpyMatrixWrapper {
public:
NpyMatrixWrapper(const cnpy::NpyArray& npy)
: npy_(npy) {}
size_t size() const {
return size1() * size2();
}
float* data() const {
return (float*)npy_.data;
}
float operator()(size_t i, size_t j) const {
return ((float*)npy_.data)[i * size2() + j];
}
size_t size1() const {
return npy_.shape[0];
}
size_t size2() const {
if(npy_.shape.size() == 1)
return 1;
else
return npy_.shape[1];
}
private:
const cnpy::NpyArray& npy_;
};
public:
NpzConverter(const std::string& file)
: model_(cnpy::npz_load(file)),
destructed_(false) {
}
~NpzConverter() {
if(!destructed_)
model_.destruct();
}
void Destruct() {
model_.destruct();
destructed_ = true;
}
void Load(const std::string& key, std::vector<float>& data, marian::Shape& shape) const {
auto it = model_.find(key);
if(it != model_.end()) {
NpyMatrixWrapper np(it->second);
data.clear();
data.resize(np.size());
std::copy(np.data(), np.data() + np.size(), data.begin());
shape.clear();
shape.push_back(np.size1());
shape.push_back(np.size2());
}
else {
std::cerr << "Missing " << key << std::endl;
}
}
private:
cnpy::npz_t model_;
bool destructed_;
};

34
src/param_initializers.h Normal file
View File

@ -0,0 +1,34 @@
#pragma once
#include <random>
#include <algorithm>
#include <iterator>
#include <functional>
#include "tensor.h"
namespace marian {
void zeros(Tensor t) {
std::vector<float> vals(t.size(), 0.0f);
thrust::copy(vals.begin(), vals.end(), t.begin());
}
void ones(Tensor t) {
std::vector<float> vals(t.size(), 1.0f);
thrust::copy(vals.begin(), vals.end(), t.begin());
}
void randreal(Tensor t) {
std::random_device device;
std::default_random_engine engine(device());
std::uniform_real_distribution<> dist(0, 1);
auto gen = std::bind(dist, engine);
std::vector<float> vals(t.size());
std::generate(begin(vals), end(vals), gen);
thrust::copy(vals.begin(), vals.end(), t.begin());
}
} // namespace marian

88
src/sgd.h Normal file
View File

@ -0,0 +1,88 @@
#pragma once
#include <memory>
#include <iostream>
#include "expressions.h"
#include "thrust_functions.h"
namespace marian {
class SGD {
public:
SGD(Expr& cost_func, Expr& inX, Expr& inY,
const std::vector<Expr*> params, float eta,
std::vector<float>& xData, size_t numFeatures,
std::vector<float>& yData, size_t numClasses,
size_t epochs, size_t batchSize)
: cost_function_(&cost_func),
inX_(&inX),
inY_(&inY),
params_(params),
eta_(eta),
xData_(xData),
numFeatures_(numFeatures),
yData_(yData),
numClasses_(numClasses),
epochs_(epochs),
batchSize_(batchSize)
{}
void Run() {
size_t numExamples = xData_.size()/ numFeatures_;
Tensor xt({(int)batchSize_, (int)numExamples}, 0.0f);
Tensor yt({(int)batchSize_, (int)numClasses_}, 0.0f);
for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) {
std::cerr << "Starting epoch #" << numEpoch << std::endl;
size_t startId = 0;
size_t endId = startId + batchSize_;
while (endId < numExamples) {
PrepareBatch(startId, endId, xt, yt);
*inX_ = xt;
*inY_ = yt;
cost_function_->forward(batchSize_);
cost_function_->backward();
UpdateModel();
startId += batchSize_;
endId += batchSize_;
}
}
}
void PrepareBatch(size_t startId, size_t endId, Tensor& xt, Tensor& yt) {
std::vector<float> x(xData_.begin() + startId * numFeatures_,
xData_.begin() + endId * numFeatures_);
std::vector<float> y(yData_.begin() + startId * numClasses_,
yData_.begin() + endId * numClasses_);
xt.Load(x);
yt.Load(y);
}
void UpdateModel() {
for (auto& param : params_) {
using namespace thrust::placeholders;
Element(_1 = _1 - eta_ * _2, param->val(), param->grad());
}
}
private:
std::shared_ptr<Expr> cost_function_;
std::shared_ptr<Expr> inX_;
std::shared_ptr<Expr> inY_;
std::vector<Expr*> params_;
const float eta_;
std::vector<float>& xData_;
const size_t numFeatures_;
std::vector<float>& yData_;
const size_t numClasses_;
const size_t epochs_;
const size_t batchSize_;
};
} // namespace marian

View File

@ -83,6 +83,12 @@ void Tensor::Load(const std::string &path)
Load(hostData.begin(), hostData.begin());
}
void Tensor::Load(const std::vector<float>& data)
{
pimpl_->set(data.begin(), data.end());
}
void Tensor::Load(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end)
{
pimpl_->set(begin, end);

View File

@ -16,16 +16,16 @@ namespace marian {
struct Handles {
cudnnHandle_t cudnnHandle;
cublasHandle_t cublasHandle;
cudnnOpTensorDescriptor_t add;
cudnnOpTensorDescriptor_t add;
Handles() {
cudnnCreate(&cudnnHandle);
cublasCreate(&cublasHandle);
cudnnCreateOpTensorDescriptor(&add);
cudnnSetOpTensorDescriptor(add, CUDNN_OP_TENSOR_ADD, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN);
}
~Handles() {
cudnnDestroy(cudnnHandle);
cublasDestroy(cublasHandle);
@ -35,7 +35,7 @@ struct Handles {
const Handles handles;
typedef std::vector<int> Shape;
// typedef std::vector<int> Shape;
inline std::string Debug(const Shape &shape)
{
@ -63,7 +63,7 @@ class TensorImpl {
cudnnTensorDescriptor_t desc_;
size_t tno_;
static size_t tensorCounter;
cudnnDataType_t dataType() {
switch(sizeof(Float)) {
case 2: return CUDNN_DATA_HALF;
@ -74,15 +74,15 @@ class TensorImpl {
public:
typedef Float value_type;
TensorImpl(const Shape& shape, value_type value = 0)
: shape_(shape), tno_(tensorCounter++)
{
// @TODO:
// @TODO:
UTIL_THROW_IF2(shape_.size() != 2,
"For now, only 2D Tensors, will be fixed later.");
UTIL_THROW_IF2(shape_.size() < 1 || shape_.size() > 4,
"Wrong number of dimensions: " << shape_.size());
@ -106,54 +106,54 @@ class TensorImpl {
shape_[0], shape_[1], shape_[2], shape_[3]); break;
}
}
TensorImpl(const TensorImpl&) = delete;
TensorImpl(TensorImpl&&) = delete;
~TensorImpl() {
cudnnDestroyTensorDescriptor(desc_);
}
value_type operator[](size_t i) const {
return data_[i];
}
auto begin() -> decltype( data_.begin() ) {
return data_.begin();
}
auto begin() const -> decltype( data_.begin() ) {
return data_.begin();
}
auto end() -> decltype( data_.end() ) {
return data_.end();
}
auto end() const -> decltype( data_.end() ) {
return data_.end();
}
const Shape& shape() const {
return shape_;
}
size_t size() const {
return data_.size();
}
value_type* data() {
return thrust::raw_pointer_cast(data_.data());
}
cudnnTensorDescriptor_t desc() const {
return desc_;
}
size_t id() const {
return tno_;
}
void set(value_type value) {
thrust::fill(data_.begin(), data_.end(), value);
}
@ -194,70 +194,70 @@ size_t TensorImpl<Type>::tensorCounter = 0;
class Tensor {
private:
std::shared_ptr<TensorImpl<Float>> pimpl_;
public:
typedef TensorImpl<Float>::value_type value_type;
Tensor() {}
Tensor(Shape shape, value_type value = 0) {
Tensor(const Shape& shape, value_type value = 0) {
allocate(shape, value);
}
~Tensor() {}
void allocate(Shape shape, value_type value = 0) {
void allocate(const Shape& shape, value_type value = 0) {
if(!pimpl_)
pimpl_.reset(new TensorImpl<Float>(shape, value));
}
value_type operator[](size_t i) const {
return (*pimpl_)[i];
}
size_t size() const {
return pimpl_->size();
}
value_type* data() {
return pimpl_->data();
}
const value_type* data() const {
return pimpl_->data();
}
auto begin() -> decltype( pimpl_->begin() ) {
return pimpl_->begin();
}
auto begin() const -> decltype( pimpl_->begin() ) {
return pimpl_->begin();
}
auto end() -> decltype( pimpl_->begin() ) {
return pimpl_->begin();
}
auto end() const -> decltype( pimpl_->begin() ) {
return pimpl_->begin();
}
const Shape& shape() const {
return pimpl_->shape();
}
cudnnTensorDescriptor_t desc() const {
return pimpl_->desc();
}
void set(value_type value) {
pimpl_->set(value);
}
size_t id() const {
return pimpl_->id();
}
operator bool() {
return pimpl_ != nullptr;
}
@ -275,6 +275,7 @@ class Tensor {
}
void Load(const std::string &path);
void Load(const std::vector<float>& data);
void Load(const std::vector<float>::const_iterator &begin, const std::vector<float>::const_iterator &end);
};

View File

@ -20,6 +20,7 @@ int main(int argc, char** argv) {
Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y");
Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0");
// Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=randreal);
Expr b = param(shape={1, LABEL_SIZE}, name="b0");
Expr z = dot(x, w) + b;

37
src/train_mnist.cu Normal file
View File

@ -0,0 +1,37 @@
#include "marian.h"
#include "mnist.h"
#include "sgd.h"
using namespace std;
int main(int argc, char** argv) {
const size_t IMAGE_SIZE = 784;
const size_t LABEL_SIZE = 10;
int numofdata;
vector<float> trainImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
vector<float>trainLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);
using namespace marian;
using namespace keywords;
Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X");
Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y");
Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0");
Expr b = param(shape={1, LABEL_SIZE}, name="b0");
std::vector<Expr*> params;
params.push_back(&w);
params.push_back(&b);
auto scores = dot(x, w) + b;
auto lr = softmax_fast(scores, axis=1, name="pred");
auto cost = -mean(sum(y * log(lr), axis=1), axis=0, name="cost");
cerr << "lr=" << lr.Debug() << endl;
SGD opt(cost, x, y, params, 0.9, trainImages, IMAGE_SIZE, trainLabels, LABEL_SIZE, 3, 24);
opt.Run();
return 0;
}

77
src/validate_mnist.cu Normal file
View File

@ -0,0 +1,77 @@
#include "marian.h"
#include "mnist.h"
#include "npz_converter.h"
using namespace marian;
using namespace keywords;
int main(int argc, char** argv) {
const size_t IMAGE_SIZE = 784;
const size_t LABEL_SIZE = 10;
int numofdata;
std::cerr << "Loading test set...";
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE);
std::vector<float>testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE);
std::cerr << "\tDone." << std::endl;
std::cerr << "Loading model params...";
NpzConverter converter("../scripts/test_model/model.npz");
std::vector<float> wData;
Shape wShape;
converter.Load("weights", wData, wShape);
std::vector<float> bData;
Shape bShape;
converter.Load("bias", bData, bShape);
auto initW = [&wData](Tensor t) {
thrust::copy(wData.begin(), wData.end(), t.begin());
};
auto initB = [&bData](Tensor t) {
thrust::copy(bData.begin(), bData.end(), t.begin());
};
std::cerr << "\tDone." << std::endl;
Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X");
Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW);
Expr b = param(shape={1, LABEL_SIZE}, name="b0", init=initB);
std::cerr << "Building model...";
auto scores = dot(x, w) + b;
auto predict = softmax(scores, axis=1, name="pred");
std::cerr << "\tDone." << std::endl;
Tensor xt({numofdata, IMAGE_SIZE});
xt.Load(testImages);
predict.forward(numofdata);
auto results = predict.val();
size_t acc = 0;
for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) {
size_t correct = 0;
size_t predicted = 0;
for (size_t j = 0; j < LABEL_SIZE; ++j) {
if (testLabels[i+j]) correct = j;
if (results[i + j] > results[i + predicted]) predicted = j;
}
acc += (correct == predicted);
std::cerr << "corect: " << correct << " | " << predicted << "(";
for (size_t j = 0; j < LABEL_SIZE; ++j) {
std::cerr << results[i+j] << " ";
}
std::cerr << std::endl;
}
std::cerr << "ACC: " << float(acc)/numofdata << std::endl;
return 0;
}