diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 49832492..2aa3fdc9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,6 +3,7 @@ include_directories(.) add_library(libcommon OBJECT exception.cpp + cnpy/cnpy.cpp ) cuda_add_executable( @@ -14,7 +15,25 @@ cuda_add_executable( $ ) -foreach(exec marian) +cuda_add_executable( + train_mnist + train_mnist.cu + expressions.cu + tensor_operators.cu + tensor.cu + $ +) + +cuda_add_executable( + validate_mnist + validate_mnist.cu + expressions.cu + tensor_operators.cu + tensor.cu + $ +) + +foreach(exec marian train_mnist validate_mnist) target_link_libraries(${exec} ${EXT_LIBS} cuda cudnn) cuda_add_cublas_to_target(${exec}) set_target_properties(${exec} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") diff --git a/src/cnpy/LICENSE b/src/cnpy/LICENSE new file mode 100644 index 00000000..e60eadbc --- /dev/null +++ b/src/cnpy/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) Carl Rogers, 2011 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/src/cnpy/cnpy.cpp b/src/cnpy/cnpy.cpp new file mode 100644 index 00000000..85978dc8 --- /dev/null +++ b/src/cnpy/cnpy.cpp @@ -0,0 +1,251 @@ +//Copyright (C) 2011 Carl Rogers +//Released under MIT License +//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php + +#include"cnpy.h" +#include +#include +#include +#include +#include + +char cnpy::BigEndianTest() { + unsigned char x[] = {1,0}; + short y = *(short*) x; + return y == 1 ? '<' : '>'; +} + +char cnpy::map_type(const std::type_info& t) +{ + if(t == typeid(float) ) return 'f'; + if(t == typeid(double) ) return 'f'; + if(t == typeid(long double) ) return 'f'; + + if(t == typeid(int) ) return 'i'; + if(t == typeid(char) ) return 'i'; + if(t == typeid(short) ) return 'i'; + if(t == typeid(long) ) return 'i'; + if(t == typeid(long long) ) return 'i'; + + if(t == typeid(unsigned char) ) return 'u'; + if(t == typeid(unsigned short) ) return 'u'; + if(t == typeid(unsigned long) ) return 'u'; + if(t == typeid(unsigned long long) ) return 'u'; + if(t == typeid(unsigned int) ) return 'u'; + + if(t == typeid(bool) ) return 'b'; + + if(t == typeid(std::complex) ) return 'c'; + if(t == typeid(std::complex) ) return 'c'; + if(t == typeid(std::complex) ) return 'c'; + + else return '?'; +} + +template<> std::vector& cnpy::operator+=(std::vector& lhs, const std::string rhs) { + lhs.insert(lhs.end(),rhs.begin(),rhs.end()); + return lhs; +} + +template<> std::vector& cnpy::operator+=(std::vector& lhs, const char* rhs) { + //write in little endian + size_t len = strlen(rhs); + lhs.reserve(len); + for(size_t byte = 0; byte < len; byte++) { + lhs.push_back(rhs[byte]); + } + return lhs; +} + +void cnpy::parse_npy_header(FILE* fp, unsigned int& word_size, unsigned int*& shape, unsigned int& ndims, bool& fortran_order) { + char buffer[256]; + size_t res = fread(buffer,sizeof(char),11,fp); + if(res != 11) + throw std::runtime_error("parse_npy_header: failed fread"); + std::string header = fgets(buffer,256,fp); + assert(header[header.size()-1] == '\n'); + + int loc1, loc2; + + //fortran order + loc1 = header.find("fortran_order")+16; + fortran_order = (header.substr(loc1,5) == "True" ? true : false); + + //shape + loc1 = header.find("("); + loc2 = header.find(")"); + std::string str_shape = header.substr(loc1+1,loc2-loc1-1); + if(str_shape.length() == 0) ndims = 0; + else if(str_shape[str_shape.size()-1] == ',') ndims = 1; + else ndims = std::count(str_shape.begin(),str_shape.end(),',')+1; + shape = new unsigned int[ndims]; + for(unsigned int i = 0;i < ndims;i++) { + loc1 = str_shape.find(","); + shape[i] = atoi(str_shape.substr(0,loc1).c_str()); + str_shape = str_shape.substr(loc1+1); + } + + //endian, word size, data type + //byte order code | stands for not applicable. + //not sure when this applies except for byte array + loc1 = header.find("descr")+9; + bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); + assert(littleEndian); + + //char type = header[loc1+1]; + //assert(type == map_type(T)); + + std::string str_ws = header.substr(loc1+2); + loc2 = str_ws.find("'"); + word_size = atoi(str_ws.substr(0,loc2).c_str()); +} + +void cnpy::parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& global_header_size, unsigned int& global_header_offset) +{ + std::vector footer(22); + fseek(fp,-22,SEEK_END); + size_t res = fread(&footer[0],sizeof(char),22,fp); + if(res != 22) + throw std::runtime_error("parse_zip_footer: failed fread"); + + unsigned short disk_no, disk_start, nrecs_on_disk, comment_len; + disk_no = *(unsigned short*) &footer[4]; + disk_start = *(unsigned short*) &footer[6]; + nrecs_on_disk = *(unsigned short*) &footer[8]; + nrecs = *(unsigned short*) &footer[10]; + global_header_size = *(unsigned int*) &footer[12]; + global_header_offset = *(unsigned int*) &footer[16]; + comment_len = *(unsigned short*) &footer[20]; + + assert(disk_no == 0); + assert(disk_start == 0); + assert(nrecs_on_disk == nrecs); + assert(comment_len == 0); +} + +cnpy::NpyArray load_the_npy_file(FILE* fp) { + unsigned int* shape; + unsigned int ndims, word_size; + bool fortran_order; + cnpy::parse_npy_header(fp,word_size,shape,ndims,fortran_order); + unsigned long long size = 1; //long long so no overflow when multiplying by word_size + for(unsigned int i = 0;i < ndims;i++) size *= shape[i]; + + cnpy::NpyArray arr; + arr.word_size = word_size; + arr.shape = std::vector(shape,shape+ndims); + delete[] shape; + arr.data = new char[size*word_size]; + arr.fortran_order = fortran_order; + size_t nread = fread(arr.data,word_size,size,fp); + if(nread != size) + throw std::runtime_error("load_the_npy_file: failed fread"); + return arr; +} + +cnpy::npz_t cnpy::npz_load(std::string fname) { + FILE* fp = fopen(fname.c_str(),"rb"); + + if(!fp) printf("npz_load: Error! Unable to open file %s!\n",fname.c_str()); + assert(fp); + + cnpy::npz_t arrays; + + while(1) { + std::vector local_header(30); + size_t headerres = fread(&local_header[0],sizeof(char),30,fp); + if(headerres != 30) + throw std::runtime_error("npz_load: failed fread"); + + //if we've reached the global header, stop reading + if(local_header[2] != 0x03 || local_header[3] != 0x04) break; + + //read in the variable name + unsigned short name_len = *(unsigned short*) &local_header[26]; + std::string varname(name_len,' '); + size_t vname_res = fread(&varname[0],sizeof(char),name_len,fp); + if(vname_res != name_len) + throw std::runtime_error("npz_load: failed fread"); + + //erase the lagging .npy + varname.erase(varname.end()-4,varname.end()); + + //read in the extra field + unsigned short extra_field_len = *(unsigned short*) &local_header[28]; + if(extra_field_len > 0) { + std::vector buff(extra_field_len); + size_t efield_res = fread(&buff[0],sizeof(char),extra_field_len,fp); + if(efield_res != extra_field_len) + throw std::runtime_error("npz_load: failed fread"); + } + + arrays[varname] = load_the_npy_file(fp); + } + + fclose(fp); + return arrays; +} + +cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) { + FILE* fp = fopen(fname.c_str(),"rb"); + + if(!fp) { + printf("npz_load: Error! Unable to open file %s!\n",fname.c_str()); + abort(); + } + + while(1) { + std::vector local_header(30); + size_t header_res = fread(&local_header[0],sizeof(char),30,fp); + if(header_res != 30) + throw std::runtime_error("npz_load: failed fread"); + + //if we've reached the global header, stop reading + if(local_header[2] != 0x03 || local_header[3] != 0x04) break; + + //read in the variable name + unsigned short name_len = *(unsigned short*) &local_header[26]; + std::string vname(name_len,' '); + size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp); + if(vname_res != name_len) + throw std::runtime_error("npz_load: failed fread"); + vname.erase(vname.end()-4,vname.end()); //erase the lagging .npy + + //read in the extra field + unsigned short extra_field_len = *(unsigned short*) &local_header[28]; + fseek(fp,extra_field_len,SEEK_CUR); //skip past the extra field + + if(vname == varname) { + NpyArray array = load_the_npy_file(fp); + fclose(fp); + return array; + } + else { + //skip past the data + unsigned int size = *(unsigned int*) &local_header[22]; + fseek(fp,size,SEEK_CUR); + } + } + + fclose(fp); + printf("npz_load: Error! Variable name %s not found in %s!\n",varname.c_str(),fname.c_str()); + abort(); +} + +cnpy::NpyArray cnpy::npy_load(std::string fname) { + + FILE* fp = fopen(fname.c_str(), "rb"); + + if(!fp) { + printf("npy_load: Error! Unable to open file %s!\n",fname.c_str()); + abort(); + } + + NpyArray arr = load_the_npy_file(fp); + + fclose(fp); + return arr; +} + + + diff --git a/src/cnpy/cnpy.h b/src/cnpy/cnpy.h new file mode 100644 index 00000000..b11013b9 --- /dev/null +++ b/src/cnpy/cnpy.h @@ -0,0 +1,241 @@ +//Copyright (C) 2011 Carl Rogers +//Released under MIT License +//license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php + +#ifndef LIBCNPY_H_ +#define LIBCNPY_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cnpy { + + struct NpyArray { + char* data; + std::vector shape; + unsigned int word_size; + bool fortran_order; + void destruct() {delete[] data;} + }; + + struct npz_t : public std::map + { + void destruct() + { + npz_t::iterator it = this->begin(); + for(; it != this->end(); ++it) (*it).second.destruct(); + } + }; + + char BigEndianTest(); + char map_type(const std::type_info& t); + template std::vector create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims); + void parse_npy_header(FILE* fp,unsigned int& word_size, unsigned int*& shape, unsigned int& ndims, bool& fortran_order); + void parse_zip_footer(FILE* fp, unsigned short& nrecs, unsigned int& global_header_size, unsigned int& global_header_offset); + npz_t npz_load(std::string fname); + NpyArray npz_load(std::string fname, std::string varname); + NpyArray npy_load(std::string fname); + + template std::vector& operator+=(std::vector& lhs, const T rhs) { + //write in little endian + for(char byte = 0; byte < sizeof(T); byte++) { + char val = *((char*)&rhs+byte); + lhs.push_back(val); + } + return lhs; + } + + template<> std::vector& operator+=(std::vector& lhs, const std::string rhs); + template<> std::vector& operator+=(std::vector& lhs, const char* rhs); + + + template std::string tostring(T i, int pad = 0, char padval = ' ') { + std::stringstream s; + s << i; + return s.str(); + } + + template void npy_save(std::string fname, const T* data, const unsigned int* shape, const unsigned int ndims, std::string mode = "w") { + FILE* fp = NULL; + + if(mode == "a") fp = fopen(fname.c_str(),"r+b"); + + if(fp) { + //file exists. we need to append to it. read the header, modify the array size + unsigned int word_size, tmp_dims; + unsigned int* tmp_shape = 0; + bool fortran_order; + parse_npy_header(fp,word_size,tmp_shape,tmp_dims,fortran_order); + assert(!fortran_order); + + if(word_size != sizeof(T)) { + std::cout<<"libnpy error: "< header = create_npy_header(data,tmp_shape,ndims); + fwrite(&header[0],sizeof(char),header.size(),fp); + fseek(fp,0,SEEK_END); + + delete[] tmp_shape; + } + else { + fp = fopen(fname.c_str(),"wb"); + std::vector header = create_npy_header(data,shape,ndims); + fwrite(&header[0],sizeof(char),header.size(),fp); + } + + unsigned int nels = 1; + for(int i = 0;i < ndims;i++) nels *= shape[i]; + + fwrite(data,sizeof(T),nels,fp); + fclose(fp); + } + + template void npz_save(std::string zipname, std::string fname, const T* data, const unsigned int* shape, const unsigned int ndims, std::string mode = "w") + { + //first, append a .npy to the fname + fname += ".npy"; + + //now, on with the show + FILE* fp = NULL; + unsigned short nrecs = 0; + unsigned int global_header_offset = 0; + std::vector global_header; + + if(mode == "a") fp = fopen(zipname.c_str(),"r+b"); + + if(fp) { + //zip file exists. we need to add a new npy file to it. + //first read the footer. this gives us the offset and size of the global header + //then read and store the global header. + //below, we will write the the new data at the start of the global header then append the global header and footer below it + unsigned int global_header_size; + parse_zip_footer(fp,nrecs,global_header_size,global_header_offset); + fseek(fp,global_header_offset,SEEK_SET); + global_header.resize(global_header_size); + size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp); + if(res != global_header_size){ + throw std::runtime_error("npz_save: header read error while adding to existing zip"); + } + fseek(fp,global_header_offset,SEEK_SET); + } + else { + fp = fopen(zipname.c_str(),"wb"); + } + + std::vector npy_header = create_npy_header(data,shape,ndims); + + unsigned long nels = 1; + for (int m=0; m local_header; + local_header += "PK"; //first part of sig + local_header += (unsigned short) 0x0403; //second part of sig + local_header += (unsigned short) 20; //min version to extract + local_header += (unsigned short) 0; //general purpose bit flag + local_header += (unsigned short) 0; //compression method + local_header += (unsigned short) 0; //file last mod time + local_header += (unsigned short) 0; //file last mod date + local_header += (unsigned int) crc; //crc + local_header += (unsigned int) nbytes; //compressed size + local_header += (unsigned int) nbytes; //uncompressed size + local_header += (unsigned short) fname.size(); //fname length + local_header += (unsigned short) 0; //extra field length + local_header += fname; + + //build global header + global_header += "PK"; //first part of sig + global_header += (unsigned short) 0x0201; //second part of sig + global_header += (unsigned short) 20; //version made by + global_header.insert(global_header.end(),local_header.begin()+4,local_header.begin()+30); + global_header += (unsigned short) 0; //file comment length + global_header += (unsigned short) 0; //disk number where file starts + global_header += (unsigned short) 0; //internal file attributes + global_header += (unsigned int) 0; //external file attributes + global_header += (unsigned int) global_header_offset; //relative offset of local file header, since it begins where the global header used to begin + global_header += fname; + + //build footer + std::vector footer; + footer += "PK"; //first part of sig + footer += (unsigned short) 0x0605; //second part of sig + footer += (unsigned short) 0; //number of this disk + footer += (unsigned short) 0; //disk where footer starts + footer += (unsigned short) (nrecs+1); //number of records on this disk + footer += (unsigned short) (nrecs+1); //total number of records + footer += (unsigned int) global_header.size(); //nbytes of global headers + footer += (unsigned int) (global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array + footer += (unsigned short) 0; //zip file comment length + + //write everything + fwrite(&local_header[0],sizeof(char),local_header.size(),fp); + fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp); + fwrite(data,sizeof(T),nels,fp); + fwrite(&global_header[0],sizeof(char),global_header.size(),fp); + fwrite(&footer[0],sizeof(char),footer.size(),fp); + fclose(fp); + } + + template std::vector create_npy_header(const T* data, const unsigned int* shape, const unsigned int ndims) { + + std::vector dict; + dict += "{'descr': '"; + dict += BigEndianTest(); + dict += map_type(typeid(T)); + dict += tostring(sizeof(T)); + dict += "', 'fortran_order': False, 'shape': ("; + dict += tostring(shape[0]); + for(int i = 1;i < ndims;i++) { + dict += ", "; + dict += tostring(shape[i]); + } + if(ndims == 1) dict += ","; + dict += "), }"; + //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n + int remainder = 16 - (10 + dict.size()) % 16; + dict.insert(dict.end(),remainder,' '); + dict.back() = '\n'; + + std::vector header; + header += (char) 0x93; + header += "NUMPY"; + header += (char) 0x01; //major version of numpy format + header += (char) 0x00; //minor version of numpy format + header += (unsigned short) dict.size(); + header.insert(header.end(),dict.begin(),dict.end()); + + return header; + } + + +} + +#endif diff --git a/src/definitions.h b/src/definitions.h index ea52024e..c1f24663 100644 --- a/src/definitions.h +++ b/src/definitions.h @@ -5,13 +5,13 @@ #include namespace marian { - typedef float Float; + typedef float Float; typedef std::vector Shape; const int whatevs{-1}; } #include "keywords.h" -#include "tensor.h" +// #include "tensor.h" namespace marian { class Tensor; diff --git a/src/expressions.cu b/src/expressions.cu index 2d656ce1..a95b1bef 100644 --- a/src/expressions.cu +++ b/src/expressions.cu @@ -10,7 +10,7 @@ Expr::Expr(Chainable* chainable) : pimpl_(chainable) {} Expr::Expr(Float v) : pimpl_(new ConstantNode(keywords::value=v, keywords::shape={1,1})) {} -Tensor &Expr::val() { +Tensor Expr::val() { return pimpl_->val(); } diff --git a/src/expressions.h b/src/expressions.h index 09d0edfa..43016dac 100644 --- a/src/expressions.h +++ b/src/expressions.h @@ -9,25 +9,25 @@ class Expr { public: Expr(Chainable* chainable); Expr(Float v); - + Expr operator=(Tensor t) { pimpl_->setVal(t); return *this; } - - Tensor &val(); + + Tensor val(); Tensor grad(); - + void forward(size_t batchSize); void backward(); - + ChainPtr node(); operator ChainPtr(); - + std::string Debug() const; private: - ChainPtr pimpl_; + ChainPtr pimpl_; }; } diff --git a/src/marian.h b/src/marian.h index 8c987ccf..0876d4cd 100644 --- a/src/marian.h +++ b/src/marian.h @@ -5,4 +5,5 @@ #include "graph_operators.h" #include "expressions.h" #include "expression_operators.h" +#include "param_initializers.h" diff --git a/src/npz_converter.cpp b/src/npz_converter.cpp new file mode 100644 index 00000000..1ecbc11c --- /dev/null +++ b/src/npz_converter.cpp @@ -0,0 +1,39 @@ +#include "common/npz_converter.h" + + + +NpzConverter::NpzConverter(const std::string& file) + : model_(cnpy::npz_load(file)), + destructed_(false) { + } + +NpzConverter::~NpzConverter() { + if(!destructed_) + model_.destruct(); +} + +void NpzConverter::Destruct() { + model_.destruct(); + destructed_ = true; +} + +mblas::Matrix NpzConverter::operator[](const std::string& key) const { + typedef blaze::CustomMatrix BlazeWrapper; + mblas::Matrix matrix; + auto it = model_.find(key); + if(it != model_.end()) { + NpyMatrixWrapper np(it->second); + matrix = BlazeWrapper(np.data(), np.size1(), np.size2()); + } + else { + std::cerr << "Missing " << key << std::endl; + } + return std::move(matrix); +} + +mblas::Matrix NpzConverter::operator()(const std::string& key, bool transpose) const { + mblas::Matrix matrix = (*this)[key]; + mblas::Trans(matrix); + return std::move(matrix); +} diff --git a/src/npz_converter.h b/src/npz_converter.h new file mode 100644 index 00000000..96060cfc --- /dev/null +++ b/src/npz_converter.h @@ -0,0 +1,77 @@ +#pragma once + +#include "cnpy/cnpy.h" +#include "tensor.h" + +class NpzConverter { + private: + class NpyMatrixWrapper { + public: + NpyMatrixWrapper(const cnpy::NpyArray& npy) + : npy_(npy) {} + + size_t size() const { + return size1() * size2(); + } + + float* data() const { + return (float*)npy_.data; + } + + float operator()(size_t i, size_t j) const { + return ((float*)npy_.data)[i * size2() + j]; + } + + size_t size1() const { + return npy_.shape[0]; + } + + size_t size2() const { + if(npy_.shape.size() == 1) + return 1; + else + return npy_.shape[1]; + } + + private: + const cnpy::NpyArray& npy_; + }; + + public: + NpzConverter(const std::string& file) + : model_(cnpy::npz_load(file)), + destructed_(false) { + } + + ~NpzConverter() { + if(!destructed_) + model_.destruct(); + } + + void Destruct() { + model_.destruct(); + destructed_ = true; + } + + void Load(const std::string& key, std::vector& data, marian::Shape& shape) const { + auto it = model_.find(key); + if(it != model_.end()) { + NpyMatrixWrapper np(it->second); + data.clear(); + data.resize(np.size()); + std::copy(np.data(), np.data() + np.size(), data.begin()); + + shape.clear(); + shape.push_back(np.size1()); + shape.push_back(np.size2()); + + } + else { + std::cerr << "Missing " << key << std::endl; + } + } + + private: + cnpy::npz_t model_; + bool destructed_; +}; diff --git a/src/param_initializers.h b/src/param_initializers.h new file mode 100644 index 00000000..ab781064 --- /dev/null +++ b/src/param_initializers.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include +#include +#include + +#include "tensor.h" + +namespace marian { + +void zeros(Tensor t) { + std::vector vals(t.size(), 0.0f); + thrust::copy(vals.begin(), vals.end(), t.begin()); +} + +void ones(Tensor t) { + std::vector vals(t.size(), 1.0f); + thrust::copy(vals.begin(), vals.end(), t.begin()); +} + +void randreal(Tensor t) { + std::random_device device; + std::default_random_engine engine(device()); + std::uniform_real_distribution<> dist(0, 1); + auto gen = std::bind(dist, engine); + + std::vector vals(t.size()); + std::generate(begin(vals), end(vals), gen); + + thrust::copy(vals.begin(), vals.end(), t.begin()); +} + +} // namespace marian diff --git a/src/sgd.h b/src/sgd.h new file mode 100644 index 00000000..298cd358 --- /dev/null +++ b/src/sgd.h @@ -0,0 +1,88 @@ +#pragma once + +#include +#include + +#include "expressions.h" +#include "thrust_functions.h" + +namespace marian { + +class SGD { + public: + SGD(Expr& cost_func, Expr& inX, Expr& inY, + const std::vector params, float eta, + std::vector& xData, size_t numFeatures, + std::vector& yData, size_t numClasses, + size_t epochs, size_t batchSize) + : cost_function_(&cost_func), + inX_(&inX), + inY_(&inY), + params_(params), + eta_(eta), + xData_(xData), + numFeatures_(numFeatures), + yData_(yData), + numClasses_(numClasses), + epochs_(epochs), + batchSize_(batchSize) + {} + + void Run() { + size_t numExamples = xData_.size()/ numFeatures_; + Tensor xt({(int)batchSize_, (int)numExamples}, 0.0f); + Tensor yt({(int)batchSize_, (int)numClasses_}, 0.0f); + + for (size_t numEpoch = 0; numEpoch < epochs_; ++numEpoch) { + std::cerr << "Starting epoch #" << numEpoch << std::endl; + size_t startId = 0; + size_t endId = startId + batchSize_; + + while (endId < numExamples) { + PrepareBatch(startId, endId, xt, yt); + *inX_ = xt; + *inY_ = yt; + + cost_function_->forward(batchSize_); + cost_function_->backward(); + + UpdateModel(); + + startId += batchSize_; + endId += batchSize_; + } + } + } + + void PrepareBatch(size_t startId, size_t endId, Tensor& xt, Tensor& yt) { + std::vector x(xData_.begin() + startId * numFeatures_, + xData_.begin() + endId * numFeatures_); + std::vector y(yData_.begin() + startId * numClasses_, + yData_.begin() + endId * numClasses_); + + xt.Load(x); + yt.Load(y); + } + + void UpdateModel() { + for (auto& param : params_) { + using namespace thrust::placeholders; + Element(_1 = _1 - eta_ * _2, param->val(), param->grad()); + } + } + + private: + std::shared_ptr cost_function_; + std::shared_ptr inX_; + std::shared_ptr inY_; + std::vector params_; + const float eta_; + std::vector& xData_; + const size_t numFeatures_; + std::vector& yData_; + const size_t numClasses_; + const size_t epochs_; + const size_t batchSize_; +}; + +} // namespace marian diff --git a/src/tensor.cu b/src/tensor.cu index 398b696a..09355b21 100644 --- a/src/tensor.cu +++ b/src/tensor.cu @@ -83,6 +83,12 @@ void Tensor::Load(const std::string &path) Load(hostData.begin(), hostData.begin()); } +void Tensor::Load(const std::vector& data) +{ + pimpl_->set(data.begin(), data.end()); +} + + void Tensor::Load(const std::vector::const_iterator &begin, const std::vector::const_iterator &end) { pimpl_->set(begin, end); diff --git a/src/tensor.h b/src/tensor.h index ff6ecd0b..b9c81a91 100644 --- a/src/tensor.h +++ b/src/tensor.h @@ -16,16 +16,16 @@ namespace marian { struct Handles { cudnnHandle_t cudnnHandle; cublasHandle_t cublasHandle; - - cudnnOpTensorDescriptor_t add; - + + cudnnOpTensorDescriptor_t add; + Handles() { cudnnCreate(&cudnnHandle); cublasCreate(&cublasHandle); cudnnCreateOpTensorDescriptor(&add); cudnnSetOpTensorDescriptor(add, CUDNN_OP_TENSOR_ADD, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN); } - + ~Handles() { cudnnDestroy(cudnnHandle); cublasDestroy(cublasHandle); @@ -35,7 +35,7 @@ struct Handles { const Handles handles; -typedef std::vector Shape; +// typedef std::vector Shape; inline std::string Debug(const Shape &shape) { @@ -63,7 +63,7 @@ class TensorImpl { cudnnTensorDescriptor_t desc_; size_t tno_; static size_t tensorCounter; - + cudnnDataType_t dataType() { switch(sizeof(Float)) { case 2: return CUDNN_DATA_HALF; @@ -74,15 +74,15 @@ class TensorImpl { public: typedef Float value_type; - + TensorImpl(const Shape& shape, value_type value = 0) : shape_(shape), tno_(tensorCounter++) { - - // @TODO: + + // @TODO: UTIL_THROW_IF2(shape_.size() != 2, "For now, only 2D Tensors, will be fixed later."); - + UTIL_THROW_IF2(shape_.size() < 1 || shape_.size() > 4, "Wrong number of dimensions: " << shape_.size()); @@ -106,54 +106,54 @@ class TensorImpl { shape_[0], shape_[1], shape_[2], shape_[3]); break; } } - + TensorImpl(const TensorImpl&) = delete; TensorImpl(TensorImpl&&) = delete; - + ~TensorImpl() { cudnnDestroyTensorDescriptor(desc_); } - + value_type operator[](size_t i) const { return data_[i]; } - + auto begin() -> decltype( data_.begin() ) { return data_.begin(); } - + auto begin() const -> decltype( data_.begin() ) { return data_.begin(); } - + auto end() -> decltype( data_.end() ) { return data_.end(); } - + auto end() const -> decltype( data_.end() ) { return data_.end(); } - + const Shape& shape() const { return shape_; } - + size_t size() const { return data_.size(); } - + value_type* data() { return thrust::raw_pointer_cast(data_.data()); } - + cudnnTensorDescriptor_t desc() const { return desc_; } - + size_t id() const { return tno_; } - + void set(value_type value) { thrust::fill(data_.begin(), data_.end(), value); } @@ -194,70 +194,70 @@ size_t TensorImpl::tensorCounter = 0; class Tensor { private: std::shared_ptr> pimpl_; - + public: typedef TensorImpl::value_type value_type; - + Tensor() {} - Tensor(Shape shape, value_type value = 0) { + Tensor(const Shape& shape, value_type value = 0) { allocate(shape, value); } - + ~Tensor() {} - - void allocate(Shape shape, value_type value = 0) { + + void allocate(const Shape& shape, value_type value = 0) { if(!pimpl_) pimpl_.reset(new TensorImpl(shape, value)); } - + value_type operator[](size_t i) const { return (*pimpl_)[i]; } - + size_t size() const { return pimpl_->size(); } - + value_type* data() { return pimpl_->data(); } - + const value_type* data() const { return pimpl_->data(); } - + auto begin() -> decltype( pimpl_->begin() ) { return pimpl_->begin(); } - + auto begin() const -> decltype( pimpl_->begin() ) { return pimpl_->begin(); } - + auto end() -> decltype( pimpl_->begin() ) { return pimpl_->begin(); } - + auto end() const -> decltype( pimpl_->begin() ) { return pimpl_->begin(); } - + const Shape& shape() const { return pimpl_->shape(); } - + cudnnTensorDescriptor_t desc() const { return pimpl_->desc(); } - + void set(value_type value) { pimpl_->set(value); } - + size_t id() const { return pimpl_->id(); } - + operator bool() { return pimpl_ != nullptr; } @@ -275,6 +275,7 @@ class Tensor { } void Load(const std::string &path); + void Load(const std::vector& data); void Load(const std::vector::const_iterator &begin, const std::vector::const_iterator &end); }; diff --git a/src/test.cu b/src/test.cu index 0285e3a5..a86c60ee 100644 --- a/src/test.cu +++ b/src/test.cu @@ -20,6 +20,7 @@ int main(int argc, char** argv) { Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y"); Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); + // Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=randreal); Expr b = param(shape={1, LABEL_SIZE}, name="b0"); Expr z = dot(x, w) + b; diff --git a/src/train_mnist.cu b/src/train_mnist.cu new file mode 100644 index 00000000..aa21597a --- /dev/null +++ b/src/train_mnist.cu @@ -0,0 +1,37 @@ + +#include "marian.h" +#include "mnist.h" +#include "sgd.h" + +using namespace std; + +int main(int argc, char** argv) { + const size_t IMAGE_SIZE = 784; + const size_t LABEL_SIZE = 10; + int numofdata; + + vector trainImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); + vectortrainLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); + + using namespace marian; + using namespace keywords; + + Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); + Expr y = input(shape={whatevs, LABEL_SIZE}, name="Y"); + + Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0"); + Expr b = param(shape={1, LABEL_SIZE}, name="b0"); + + std::vector params; + params.push_back(&w); + params.push_back(&b); + + auto scores = dot(x, w) + b; + auto lr = softmax_fast(scores, axis=1, name="pred"); + auto cost = -mean(sum(y * log(lr), axis=1), axis=0, name="cost"); + cerr << "lr=" << lr.Debug() << endl; + + SGD opt(cost, x, y, params, 0.9, trainImages, IMAGE_SIZE, trainLabels, LABEL_SIZE, 3, 24); + opt.Run(); + return 0; +} diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu new file mode 100644 index 00000000..a42fa881 --- /dev/null +++ b/src/validate_mnist.cu @@ -0,0 +1,77 @@ + +#include "marian.h" +#include "mnist.h" +#include "npz_converter.h" + +using namespace marian; +using namespace keywords; + +int main(int argc, char** argv) { + const size_t IMAGE_SIZE = 784; + const size_t LABEL_SIZE = 10; + int numofdata; + + std::cerr << "Loading test set..."; + std::vector testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", numofdata, IMAGE_SIZE); + std::vectortestLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", numofdata, LABEL_SIZE); + std::cerr << "\tDone." << std::endl; + + std::cerr << "Loading model params..."; + NpzConverter converter("../scripts/test_model/model.npz"); + + std::vector wData; + Shape wShape; + converter.Load("weights", wData, wShape); + + std::vector bData; + Shape bShape; + converter.Load("bias", bData, bShape); + + auto initW = [&wData](Tensor t) { + thrust::copy(wData.begin(), wData.end(), t.begin()); + }; + + auto initB = [&bData](Tensor t) { + thrust::copy(bData.begin(), bData.end(), t.begin()); + }; + + std::cerr << "\tDone." << std::endl; + + + Expr x = input(shape={whatevs, IMAGE_SIZE}, name="X"); + + Expr w = param(shape={IMAGE_SIZE, LABEL_SIZE}, name="W0", init=initW); + Expr b = param(shape={1, LABEL_SIZE}, name="b0", init=initB); + + std::cerr << "Building model..."; + auto scores = dot(x, w) + b; + auto predict = softmax(scores, axis=1, name="pred"); + std::cerr << "\tDone." << std::endl; + + Tensor xt({numofdata, IMAGE_SIZE}); + xt.Load(testImages); + + predict.forward(numofdata); + + auto results = predict.val(); + + size_t acc = 0; + + for (size_t i = 0; i < testLabels.size(); i += LABEL_SIZE) { + size_t correct = 0; + size_t predicted = 0; + for (size_t j = 0; j < LABEL_SIZE; ++j) { + if (testLabels[i+j]) correct = j; + if (results[i + j] > results[i + predicted]) predicted = j; + } + acc += (correct == predicted); + std::cerr << "corect: " << correct << " | " << predicted << "("; + for (size_t j = 0; j < LABEL_SIZE; ++j) { + std::cerr << results[i+j] << " "; + } + std::cerr << std::endl; + } + std::cerr << "ACC: " << float(acc)/numofdata << std::endl; + + return 0; +}