From 9ee6a60a7f76e498d49626d72d3ed119cb5c0c7e Mon Sep 17 00:00:00 2001 From: Hieu Hoang Date: Tue, 13 Sep 2016 17:52:57 +0200 Subject: [PATCH] load text --- src/tensor.cu | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/test.cu | 3 +++ 2 files changed, 62 insertions(+) diff --git a/src/tensor.cu b/src/tensor.cu index a8a3418a..cd57c3b0 100644 --- a/src/tensor.cu +++ b/src/tensor.cu @@ -5,15 +5,74 @@ using namespace std; namespace marian { +inline std::vector Tokenize(const std::string& str, + const std::string& delimiters = " \t") +{ + std::vector tokens; + // Skip delimiters at beginning. + std::string::size_type lastPos = str.find_first_not_of(delimiters, 0); + // Find first "non-delimiter". + std::string::size_type pos = str.find_first_of(delimiters, lastPos); + + while (std::string::npos != pos || std::string::npos != lastPos) { + // Found a token, add it to the vector. + tokens.push_back(str.substr(lastPos, pos - lastPos)); + // Skip delimiters. Note the "not_of" + lastPos = str.find_first_not_of(delimiters, pos); + // Find next "non-delimiter" + pos = str.find_first_of(delimiters, lastPos); + } + + return tokens; +} + +//! convert string to variable of type T. Used to reading floats, int etc from files +template +T Scan(const std::string &input) +{ + std::stringstream stream(input); + T ret; + stream >> ret; + return ret; +} + +//! convert vectors of string to vectors of type T variables +template +inline std::vector Scan(const std::vector< std::string > &input) +{ + std::vector output(input.size()); + for (size_t i = 0 ; i < input.size() ; i++) { + output[i] = Scan( input[i] ); + } + return output; +} + +//! tokenise input string to vector of type T +template +inline std::vector Tokenize( const std::string &input + , const std::string& delimiters = " \t") +{ + std::vector stringVector = Tokenize(input, delimiters); + return Scan( stringVector ); +} + + void Tensor::Load(const std::string &path) { fstream strm; strm.open(path.c_str()); + size_t lineNum = 0; string line; while ( getline (strm, line) ) { cerr << line << '\n'; + vector toks = Tokenize(line); + for (size_t i = 0; i < toks.size(); ++i) { + //(*this)[lineNum][i] = toks[i]; + } + + ++lineNum; } strm.close(); diff --git a/src/test.cu b/src/test.cu index 5657bf20..4e382141 100644 --- a/src/test.cu +++ b/src/test.cu @@ -50,6 +50,9 @@ int main(int argc, char** argv) { Tensor tx({4, 2}, 1); Tensor ty({4, 1}, 1); + cerr << "tx=" << tx.Debug() << endl; + cerr << "ty=" << ty.Debug() << endl; + tx.Load("../examples/xor/train.txt"); ty.Load("../examples/xor/label.txt");