mirror of
https://github.com/marian-nmt/marian.git
synced 2024-11-03 20:13:47 +03:00
load text
This commit is contained in:
parent
057d200a40
commit
9ee6a60a7f
@ -5,15 +5,74 @@ using namespace std;
|
||||
|
||||
namespace marian {
|
||||
|
||||
inline std::vector<std::string> Tokenize(const std::string& str,
|
||||
const std::string& delimiters = " \t")
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
// Skip delimiters at beginning.
|
||||
std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
|
||||
// Find first "non-delimiter".
|
||||
std::string::size_type pos = str.find_first_of(delimiters, lastPos);
|
||||
|
||||
while (std::string::npos != pos || std::string::npos != lastPos) {
|
||||
// Found a token, add it to the vector.
|
||||
tokens.push_back(str.substr(lastPos, pos - lastPos));
|
||||
// Skip delimiters. Note the "not_of"
|
||||
lastPos = str.find_first_not_of(delimiters, pos);
|
||||
// Find next "non-delimiter"
|
||||
pos = str.find_first_of(delimiters, lastPos);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
//! convert string to variable of type T. Used to reading floats, int etc from files
|
||||
template<typename T>
|
||||
T Scan(const std::string &input)
|
||||
{
|
||||
std::stringstream stream(input);
|
||||
T ret;
|
||||
stream >> ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
//! convert vectors of string to vectors of type T variables
|
||||
template<typename T>
|
||||
inline std::vector<T> Scan(const std::vector< std::string > &input)
|
||||
{
|
||||
std::vector<T> output(input.size());
|
||||
for (size_t i = 0 ; i < input.size() ; i++) {
|
||||
output[i] = Scan<T>( input[i] );
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
//! tokenise input string to vector of type T
|
||||
template<typename T>
|
||||
inline std::vector<T> Tokenize( const std::string &input
|
||||
, const std::string& delimiters = " \t")
|
||||
{
|
||||
std::vector<std::string> stringVector = Tokenize(input, delimiters);
|
||||
return Scan<T>( stringVector );
|
||||
}
|
||||
|
||||
|
||||
void Tensor::Load(const std::string &path)
|
||||
{
|
||||
fstream strm;
|
||||
strm.open(path.c_str());
|
||||
|
||||
size_t lineNum = 0;
|
||||
string line;
|
||||
while ( getline (strm, line) )
|
||||
{
|
||||
cerr << line << '\n';
|
||||
vector<float> toks = Tokenize<float>(line);
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
//(*this)[lineNum][i] = toks[i];
|
||||
}
|
||||
|
||||
++lineNum;
|
||||
}
|
||||
strm.close();
|
||||
|
||||
|
@ -50,6 +50,9 @@ int main(int argc, char** argv) {
|
||||
|
||||
Tensor tx({4, 2}, 1);
|
||||
Tensor ty({4, 1}, 1);
|
||||
cerr << "tx=" << tx.Debug() << endl;
|
||||
cerr << "ty=" << ty.Debug() << endl;
|
||||
|
||||
tx.Load("../examples/xor/train.txt");
|
||||
ty.Load("../examples/xor/label.txt");
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user