diff --git a/moses/TranslationModel/ProbingPT/line_splitter.cpp b/moses/TranslationModel/ProbingPT/line_splitter.cpp index 1eeeb1899..5539d0f33 100644 --- a/moses/TranslationModel/ProbingPT/line_splitter.cpp +++ b/moses/TranslationModel/ProbingPT/line_splitter.cpp @@ -2,41 +2,48 @@ line_text splitLine(StringPiece textin) { - const char delim[] = " ||| "; + const char delim[] = "|||"; line_text output; //Tokenize util::TokenIter it(textin, util::MultiCharacter(delim)); //Get source phrase - output.source_phrase = *it; + output.source_phrase = Trim(*it); + //std::cerr << "output.source_phrase=" << output.source_phrase << "AAAA" << std::endl; //Get target_phrase it++; - output.target_phrase = *it; + output.target_phrase = Trim(*it); + //std::cerr << "output.target_phrase=" << output.target_phrase << "AAAA" << std::endl; //Get probabilities it++; - output.prob = *it; + output.prob = Trim(*it); + //std::cerr << "output.prob=" << output.prob << "AAAA" << std::endl; //Get WordAllignment it++; if (it == util::TokenIter::end()) return output; - output.word_align = *it; + output.word_align = Trim(*it); + //std::cerr << "output.word_align=" << output.word_align << "AAAA" << std::endl; //Get count it++; if (it == util::TokenIter::end()) return output; - output.counts = *it; + output.counts = Trim(*it); + //std::cerr << "output.counts=" << output.counts << "AAAA" << std::endl; //Get sparse_score it++; if (it == util::TokenIter::end()) return output; - output.sparse_score = *it; + output.sparse_score = Trim(*it); + //std::cerr << "output.sparse_score=" << output.sparse_score << "AAAA" << std::endl; //Get property it++; if (it == util::TokenIter::end()) return output; - output.property = *it; + output.property = Trim(*it); + //std::cerr << "output.property=" << output.property << "AAAA" << std::endl; return output; } diff --git a/util/string_piece.hh b/util/string_piece.hh index 9c4798217..428808692 100644 --- a/util/string_piece.hh +++ b/util/string_piece.hh @@ -257,6 +257,14 @@ inline bool operator>=(const StringPiece& x, const StringPiece& y) { return !(x < y); } +inline StringPiece Trim(const StringPiece& str, const std::string dropChars = " \t\n\r") +{ + StringPiece::size_type startPos = str.find_first_not_of(dropChars); + StringPiece::size_type endPos = str.find_last_not_of(dropChars); + StringPiece ret = str.substr(startPos, endPos - startPos + 1); + return ret; +} + // allow StringPiece to be logged (needed for unit testing). inline std::ostream& operator<<(std::ostream& o, const StringPiece& piece) { return o.write(piece.data(), static_cast(piece.size()));