#include "SplitPoint.h" #include #include #include #include "util/string_piece.hh" #include "util/tokenize_piece.hh" #include "syntax-common/exception.h" namespace MosesTraining { namespace Syntax { namespace PostprocessEgretForests { void MarkSplitPoints(const std::vector &splitPoints, std::string &sentence) { if (splitPoints.empty()) { return; } // FIXME Assumes all split points have same connector std::string connector; std::map > points; for (std::vector::const_iterator p = splitPoints.begin(); p != splitPoints.end(); ++p) { points[p->tokenPos].insert(p->charPos); connector = p->connector; } // Split the sentence in to a sequence of tokens. std::vector terminals; const util::AnyCharacter delim(" \t"); for (util::TokenIter p(sentence, delim); p; ++p) { terminals.resize(terminals.size()+1); p->CopyToString(&terminals.back()); } // Mark the split points. for (std::map >::const_iterator p = points.begin(); p != points.end(); ++p) { std::string &word = terminals[p->first]; int offset = 0; for (std::set::const_iterator q = p->second.begin(); q != p->second.end(); ++q) { std::string str = std::string("@") + connector + std::string("@"); word.replace(*q+offset, connector.size(), str); offset += 2; } } sentence.clear(); for (std::size_t i = 0; i < terminals.size(); ++i) { if (i > 0) { sentence += " "; } sentence += terminals[i]; } } void MarkSplitPoints(const std::vector &splitPoints, Forest &forest) { if (splitPoints.empty()) { return; } // FIXME Assumes all split points have same connector std::string connector; std::map > points; for (std::vector::const_iterator p = splitPoints.begin(); p != splitPoints.end(); ++p) { points[p->tokenPos].insert(p->charPos); connector = p->connector; } // Get the terminal vertices in sentence order. std::vector terminals; for (std::vector >::const_iterator p = forest.vertices.begin(); p != forest.vertices.end(); ++p) { if (!(*p)->incoming.empty()) { continue; } int pos = (*p)->start; if (pos >= terminals.size()) { terminals.resize(pos+1); } terminals[pos] = p->get(); } // Mark the split points. for (std::map >::const_iterator p = points.begin(); p != points.end(); ++p) { std::string &word = terminals[p->first]->symbol.value; int offset = 0; for (std::set::const_iterator q = p->second.begin(); q != p->second.end(); ++q) { std::string str = std::string("@") + connector + std::string("@"); word.replace(*q+offset, connector.size(), str); offset += 2; } } } } // namespace PostprocessEgretForests } // namespace Syntax } // namespace MosesTraining