mosesdecoder/phrase-extract/extract-ghkm/Alignment.cpp
Phil Williams 940591a1a3 extract-ghkm: allow trailing whitespace in alignment file
Thanks to Matt Post for reporting the problem.
2013-09-26 15:49:08 +01:00

66 lines
1.9 KiB
C++

/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "Alignment.h"
#include "Exception.h"
#include <cassert>
#include <cstdlib>
namespace Moses
{
namespace GHKM
{
void ReadAlignment(const std::string &s, Alignment &a)
{
const std::string digits = "0123456789";
a.clear();
std::string::size_type begin = 0;
while (true) {
std::string::size_type end = s.find("-", begin);
if (end == std::string::npos) {
return;
}
int src = std::atoi(s.substr(begin, end-begin).c_str());
if (end+1 == s.size()) {
throw Exception("Target index missing");
}
begin = end+1;
end = s.find_first_not_of(digits, begin+1);
int tgt;
if (end == std::string::npos) {
tgt = std::atoi(s.substr(begin).c_str());
a.push_back(std::make_pair(src, tgt));
return;
} else {
tgt = std::atoi(s.substr(begin, end-begin).c_str());
a.push_back(std::make_pair(src, tgt));
}
begin = end+1;
}
}
} // namespace GHKM
} // namespace Moses