mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-05 03:24:07 +03:00
171 lines
4.7 KiB
C++
171 lines
4.7 KiB
C++
/***********************************************************************
|
|
Moses - statistical machine translation system
|
|
Copyright (C) 2006-2011 University of Edinburgh
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
***********************************************************************/
|
|
#include <algorithm>
|
|
#include <set>
|
|
#include "AlignmentInfo.h"
|
|
#include "TypeDef.h"
|
|
#include "StaticData.h"
|
|
#include "Util.h"
|
|
#include "util/exception.hh"
|
|
|
|
namespace Moses
|
|
{
|
|
|
|
AlignmentInfo::AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
|
|
: m_collection(pairs)
|
|
{
|
|
BuildNonTermIndexMaps();
|
|
}
|
|
|
|
AlignmentInfo::AlignmentInfo(const std::vector<unsigned char> &aln)
|
|
{
|
|
assert(aln.size()%2==0);
|
|
for (size_t i = 0; i < aln.size(); i+= 2)
|
|
m_collection.insert(std::make_pair(size_t(aln[i]),size_t(aln[i+1])));
|
|
BuildNonTermIndexMaps();
|
|
}
|
|
|
|
AlignmentInfo::AlignmentInfo(const std::string &str)
|
|
{
|
|
std::vector<std::string> points = Tokenize(str, " ");
|
|
std::vector<std::string>::const_iterator iter;
|
|
for (iter = points.begin(); iter != points.end(); iter++) {
|
|
std::vector<size_t> point = Tokenize<size_t>(*iter, "-");
|
|
UTIL_THROW_IF2(point.size() != 2, "Bad format of word alignment point: " << *iter);
|
|
Add(point[0], point[1]);
|
|
}
|
|
}
|
|
|
|
void AlignmentInfo::BuildNonTermIndexMaps()
|
|
{
|
|
if (m_collection.empty()) {
|
|
return;
|
|
}
|
|
const_iterator p = begin();
|
|
size_t maxIndex = p->second;
|
|
for (++p; p != end(); ++p) {
|
|
if (p->second > maxIndex) {
|
|
maxIndex = p->second;
|
|
}
|
|
}
|
|
m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
|
|
m_nonTermIndexMap2.resize(maxIndex+1, NOT_FOUND);
|
|
size_t i = 0;
|
|
for (p = begin(); p != end(); ++p) {
|
|
if (m_nonTermIndexMap[p->second] != NOT_FOUND) {
|
|
// 1-to-many. Definitely a set of terminals. Don't bother storing 1-to-1 index map
|
|
m_nonTermIndexMap.clear();
|
|
m_nonTermIndexMap2.clear();
|
|
return;
|
|
}
|
|
m_nonTermIndexMap[p->second] = i++;
|
|
m_nonTermIndexMap2[p->second] = p->first;
|
|
}
|
|
}
|
|
|
|
std::set<size_t> AlignmentInfo::GetAlignmentsForSource(size_t sourcePos) const
|
|
{
|
|
std::set<size_t> ret;
|
|
CollType::const_iterator iter;
|
|
for (iter = begin(); iter != end(); ++iter) {
|
|
// const std::pair<size_t,size_t> &align = *iter;
|
|
if (iter->first == sourcePos) {
|
|
ret.insert(iter->second);
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
|
|
{
|
|
std::set<size_t> ret;
|
|
CollType::const_iterator iter;
|
|
for (iter = begin(); iter != end(); ++iter) {
|
|
// const std::pair<size_t,size_t> &align = *iter;
|
|
if (iter->second == targetPos) {
|
|
ret.insert(iter->first);
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
|
|
bool
|
|
compare_target(std::pair<size_t,size_t> const* a,
|
|
std::pair<size_t,size_t> const* b)
|
|
{
|
|
if(a->second < b->second) return true;
|
|
if(a->second == b->second) return (a->first < b->first);
|
|
return false;
|
|
}
|
|
|
|
|
|
std::vector< const std::pair<size_t,size_t>* >
|
|
AlignmentInfo::
|
|
GetSortedAlignments(WordAlignmentSort SortOrder) const
|
|
{
|
|
std::vector< const std::pair<size_t,size_t>* > ret;
|
|
|
|
CollType::const_iterator iter;
|
|
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
|
const std::pair<size_t,size_t> &alignPair = *iter;
|
|
ret.push_back(&alignPair);
|
|
}
|
|
|
|
switch (SortOrder) {
|
|
case NoSort:
|
|
break;
|
|
|
|
case TargetOrder:
|
|
std::sort(ret.begin(), ret.end(), compare_target);
|
|
break;
|
|
|
|
default:
|
|
UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
|
|
<< SortOrder);
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
std::vector<size_t> AlignmentInfo::GetSourceIndex2PosMap() const
|
|
{
|
|
std::set<size_t> sourcePoses;
|
|
|
|
CollType::const_iterator iter;
|
|
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
|
|
size_t sourcePos = iter->first;
|
|
sourcePoses.insert(sourcePos);
|
|
}
|
|
std::vector<size_t> ret(sourcePoses.begin(), sourcePoses.end());
|
|
return ret;
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream &out, const AlignmentInfo &alignmentInfo)
|
|
{
|
|
AlignmentInfo::const_iterator iter;
|
|
for (iter = alignmentInfo.begin(); iter != alignmentInfo.end(); ++iter) {
|
|
out << iter->first << "-" << iter->second << " ";
|
|
}
|
|
return out;
|
|
}
|
|
|
|
}
|