/*********************************************************************** Moses - statistical machine translation system Copyright (C) 2006-2011 University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #include #include #include "AlignmentInfo.h" #include "TypeDef.h" #include "StaticData.h" #include "util/exception.hh" namespace Moses { AlignmentInfo::AlignmentInfo(const std::set > &pairs) : m_collection(pairs) { BuildNonTermIndexMap(); } AlignmentInfo::AlignmentInfo(const std::vector &aln) { assert(aln.size()%2==0); for (size_t i = 0; i < aln.size(); i+= 2) m_collection.insert(std::make_pair(size_t(aln[i]),size_t(aln[i+1]))); BuildNonTermIndexMap(); } void AlignmentInfo::BuildNonTermIndexMap() { if (m_collection.empty()) { return; } const_iterator p = begin(); size_t maxIndex = p->second; for (++p; p != end(); ++p) { if (p->second > maxIndex) { maxIndex = p->second; } } m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND); size_t i = 0; for (p = begin(); p != end(); ++p) { if (m_nonTermIndexMap[p->second] != NOT_FOUND) { // 1-to-many. Definitely a set of terminals. Don't bother storing 1-to-1 index map m_nonTermIndexMap.clear(); return; } m_nonTermIndexMap[p->second] = i++; } } std::set AlignmentInfo::GetAlignmentsForSource(size_t sourcePos) const { std::set ret; CollType::const_iterator iter; for (iter = begin(); iter != end(); ++iter) { // const std::pair &align = *iter; if (iter->first == sourcePos) { ret.insert(iter->second); } } return ret; } std::set AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const { std::set ret; CollType::const_iterator iter; for (iter = begin(); iter != end(); ++iter) { // const std::pair &align = *iter; if (iter->second == targetPos) { ret.insert(iter->first); } } return ret; } bool compare_target(const std::pair *a, const std::pair *b) { if(a->second < b->second) return true; if(a->second == b->second) return (a->first < b->first); return false; } std::vector< const std::pair* > AlignmentInfo::GetSortedAlignments() const { std::vector< const std::pair* > ret; CollType::const_iterator iter; for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) { const std::pair &alignPair = *iter; ret.push_back(&alignPair); } const StaticData &staticData = StaticData::Instance(); WordAlignmentSort wordAlignmentSort = staticData.GetWordAlignmentSort(); switch (wordAlignmentSort) { case NoSort: break; case TargetOrder: std::sort(ret.begin(), ret.end(), compare_target); break; default: UTIL_THROW(util::Exception, "Unknown alignment sort option: " << wordAlignmentSort); } return ret; } std::vector AlignmentInfo::GetSourceIndex2PosMap() const { std::set sourcePoses; CollType::const_iterator iter; for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) { size_t sourcePos = iter->first; sourcePoses.insert(sourcePos); } std::vector ret(sourcePoses.begin(), sourcePoses.end()); return ret; } std::ostream& operator<<(std::ostream &out, const AlignmentInfo &alignmentInfo) { AlignmentInfo::const_iterator iter; for (iter = alignmentInfo.begin(); iter != alignmentInfo.end(); ++iter) { out << iter->first << "-" << iter->second << " "; } return out; } }