mosesdecoder/moses/AlignmentInfo.h

144 lines
4.3 KiB
C
Raw Permalink Normal View History

/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
2013-05-29 21:16:15 +04:00
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
2013-05-29 21:16:15 +04:00
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
2013-05-29 21:16:15 +04:00
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include <ostream>
#include <set>
#include <vector>
#include <cstdlib>
2012-10-12 01:03:32 +04:00
#include <boost/functional/hash.hpp>
#include "TypeDef.h"
namespace Moses
{
class AlignmentInfoCollection;
/** Collection of non-terminal alignment pairs, ordered by source index.
* Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
*/
class AlignmentInfo
{
friend std::ostream& operator<<(std::ostream &, const AlignmentInfo &);
friend struct AlignmentInfoOrderer;
2012-10-12 01:03:32 +04:00
friend struct AlignmentInfoHasher;
friend class AlignmentInfoCollection;
2015-01-09 16:24:16 +03:00
friend class VW;
2013-05-29 21:16:15 +04:00
public:
typedef std::set<std::pair<size_t,size_t> > CollType;
typedef std::vector<size_t> NonTermIndexMap;
typedef CollType::const_iterator const_iterator;
2013-05-29 21:16:15 +04:00
const_iterator begin() const {
return m_collection.begin();
}
const_iterator end() const {
return m_collection.end();
}
void Add(size_t sourcePos, size_t targetPos) {
2013-05-29 21:16:15 +04:00
m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
}
2012-07-02 23:01:07 +04:00
/** Provides a map from target-side to source-side non-terminal indices.
* The target-side index should be the rule symbol index (COUNTING terminals).
* The index returned is the rule non-terminal index (IGNORING terminals).
2012-07-02 23:01:07 +04:00
*/
const NonTermIndexMap &GetNonTermIndexMap() const {
return m_nonTermIndexMap;
}
/** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
* the index counting both terminals and non-terminals) */
const NonTermIndexMap &GetNonTermIndexMap2() const {
return m_nonTermIndexMap2;
}
const CollType &GetAlignments() const {
return m_collection;
}
2013-05-29 21:16:15 +04:00
2013-07-19 18:38:13 +04:00
std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
2013-09-22 17:36:50 +04:00
std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
2013-07-19 18:38:13 +04:00
2013-05-29 21:16:15 +04:00
size_t GetSize() const {
return m_collection.size();
}
2015-11-02 03:00:37 +03:00
std::vector< const std::pair<size_t,size_t>* >
GetSortedAlignments(WordAlignmentSort SortOrder) const;
std::vector<size_t> GetSourceIndex2PosMap() const;
2013-05-29 21:16:15 +04:00
bool operator==(const AlignmentInfo& rhs) const {
return m_collection == rhs.m_collection &&
m_nonTermIndexMap == rhs.m_nonTermIndexMap;
}
2013-05-29 21:16:15 +04:00
private:
2012-07-02 23:01:07 +04:00
//! AlignmentInfo objects should only be created by an AlignmentInfoCollection
explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
explicit AlignmentInfo(const std::vector<unsigned char> &aln);
2015-01-08 16:53:46 +03:00
2015-01-09 16:24:16 +03:00
// used only by VW to load word alignment between sentences
explicit AlignmentInfo(const std::string &str);
void BuildNonTermIndexMaps();
CollType m_collection;
NonTermIndexMap m_nonTermIndexMap;
NonTermIndexMap m_nonTermIndexMap2;
};
/** Define an arbitrary strict weak ordering between AlignmentInfo objects
* for use by AlignmentInfoCollection.
*/
2013-05-29 21:16:15 +04:00
struct AlignmentInfoOrderer {
bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
2013-05-29 21:16:15 +04:00
if (a.m_collection == b.m_collection) {
return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
} else {
return a.m_collection < b.m_collection;
}
}
};
2013-05-29 21:16:15 +04:00
/**
2012-10-12 01:03:32 +04:00
* Hashing functoid
**/
2013-05-29 21:16:15 +04:00
struct AlignmentInfoHasher {
size_t operator()(const AlignmentInfo& a) const {
2012-10-12 01:03:32 +04:00
size_t seed = 0;
boost::hash_combine(seed,a.m_collection);
boost::hash_combine(seed,a.m_nonTermIndexMap);
return seed;
}
};
2013-05-29 21:16:15 +04:00
inline size_t hash_value(const AlignmentInfo& a)
{
static AlignmentInfoHasher hasher;
return hasher(a);
}
}