2010-04-12 14:15:49 +04:00
|
|
|
/***********************************************************************
|
2011-06-17 01:20:20 +04:00
|
|
|
Moses - statistical machine translation system
|
|
|
|
Copyright (C) 2006-2011 University of Edinburgh
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2010-04-12 14:15:49 +04:00
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2010-04-12 14:15:49 +04:00
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2010-04-12 14:15:49 +04:00
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
2011-06-17 01:20:20 +04:00
|
|
|
***********************************************************************/
|
|
|
|
|
2010-04-08 21:16:10 +04:00
|
|
|
#pragma once
|
|
|
|
|
2012-04-19 22:08:06 +04:00
|
|
|
#include <iostream>
|
2010-04-08 21:16:10 +04:00
|
|
|
#include <ostream>
|
2011-06-17 01:20:20 +04:00
|
|
|
#include <set>
|
|
|
|
#include <vector>
|
2012-04-19 22:08:06 +04:00
|
|
|
#include <cstdlib>
|
2010-04-08 21:16:10 +04:00
|
|
|
|
2012-10-12 01:03:32 +04:00
|
|
|
#include <boost/functional/hash.hpp>
|
|
|
|
|
2010-04-08 21:16:10 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
|
2011-06-17 01:20:20 +04:00
|
|
|
class AlignmentInfoCollection;
|
|
|
|
|
2012-06-27 03:45:02 +04:00
|
|
|
/** Collection of non-terminal alignment pairs, ordered by source index.
|
|
|
|
* Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
|
|
|
|
*/
|
2010-04-08 21:16:10 +04:00
|
|
|
class AlignmentInfo
|
|
|
|
{
|
2011-06-17 01:20:20 +04:00
|
|
|
friend std::ostream& operator<<(std::ostream &, const AlignmentInfo &);
|
|
|
|
friend struct AlignmentInfoOrderer;
|
2012-10-12 01:03:32 +04:00
|
|
|
friend struct AlignmentInfoHasher;
|
2011-06-17 01:20:20 +04:00
|
|
|
friend class AlignmentInfoCollection;
|
2015-01-09 16:24:16 +03:00
|
|
|
friend class VW;
|
2010-04-08 21:16:10 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
2012-04-19 22:08:06 +04:00
|
|
|
typedef std::set<std::pair<size_t,size_t> > CollType;
|
2011-06-17 01:20:20 +04:00
|
|
|
typedef std::vector<size_t> NonTermIndexMap;
|
2011-02-24 16:14:42 +03:00
|
|
|
typedef CollType::const_iterator const_iterator;
|
2011-06-17 01:20:20 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
const_iterator begin() const {
|
|
|
|
return m_collection.begin();
|
|
|
|
}
|
|
|
|
const_iterator end() const {
|
|
|
|
return m_collection.end();
|
|
|
|
}
|
2011-06-17 01:20:20 +04:00
|
|
|
|
2012-10-19 18:10:10 +04:00
|
|
|
void Add(size_t sourcePos, size_t targetPos) {
|
2013-05-29 21:16:15 +04:00
|
|
|
m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
|
2012-10-19 18:10:10 +04:00
|
|
|
}
|
2012-07-02 23:01:07 +04:00
|
|
|
/** Provides a map from target-side to source-side non-terminal indices.
|
2012-11-16 20:44:29 +04:00
|
|
|
* The target-side index should be the rule symbol index (COUNTING terminals).
|
|
|
|
* The index returned is the rule non-terminal index (IGNORING terminals).
|
2012-07-02 23:01:07 +04:00
|
|
|
*/
|
2011-06-17 01:20:20 +04:00
|
|
|
const NonTermIndexMap &GetNonTermIndexMap() const {
|
|
|
|
return m_nonTermIndexMap;
|
2011-02-24 16:14:42 +03:00
|
|
|
}
|
2012-10-19 19:00:42 +04:00
|
|
|
|
2014-11-04 16:13:56 +03:00
|
|
|
/** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
|
|
|
|
* the index counting both terminals and non-terminals) */
|
|
|
|
const NonTermIndexMap &GetNonTermIndexMap2() const {
|
|
|
|
return m_nonTermIndexMap2;
|
|
|
|
}
|
|
|
|
|
2012-04-19 22:08:06 +04:00
|
|
|
const CollType &GetAlignments() const {
|
|
|
|
return m_collection;
|
|
|
|
}
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2013-07-19 18:38:13 +04:00
|
|
|
std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
|
2013-09-22 17:36:50 +04:00
|
|
|
std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
|
2013-07-19 18:38:13 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
size_t GetSize() const {
|
|
|
|
return m_collection.size();
|
|
|
|
}
|
2012-01-26 15:38:40 +04:00
|
|
|
|
2011-08-26 06:37:52 +04:00
|
|
|
std::vector< const std::pair<size_t,size_t>* > GetSortedAlignments() const;
|
2012-10-12 01:18:56 +04:00
|
|
|
|
2012-11-16 20:44:29 +04:00
|
|
|
std::vector<size_t> GetSourceIndex2PosMap() const;
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
bool operator==(const AlignmentInfo& rhs) const {
|
2012-10-12 01:18:56 +04:00
|
|
|
return m_collection == rhs.m_collection &&
|
|
|
|
m_nonTermIndexMap == rhs.m_nonTermIndexMap;
|
|
|
|
}
|
2013-05-29 21:16:15 +04:00
|
|
|
|
|
|
|
private:
|
2012-07-02 23:01:07 +04:00
|
|
|
//! AlignmentInfo objects should only be created by an AlignmentInfoCollection
|
2012-10-19 19:00:42 +04:00
|
|
|
explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
|
2014-10-07 21:08:31 +04:00
|
|
|
explicit AlignmentInfo(const std::vector<unsigned char> &aln);
|
2015-01-08 16:53:46 +03:00
|
|
|
|
2015-01-09 16:24:16 +03:00
|
|
|
// used only by VW to load word alignment between sentences
|
|
|
|
explicit AlignmentInfo(const std::string &str);
|
|
|
|
|
2014-11-04 16:13:56 +03:00
|
|
|
void BuildNonTermIndexMaps();
|
2011-06-17 01:20:20 +04:00
|
|
|
|
|
|
|
CollType m_collection;
|
|
|
|
NonTermIndexMap m_nonTermIndexMap;
|
2014-11-04 16:13:56 +03:00
|
|
|
NonTermIndexMap m_nonTermIndexMap2;
|
2010-04-08 21:16:10 +04:00
|
|
|
};
|
|
|
|
|
2012-06-27 03:45:02 +04:00
|
|
|
/** Define an arbitrary strict weak ordering between AlignmentInfo objects
|
|
|
|
* for use by AlignmentInfoCollection.
|
|
|
|
*/
|
2013-05-29 21:16:15 +04:00
|
|
|
struct AlignmentInfoOrderer {
|
2011-06-17 01:20:20 +04:00
|
|
|
bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
|
2013-05-29 21:16:15 +04:00
|
|
|
if (a.m_collection == b.m_collection) {
|
|
|
|
return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
|
|
|
|
} else {
|
|
|
|
return a.m_collection < b.m_collection;
|
|
|
|
}
|
2011-06-17 01:20:20 +04:00
|
|
|
}
|
2010-04-08 21:16:10 +04:00
|
|
|
};
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
/**
|
2012-10-12 01:03:32 +04:00
|
|
|
* Hashing functoid
|
|
|
|
**/
|
2013-05-29 21:16:15 +04:00
|
|
|
struct AlignmentInfoHasher {
|
|
|
|
size_t operator()(const AlignmentInfo& a) const {
|
2012-10-12 01:03:32 +04:00
|
|
|
size_t seed = 0;
|
|
|
|
boost::hash_combine(seed,a.m_collection);
|
|
|
|
boost::hash_combine(seed,a.m_nonTermIndexMap);
|
|
|
|
return seed;
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
inline size_t hash_value(const AlignmentInfo& a)
|
|
|
|
{
|
2012-10-12 01:18:56 +04:00
|
|
|
static AlignmentInfoHasher hasher;
|
|
|
|
return hasher(a);
|
|
|
|
}
|
|
|
|
|
2011-06-17 01:20:20 +04:00
|
|
|
}
|