mosesdecoder/moses/AlignmentInfo.h
Phil Williams 5240c430ce Merge s2t branch
This adds a new string-to-tree decoder, which can be enabled with the -s2t
option.  It's intended to be faster and simpler than the generic chart
decoder, and is designed to support lattice input (still WIP).  For a en-de
system trained on WMT14 data, it's approximately 40% faster in practice.

For background information, see the decoding section of the EMNLP tutorial
on syntax-based MT:

  http://www.emnlp2014.org/tutorials/5_notes.pdf

Some features are not implemented yet, including support for internal tree
structure and soft source-syntactic constraints.
2014-11-04 13:13:56 +00:00

138 lines
4.1 KiB
C++

/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include <ostream>
#include <set>
#include <vector>
#include <cstdlib>
#include <boost/functional/hash.hpp>
namespace Moses
{
class AlignmentInfoCollection;
/** Collection of non-terminal alignment pairs, ordered by source index.
* Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
*/
class AlignmentInfo
{
friend std::ostream& operator<<(std::ostream &, const AlignmentInfo &);
friend struct AlignmentInfoOrderer;
friend struct AlignmentInfoHasher;
friend class AlignmentInfoCollection;
public:
typedef std::set<std::pair<size_t,size_t> > CollType;
typedef std::vector<size_t> NonTermIndexMap;
typedef CollType::const_iterator const_iterator;
const_iterator begin() const {
return m_collection.begin();
}
const_iterator end() const {
return m_collection.end();
}
void Add(size_t sourcePos, size_t targetPos) {
m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
}
/** Provides a map from target-side to source-side non-terminal indices.
* The target-side index should be the rule symbol index (COUNTING terminals).
* The index returned is the rule non-terminal index (IGNORING terminals).
*/
const NonTermIndexMap &GetNonTermIndexMap() const {
return m_nonTermIndexMap;
}
/** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
* the index counting both terminals and non-terminals) */
const NonTermIndexMap &GetNonTermIndexMap2() const {
return m_nonTermIndexMap2;
}
const CollType &GetAlignments() const {
return m_collection;
}
std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
size_t GetSize() const {
return m_collection.size();
}
std::vector< const std::pair<size_t,size_t>* > GetSortedAlignments() const;
std::vector<size_t> GetSourceIndex2PosMap() const;
bool operator==(const AlignmentInfo& rhs) const {
return m_collection == rhs.m_collection &&
m_nonTermIndexMap == rhs.m_nonTermIndexMap;
}
private:
//! AlignmentInfo objects should only be created by an AlignmentInfoCollection
explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
explicit AlignmentInfo(const std::vector<unsigned char> &aln);
void BuildNonTermIndexMaps();
CollType m_collection;
NonTermIndexMap m_nonTermIndexMap;
NonTermIndexMap m_nonTermIndexMap2;
};
/** Define an arbitrary strict weak ordering between AlignmentInfo objects
* for use by AlignmentInfoCollection.
*/
struct AlignmentInfoOrderer {
bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
if (a.m_collection == b.m_collection) {
return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
} else {
return a.m_collection < b.m_collection;
}
}
};
/**
* Hashing functoid
**/
struct AlignmentInfoHasher {
size_t operator()(const AlignmentInfo& a) const {
size_t seed = 0;
boost::hash_combine(seed,a.m_collection);
boost::hash_combine(seed,a.m_nonTermIndexMap);
return seed;
}
};
inline size_t hash_value(const AlignmentInfo& a)
{
static AlignmentInfoHasher hasher;
return hasher(a);
}
}