2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
2010-02-24 14:15:44 +03:00
|
|
|
#ifndef moses_Word_h
|
|
|
|
#define moses_Word_h
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2009-07-23 14:29:30 +04:00
|
|
|
#include <cstring>
|
2008-06-11 14:52:57 +04:00
|
|
|
#include <iostream>
|
|
|
|
#include <vector>
|
|
|
|
#include <list>
|
2011-11-18 14:17:16 +04:00
|
|
|
|
|
|
|
#include "util/murmur_hash.hh"
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
#include "TypeDef.h"
|
|
|
|
#include "Util.h"
|
2012-10-14 20:35:58 +04:00
|
|
|
#include "util/string_piece.hh"
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2013-10-04 18:40:23 +04:00
|
|
|
class Factor;
|
2013-07-04 18:13:29 +04:00
|
|
|
class FactorMask;
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2012-06-29 02:29:46 +04:00
|
|
|
/** Represent a word (terminal or non-term)
|
|
|
|
* Wrapper around hold a set of factors for a single word
|
2008-06-11 14:52:57 +04:00
|
|
|
*/
|
|
|
|
class Word
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
friend std::ostream& operator<<(std::ostream&, const Word&);
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
protected:
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
typedef const Factor * FactorArray[MAX_NUM_FACTORS];
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
FactorArray m_factorArray; /**< set of factors */
|
|
|
|
bool m_isNonTerminal;
|
2013-06-24 17:45:20 +04:00
|
|
|
bool m_isOOV;
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
public:
|
2011-02-24 16:14:42 +03:00
|
|
|
/** deep copy */
|
|
|
|
Word(const Word ©)
|
2013-06-24 17:45:20 +04:00
|
|
|
:m_isNonTerminal(copy.m_isNonTerminal)
|
2013-07-04 23:19:51 +04:00
|
|
|
,m_isOOV(copy.m_isOOV) {
|
2011-02-24 16:14:42 +03:00
|
|
|
std::memcpy(m_factorArray, copy.m_factorArray, sizeof(FactorArray));
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
/** empty word */
|
|
|
|
explicit Word(bool isNonTerminal = false) {
|
|
|
|
std::memset(m_factorArray, 0, sizeof(FactorArray));
|
|
|
|
m_isNonTerminal = isNonTerminal;
|
2013-06-24 17:45:20 +04:00
|
|
|
m_isOOV = false;
|
2011-02-24 16:14:42 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
~Word() {}
|
|
|
|
|
|
|
|
//! returns Factor pointer for particular FactorType
|
|
|
|
const Factor*& operator[](FactorType index) {
|
|
|
|
return m_factorArray[index];
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
const Factor * const & operator[](FactorType index) const {
|
|
|
|
return m_factorArray[index];
|
|
|
|
}
|
|
|
|
|
|
|
|
//! Deprecated. should use operator[]
|
|
|
|
inline const Factor* GetFactor(FactorType factorType) const {
|
|
|
|
return m_factorArray[factorType];
|
|
|
|
}
|
|
|
|
inline void SetFactor(FactorType factorType, const Factor *factor) {
|
|
|
|
m_factorArray[factorType] = factor;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool IsNonTerminal() const {
|
|
|
|
return m_isNonTerminal;
|
|
|
|
}
|
|
|
|
inline void SetIsNonTerminal(bool val) {
|
|
|
|
m_isNonTerminal = val;
|
|
|
|
}
|
|
|
|
|
2013-06-24 17:45:20 +04:00
|
|
|
inline bool IsOOV() const {
|
|
|
|
return m_isOOV;
|
|
|
|
}
|
|
|
|
inline void SetIsOOV(bool val) {
|
|
|
|
m_isOOV = val;
|
|
|
|
}
|
|
|
|
|
2013-10-04 17:18:11 +04:00
|
|
|
bool IsEpsilon() const;
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
/** add the factors from sourceWord into this representation,
|
|
|
|
* NULL elements in sourceWord will be skipped */
|
|
|
|
void Merge(const Word &sourceWord);
|
|
|
|
|
|
|
|
/** get string representation of list of factors. Used by PDTimp so supposed
|
|
|
|
* to be invariant to changes in format of debuggin output, therefore, doesn't
|
|
|
|
* use streaming output or ToString() from any class so not dependant on
|
|
|
|
* these debugging functions.
|
|
|
|
*/
|
|
|
|
std::string GetString(const std::vector<FactorType> factorType,bool endWithBlank) const;
|
2013-04-25 22:42:30 +04:00
|
|
|
StringPiece GetString(FactorType factorType) const;
|
2011-02-24 16:14:42 +03:00
|
|
|
TO_STRING();
|
|
|
|
|
|
|
|
//! transitive comparison of Word objects
|
|
|
|
inline bool operator< (const Word &compare) const {
|
|
|
|
// needed to store word in GenerationDictionary map
|
|
|
|
// uses comparison of FactorKey
|
|
|
|
// 'proper' comparison, not address/id comparison
|
|
|
|
return Compare(*this, compare) < 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool operator== (const Word &compare) const {
|
|
|
|
// needed to store word in GenerationDictionary map
|
|
|
|
// uses comparison of FactorKey
|
|
|
|
// 'proper' comparison, not address/id comparison
|
|
|
|
return Compare(*this, compare) == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool operator!= (const Word &compare) const {
|
|
|
|
return Compare(*this, compare) != 0;
|
|
|
|
}
|
|
|
|
|
2013-07-21 02:41:49 +04:00
|
|
|
int Compare(const Word &other) const {
|
|
|
|
return Compare(*this, other);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
/* static functions */
|
|
|
|
|
|
|
|
/** transitive comparison of 2 word objects. Used by operator<.
|
|
|
|
* Only compare the co-joined factors, ie. where factor exists for both words.
|
|
|
|
* Should make it non-static
|
|
|
|
*/
|
|
|
|
static int Compare(const Word &targetWord, const Word &sourceWord);
|
|
|
|
|
|
|
|
void CreateFromString(FactorDirection direction
|
|
|
|
, const std::vector<FactorType> &factorOrder
|
2012-10-14 20:35:58 +04:00
|
|
|
, const StringPiece &str
|
2014-04-11 18:24:52 +04:00
|
|
|
, bool isNonTerminal
|
|
|
|
, bool strict = true);
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
void CreateUnknownWord(const Word &sourceWord);
|
|
|
|
|
2013-07-04 18:13:29 +04:00
|
|
|
void OnlyTheseFactors(const FactorMask &factors);
|
|
|
|
|
2011-11-18 14:17:16 +04:00
|
|
|
inline size_t hash() const {
|
2011-11-18 14:27:19 +04:00
|
|
|
return util::MurmurHashNative(m_factorArray, MAX_NUM_FACTORS*sizeof(Factor*), m_isNonTerminal);
|
2011-11-18 14:17:16 +04:00
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
};
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
struct WordComparer {
|
|
|
|
//! returns true if hypoA can be recombined with hypoB
|
|
|
|
bool operator()(const Word *a, const Word *b) const {
|
|
|
|
return *a < *b;
|
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
};
|
|
|
|
|
2011-11-18 14:17:16 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
inline size_t hash_value(const Word& word)
|
|
|
|
{
|
|
|
|
return word.hash();
|
2011-11-18 14:17:16 +04:00
|
|
|
}
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
|
|
|
|
2010-02-24 14:15:44 +03:00
|
|
|
#endif
|