2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
|
|
|
// vim:tabstop=2
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
2010-02-24 14:15:44 +03:00
|
|
|
#ifndef moses_Phrase_h
|
|
|
|
#define moses_Phrase_h
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <vector>
|
|
|
|
#include <list>
|
|
|
|
#include <string>
|
2011-11-18 14:17:16 +04:00
|
|
|
|
|
|
|
#include <boost/functional/hash.hpp>
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
#include "Word.h"
|
|
|
|
#include "WordsBitmap.h"
|
|
|
|
#include "TypeDef.h"
|
|
|
|
#include "Util.h"
|
|
|
|
|
2011-10-14 20:40:30 +04:00
|
|
|
#include "util/string_piece.hh"
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
2013-04-26 15:20:49 +04:00
|
|
|
class FactorMask;
|
2008-10-09 03:51:26 +04:00
|
|
|
|
2012-06-29 02:29:46 +04:00
|
|
|
/** Representation of a phrase, ie. a contiguous number of words.
|
|
|
|
* Wrapper for vector of words
|
|
|
|
*/
|
2008-06-11 14:52:57 +04:00
|
|
|
class Phrase
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
friend std::ostream& operator<<(std::ostream&, const Phrase&);
|
|
|
|
private:
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
std::vector<Word> m_words;
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
public:
|
2011-02-24 16:14:42 +03:00
|
|
|
/** No longer does anything as not using mem pool for Phrase class anymore */
|
|
|
|
static void InitializeMemPool();
|
|
|
|
static void FinalizeMemPool();
|
|
|
|
|
|
|
|
/** create empty phrase
|
|
|
|
*/
|
2012-10-22 21:17:46 +04:00
|
|
|
Phrase();
|
2012-10-22 20:40:23 +04:00
|
|
|
explicit Phrase(size_t reserveSize);
|
2011-02-24 16:14:42 +03:00
|
|
|
/** create phrase from vectors of words */
|
2012-10-22 20:40:23 +04:00
|
|
|
explicit Phrase(const std::vector< const Word* > &mergeWords);
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
/** destructor */
|
|
|
|
virtual ~Phrase();
|
|
|
|
|
|
|
|
/** Fills phrase with words from format string, typically from phrase table or sentence input
|
|
|
|
* \param factorOrder factor types of each element in 2D string vector
|
|
|
|
* \param phraseString formatted input string to parse
|
2013-05-29 21:16:15 +04:00
|
|
|
* \param factorDelimiter delimiter between factors.
|
2011-02-24 16:14:42 +03:00
|
|
|
*/
|
2013-03-08 23:10:28 +04:00
|
|
|
void CreateFromString(FactorDirection direction
|
|
|
|
, const std::vector<FactorType> &factorOrder
|
2013-05-29 21:16:15 +04:00
|
|
|
, const StringPiece &phraseString
|
|
|
|
, const StringPiece &factorDelimiter
|
|
|
|
, Word **lhs);
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
/** copy factors from the other phrase to this phrase.
|
|
|
|
IsCompatible() must be run beforehand to ensure incompatible factors aren't overwritten
|
|
|
|
*/
|
|
|
|
void MergeFactors(const Phrase ©);
|
|
|
|
//! copy a single factor (specified by factorType)
|
|
|
|
void MergeFactors(const Phrase ©, FactorType factorType);
|
|
|
|
//! copy all factors specified in factorVec and none others
|
|
|
|
void MergeFactors(const Phrase ©, const std::vector<FactorType>& factorVec);
|
|
|
|
|
|
|
|
/** compare 2 phrases to ensure no factors are lost if the phrases are merged
|
|
|
|
* must run IsCompatible() to ensure incompatible factors aren't being overwritten
|
|
|
|
*/
|
|
|
|
bool IsCompatible(const Phrase &inputPhrase) const;
|
|
|
|
bool IsCompatible(const Phrase &inputPhrase, FactorType factorType) const;
|
|
|
|
bool IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const;
|
|
|
|
|
|
|
|
//! number of words
|
|
|
|
inline size_t GetSize() const {
|
|
|
|
return m_words.size();
|
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
//! word at a particular position
|
|
|
|
inline const Word &GetWord(size_t pos) const {
|
|
|
|
return m_words[pos];
|
|
|
|
}
|
|
|
|
inline Word &GetWord(size_t pos) {
|
|
|
|
return m_words[pos];
|
|
|
|
}
|
|
|
|
//! particular factor at a particular position
|
|
|
|
inline const Factor *GetFactor(size_t pos, FactorType factorType) const {
|
|
|
|
const Word &ptr = m_words[pos];
|
|
|
|
return ptr[factorType];
|
|
|
|
}
|
|
|
|
inline void SetFactor(size_t pos, FactorType factorType, const Factor *factor) {
|
|
|
|
Word &ptr = m_words[pos];
|
|
|
|
ptr[factorType] = factor;
|
|
|
|
}
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
size_t GetNumTerminals() const;
|
2010-04-08 21:16:10 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
//! whether the 2D vector is a substring of this phrase
|
|
|
|
bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector
|
|
|
|
, const std::vector<FactorType> &inputFactor) const;
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
//! create an empty word at the end of the phrase
|
|
|
|
Word &AddWord();
|
|
|
|
//! create copy of input word at the end of the phrase
|
|
|
|
void AddWord(const Word &newWord) {
|
2008-06-11 14:52:57 +04:00
|
|
|
AddWord() = newWord;
|
|
|
|
}
|
2013-05-29 21:16:15 +04:00
|
|
|
|
|
|
|
/** appends a phrase at the end of current phrase **/
|
|
|
|
void Append(const Phrase &endPhrase);
|
|
|
|
void PrependWord(const Word &newWord);
|
|
|
|
|
|
|
|
void Clear() {
|
|
|
|
m_words.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
void RemoveWord(size_t pos) {
|
|
|
|
CHECK(pos < m_words.size());
|
|
|
|
m_words.erase(m_words.begin() + pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
//! create new phrase class that is a substring of this phrase
|
|
|
|
Phrase GetSubString(const WordsRange &wordsRange) const;
|
2011-12-09 13:30:48 +04:00
|
|
|
Phrase GetSubString(const WordsRange &wordsRange, FactorType factorType) const;
|
2013-05-29 21:16:15 +04:00
|
|
|
|
|
|
|
//! return a string rep of the phrase. Each factor is separated by the factor delimiter as specified in StaticData class
|
|
|
|
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const;
|
|
|
|
|
|
|
|
TO_STRING();
|
|
|
|
|
|
|
|
|
|
|
|
int Compare(const Phrase &other) const;
|
|
|
|
|
|
|
|
/** transitive comparison between 2 phrases
|
|
|
|
* used to insert & find phrase in dictionary
|
|
|
|
*/
|
|
|
|
bool operator< (const Phrase &compare) const {
|
|
|
|
return Compare(compare) < 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool operator== (const Phrase &compare) const {
|
|
|
|
return Compare(compare) == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void OnlyTheseFactors(const FactorMask &factors);
|
2013-04-26 15:20:49 +04:00
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
};
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
inline size_t hash_value(const Phrase& phrase)
|
|
|
|
{
|
2011-11-18 14:17:16 +04:00
|
|
|
size_t seed = 0;
|
|
|
|
for (size_t i = 0; i < phrase.GetSize(); ++i) {
|
|
|
|
boost::hash_combine(seed, phrase.GetWord(i));
|
|
|
|
}
|
|
|
|
return seed;
|
|
|
|
}
|
2008-10-09 03:51:26 +04:00
|
|
|
|
2013-03-01 02:48:41 +04:00
|
|
|
struct PhrasePtrComparator {
|
|
|
|
inline bool operator()(const Phrase* lhs, const Phrase* rhs) const {
|
|
|
|
return *lhs == *rhs;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct PhrasePtrHasher {
|
|
|
|
inline size_t operator()(const Phrase* phrase) const {
|
|
|
|
size_t seed = 0;
|
|
|
|
boost::hash_combine(seed,*phrase);
|
|
|
|
return seed;
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
2013-03-01 02:48:41 +04:00
|
|
|
|
2010-02-24 14:15:44 +03:00
|
|
|
#endif
|