mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 15:48:05 +03:00
added comments
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@861 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
d9e67b804b
commit
7a1414e4c1
@ -375,7 +375,7 @@ public:
|
||||
Range newRange(curr.begin(),curr.end()+1);
|
||||
float newScore=curr.GetScore()+currCol[colidx].second; // CN score
|
||||
Phrase newSrc(curr.src);
|
||||
if(!isEpsilon) newSrc.push_back(w);
|
||||
if(!isEpsilon) newSrc.AddWord(w);
|
||||
if(newRange.second<srcSize && newScore>LOWEST_SCORE)
|
||||
{
|
||||
// if there is more room to grow, add a new state onto the queue
|
||||
|
@ -27,18 +27,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "memory.h"
|
||||
#include "FactorCollection.h"
|
||||
#include "Phrase.h"
|
||||
#include "Util.h" //malloc() replacement
|
||||
#include "StaticData.h" // GetMaxNumFactors
|
||||
|
||||
using namespace std;
|
||||
|
||||
// std::vector<mempool*> Phrase::s_memPool;
|
||||
|
||||
Phrase::Phrase(const Phrase ©)
|
||||
:m_direction(copy.m_direction)
|
||||
,m_phraseSize(copy.m_phraseSize)
|
||||
,m_arraySize(copy.m_arraySize)
|
||||
//,m_memPoolIndex(copy.m_memPoolIndex)
|
||||
,m_words(copy.m_words)
|
||||
{
|
||||
}
|
||||
@ -46,15 +42,13 @@ Phrase::Phrase(const Phrase ©)
|
||||
Phrase& Phrase::operator=(const Phrase& x)
|
||||
{
|
||||
if(this!=&x)
|
||||
{
|
||||
{
|
||||
m_direction=x.m_direction;
|
||||
m_phraseSize=x.m_phraseSize;
|
||||
m_arraySize=x.m_arraySize;
|
||||
|
||||
m_direction=x.m_direction;
|
||||
m_phraseSize=x.m_phraseSize;
|
||||
m_arraySize=x.m_arraySize;
|
||||
// m_memPoolIndex=x.m_memPoolIndex;
|
||||
|
||||
m_words = x.m_words;
|
||||
}
|
||||
m_words = x.m_words;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -63,19 +57,17 @@ Phrase::Phrase(FactorDirection direction)
|
||||
: m_direction(direction)
|
||||
, m_phraseSize(0)
|
||||
, m_arraySize(ARRAY_SIZE_INCR)
|
||||
// , m_memPoolIndex(0)
|
||||
, m_words(ARRAY_SIZE_INCR)
|
||||
{
|
||||
}
|
||||
|
||||
Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWords)
|
||||
:m_direction(direction)
|
||||
,m_phraseSize(mergeWords.size())
|
||||
,m_words(mergeWords.size())
|
||||
{
|
||||
for (size_t currPos = 0 ; currPos < m_phraseSize ; currPos++)
|
||||
for (size_t currPos = 0 ; currPos < mergeWords.size() ; currPos++)
|
||||
{
|
||||
m_words[currPos] = *mergeWords[currPos];
|
||||
AddWord(*mergeWords[currPos]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -209,7 +201,7 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
, const string &phraseString
|
||||
, FactorCollection &factorCollection
|
||||
, const string &factorDelimiter)
|
||||
, const string &factorDelimiter)
|
||||
{
|
||||
vector< vector<string> > phraseVector = Parse(phraseString, factorOrder, factorDelimiter);
|
||||
CreateFromString(factorOrder, phraseVector, factorCollection);
|
||||
@ -353,29 +345,10 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase, const std::vector<FactorTyp
|
||||
|
||||
void Phrase::InitializeMemPool()
|
||||
{
|
||||
#if 0
|
||||
s_memPool.push_back( new mempool(1 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 50000 ));
|
||||
s_memPool.push_back( new mempool(2 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
|
||||
s_memPool.push_back( new mempool(3 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
|
||||
s_memPool.push_back( new mempool(4 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 100 ));
|
||||
s_memPool.push_back( new mempool(5 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 ));
|
||||
s_memPool.push_back( new mempool(6 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 ));
|
||||
s_memPool.push_back( new mempool(7 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 ));
|
||||
|
||||
for (size_t i = 8 ; i < 30 ; ++i)
|
||||
s_memPool.push_back( new mempool(i * ARRAY_SIZE_INCR * sizeof(FactorArray) , 2 ));
|
||||
#endif
|
||||
}
|
||||
|
||||
void Phrase::FinalizeMemPool()
|
||||
{
|
||||
#if 0
|
||||
std::vector<mempool*>::iterator iter;
|
||||
for (iter = s_memPool.begin() ; iter != s_memPool.end() ; ++iter)
|
||||
{
|
||||
delete *iter;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TO_STRING_BODY(Phrase);
|
||||
|
@ -36,55 +36,91 @@ class Phrase
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const Phrase&);
|
||||
private:
|
||||
// static std::vector<mempool*> s_memPool;
|
||||
|
||||
FactorDirection m_direction;
|
||||
FactorDirection m_direction; /** Reusing Direction enum to really mean which language
|
||||
Input = Source, Output = Target.
|
||||
Not really used, but nice to know for debugging purposes
|
||||
*/
|
||||
size_t m_phraseSize; //number of words
|
||||
size_t m_arraySize;
|
||||
// size_t m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated?
|
||||
size_t m_arraySize; /** current size of vector m_words. This number is equal or bigger
|
||||
than m_phraseSize. Used for faster allocation of m_word */
|
||||
std::vector<Word> m_words;
|
||||
|
||||
public:
|
||||
/** No longer does anything as not using mem pool for Phrase class anymore */
|
||||
static void InitializeMemPool();
|
||||
static void FinalizeMemPool();
|
||||
|
||||
/** copy constructor */
|
||||
Phrase(const Phrase ©);
|
||||
Phrase& operator=(const Phrase&);
|
||||
|
||||
/** create empty phrase
|
||||
* \param direction = language (Input = Source, Output = Target)
|
||||
*/
|
||||
Phrase(FactorDirection direction);
|
||||
/** create phrase from vectors of words */
|
||||
Phrase(FactorDirection direction, const std::vector< const Word* > &mergeWords);
|
||||
|
||||
/** destructor */
|
||||
virtual ~Phrase();
|
||||
|
||||
static std::vector< std::vector<std::string> > Parse(const std::string &phraseString, const std::vector<FactorType> &factorOrder, const std::string& factorDelimiter);
|
||||
/** parse a string from phrase table or sentence input and create a 2D vector of strings
|
||||
* \param phraseString string to parse
|
||||
* \param factorOrder factors in the parse string. This argument is not fully used, only as a check to make ensure
|
||||
* number of factors is what was promised
|
||||
* \param factorDelimiter what char to use to separate factor strings from each other. Usually use '|'. Can be multi-char
|
||||
*/
|
||||
static std::vector< std::vector<std::string> > Parse(
|
||||
const std::string &phraseString
|
||||
, const std::vector<FactorType> &factorOrder
|
||||
, const std::string& factorDelimiter);
|
||||
/** Fills phrase with words from 2D string vector
|
||||
* \param factorOrder factor types of each element in 2D string vector
|
||||
* \param phraseVector 2D string vector
|
||||
*/
|
||||
void CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
, const std::vector< std::vector<std::string> > &phraseVector
|
||||
, FactorCollection &factorCollection);
|
||||
/** Fills phrase with words from format string, typically from phrase table or sentence input
|
||||
* \param factorOrder factor types of each element in 2D string vector
|
||||
* \param phraseString formatted input string to parse
|
||||
* \param factorDelimiter delimiter, as used by Parse()
|
||||
*/
|
||||
void CreateFromString(const std::vector<FactorType> &factorOrder
|
||||
, const std::string &phraseString
|
||||
, FactorCollection &factorCollection
|
||||
, const std::string &factorDelimiter);
|
||||
|
||||
/** copy factors from the other phrase to this phrase.
|
||||
IsCompatible() must be run beforehand to ensure incompatible factors aren't overwritten
|
||||
*/
|
||||
void MergeFactors(const Phrase ©);
|
||||
//! copy a single factor (specified by factorType)
|
||||
void MergeFactors(const Phrase ©, FactorType factorType);
|
||||
//! copy all factors specified in factorVec and none others
|
||||
void MergeFactors(const Phrase ©, const std::vector<FactorType>& factorVec);
|
||||
|
||||
// must run IsCompatible() to ensure incompatible factors aren't being overwritten
|
||||
/** compare 2 phrases to ensure no factors are lost if the phrases are merged
|
||||
* must run IsCompatible() to ensure incompatible factors aren't being overwritten
|
||||
*/
|
||||
bool IsCompatible(const Phrase &inputPhrase) const;
|
||||
bool IsCompatible(const Phrase &inputPhrase, FactorType factorType) const;
|
||||
bool IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const;
|
||||
|
||||
|
||||
//! really means what language. Input = Source, Output = Target
|
||||
inline FactorDirection GetDirection() const
|
||||
{
|
||||
return m_direction;
|
||||
}
|
||||
|
||||
//! number of words
|
||||
inline size_t GetSize() const
|
||||
{
|
||||
return m_phraseSize;
|
||||
}
|
||||
|
||||
//! word at a particular position
|
||||
inline const Word &GetWord(size_t pos) const
|
||||
{
|
||||
return m_words[pos];
|
||||
@ -93,6 +129,7 @@ public:
|
||||
{
|
||||
return m_words[pos];
|
||||
}
|
||||
//! particular factor at a particular position
|
||||
inline const Factor *GetFactor(size_t pos, FactorType factorType) const
|
||||
{
|
||||
const Word &ptr = m_words[pos];
|
||||
@ -104,22 +141,28 @@ public:
|
||||
ptr[factorType] = factor;
|
||||
}
|
||||
|
||||
//! whether the 2D vector is a substring of this phrase
|
||||
bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector
|
||||
, const std::vector<FactorType> &inputFactor) const;
|
||||
|
||||
//! create an empty word at the end of the phrase
|
||||
Word &AddWord();
|
||||
|
||||
//! create copy of input word at the end of the phrase
|
||||
void AddWord(const Word &newWord)
|
||||
{
|
||||
AddWord() = newWord;
|
||||
}
|
||||
//! create new phrase class that is a substring of this phrase
|
||||
Phrase GetSubString(const WordsRange &wordsRange) const;
|
||||
|
||||
//! return a string rep of the phrase. Each factor is separated by the factor delimiter as specified in StaticData class
|
||||
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const;
|
||||
|
||||
void push_back(Word const& w) {
|
||||
AddWord() = w;
|
||||
}
|
||||
|
||||
TO_STRING;
|
||||
|
||||
// used to insert & find phrase in dictionary
|
||||
/** transitive comparison between 2 phrases
|
||||
* used to insert & find phrase in dictionary
|
||||
*/
|
||||
bool operator< (const Phrase &compare) const;
|
||||
};
|
||||
|
||||
|
@ -33,13 +33,11 @@ using namespace std;
|
||||
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase)
|
||||
: m_targetPhrase(targetPhrase),m_sourcePhrase(targetPhrase.GetSourcePhrase())
|
||||
,m_sourceWordsRange (wordsRange)
|
||||
{ // used by initial translation step
|
||||
|
||||
{
|
||||
// set score
|
||||
m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());
|
||||
}
|
||||
|
||||
// used to create trans opt from unknown word
|
||||
//TODO this should be a factory function!
|
||||
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase, int /*whatever*/)
|
||||
: m_targetPhrase(targetPhrase)
|
||||
|
@ -70,7 +70,9 @@ protected:
|
||||
ScoreComponentCollection2 m_scoreBreakdown;
|
||||
|
||||
public:
|
||||
/** constructor. Used by initial translation step */
|
||||
TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase);
|
||||
/** constructor. Used to create trans opt from unknown word */
|
||||
TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase, int);
|
||||
|
||||
/** used by initial translation step */
|
||||
@ -100,6 +102,7 @@ public:
|
||||
return m_sourcePhrase;
|
||||
}
|
||||
|
||||
/** whether source span overlaps with those of a hypothesis */
|
||||
bool Overlap(const Hypothesis &hypothesis) const;
|
||||
|
||||
/** return start index of source phrase */
|
||||
@ -143,6 +146,7 @@ public:
|
||||
return m_scoreBreakdown;
|
||||
}
|
||||
|
||||
/** Calculate future score and n-gram score of this trans option, plus the score breakdowns */
|
||||
void CalcScore();
|
||||
|
||||
TO_STRING;
|
||||
|
Loading…
Reference in New Issue
Block a user