added comments

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@861 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
hieuhoang1972 2006-10-05 18:50:21 +00:00
parent d9e67b804b
commit 7a1414e4c1
5 changed files with 71 additions and 53 deletions

View File

@ -375,7 +375,7 @@ public:
Range newRange(curr.begin(),curr.end()+1);
float newScore=curr.GetScore()+currCol[colidx].second; // CN score
Phrase newSrc(curr.src);
if(!isEpsilon) newSrc.push_back(w);
if(!isEpsilon) newSrc.AddWord(w);
if(newRange.second<srcSize && newScore>LOWEST_SCORE)
{
// if there is more room to grow, add a new state onto the queue

View File

@ -27,18 +27,14 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "memory.h"
#include "FactorCollection.h"
#include "Phrase.h"
#include "Util.h" //malloc() replacement
#include "StaticData.h" // GetMaxNumFactors
using namespace std;
// std::vector<mempool*> Phrase::s_memPool;
Phrase::Phrase(const Phrase &copy)
:m_direction(copy.m_direction)
,m_phraseSize(copy.m_phraseSize)
,m_arraySize(copy.m_arraySize)
//,m_memPoolIndex(copy.m_memPoolIndex)
,m_words(copy.m_words)
{
}
@ -46,15 +42,13 @@ Phrase::Phrase(const Phrase &copy)
Phrase& Phrase::operator=(const Phrase& x)
{
if(this!=&x)
{
{
m_direction=x.m_direction;
m_phraseSize=x.m_phraseSize;
m_arraySize=x.m_arraySize;
m_direction=x.m_direction;
m_phraseSize=x.m_phraseSize;
m_arraySize=x.m_arraySize;
// m_memPoolIndex=x.m_memPoolIndex;
m_words = x.m_words;
}
m_words = x.m_words;
}
return *this;
}
@ -63,19 +57,17 @@ Phrase::Phrase(FactorDirection direction)
: m_direction(direction)
, m_phraseSize(0)
, m_arraySize(ARRAY_SIZE_INCR)
// , m_memPoolIndex(0)
, m_words(ARRAY_SIZE_INCR)
{
}
Phrase::Phrase(FactorDirection direction, const vector< const Word* > &mergeWords)
:m_direction(direction)
,m_phraseSize(mergeWords.size())
,m_words(mergeWords.size())
{
for (size_t currPos = 0 ; currPos < m_phraseSize ; currPos++)
for (size_t currPos = 0 ; currPos < mergeWords.size() ; currPos++)
{
m_words[currPos] = *mergeWords[currPos];
AddWord(*mergeWords[currPos]);
}
}
@ -209,7 +201,7 @@ void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
void Phrase::CreateFromString(const std::vector<FactorType> &factorOrder
, const string &phraseString
, FactorCollection &factorCollection
, const string &factorDelimiter)
, const string &factorDelimiter)
{
vector< vector<string> > phraseVector = Parse(phraseString, factorOrder, factorDelimiter);
CreateFromString(factorOrder, phraseVector, factorCollection);
@ -353,29 +345,10 @@ bool Phrase::IsCompatible(const Phrase &inputPhrase, const std::vector<FactorTyp
void Phrase::InitializeMemPool()
{
#if 0
s_memPool.push_back( new mempool(1 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 50000 ));
s_memPool.push_back( new mempool(2 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
s_memPool.push_back( new mempool(3 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 1000 ));
s_memPool.push_back( new mempool(4 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 100 ));
s_memPool.push_back( new mempool(5 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 ));
s_memPool.push_back( new mempool(6 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 ));
s_memPool.push_back( new mempool(7 * ARRAY_SIZE_INCR * sizeof(FactorArray) , 10 ));
for (size_t i = 8 ; i < 30 ; ++i)
s_memPool.push_back( new mempool(i * ARRAY_SIZE_INCR * sizeof(FactorArray) , 2 ));
#endif
}
void Phrase::FinalizeMemPool()
{
#if 0
std::vector<mempool*>::iterator iter;
for (iter = s_memPool.begin() ; iter != s_memPool.end() ; ++iter)
{
delete *iter;
}
#endif
}
TO_STRING_BODY(Phrase);

View File

@ -36,55 +36,91 @@ class Phrase
{
friend std::ostream& operator<<(std::ostream&, const Phrase&);
private:
// static std::vector<mempool*> s_memPool;
FactorDirection m_direction;
FactorDirection m_direction; /** Reusing Direction enum to really mean which language
Input = Source, Output = Target.
Not really used, but nice to know for debugging purposes
*/
size_t m_phraseSize; //number of words
size_t m_arraySize;
// size_t m_memPoolIndex; //TODO is this supposed to be the number of mempools allocated?
size_t m_arraySize; /** current size of vector m_words. This number is equal or bigger
than m_phraseSize. Used for faster allocation of m_word */
std::vector<Word> m_words;
public:
/** No longer does anything as not using mem pool for Phrase class anymore */
static void InitializeMemPool();
static void FinalizeMemPool();
/** copy constructor */
Phrase(const Phrase &copy);
Phrase& operator=(const Phrase&);
/** create empty phrase
* \param direction = language (Input = Source, Output = Target)
*/
Phrase(FactorDirection direction);
/** create phrase from vectors of words */
Phrase(FactorDirection direction, const std::vector< const Word* > &mergeWords);
/** destructor */
virtual ~Phrase();
static std::vector< std::vector<std::string> > Parse(const std::string &phraseString, const std::vector<FactorType> &factorOrder, const std::string& factorDelimiter);
/** parse a string from phrase table or sentence input and create a 2D vector of strings
* \param phraseString string to parse
* \param factorOrder factors in the parse string. This argument is not fully used, only as a check to make ensure
* number of factors is what was promised
* \param factorDelimiter what char to use to separate factor strings from each other. Usually use '|'. Can be multi-char
*/
static std::vector< std::vector<std::string> > Parse(
const std::string &phraseString
, const std::vector<FactorType> &factorOrder
, const std::string& factorDelimiter);
/** Fills phrase with words from 2D string vector
* \param factorOrder factor types of each element in 2D string vector
* \param phraseVector 2D string vector
*/
void CreateFromString(const std::vector<FactorType> &factorOrder
, const std::vector< std::vector<std::string> > &phraseVector
, FactorCollection &factorCollection);
/** Fills phrase with words from format string, typically from phrase table or sentence input
* \param factorOrder factor types of each element in 2D string vector
* \param phraseString formatted input string to parse
* \param factorDelimiter delimiter, as used by Parse()
*/
void CreateFromString(const std::vector<FactorType> &factorOrder
, const std::string &phraseString
, FactorCollection &factorCollection
, const std::string &factorDelimiter);
/** copy factors from the other phrase to this phrase.
IsCompatible() must be run beforehand to ensure incompatible factors aren't overwritten
*/
void MergeFactors(const Phrase &copy);
//! copy a single factor (specified by factorType)
void MergeFactors(const Phrase &copy, FactorType factorType);
//! copy all factors specified in factorVec and none others
void MergeFactors(const Phrase &copy, const std::vector<FactorType>& factorVec);
// must run IsCompatible() to ensure incompatible factors aren't being overwritten
/** compare 2 phrases to ensure no factors are lost if the phrases are merged
* must run IsCompatible() to ensure incompatible factors aren't being overwritten
*/
bool IsCompatible(const Phrase &inputPhrase) const;
bool IsCompatible(const Phrase &inputPhrase, FactorType factorType) const;
bool IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const;
//! really means what language. Input = Source, Output = Target
inline FactorDirection GetDirection() const
{
return m_direction;
}
//! number of words
inline size_t GetSize() const
{
return m_phraseSize;
}
//! word at a particular position
inline const Word &GetWord(size_t pos) const
{
return m_words[pos];
@ -93,6 +129,7 @@ public:
{
return m_words[pos];
}
//! particular factor at a particular position
inline const Factor *GetFactor(size_t pos, FactorType factorType) const
{
const Word &ptr = m_words[pos];
@ -104,22 +141,28 @@ public:
ptr[factorType] = factor;
}
//! whether the 2D vector is a substring of this phrase
bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector
, const std::vector<FactorType> &inputFactor) const;
//! create an empty word at the end of the phrase
Word &AddWord();
//! create copy of input word at the end of the phrase
void AddWord(const Word &newWord)
{
AddWord() = newWord;
}
//! create new phrase class that is a substring of this phrase
Phrase GetSubString(const WordsRange &wordsRange) const;
//! return a string rep of the phrase. Each factor is separated by the factor delimiter as specified in StaticData class
std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const;
void push_back(Word const& w) {
AddWord() = w;
}
TO_STRING;
// used to insert & find phrase in dictionary
/** transitive comparison between 2 phrases
* used to insert & find phrase in dictionary
*/
bool operator< (const Phrase &compare) const;
};

View File

@ -33,13 +33,11 @@ using namespace std;
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase)
: m_targetPhrase(targetPhrase),m_sourcePhrase(targetPhrase.GetSourcePhrase())
,m_sourceWordsRange (wordsRange)
{ // used by initial translation step
{
// set score
m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());
}
// used to create trans opt from unknown word
//TODO this should be a factory function!
TranslationOption::TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase, int /*whatever*/)
: m_targetPhrase(targetPhrase)

View File

@ -70,7 +70,9 @@ protected:
ScoreComponentCollection2 m_scoreBreakdown;
public:
/** constructor. Used by initial translation step */
TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase);
/** constructor. Used to create trans opt from unknown word */
TranslationOption(const WordsRange &wordsRange, const TargetPhrase &targetPhrase, int);
/** used by initial translation step */
@ -100,6 +102,7 @@ public:
return m_sourcePhrase;
}
/** whether source span overlaps with those of a hypothesis */
bool Overlap(const Hypothesis &hypothesis) const;
/** return start index of source phrase */
@ -143,6 +146,7 @@ public:
return m_scoreBreakdown;
}
/** Calculate future score and n-gram score of this trans option, plus the score breakdowns */
void CalcScore();
TO_STRING;