2010-07-18 03:23:09 +04:00
|
|
|
// $Id$
|
2010-04-12 14:15:49 +04:00
|
|
|
// vim:tabstop=2
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2010 Hieu Hoang
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <map>
|
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
|
|
|
#include "PhraseDictionary.h"
|
|
|
|
#include "../../OnDiskPt/src/OnDiskWrapper.h"
|
|
|
|
#include "../../OnDiskPt/src/Word.h"
|
|
|
|
#include "../../OnDiskPt/src/PhraseNode.h"
|
|
|
|
|
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
class TargetPhraseCollection;
|
|
|
|
class ProcessedRuleStackOnDisk;
|
|
|
|
class CellCollection;
|
2010-08-10 17:12:00 +04:00
|
|
|
class WordPenaltyProducer;
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
class PhraseDictionaryOnDisk : public PhraseDictionary
|
|
|
|
{
|
|
|
|
typedef PhraseDictionary MyBase;
|
|
|
|
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryOnDisk&);
|
|
|
|
|
|
|
|
protected:
|
2010-08-10 17:12:00 +04:00
|
|
|
const LMList* m_languageModels;
|
|
|
|
const WordPenaltyProducer* m_wpProducer;;
|
2010-04-08 21:16:10 +04:00
|
|
|
std::vector<FactorType> m_inputFactorsVec, m_outputFactorsVec;
|
|
|
|
std::vector<float> m_weight;
|
|
|
|
std::string m_filePath;
|
|
|
|
|
|
|
|
mutable OnDiskPt::OnDiskWrapper m_dbWrapper;
|
|
|
|
|
|
|
|
mutable std::map<UINT64, const TargetPhraseCollection*> m_cache;
|
|
|
|
mutable std::list<const OnDiskPt::PhraseNode*> m_sourcePhraseNode;
|
|
|
|
|
|
|
|
mutable std::vector<ProcessedRuleStackOnDisk*> m_runningNodesVec;
|
|
|
|
|
|
|
|
void LoadTargetLookup();
|
|
|
|
|
|
|
|
public:
|
|
|
|
PhraseDictionaryOnDisk(size_t numScoreComponent, PhraseDictionaryFeature* feature)
|
2010-08-10 17:12:00 +04:00
|
|
|
: MyBase(numScoreComponent, feature), m_languageModels(NULL)
|
2010-04-08 21:16:10 +04:00
|
|
|
{}
|
|
|
|
virtual ~PhraseDictionaryOnDisk();
|
|
|
|
|
|
|
|
PhraseTableImplementation GetPhraseTableImplementation() const
|
2010-04-08 21:57:38 +04:00
|
|
|
{ return OnDisk; }
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
bool Load(const std::vector<FactorType> &input
|
|
|
|
, const std::vector<FactorType> &output
|
|
|
|
, const std::string &filePath
|
|
|
|
, const std::vector<float> &weight
|
2010-08-10 17:12:00 +04:00
|
|
|
, size_t tableLimit,
|
|
|
|
const LMList& languageModels,
|
|
|
|
const WordPenaltyProducer* wpProducer);
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
std::string GetScoreProducerDescription() const
|
|
|
|
{ return "BerkeleyPt"; }
|
|
|
|
|
|
|
|
// PhraseDictionary impl
|
|
|
|
//! find list of translations that can translates src. Only for phrase input
|
|
|
|
virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const;
|
|
|
|
|
|
|
|
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
|
|
|
|
|
|
|
|
//! Create entry for translation of source to targetPhrase
|
|
|
|
virtual void AddEquivPhrase(const Phrase &source, TargetPhrase *targetPhrase);
|
|
|
|
|
2010-09-23 21:39:32 +04:00
|
|
|
virtual void GetChartRuleCollection(ChartTranslationOptionList &outColl
|
2010-09-23 19:18:11 +04:00
|
|
|
,InputType const& src
|
|
|
|
,WordsRange const& range
|
|
|
|
,bool adhereTableLimit
|
|
|
|
,const CellCollection &cellColl) const;
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
void InitializeForInput(const InputType& input);
|
|
|
|
void CleanUp();
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|