2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
|
|
|
// vim:tabstop=2
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
#include "PhraseDictionary.h"
|
2009-08-07 20:47:54 +04:00
|
|
|
#include "PhraseDictionaryTreeAdaptor.h"
|
2008-06-11 14:52:57 +04:00
|
|
|
#include "StaticData.h"
|
|
|
|
#include "InputType.h"
|
2009-02-06 18:43:06 +03:00
|
|
|
#include "TranslationOption.h"
|
2010-04-06 15:37:50 +04:00
|
|
|
#include "PhraseDictionaryDynSuffixArray.h"
|
2010-01-28 15:12:57 +03:00
|
|
|
#include "UserMessage.h"
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2009-08-07 20:47:54 +04:00
|
|
|
namespace Moses {
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
const TargetPhraseCollection *PhraseDictionary::
|
|
|
|
GetTargetPhraseCollection(InputType const& src,WordsRange const& range) const
|
|
|
|
{
|
2009-08-07 20:47:54 +04:00
|
|
|
return GetTargetPhraseCollection(src.GetSubString(range));
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2009-08-07 20:47:54 +04:00
|
|
|
PhraseDictionaryFeature::PhraseDictionaryFeature
|
|
|
|
( size_t numScoreComponent
|
|
|
|
, unsigned numInputScores
|
|
|
|
, const std::vector<FactorType> &input
|
|
|
|
, const std::vector<FactorType> &output
|
|
|
|
, const std::string &filePath
|
|
|
|
, const std::vector<float> &weight
|
2010-04-06 15:37:50 +04:00
|
|
|
, size_t tableLimit
|
|
|
|
, const std::string targetFile // default param
|
|
|
|
, const std::string alignmentsFile): // default param
|
2009-08-07 20:47:54 +04:00
|
|
|
m_numScoreComponent(numScoreComponent),
|
|
|
|
m_numInputScores(numInputScores),
|
|
|
|
m_input(input),
|
|
|
|
m_output(output),
|
|
|
|
m_filePath(filePath),
|
|
|
|
m_weight(weight),
|
|
|
|
m_tableLimit(tableLimit)
|
|
|
|
{
|
|
|
|
const StaticData& staticData = StaticData::Instance();
|
|
|
|
const_cast<ScoreIndexManager&>(staticData.GetScoreIndexManager()).AddScoreProducer(this);
|
|
|
|
|
2010-04-06 15:37:50 +04:00
|
|
|
// HACKING IN DYN SA PHRASE TABLES
|
|
|
|
if((targetFile != "") && (alignmentsFile != "")) {
|
|
|
|
PhraseDictionaryDynSuffixArray *pd = new PhraseDictionaryDynSuffixArray(numScoreComponent, this);
|
|
|
|
if(!(pd && pd->Load(filePath, targetFile, alignmentsFile
|
|
|
|
, weight, tableLimit
|
|
|
|
, staticData.GetAllLM()
|
|
|
|
, staticData.GetWeightWordPenalty())))
|
|
|
|
{
|
|
|
|
std::cerr << "FAILED TO LOAD\n" << endl;
|
|
|
|
delete pd;
|
|
|
|
}
|
|
|
|
m_memoryDictionary.reset(pd);
|
|
|
|
std::cerr << "Suffix array phrase table loaded" << std::endl;
|
|
|
|
}
|
2009-08-07 20:47:54 +04:00
|
|
|
//if we're using an in-memory phrase table, then load it now, otherwise wait
|
2010-04-06 15:37:50 +04:00
|
|
|
else if (!FileExists(filePath+".binphr.idx"))
|
2009-08-07 20:47:54 +04:00
|
|
|
{ // memory phrase table
|
2010-01-28 15:12:57 +03:00
|
|
|
VERBOSE(2,"using standard phrase tables" << std::endl);
|
2009-08-07 20:47:54 +04:00
|
|
|
if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
|
|
|
|
m_filePath += ".gz";
|
2010-01-28 15:12:57 +03:00
|
|
|
VERBOSE(2,"Using gzipped file" << std::endl);
|
2009-08-07 20:47:54 +04:00
|
|
|
}
|
|
|
|
if (staticData.GetInputType() != SentenceInput)
|
|
|
|
{
|
|
|
|
UserMessage::Add("Must use binary phrase table for this input type");
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
PhraseDictionaryMemory* pdm = new PhraseDictionaryMemory(m_numScoreComponent,this);
|
|
|
|
assert(pdm->Load(m_input
|
|
|
|
, m_output
|
|
|
|
, m_filePath
|
|
|
|
, m_weight
|
|
|
|
, m_tableLimit
|
|
|
|
, staticData.GetAllLM()
|
|
|
|
, staticData.GetWeightWordPenalty()));
|
|
|
|
m_memoryDictionary.reset(pdm);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
//don't initialise the tree dictionary until it's required
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
PhraseDictionary* PhraseDictionaryFeature::GetDictionary
|
|
|
|
(const InputType& source) {
|
|
|
|
PhraseDictionary* dict = NULL;
|
|
|
|
if (m_memoryDictionary.get()) {
|
|
|
|
dict = m_memoryDictionary.get();
|
|
|
|
} else {
|
|
|
|
if (!m_treeDictionary.get()) {
|
|
|
|
//load the tree dictionary for this thread
|
|
|
|
const StaticData& staticData = StaticData::Instance();
|
|
|
|
PhraseDictionaryTreeAdaptor* pdta = new PhraseDictionaryTreeAdaptor(m_numScoreComponent, m_numInputScores,this);
|
|
|
|
assert(pdta->Load(
|
|
|
|
m_input
|
|
|
|
, m_output
|
|
|
|
, m_filePath
|
|
|
|
, m_weight
|
|
|
|
, m_tableLimit
|
|
|
|
, staticData.GetAllLM()
|
|
|
|
, staticData.GetWeightWordPenalty()));
|
|
|
|
m_treeDictionary.reset(pdta);
|
|
|
|
}
|
|
|
|
dict = m_treeDictionary.get();
|
|
|
|
}
|
|
|
|
dict->InitializeForInput(source);
|
|
|
|
return dict;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PhraseDictionaryFeature::~PhraseDictionaryFeature() {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::string PhraseDictionaryFeature::GetScoreProducerDescription() const
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
2008-10-05 20:37:42 +04:00
|
|
|
return "PhraseModel";
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2009-08-07 20:47:54 +04:00
|
|
|
size_t PhraseDictionaryFeature::GetNumScoreComponents() const
|
2008-06-11 14:52:57 +04:00
|
|
|
{
|
|
|
|
return m_numScoreComponent;
|
|
|
|
}
|
2008-10-09 03:51:26 +04:00
|
|
|
|
2009-08-07 20:47:54 +04:00
|
|
|
size_t PhraseDictionaryFeature::GetNumInputScores() const
|
|
|
|
{
|
|
|
|
return m_numInputScores;
|
|
|
|
}
|
2009-02-06 18:43:06 +03:00
|
|
|
|
2009-08-07 20:47:54 +04:00
|
|
|
bool PhraseDictionaryFeature::ComputeValueInTranslationOption() const {
|
2009-02-06 18:43:06 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2009-08-07 20:47:54 +04:00
|
|
|
const PhraseDictionaryFeature* PhraseDictionary::GetFeature() const {
|
|
|
|
return m_feature;
|
|
|
|
}
|
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
|
|
|
|