2013-09-25 02:56:47 +04:00
|
|
|
// -*- c++ -*-
|
2008-06-11 14:52:57 +04:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
2010-02-24 14:15:44 +03:00
|
|
|
#ifndef moses_PhraseDictionary_h
|
|
|
|
#define moses_PhraseDictionary_h
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <map>
|
2009-08-11 13:37:29 +04:00
|
|
|
#include <memory>
|
2008-06-11 14:52:57 +04:00
|
|
|
#include <list>
|
2012-09-13 21:16:13 +04:00
|
|
|
#include <stdexcept>
|
2008-06-11 14:52:57 +04:00
|
|
|
#include <vector>
|
|
|
|
#include <string>
|
2014-02-11 07:43:58 +04:00
|
|
|
#include <boost/unordered_map.hpp>
|
2009-08-07 20:47:54 +04:00
|
|
|
|
|
|
|
#ifdef WITH_THREADS
|
|
|
|
#include <boost/thread/tss.hpp>
|
2013-08-21 18:58:17 +04:00
|
|
|
#else
|
|
|
|
#include <boost/scoped_ptr.hpp>
|
2013-12-19 00:15:39 +04:00
|
|
|
#include <time.h>
|
2009-08-07 20:47:54 +04:00
|
|
|
#endif
|
|
|
|
|
2012-11-27 19:08:31 +04:00
|
|
|
#include "moses/Phrase.h"
|
|
|
|
#include "moses/TargetPhrase.h"
|
|
|
|
#include "moses/TargetPhraseCollection.h"
|
2013-07-09 17:19:35 +04:00
|
|
|
#include "moses/InputPath.h"
|
2013-08-30 19:03:06 +04:00
|
|
|
#include "moses/FF/DecodeFeature.h"
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
class StaticData;
|
|
|
|
class InputType;
|
|
|
|
class WordsRange;
|
2012-10-11 17:27:30 +04:00
|
|
|
class ChartCellCollectionBase;
|
2011-04-13 14:38:27 +04:00
|
|
|
class ChartRuleLookupManager;
|
2013-07-31 15:25:34 +04:00
|
|
|
class ChartParser;
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2014-02-11 07:43:58 +04:00
|
|
|
class CacheColl : public boost::unordered_map<size_t, std::pair<const TargetPhraseCollection*, clock_t> >
|
2014-01-06 15:10:57 +04:00
|
|
|
{
|
|
|
|
// 1st = hash of source phrase/ address of phrase-table node
|
|
|
|
// 2nd = all translations
|
|
|
|
// 3rd = time of last access
|
|
|
|
|
|
|
|
public:
|
2014-01-15 19:42:02 +04:00
|
|
|
~CacheColl();
|
2014-01-06 15:10:57 +04:00
|
|
|
};
|
|
|
|
|
2009-08-07 20:47:54 +04:00
|
|
|
/**
|
|
|
|
* Abstract base class for phrase dictionaries (tables).
|
|
|
|
**/
|
2013-02-22 23:17:57 +04:00
|
|
|
class PhraseDictionary : public DecodeFeature
|
2011-02-24 16:14:42 +03:00
|
|
|
{
|
|
|
|
public:
|
2014-03-25 05:49:24 +04:00
|
|
|
virtual bool ProvidesPrefixCheck() const;
|
|
|
|
|
2013-11-15 21:26:26 +04:00
|
|
|
static const std::vector<PhraseDictionary*>& GetColl() {
|
2014-01-15 19:42:02 +04:00
|
|
|
return s_staticColl;
|
2013-11-15 21:26:26 +04:00
|
|
|
}
|
|
|
|
|
2013-10-29 22:20:55 +04:00
|
|
|
PhraseDictionary(const std::string &line);
|
2013-02-22 23:17:57 +04:00
|
|
|
|
2013-06-26 20:12:22 +04:00
|
|
|
virtual ~PhraseDictionary() {
|
|
|
|
}
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
//! table limit number.
|
|
|
|
size_t GetTableLimit() const {
|
|
|
|
return m_tableLimit;
|
|
|
|
}
|
2013-02-05 22:47:25 +04:00
|
|
|
|
2014-08-05 13:26:42 +04:00
|
|
|
//! continguous id for each pt, starting from 0
|
2015-01-14 14:07:42 +03:00
|
|
|
size_t GetId() const {
|
|
|
|
return m_id;
|
|
|
|
}
|
2014-08-05 13:26:42 +04:00
|
|
|
|
2014-03-24 16:29:46 +04:00
|
|
|
virtual
|
|
|
|
void
|
2014-03-25 05:49:24 +04:00
|
|
|
Release(TargetPhraseCollection const* tpc) const;
|
|
|
|
|
2014-05-19 17:34:27 +04:00
|
|
|
/// return true if phrase table entries starting with /phrase/
|
2014-03-25 05:49:24 +04:00
|
|
|
// exist in the table.
|
|
|
|
virtual
|
|
|
|
bool
|
|
|
|
PrefixExists(Phrase const& phrase) const;
|
2014-03-24 16:29:46 +04:00
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
// LEGACY!
|
2013-09-25 02:56:47 +04:00
|
|
|
// The preferred method is to override GetTargetPhraseCollectionBatch().
|
2013-07-11 23:05:08 +04:00
|
|
|
// See class PhraseDictionaryMemory or PhraseDictionaryOnDisk for details
|
2011-02-24 16:14:42 +03:00
|
|
|
//! find list of translations that can translates src. Only for phrase input
|
2012-12-24 20:52:40 +04:00
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
virtual
|
2013-09-25 02:56:47 +04:00
|
|
|
TargetPhraseCollection const *
|
|
|
|
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
|
|
|
|
2013-09-27 12:35:24 +04:00
|
|
|
virtual
|
|
|
|
void
|
2013-10-03 21:58:45 +04:00
|
|
|
GetTargetPhraseCollectionBatch(const InputPathList &inputPathQueue) const;
|
2013-07-11 23:05:08 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
//! Create entry for translation of source to targetPhrase
|
2013-06-26 20:12:22 +04:00
|
|
|
virtual void InitializeForInput(InputType const& source) {
|
|
|
|
}
|
2012-12-24 19:17:13 +04:00
|
|
|
// clean up temporary memory, called after processing each sentence
|
2013-06-26 20:12:22 +04:00
|
|
|
virtual void CleanUpAfterSentenceProcessing(const InputType& source) {
|
|
|
|
}
|
2011-02-24 16:14:42 +03:00
|
|
|
|
|
|
|
//! Create a sentence-specific manager for SCFG rule lookup.
|
|
|
|
virtual ChartRuleLookupManager *CreateRuleLookupManager(
|
2013-07-31 15:25:34 +04:00
|
|
|
const ChartParser &,
|
2014-03-14 12:49:09 +04:00
|
|
|
const ChartCellCollectionBase &,
|
|
|
|
std::size_t) = 0;
|
2011-02-24 16:14:42 +03:00
|
|
|
|
2013-06-26 20:12:22 +04:00
|
|
|
const std::string &GetFilePath() const {
|
|
|
|
return m_filePath;
|
2012-09-13 21:16:13 +04:00
|
|
|
}
|
|
|
|
|
2013-06-26 20:12:22 +04:00
|
|
|
const std::vector<FeatureFunction*> &GetFeaturesToApply() const {
|
|
|
|
return m_featuresToApply;
|
2012-09-13 21:16:13 +04:00
|
|
|
}
|
|
|
|
|
2013-06-26 20:12:22 +04:00
|
|
|
void SetParameter(const std::string& key, const std::string& value);
|
2013-02-25 18:50:33 +04:00
|
|
|
|
2013-08-07 17:10:42 +04:00
|
|
|
// LEGACY
|
|
|
|
//! find list of translations that can translates a portion of src. Used by confusion network decoding
|
2013-08-24 00:34:10 +04:00
|
|
|
virtual const TargetPhraseCollectionWithSourcePhrase* GetTargetPhraseCollectionLEGACY(InputType const& src,WordsRange const& range) const;
|
2013-08-07 17:10:42 +04:00
|
|
|
|
2013-02-22 23:17:57 +04:00
|
|
|
protected:
|
2013-11-15 21:26:26 +04:00
|
|
|
static std::vector<PhraseDictionary*> s_staticColl;
|
|
|
|
|
2013-02-22 23:17:57 +04:00
|
|
|
size_t m_tableLimit;
|
2011-02-24 16:14:42 +03:00
|
|
|
std::string m_filePath;
|
|
|
|
|
2013-06-26 20:12:22 +04:00
|
|
|
// features to apply evaluate target phrase when loading.
|
|
|
|
// NOT when creating translation options. Those are in DecodeStep
|
|
|
|
std::vector<FeatureFunction*> m_featuresToApply;
|
|
|
|
|
|
|
|
// MUST be called at the start of Load()
|
|
|
|
void SetFeaturesToApply();
|
2013-08-15 23:50:22 +04:00
|
|
|
|
2014-05-12 18:40:18 +04:00
|
|
|
bool SatisfyBackoff(const InputPath &inputPath) const;
|
|
|
|
|
2013-08-15 23:50:22 +04:00
|
|
|
// cache
|
2013-08-16 18:05:36 +04:00
|
|
|
size_t m_maxCacheSize; // 0 = no caching
|
2013-08-21 18:58:17 +04:00
|
|
|
|
2013-08-15 23:50:22 +04:00
|
|
|
#ifdef WITH_THREADS
|
|
|
|
//reader-writer lock
|
2013-08-21 18:58:17 +04:00
|
|
|
mutable boost::thread_specific_ptr<CacheColl> m_cache;
|
|
|
|
#else
|
|
|
|
mutable boost::scoped_ptr<CacheColl> m_cache;
|
2013-08-15 23:50:22 +04:00
|
|
|
#endif
|
|
|
|
|
2013-08-24 00:08:53 +04:00
|
|
|
virtual const TargetPhraseCollection *GetTargetPhraseCollectionNonCacheLEGACY(const Phrase& src) const;
|
2013-08-16 18:05:36 +04:00
|
|
|
void ReduceCache() const;
|
2013-08-21 18:58:17 +04:00
|
|
|
|
|
|
|
protected:
|
|
|
|
CacheColl &GetCache() const;
|
2014-08-05 13:26:42 +04:00
|
|
|
size_t m_id;
|
2013-08-21 18:58:17 +04:00
|
|
|
|
2009-08-07 20:47:54 +04:00
|
|
|
};
|
2009-02-06 18:43:06 +03:00
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
2010-02-24 14:15:44 +03:00
|
|
|
#endif
|