2015-03-15 23:38:31 +03:00
|
|
|
// -*- c++ -*-
|
2006-07-04 22:04:38 +04:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (c) 2006 University of Edinburgh
|
|
|
|
All rights reserved.
|
|
|
|
|
2011-02-24 15:39:29 +03:00
|
|
|
Redistribution and use in source and binary forms, with or without modification,
|
2006-07-04 22:04:38 +04:00
|
|
|
are permitted provided that the following conditions are met:
|
|
|
|
|
2011-02-24 15:39:29 +03:00
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
2006-07-04 22:04:38 +04:00
|
|
|
this list of conditions and the following disclaimer.
|
2011-02-24 15:39:29 +03:00
|
|
|
* Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
this list of conditions and the following disclaimer in the documentation
|
2006-07-04 22:04:38 +04:00
|
|
|
and/or other materials provided with the distribution.
|
2011-02-24 15:39:29 +03:00
|
|
|
* Neither the name of the University of Edinburgh nor the names of its contributors
|
|
|
|
may be used to endorse or promote products derived from this software
|
2006-07-04 22:04:38 +04:00
|
|
|
without specific prior written permission.
|
|
|
|
|
2011-02-24 15:39:29 +03:00
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
|
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
|
|
|
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
|
|
|
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
2006-07-04 22:04:38 +04:00
|
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
// example file on how to use moses library
|
|
|
|
|
2014-10-13 15:46:58 +04:00
|
|
|
#pragma once
|
2006-07-04 22:04:38 +04:00
|
|
|
|
2015-03-21 19:12:52 +03:00
|
|
|
#ifdef WITH_THREADS
|
|
|
|
#include <boost/thread.hpp>
|
|
|
|
#endif
|
|
|
|
|
2012-05-28 20:29:46 +04:00
|
|
|
#include <cassert>
|
2006-07-04 22:04:38 +04:00
|
|
|
#include <fstream>
|
2010-02-17 20:25:56 +03:00
|
|
|
#include <ostream>
|
2006-07-04 22:04:38 +04:00
|
|
|
#include <vector>
|
2015-05-11 02:34:24 +03:00
|
|
|
#include <list>
|
|
|
|
#include <iomanip>
|
2010-02-17 20:25:56 +03:00
|
|
|
|
2012-11-13 00:21:32 +04:00
|
|
|
#include "moses/TypeDef.h"
|
|
|
|
#include "moses/Sentence.h"
|
2015-01-08 14:30:07 +03:00
|
|
|
#include "moses/TabbedSentence.h"
|
2012-11-13 00:21:32 +04:00
|
|
|
#include "moses/FactorTypeSet.h"
|
|
|
|
#include "moses/FactorCollection.h"
|
|
|
|
#include "moses/Hypothesis.h"
|
|
|
|
#include "moses/OutputCollector.h"
|
|
|
|
#include "moses/TrellisPathList.h"
|
|
|
|
#include "moses/InputFileStream.h"
|
|
|
|
#include "moses/InputType.h"
|
|
|
|
#include "moses/WordLattice.h"
|
2014-09-29 20:34:11 +04:00
|
|
|
#include "moses/LatticeMBR.h"
|
2014-10-09 19:42:16 +04:00
|
|
|
#include "moses/ChartKBestExtractor.h"
|
2014-11-07 22:51:18 +03:00
|
|
|
#include "moses/Syntax/KBestExtractor.h"
|
2014-10-09 19:42:16 +04:00
|
|
|
|
|
|
|
#include "search/applied.hh"
|
2006-07-04 22:04:38 +04:00
|
|
|
|
2012-12-07 17:34:44 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
class ScoreComponentCollection;
|
2013-09-19 18:33:21 +04:00
|
|
|
class Hypothesis;
|
2014-10-09 19:42:16 +04:00
|
|
|
class ChartHypothesis;
|
2013-09-19 18:33:21 +04:00
|
|
|
class Factor;
|
2015-03-15 23:38:31 +03:00
|
|
|
class TranslationTask;
|
2014-11-07 22:51:18 +03:00
|
|
|
namespace Syntax
|
|
|
|
{
|
|
|
|
struct SHyperedge;
|
|
|
|
}
|
|
|
|
|
2012-06-29 07:19:28 +04:00
|
|
|
/** Helper class that holds misc variables to write data out to command line.
|
|
|
|
*/
|
2008-08-05 04:24:45 +04:00
|
|
|
class IOWrapper
|
2006-07-04 22:04:38 +04:00
|
|
|
{
|
|
|
|
protected:
|
2014-12-01 18:26:47 +03:00
|
|
|
const std::vector<Moses::FactorType> *m_inputFactorOrder;
|
2014-12-04 22:16:30 +03:00
|
|
|
std::string m_inputFilePath;
|
|
|
|
Moses::InputFileStream *m_inputFile;
|
|
|
|
std::istream *m_inputStream;
|
2014-10-02 17:57:04 +04:00
|
|
|
std::ostream *m_nBestStream;
|
|
|
|
std::ostream *m_outputWordGraphStream;
|
2014-12-04 22:16:30 +03:00
|
|
|
std::ostream *m_outputSearchGraphStream;
|
2014-10-02 17:57:04 +04:00
|
|
|
std::ostream *m_detailedTranslationReportingStream;
|
2014-12-04 22:16:30 +03:00
|
|
|
std::ostream *m_unknownsStream;
|
|
|
|
std::ostream *m_detailedTreeFragmentsTranslationReportingStream;
|
2014-10-02 14:22:52 +04:00
|
|
|
std::ofstream *m_alignmentInfoStream;
|
2014-10-03 18:30:06 +04:00
|
|
|
std::ofstream *m_latticeSamplesStream;
|
2011-02-24 15:39:29 +03:00
|
|
|
|
2015-03-07 02:37:07 +03:00
|
|
|
std::auto_ptr<Moses::OutputCollector> m_singleBestOutputCollector;
|
|
|
|
std::auto_ptr<Moses::OutputCollector> m_nBestOutputCollector;
|
|
|
|
std::auto_ptr<Moses::OutputCollector> m_unknownsCollector;
|
|
|
|
std::auto_ptr<Moses::OutputCollector> m_alignmentInfoCollector;
|
|
|
|
std::auto_ptr<Moses::OutputCollector> m_searchGraphOutputCollector;
|
|
|
|
std::auto_ptr<Moses::OutputCollector> m_detailedTranslationCollector;
|
|
|
|
std::auto_ptr<Moses::OutputCollector> m_wordGraphCollector;
|
|
|
|
std::auto_ptr<Moses::OutputCollector> m_latticeSamplesCollector;
|
|
|
|
std::auto_ptr<Moses::OutputCollector> m_detailTreeFragmentsOutputCollector;
|
2014-10-10 14:35:14 +04:00
|
|
|
|
2014-12-04 22:16:30 +03:00
|
|
|
bool m_surpressSingleBestOutput;
|
|
|
|
|
2015-03-21 19:12:52 +03:00
|
|
|
#ifdef WITH_THREADS
|
|
|
|
boost::mutex m_lock;
|
|
|
|
#endif
|
|
|
|
size_t m_currentLine; /* line counter, initialized from static data at construction
|
|
|
|
* incremented with every call to ReadInput */
|
|
|
|
|
|
|
|
InputTypeEnum m_inputType; // initialized from StaticData at construction
|
2015-05-11 02:34:24 +03:00
|
|
|
std::list<boost::shared_ptr<InputType> > m_past_input;
|
|
|
|
std::list<boost::shared_ptr<InputType> > m_future_input;
|
|
|
|
size_t m_look_ahead; /// for context-sensitive decoding: # of wrds to look ahead
|
|
|
|
size_t m_look_back; /// for context-sensitive decoding: # of wrds to look back
|
|
|
|
size_t m_buffered_ahead; /// number of words buffered ahead
|
|
|
|
// For context-sensitive decoding:
|
|
|
|
// Number of context words ahead and before the current sentence.
|
2006-07-04 22:04:38 +04:00
|
|
|
public:
|
2014-12-01 18:26:47 +03:00
|
|
|
IOWrapper();
|
2011-02-24 15:39:29 +03:00
|
|
|
~IOWrapper();
|
|
|
|
|
2015-03-21 19:12:52 +03:00
|
|
|
// Moses::InputType* GetInput(Moses::InputType *inputType);
|
|
|
|
boost::shared_ptr<InputType> ReadInput();
|
2011-02-24 15:39:29 +03:00
|
|
|
|
2014-10-01 20:43:51 +04:00
|
|
|
Moses::OutputCollector *GetSingleBestOutputCollector() {
|
2015-03-07 02:37:07 +03:00
|
|
|
return m_singleBestOutputCollector.get();
|
|
|
|
}
|
|
|
|
|
|
|
|
void SetOutputStream2SingleBestOutputCollector(std::ostream* outStream) {
|
|
|
|
if (m_singleBestOutputCollector.get())
|
|
|
|
m_singleBestOutputCollector->SetOutputStream(outStream);
|
|
|
|
else
|
|
|
|
m_singleBestOutputCollector.reset(new Moses::OutputCollector(outStream));
|
2014-10-01 20:43:51 +04:00
|
|
|
}
|
2014-10-01 21:21:57 +04:00
|
|
|
|
|
|
|
Moses::OutputCollector *GetNBestOutputCollector() {
|
2015-03-07 02:37:07 +03:00
|
|
|
return m_nBestOutputCollector.get();
|
2014-10-01 21:21:57 +04:00
|
|
|
}
|
|
|
|
|
2014-10-01 22:24:58 +04:00
|
|
|
Moses::OutputCollector *GetUnknownsCollector() {
|
2015-03-07 02:37:07 +03:00
|
|
|
return m_unknownsCollector.get();
|
2014-10-01 22:24:58 +04:00
|
|
|
}
|
2014-10-02 14:22:52 +04:00
|
|
|
|
|
|
|
Moses::OutputCollector *GetAlignmentInfoCollector() {
|
2015-03-07 02:37:07 +03:00
|
|
|
return m_alignmentInfoCollector.get();
|
2014-10-02 14:22:52 +04:00
|
|
|
}
|
|
|
|
|
2014-10-02 15:20:49 +04:00
|
|
|
Moses::OutputCollector *GetSearchGraphOutputCollector() {
|
2015-03-07 02:37:07 +03:00
|
|
|
return m_searchGraphOutputCollector.get();
|
2014-10-02 15:20:49 +04:00
|
|
|
}
|
2014-10-02 14:22:52 +04:00
|
|
|
|
2014-10-02 17:18:12 +04:00
|
|
|
Moses::OutputCollector *GetDetailedTranslationCollector() {
|
2015-03-07 02:37:07 +03:00
|
|
|
return m_detailedTranslationCollector.get();
|
2014-10-02 17:18:12 +04:00
|
|
|
}
|
|
|
|
|
2014-10-02 17:57:04 +04:00
|
|
|
Moses::OutputCollector *GetWordGraphCollector() {
|
2015-03-07 02:37:07 +03:00
|
|
|
return m_wordGraphCollector.get();
|
2014-10-02 17:57:04 +04:00
|
|
|
}
|
|
|
|
|
2014-10-03 15:30:18 +04:00
|
|
|
Moses::OutputCollector *GetLatticeSamplesCollector() {
|
2015-03-07 02:37:07 +03:00
|
|
|
return m_latticeSamplesCollector.get();
|
2014-10-03 15:30:18 +04:00
|
|
|
}
|
|
|
|
|
2014-12-04 21:35:19 +03:00
|
|
|
Moses::OutputCollector *GetDetailTreeFragmentsOutputCollector() {
|
2015-03-07 02:37:07 +03:00
|
|
|
return m_detailTreeFragmentsOutputCollector.get();
|
|
|
|
}
|
|
|
|
|
2015-05-02 13:45:24 +03:00
|
|
|
void SetInputStreamFromString(std::istringstream &input) {
|
2015-03-07 02:37:07 +03:00
|
|
|
m_inputStream = &input;
|
2014-12-04 21:35:19 +03:00
|
|
|
}
|
|
|
|
|
2014-11-20 14:21:50 +03:00
|
|
|
// post editing
|
|
|
|
std::ifstream *spe_src, *spe_trg, *spe_aln;
|
|
|
|
|
2015-05-11 02:34:24 +03:00
|
|
|
private:
|
|
|
|
template<class itype>
|
|
|
|
boost::shared_ptr<InputType>
|
|
|
|
BufferInput();
|
|
|
|
|
|
|
|
boost::shared_ptr<InputType>
|
|
|
|
GetBufferedInput();
|
|
|
|
|
|
|
|
void
|
|
|
|
set_context_for(InputType& source);
|
2006-11-22 02:06:30 +03:00
|
|
|
};
|
2009-08-07 20:47:54 +04:00
|
|
|
|
2015-05-11 02:34:24 +03:00
|
|
|
template<class itype>
|
|
|
|
boost::shared_ptr<InputType>
|
|
|
|
IOWrapper::
|
|
|
|
BufferInput()
|
|
|
|
{
|
|
|
|
boost::shared_ptr<itype> source;
|
|
|
|
boost::shared_ptr<InputType> ret;
|
|
|
|
if (m_future_input.size())
|
|
|
|
{
|
|
|
|
ret = m_future_input.front();
|
|
|
|
m_future_input.pop_front();
|
|
|
|
m_buffered_ahead -= ret->GetSize();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
source.reset(new itype);
|
|
|
|
if (!source->Read(*m_inputStream, *m_inputFactorOrder))
|
|
|
|
return ret;
|
|
|
|
ret = source;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (m_buffered_ahead < m_look_ahead)
|
|
|
|
{
|
|
|
|
source.reset(new itype);
|
|
|
|
if (!source->Read(*m_inputStream, *m_inputFactorOrder)) break;
|
|
|
|
m_future_input.push_back(source);
|
|
|
|
m_buffered_ahead += source->GetSize();
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
2013-09-19 18:33:21 +04:00
|
|
|
|
2014-10-09 19:42:16 +04:00
|
|
|
|
2012-07-02 20:05:11 +04:00
|
|
|
}
|
|
|
|
|