2006-07-04 22:04:38 +04:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (c) 2006 University of Edinburgh
|
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
are permitted provided that the following conditions are met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
|
|
this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
this list of conditions and the following disclaimer in the documentation
|
|
|
|
and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of the University of Edinburgh nor the names of its contributors
|
|
|
|
may be used to endorse or promote products derived from this software
|
|
|
|
without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
|
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
|
|
|
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
|
|
|
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
// example file on how to use moses library
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include "TypeDef.h"
|
|
|
|
#include "Util.h"
|
2008-08-05 04:24:45 +04:00
|
|
|
#include "IOWrapper.h"
|
2006-07-04 22:04:38 +04:00
|
|
|
#include "Hypothesis.h"
|
2006-07-25 03:48:18 +04:00
|
|
|
#include "WordsRange.h"
|
2007-05-16 17:08:55 +04:00
|
|
|
#include "TrellisPathList.h"
|
2006-07-28 05:15:09 +04:00
|
|
|
#include "StaticData.h"
|
2006-07-29 05:11:45 +04:00
|
|
|
#include "DummyScoreProducers.h"
|
2006-11-22 02:06:30 +03:00
|
|
|
#include "InputFileStream.h"
|
2006-07-04 22:04:38 +04:00
|
|
|
|
|
|
|
using namespace std;
|
2008-10-09 03:51:26 +04:00
|
|
|
using namespace Moses;
|
2006-07-04 22:04:38 +04:00
|
|
|
|
2008-08-05 04:24:45 +04:00
|
|
|
IOWrapper::IOWrapper(
|
2006-08-02 00:02:39 +04:00
|
|
|
const vector<FactorType> &inputFactorOrder
|
|
|
|
, const vector<FactorType> &outputFactorOrder
|
2006-08-11 00:42:34 +04:00
|
|
|
, const FactorMask &inputFactorUsed
|
2006-07-04 22:04:38 +04:00
|
|
|
, size_t nBestSize
|
|
|
|
, const string &nBestFilePath)
|
2006-08-02 00:02:39 +04:00
|
|
|
:m_inputFactorOrder(inputFactorOrder)
|
|
|
|
,m_outputFactorOrder(outputFactorOrder)
|
2006-07-04 22:04:38 +04:00
|
|
|
,m_inputFactorUsed(inputFactorUsed)
|
2006-11-22 02:06:30 +03:00
|
|
|
,m_inputFile(NULL)
|
|
|
|
,m_inputStream(&std::cin)
|
2007-01-21 16:38:03 +03:00
|
|
|
,m_nBestStream(NULL)
|
2008-03-01 02:08:58 +03:00
|
|
|
,m_outputWordGraphStream(NULL)
|
2008-03-18 00:34:19 +03:00
|
|
|
,m_outputSearchGraphStream(NULL)
|
2010-05-12 00:56:31 +04:00
|
|
|
,m_detailedTranslationReportingStream(NULL)
|
2006-07-04 22:04:38 +04:00
|
|
|
{
|
2008-03-01 02:08:58 +03:00
|
|
|
Initialization(inputFactorOrder, outputFactorOrder
|
|
|
|
, inputFactorUsed
|
|
|
|
, nBestSize, nBestFilePath);
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
|
|
|
|
2008-08-05 04:24:45 +04:00
|
|
|
IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
|
2006-11-22 02:06:30 +03:00
|
|
|
, const std::vector<FactorType> &outputFactorOrder
|
|
|
|
, const FactorMask &inputFactorUsed
|
|
|
|
, size_t nBestSize
|
|
|
|
, const std::string &nBestFilePath
|
|
|
|
, const std::string &inputFilePath)
|
|
|
|
:m_inputFactorOrder(inputFactorOrder)
|
|
|
|
,m_outputFactorOrder(outputFactorOrder)
|
|
|
|
,m_inputFactorUsed(inputFactorUsed)
|
|
|
|
,m_inputFilePath(inputFilePath)
|
|
|
|
,m_inputFile(new InputFileStream(inputFilePath))
|
2007-01-21 16:38:03 +03:00
|
|
|
,m_nBestStream(NULL)
|
2008-03-01 02:08:58 +03:00
|
|
|
,m_outputWordGraphStream(NULL)
|
2008-03-18 00:34:19 +03:00
|
|
|
,m_outputSearchGraphStream(NULL)
|
2010-05-12 00:56:31 +04:00
|
|
|
,m_detailedTranslationReportingStream(NULL)
|
2006-11-22 02:06:30 +03:00
|
|
|
{
|
2008-03-01 02:08:58 +03:00
|
|
|
Initialization(inputFactorOrder, outputFactorOrder
|
|
|
|
, inputFactorUsed
|
|
|
|
, nBestSize, nBestFilePath);
|
|
|
|
|
2006-11-22 02:06:30 +03:00
|
|
|
m_inputStream = m_inputFile;
|
2008-03-01 02:08:58 +03:00
|
|
|
}
|
|
|
|
|
2008-08-05 04:24:45 +04:00
|
|
|
IOWrapper::~IOWrapper()
|
2008-03-01 02:08:58 +03:00
|
|
|
{
|
|
|
|
if (m_inputFile != NULL)
|
|
|
|
delete m_inputFile;
|
|
|
|
if (m_nBestStream != NULL && !m_surpressSingleBestOutput)
|
|
|
|
{ // outputting n-best to file, rather than stdout. need to close file and delete obj
|
|
|
|
delete m_nBestStream;
|
|
|
|
}
|
|
|
|
if (m_outputWordGraphStream != NULL)
|
|
|
|
{
|
|
|
|
delete m_outputWordGraphStream;
|
|
|
|
}
|
2008-03-18 00:34:19 +03:00
|
|
|
if (m_outputSearchGraphStream != NULL)
|
|
|
|
{
|
|
|
|
delete m_outputSearchGraphStream;
|
|
|
|
}
|
2010-05-08 19:51:59 +04:00
|
|
|
delete m_detailedTranslationReportingStream;
|
2008-03-01 02:08:58 +03:00
|
|
|
}
|
2006-11-22 02:06:30 +03:00
|
|
|
|
2010-05-11 01:18:47 +04:00
|
|
|
void IOWrapper::Initialization(const std::vector<FactorType> &/*inputFactorOrder*/
|
|
|
|
, const std::vector<FactorType> &/*outputFactorOrder*/
|
|
|
|
, const FactorMask &/*inputFactorUsed*/
|
2008-03-01 02:08:58 +03:00
|
|
|
, size_t nBestSize
|
|
|
|
, const std::string &nBestFilePath)
|
|
|
|
{
|
|
|
|
const StaticData &staticData = StaticData::Instance();
|
|
|
|
|
|
|
|
// n-best
|
|
|
|
m_surpressSingleBestOutput = false;
|
2010-01-28 16:09:10 +03:00
|
|
|
|
2006-11-22 02:06:30 +03:00
|
|
|
if (nBestSize > 0)
|
2010-01-28 16:09:10 +03:00
|
|
|
{
|
|
|
|
if (nBestFilePath == "-" || nBestFilePath == "/dev/stdout")
|
|
|
|
{
|
|
|
|
m_nBestStream = &std::cout;
|
|
|
|
m_surpressSingleBestOutput = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
std::ofstream *file = new std::ofstream;
|
|
|
|
m_nBestStream = file;
|
|
|
|
file->open(nBestFilePath.c_str());
|
|
|
|
}
|
|
|
|
}
|
2006-11-22 02:06:30 +03:00
|
|
|
|
2008-03-01 02:08:58 +03:00
|
|
|
// wordgraph output
|
|
|
|
if (staticData.GetOutputWordGraph())
|
|
|
|
{
|
|
|
|
string fileName = staticData.GetParam("output-word-graph")[0];
|
|
|
|
std::ofstream *file = new std::ofstream;
|
|
|
|
m_outputWordGraphStream = file;
|
|
|
|
file->open(fileName.c_str());
|
2007-01-21 16:38:03 +03:00
|
|
|
}
|
2008-03-18 00:34:19 +03:00
|
|
|
|
|
|
|
// search graph output
|
|
|
|
if (staticData.GetOutputSearchGraph())
|
|
|
|
{
|
2010-01-28 18:32:04 +03:00
|
|
|
string fileName;
|
|
|
|
if (staticData.GetOutputSearchGraphExtended())
|
|
|
|
fileName = staticData.GetParam("output-search-graph-extended")[0];
|
|
|
|
else
|
|
|
|
fileName = staticData.GetParam("output-search-graph")[0];
|
2008-09-12 22:09:06 +04:00
|
|
|
std::ofstream *file = new std::ofstream;
|
2008-03-18 00:34:19 +03:00
|
|
|
m_outputSearchGraphStream = file;
|
|
|
|
file->open(fileName.c_str());
|
|
|
|
}
|
2010-05-08 19:51:59 +04:00
|
|
|
|
|
|
|
// detailed translation reporting
|
|
|
|
if (staticData.IsDetailedTranslationReportingEnabled())
|
|
|
|
{
|
|
|
|
const std::string &path = staticData.GetDetailedTranslationReportingFilePath();
|
|
|
|
m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
|
2010-05-19 20:42:18 +04:00
|
|
|
assert(m_detailedTranslationReportingStream->good());
|
2010-05-08 19:51:59 +04:00
|
|
|
}
|
2006-11-22 02:06:30 +03:00
|
|
|
}
|
|
|
|
|
2008-08-05 04:24:45 +04:00
|
|
|
InputType*IOWrapper::GetInput(InputType* inputType)
|
2006-07-04 22:04:38 +04:00
|
|
|
{
|
2007-02-16 21:08:37 +03:00
|
|
|
if(inputType->Read(*m_inputStream, m_inputFactorOrder))
|
2006-11-22 02:06:30 +03:00
|
|
|
{
|
2007-04-18 18:08:46 +04:00
|
|
|
if (long x = inputType->GetTranslationId()) { if (x>=m_translationId) m_translationId = x+1; }
|
|
|
|
else inputType->SetTranslationId(m_translationId++);
|
|
|
|
|
2006-11-22 02:06:30 +03:00
|
|
|
return inputType;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
delete inputType;
|
|
|
|
return NULL;
|
|
|
|
}
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
|
|
|
|
2006-08-12 01:04:38 +04:00
|
|
|
/***
|
|
|
|
* print surface factor only for the given phrase
|
|
|
|
*/
|
2006-08-02 00:02:39 +04:00
|
|
|
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
|
2006-07-04 22:04:38 +04:00
|
|
|
{
|
2006-08-02 00:12:58 +04:00
|
|
|
assert(outputFactorOrder.size() > 0);
|
2006-08-21 18:24:09 +04:00
|
|
|
if (reportAllFactors == true)
|
2006-08-02 00:02:39 +04:00
|
|
|
{
|
|
|
|
out << phrase;
|
2006-08-21 18:24:09 +04:00
|
|
|
}
|
2006-08-02 00:02:39 +04:00
|
|
|
else
|
|
|
|
{
|
|
|
|
size_t size = phrase.GetSize();
|
|
|
|
for (size_t pos = 0 ; pos < size ; pos++)
|
|
|
|
{
|
2006-08-02 00:12:58 +04:00
|
|
|
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
|
|
|
|
out << *factor;
|
|
|
|
|
|
|
|
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++)
|
2006-08-02 00:02:39 +04:00
|
|
|
{
|
|
|
|
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
|
2006-08-02 00:12:58 +04:00
|
|
|
out << "|" << *factor;
|
2006-08-02 00:02:39 +04:00
|
|
|
}
|
2006-08-02 00:12:58 +04:00
|
|
|
out << " ";
|
2006-08-02 00:02:39 +04:00
|
|
|
}
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-08-02 00:02:39 +04:00
|
|
|
void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
|
2006-08-30 23:51:07 +04:00
|
|
|
,bool reportSegmentation, bool reportAllFactors)
|
2006-07-04 22:04:38 +04:00
|
|
|
{
|
|
|
|
if ( hypo != NULL)
|
|
|
|
{
|
2006-08-30 23:51:07 +04:00
|
|
|
OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
|
2007-02-12 14:05:13 +03:00
|
|
|
OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors);
|
2006-07-25 03:48:18 +04:00
|
|
|
|
2006-08-30 23:51:07 +04:00
|
|
|
if (reportSegmentation == true
|
2007-02-12 14:05:13 +03:00
|
|
|
&& hypo->GetCurrTargetPhrase().GetSize() > 0) {
|
2006-08-21 18:24:09 +04:00
|
|
|
out << "|" << hypo->GetCurrSourceWordsRange().GetStartPos()
|
|
|
|
<< "-" << hypo->GetCurrSourceWordsRange().GetEndPos() << "| ";
|
|
|
|
}
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-05-11 01:18:47 +04:00
|
|
|
void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,bool reportSegmentation, bool reportAllFactors, std::ostream &out)
|
|
|
|
{
|
|
|
|
const std::vector<const Hypothesis *> &edges = path.GetEdges();
|
2008-09-12 22:09:06 +04:00
|
|
|
|
2010-05-11 01:18:47 +04:00
|
|
|
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
|
|
|
|
{
|
|
|
|
const Hypothesis &edge = *edges[currEdge];
|
|
|
|
OutputSurface(out, edge.GetCurrTargetPhrase(), StaticData::Instance().GetOutputFactorOrder(), reportAllFactors);
|
|
|
|
if (reportSegmentation == true
|
|
|
|
&& edge.GetCurrTargetPhrase().GetSize() > 0) {
|
|
|
|
out << "|" << edge.GetCurrSourceWordsRange().GetStartPos()
|
|
|
|
<< "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| ";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out << endl;
|
|
|
|
}
|
2008-09-12 22:09:06 +04:00
|
|
|
|
2008-08-05 04:24:45 +04:00
|
|
|
void IOWrapper::Backtrack(const Hypothesis *hypo){
|
2006-07-15 01:51:05 +04:00
|
|
|
|
2006-07-17 19:05:00 +04:00
|
|
|
if (hypo->GetPrevHypo() != NULL) {
|
2006-10-17 15:07:17 +04:00
|
|
|
VERBOSE(3,hypo->GetId() << " <= ");
|
2006-07-15 01:51:05 +04:00
|
|
|
Backtrack(hypo->GetPrevHypo());
|
|
|
|
}
|
|
|
|
}
|
2007-05-16 00:54:39 +04:00
|
|
|
|
2010-05-11 01:18:47 +04:00
|
|
|
void OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*translationId*/, bool /*reportSegmentation*/, bool /*reportAllFactors*/, ostream& out)
|
2010-02-03 20:04:05 +03:00
|
|
|
{
|
2010-02-12 18:56:37 +03:00
|
|
|
|
2010-02-03 20:04:05 +03:00
|
|
|
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++)
|
|
|
|
{
|
2010-02-17 20:25:56 +03:00
|
|
|
const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
|
|
|
|
if (i>0) out << " ";
|
|
|
|
out << *factor;
|
2010-02-03 20:04:05 +03:00
|
|
|
}
|
2010-02-17 20:25:56 +03:00
|
|
|
out << endl;
|
2010-02-12 18:56:37 +03:00
|
|
|
}
|
|
|
|
|
2010-02-03 20:04:05 +03:00
|
|
|
|
2007-09-28 20:43:33 +04:00
|
|
|
void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
|
|
|
|
{
|
|
|
|
if (hypo->GetPrevHypo())
|
|
|
|
{
|
|
|
|
OutputInput(map, hypo->GetPrevHypo());
|
|
|
|
map[hypo->GetCurrSourceWordsRange().GetStartPos()] = hypo->GetSourcePhrase();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void OutputInput(std::ostream& os, const Hypothesis* hypo)
|
|
|
|
{
|
2009-08-07 20:47:54 +04:00
|
|
|
size_t len = hypo->GetInput().GetSize();
|
2007-09-28 20:43:33 +04:00
|
|
|
std::vector<const Phrase*> inp_phrases(len, 0);
|
|
|
|
OutputInput(inp_phrases, hypo);
|
|
|
|
for (size_t i=0; i<len; ++i)
|
|
|
|
if (inp_phrases[i]) os << *inp_phrases[i];
|
|
|
|
}
|
|
|
|
|
2008-08-05 04:24:45 +04:00
|
|
|
void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
|
2006-07-04 22:04:38 +04:00
|
|
|
{
|
|
|
|
if (hypo != NULL)
|
|
|
|
{
|
2007-02-23 02:44:38 +03:00
|
|
|
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
|
2006-08-30 23:51:07 +04:00
|
|
|
VERBOSE(3,"Best path: ");
|
2006-07-17 19:05:00 +04:00
|
|
|
Backtrack(hypo);
|
2006-08-30 23:51:07 +04:00
|
|
|
VERBOSE(3,"0" << std::endl);
|
2007-01-08 17:46:48 +03:00
|
|
|
if (!m_surpressSingleBestOutput)
|
|
|
|
{
|
2007-09-28 20:43:33 +04:00
|
|
|
if (StaticData::Instance().IsPathRecoveryEnabled()) {
|
|
|
|
OutputInput(cout, hypo);
|
|
|
|
cout << "||| ";
|
|
|
|
}
|
2007-01-08 17:46:48 +03:00
|
|
|
OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
|
|
|
|
cout << endl;
|
|
|
|
}
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
|
|
|
else
|
2006-07-14 22:13:50 +04:00
|
|
|
{
|
2007-02-23 02:44:38 +03:00
|
|
|
VERBOSE(1, "NO BEST TRANSLATION" << endl);
|
2007-03-14 22:47:51 +03:00
|
|
|
if (!m_surpressSingleBestOutput)
|
|
|
|
{
|
|
|
|
cout << endl;
|
|
|
|
}
|
2006-07-14 22:13:50 +04:00
|
|
|
}
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
|
|
|
|
2009-08-31 16:55:33 +04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder,long translationId)
|
2006-07-04 22:04:38 +04:00
|
|
|
{
|
2009-05-26 23:30:35 +04:00
|
|
|
const StaticData &staticData = StaticData::Instance();
|
|
|
|
bool labeledOutput = staticData.IsLabeledNBestList();
|
2010-02-18 13:54:33 +03:00
|
|
|
bool reportAllFactors = staticData.GetReportAllFactorsNBest();
|
2009-05-26 23:30:35 +04:00
|
|
|
bool includeAlignment = staticData.NBestIncludesAlignment();
|
2010-01-28 15:11:09 +03:00
|
|
|
//bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
|
2006-08-13 23:08:39 +04:00
|
|
|
|
2007-05-16 17:08:55 +04:00
|
|
|
TrellisPathList::const_iterator iter;
|
2006-07-04 22:04:38 +04:00
|
|
|
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
|
|
|
|
{
|
2007-05-16 17:08:55 +04:00
|
|
|
const TrellisPath &path = **iter;
|
2006-07-29 02:11:20 +04:00
|
|
|
const std::vector<const Hypothesis *> &edges = path.GetEdges();
|
2006-07-04 22:04:38 +04:00
|
|
|
|
2006-08-04 08:45:48 +04:00
|
|
|
// print the surface factor of the translation
|
2009-08-31 16:55:33 +04:00
|
|
|
out << translationId << " ||| ";
|
2006-07-04 22:04:38 +04:00
|
|
|
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
|
|
|
|
{
|
2006-07-29 02:11:20 +04:00
|
|
|
const Hypothesis &edge = *edges[currEdge];
|
2010-02-18 13:54:33 +03:00
|
|
|
OutputSurface(out, edge.GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors);
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " |||";
|
2009-05-26 23:30:35 +04:00
|
|
|
|
|
|
|
std::string lastName = "";
|
|
|
|
const vector<const StatefulFeatureFunction*>& sff =
|
|
|
|
staticData.GetScoreIndexManager().GetStatefulFeatureFunctions();
|
|
|
|
for( size_t i=0; i<sff.size(); i++ )
|
|
|
|
{
|
|
|
|
if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() )
|
|
|
|
{
|
|
|
|
lastName = sff[i]->GetScoreProducerWeightShortName();
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " " << lastName << ":";
|
2009-05-26 23:30:35 +04:00
|
|
|
}
|
|
|
|
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( sff[i] );
|
|
|
|
for (size_t j = 0; j<scores.size(); ++j)
|
|
|
|
{
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " " << scores[j];
|
2009-05-26 23:30:35 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const vector<const StatelessFeatureFunction*>& slf =
|
|
|
|
staticData.GetScoreIndexManager().GetStatelessFeatureFunctions();
|
|
|
|
for( size_t i=0; i<slf.size(); i++ )
|
|
|
|
{
|
|
|
|
if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() )
|
|
|
|
{
|
|
|
|
lastName = slf[i]->GetScoreProducerWeightShortName();
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " " << lastName << ":";
|
2009-05-26 23:30:35 +04:00
|
|
|
}
|
|
|
|
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( slf[i] );
|
|
|
|
for (size_t j = 0; j<scores.size(); ++j)
|
|
|
|
{
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " " << scores[j];
|
2009-05-26 23:30:35 +04:00
|
|
|
}
|
|
|
|
}
|
2006-07-04 22:04:38 +04:00
|
|
|
|
2006-08-04 08:45:48 +04:00
|
|
|
// translation components
|
2007-04-06 04:24:25 +04:00
|
|
|
if (StaticData::Instance().GetInputType()==SentenceInput){
|
2006-11-16 13:43:57 +03:00
|
|
|
// translation components for text input
|
2009-08-07 20:47:54 +04:00
|
|
|
vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries();
|
2006-11-16 13:43:57 +03:00
|
|
|
if (pds.size() > 0) {
|
|
|
|
if (labeledOutput)
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " tm:";
|
2009-08-07 20:47:54 +04:00
|
|
|
vector<PhraseDictionaryFeature*>::iterator iter;
|
2006-11-16 13:43:57 +03:00
|
|
|
for (iter = pds.begin(); iter != pds.end(); ++iter) {
|
|
|
|
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
|
|
|
for (size_t j = 0; j<scores.size(); ++j)
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " " << scores[j];
|
2006-11-16 13:43:57 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else{
|
|
|
|
// translation components for Confusion Network input
|
|
|
|
// first translation component has GetNumInputScores() scores from the input Confusion Network
|
|
|
|
// at the beginning of the vector
|
2009-08-07 20:47:54 +04:00
|
|
|
vector<PhraseDictionaryFeature*> pds = StaticData::Instance().GetPhraseDictionaries();
|
2006-11-16 13:43:57 +03:00
|
|
|
if (pds.size() > 0) {
|
2009-08-07 20:47:54 +04:00
|
|
|
vector<PhraseDictionaryFeature*>::iterator iter;
|
2006-11-16 13:43:57 +03:00
|
|
|
|
|
|
|
iter = pds.begin();
|
|
|
|
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
|
|
|
|
|
|
|
size_t pd_numinputscore = (*iter)->GetNumInputScores();
|
|
|
|
|
|
|
|
if (pd_numinputscore){
|
|
|
|
|
|
|
|
if (labeledOutput)
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " I:";
|
2006-11-16 13:43:57 +03:00
|
|
|
|
|
|
|
for (size_t j = 0; j < pd_numinputscore; ++j)
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " " << scores[j];
|
2006-11-16 13:43:57 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (iter = pds.begin() ; iter != pds.end(); ++iter) {
|
|
|
|
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
|
|
|
|
|
|
|
size_t pd_numinputscore = (*iter)->GetNumInputScores();
|
|
|
|
|
|
|
|
if (iter == pds.begin() && labeledOutput)
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " tm:";
|
2006-11-16 13:43:57 +03:00
|
|
|
for (size_t j = pd_numinputscore; j < scores.size() ; ++j)
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " " << scores[j];
|
2006-11-16 13:43:57 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-07-04 22:04:38 +04:00
|
|
|
// generation
|
2007-02-16 21:08:37 +03:00
|
|
|
vector<GenerationDictionary*> gds = StaticData::Instance().GetGenerationDictionaries();
|
2009-05-26 23:30:35 +04:00
|
|
|
if (gds.size() > 0) {
|
2006-08-13 23:08:39 +04:00
|
|
|
if (labeledOutput)
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " g: ";
|
2009-05-26 23:30:35 +04:00
|
|
|
vector<GenerationDictionary*>::iterator iter;
|
|
|
|
for (iter = gds.begin(); iter != gds.end(); ++iter) {
|
|
|
|
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
|
|
|
for (size_t j = 0; j<scores.size(); j++) {
|
2009-08-31 16:55:33 +04:00
|
|
|
out << scores[j] << " ";
|
2009-05-26 23:30:35 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2006-07-04 22:04:38 +04:00
|
|
|
|
|
|
|
// total
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " ||| " << path.GetTotalScore();
|
2008-09-12 22:09:06 +04:00
|
|
|
|
|
|
|
//phrase-to-phrase alignment
|
2009-05-26 23:30:35 +04:00
|
|
|
if (includeAlignment) {
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " |||";
|
2008-09-12 22:09:06 +04:00
|
|
|
for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
|
|
|
|
{
|
|
|
|
const Hypothesis &edge = *edges[currEdge];
|
|
|
|
const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
|
|
|
|
WordsRange targetRange = path.GetTargetWordsRange(edge);
|
2009-08-31 16:55:33 +04:00
|
|
|
out << " " << sourceRange.GetStartPos();
|
2008-09-12 22:09:06 +04:00
|
|
|
if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
|
2009-08-31 16:55:33 +04:00
|
|
|
out << "-" << sourceRange.GetEndPos();
|
2008-09-12 22:09:06 +04:00
|
|
|
}
|
2009-08-31 16:55:33 +04:00
|
|
|
out<< "=" << targetRange.GetStartPos();
|
2008-09-12 22:09:06 +04:00
|
|
|
if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
|
2009-08-31 16:55:33 +04:00
|
|
|
out<< "-" << targetRange.GetEndPos();
|
2008-09-12 22:09:06 +04:00
|
|
|
}
|
|
|
|
}
|
2009-12-14 20:27:07 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (StaticData::Instance().IsPathRecoveryEnabled()) {
|
|
|
|
out << "|||";
|
|
|
|
OutputInput(out, edges[0]);
|
|
|
|
}
|
2008-09-12 22:09:06 +04:00
|
|
|
|
2009-08-31 16:55:33 +04:00
|
|
|
out << endl;
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
2006-07-29 02:57:31 +04:00
|
|
|
|
2008-09-12 22:09:06 +04:00
|
|
|
|
2009-08-31 16:55:33 +04:00
|
|
|
out <<std::flush;
|
|
|
|
}
|
|
|
|
|
2010-03-16 14:34:50 +03:00
|
|
|
void OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId) {
|
|
|
|
for (vector<LatticeMBRSolution>::const_iterator si = solutions.begin(); si != solutions.end(); ++si) {
|
|
|
|
out << translationId;
|
|
|
|
out << " ||| ";
|
|
|
|
const vector<Word> mbrHypo = si->GetWords();
|
|
|
|
for (size_t i = 0 ; i < mbrHypo.size() ; i++)
|
|
|
|
{
|
|
|
|
const Factor *factor = mbrHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
|
|
|
|
if (i>0) out << " ";
|
|
|
|
out << *factor;
|
|
|
|
}
|
|
|
|
out << " ||| ";
|
|
|
|
out << "map: " << si->GetMapScore();
|
|
|
|
out << " w: " << mbrHypo.size();
|
|
|
|
const vector<float>& ngramScores = si->GetNgramScores();
|
|
|
|
for (size_t i = 0; i < ngramScores.size(); ++i) {
|
|
|
|
out << " " << ngramScores[i];
|
|
|
|
}
|
|
|
|
out << " ||| ";
|
|
|
|
out << si->GetScore();
|
|
|
|
|
|
|
|
out << endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-08-31 16:55:33 +04:00
|
|
|
void IOWrapper::OutputNBestList(const TrellisPathList &nBestList, long translationId) {
|
|
|
|
OutputNBest(*m_nBestStream, nBestList,m_outputFactorOrder, translationId);
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
2009-08-07 20:47:54 +04:00
|
|
|
|
2010-03-16 14:34:50 +03:00
|
|
|
void IOWrapper::OutputLatticeMBRNBestList(const vector<LatticeMBRSolution>& solutions,long translationId) {
|
|
|
|
OutputLatticeMBRNBest(*m_nBestStream, solutions,translationId);
|
|
|
|
}
|
|
|
|
|
2009-08-07 20:47:54 +04:00
|
|
|
bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source)
|
|
|
|
{
|
|
|
|
delete source;
|
|
|
|
switch(inputType)
|
|
|
|
{
|
|
|
|
case SentenceInput: source = ioWrapper.GetInput(new Sentence(Input)); break;
|
|
|
|
case ConfusionNetworkInput: source = ioWrapper.GetInput(new ConfusionNet); break;
|
|
|
|
case WordLatticeInput: source = ioWrapper.GetInput(new WordLattice); break;
|
|
|
|
default: TRACE_ERR("Unknown input type: " << inputType << "\n");
|
|
|
|
}
|
|
|
|
return (source ? true : false);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
IOWrapper *GetIODevice(const StaticData &staticData)
|
|
|
|
{
|
|
|
|
IOWrapper *ioWrapper;
|
|
|
|
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
|
|
|
|
,&outputFactorOrder = staticData.GetOutputFactorOrder();
|
|
|
|
FactorMask inputFactorUsed(inputFactorOrder);
|
|
|
|
|
|
|
|
// io
|
|
|
|
if (staticData.GetParam("input-file").size() == 1)
|
|
|
|
{
|
|
|
|
VERBOSE(2,"IO from File" << endl);
|
|
|
|
string filePath = staticData.GetParam("input-file")[0];
|
|
|
|
|
|
|
|
ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
|
|
|
, staticData.GetNBestSize()
|
|
|
|
, staticData.GetNBestFilePath()
|
|
|
|
, filePath);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
|
|
|
|
ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
|
|
|
, staticData.GetNBestSize()
|
|
|
|
, staticData.GetNBestFilePath());
|
|
|
|
}
|
|
|
|
ioWrapper->ResetTranslationId();
|
|
|
|
|
|
|
|
IFVERBOSE(1)
|
|
|
|
PrintUserTime("Created input-output object");
|
|
|
|
|
|
|
|
return ioWrapper;
|
|
|
|
}
|