mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-17 14:17:13 +03:00
get ready to merge IOWrapper
This commit is contained in:
parent
e4489cbdde
commit
4403bd5b3c
@ -441,16 +441,6 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/IOWrapper.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>IOWrapperChart.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/IOWrapperChart.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>IOWrapperChart.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/IOWrapperChart.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Incremental.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -1,776 +0,0 @@
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (c) 2006 University of Edinburgh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the University of Edinburgh nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
// example file on how to use moses library
|
||||
|
||||
#include <iostream>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include "IOWrapperChart.h"
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/Util.h"
|
||||
#include "moses/WordsRange.h"
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/InputFileStream.h"
|
||||
#include "moses/Incremental.h"
|
||||
#include "moses/TranslationModel/PhraseDictionary.h"
|
||||
#include "moses/ChartTranslationOptions.h"
|
||||
#include "moses/ChartHypothesis.h"
|
||||
#include "moses/FeatureVector.h"
|
||||
#include "moses/FF/StatefulFeatureFunction.h"
|
||||
#include "moses/FF/StatelessFeatureFunction.h"
|
||||
#include "moses/FF/TreeStructureFeature.h"
|
||||
#include "moses/PP/TreeStructurePhraseProperty.h"
|
||||
#include "moses/TreeInput.h"
|
||||
#include "moses/ConfusionNet.h"
|
||||
#include "moses/WordLattice.h"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
using namespace Moses;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
IOWrapperChart::IOWrapperChart(const std::vector<FactorType> &inputFactorOrder
|
||||
, const std::vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, size_t nBestSize
|
||||
, const std::string &nBestFilePath
|
||||
, const std::string &inputFilePath)
|
||||
:m_inputFactorOrder(inputFactorOrder)
|
||||
,m_outputFactorOrder(outputFactorOrder)
|
||||
,m_inputFactorUsed(inputFactorUsed)
|
||||
,m_outputSearchGraphStream(NULL)
|
||||
,m_detailedTranslationReportingStream(NULL)
|
||||
,m_detailedTreeFragmentsTranslationReportingStream(NULL)
|
||||
,m_alignmentInfoStream(NULL)
|
||||
,m_unknownsStream(NULL)
|
||||
,m_inputFilePath(inputFilePath)
|
||||
,m_detailedTranslationCollector(NULL)
|
||||
,m_detailTreeFragmentsOutputCollector(NULL)
|
||||
,m_nBestOutputCollector(NULL)
|
||||
,m_searchGraphOutputCollector(NULL)
|
||||
,m_singleBestOutputCollector(NULL)
|
||||
,m_alignmentInfoCollector(NULL)
|
||||
,m_unknownsCollector(NULL)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
if (m_inputFilePath.empty()) {
|
||||
m_inputStream = &std::cin;
|
||||
} else {
|
||||
m_inputStream = new InputFileStream(inputFilePath);
|
||||
}
|
||||
|
||||
bool suppressSingleBestOutput = false;
|
||||
|
||||
if (nBestSize > 0) {
|
||||
if (nBestFilePath == "-") {
|
||||
m_nBestOutputCollector = new Moses::OutputCollector(&std::cout);
|
||||
suppressSingleBestOutput = true;
|
||||
} else {
|
||||
m_nBestOutputCollector = new Moses::OutputCollector(new std::ofstream(nBestFilePath.c_str()));
|
||||
m_nBestOutputCollector->HoldOutputStream();
|
||||
}
|
||||
}
|
||||
|
||||
if (!suppressSingleBestOutput) {
|
||||
m_singleBestOutputCollector = new Moses::OutputCollector(&std::cout);
|
||||
}
|
||||
|
||||
// search graph output
|
||||
if (staticData.GetOutputSearchGraph()) {
|
||||
string fileName = staticData.GetParam("output-search-graph")[0];
|
||||
std::ofstream *file = new std::ofstream;
|
||||
m_outputSearchGraphStream = file;
|
||||
file->open(fileName.c_str());
|
||||
m_searchGraphOutputCollector = new Moses::OutputCollector(m_outputSearchGraphStream);
|
||||
}
|
||||
|
||||
// detailed translation reporting
|
||||
if (staticData.IsDetailedTranslationReportingEnabled()) {
|
||||
const std::string &path = staticData.GetDetailedTranslationReportingFilePath();
|
||||
m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
|
||||
m_detailedTranslationCollector = new Moses::OutputCollector(m_detailedTranslationReportingStream);
|
||||
}
|
||||
|
||||
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
|
||||
const std::string &path = staticData.GetDetailedTreeFragmentsTranslationReportingFilePath();
|
||||
m_detailedTreeFragmentsTranslationReportingStream = new std::ofstream(path.c_str());
|
||||
m_detailTreeFragmentsOutputCollector = new Moses::OutputCollector(m_detailedTreeFragmentsTranslationReportingStream);
|
||||
}
|
||||
|
||||
if (!staticData.GetAlignmentOutputFile().empty()) {
|
||||
m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str());
|
||||
m_alignmentInfoCollector = new Moses::OutputCollector(m_alignmentInfoStream);
|
||||
UTIL_THROW_IF2(!m_alignmentInfoStream->good(),
|
||||
"File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
|
||||
}
|
||||
|
||||
if (!staticData.GetOutputUnknownsFile().empty()) {
|
||||
m_unknownsStream = new std::ofstream(staticData.GetOutputUnknownsFile().c_str());
|
||||
m_unknownsCollector = new Moses::OutputCollector(m_unknownsStream);
|
||||
UTIL_THROW_IF2(!m_unknownsStream->good(),
|
||||
"File for unknowns words could not be opened: " <<
|
||||
staticData.GetOutputUnknownsFile());
|
||||
}
|
||||
}
|
||||
|
||||
IOWrapperChart::~IOWrapperChart()
|
||||
{
|
||||
if (!m_inputFilePath.empty()) {
|
||||
delete m_inputStream;
|
||||
}
|
||||
delete m_outputSearchGraphStream;
|
||||
delete m_detailedTranslationReportingStream;
|
||||
delete m_detailedTreeFragmentsTranslationReportingStream;
|
||||
delete m_detailTreeFragmentsOutputCollector;
|
||||
delete m_alignmentInfoStream;
|
||||
delete m_unknownsStream;
|
||||
delete m_detailedTranslationCollector;
|
||||
delete m_nBestOutputCollector;
|
||||
delete m_searchGraphOutputCollector;
|
||||
delete m_singleBestOutputCollector;
|
||||
delete m_alignmentInfoCollector;
|
||||
delete m_unknownsCollector;
|
||||
}
|
||||
|
||||
InputType*IOWrapperChart::GetInput(InputType* inputType)
|
||||
{
|
||||
if(inputType->Read(*m_inputStream, m_inputFactorOrder)) {
|
||||
return inputType;
|
||||
} else {
|
||||
delete inputType;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool IOWrapperChart::ReadInput(IOWrapperChart &ioWrapper, InputTypeEnum inputType, InputType*& source)
|
||||
{
|
||||
delete source;
|
||||
switch(inputType) {
|
||||
case SentenceInput:
|
||||
source = ioWrapper.GetInput(new Sentence);
|
||||
break;
|
||||
case ConfusionNetworkInput:
|
||||
source = ioWrapper.GetInput(new ConfusionNet);
|
||||
break;
|
||||
case WordLatticeInput:
|
||||
source = ioWrapper.GetInput(new WordLattice);
|
||||
break;
|
||||
case TreeInputType:
|
||||
source = ioWrapper.GetInput(new TreeInput);
|
||||
break;
|
||||
default:
|
||||
TRACE_ERR("Unknown input type: " << inputType << "\n");
|
||||
}
|
||||
return (source ? true : false);
|
||||
}
|
||||
|
||||
/***
|
||||
* print surface factor only for the given phrase
|
||||
*/
|
||||
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
|
||||
{
|
||||
UTIL_THROW_IF2(outputFactorOrder.size() == 0,
|
||||
"Cannot be empty phrase");
|
||||
if (reportAllFactors == true) {
|
||||
out << phrase;
|
||||
} else {
|
||||
size_t size = phrase.GetSize();
|
||||
for (size_t pos = 0 ; pos < size ; pos++) {
|
||||
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
|
||||
out << *factor;
|
||||
UTIL_THROW_IF2(factor == NULL,
|
||||
"Empty factor 0 at position " << pos);
|
||||
|
||||
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
|
||||
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
|
||||
UTIL_THROW_IF2(factor == NULL,
|
||||
"Empty factor " << i << " at position " << pos);
|
||||
|
||||
out << "|" << *factor;
|
||||
}
|
||||
out << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OutputSurface(std::ostream &out, const ChartHypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
|
||||
,bool reportSegmentation, bool reportAllFactors)
|
||||
{
|
||||
if ( hypo != NULL) {
|
||||
//OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors);
|
||||
|
||||
const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
||||
|
||||
vector<const ChartHypothesis*>::const_iterator iter;
|
||||
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
|
||||
const ChartHypothesis *prevHypo = *iter;
|
||||
|
||||
OutputSurface(out, prevHypo, outputFactorOrder, reportSegmentation, reportAllFactors);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IOWrapperChart::Backtrack(const ChartHypothesis *hypo)
|
||||
{
|
||||
const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
||||
|
||||
vector<const ChartHypothesis*>::const_iterator iter;
|
||||
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
|
||||
const ChartHypothesis *prevHypo = *iter;
|
||||
|
||||
VERBOSE(3,prevHypo->GetId() << " <= ");
|
||||
Backtrack(prevHypo);
|
||||
}
|
||||
}
|
||||
/*
|
||||
void IOWrapperChart::OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, long )
|
||||
{
|
||||
for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
|
||||
const Factor *factor = mbrBestHypo[i];
|
||||
UTIL_THROW_IF(factor == NULL, util::Exception,
|
||||
"No factor at position " << i );
|
||||
|
||||
cout << *factor << " ";
|
||||
}
|
||||
}
|
||||
*/
|
||||
/*
|
||||
void OutputInput(std::vector<const Phrase*>& map, const ChartHypothesis* hypo)
|
||||
{
|
||||
if (hypo->GetPrevHypos())
|
||||
{
|
||||
OutputInput(map, hypo->GetPrevHypos());
|
||||
map[hypo->GetCurrSourceWordsRange().GetStartPos()] = hypo->GetSourcePhrase();
|
||||
}
|
||||
}
|
||||
|
||||
void OutputInput(std::ostream& os, const ChartHypothesis* hypo)
|
||||
{
|
||||
size_t len = StaticData::Instance().GetInput()->GetSize();
|
||||
std::vector<const Phrase*> inp_phrases(len, 0);
|
||||
OutputInput(inp_phrases, hypo);
|
||||
for (size_t i=0; i<len; ++i)
|
||||
if (inp_phrases[i]) os << *inp_phrases[i];
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void IOWrapperChart::OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
|
||||
{
|
||||
ReconstructApplicationContext(*hypo, sentence, applicationContext);
|
||||
out << "Trans Opt " << translationId
|
||||
<< " " << hypo->GetCurrSourceRange()
|
||||
<< ": ";
|
||||
WriteApplicationContext(out, applicationContext);
|
||||
out << ": " << hypo->GetCurrTargetPhrase().GetTargetLHS()
|
||||
<< "->" << hypo->GetCurrTargetPhrase()
|
||||
<< " " << hypo->GetTotalScore() << hypo->GetScoreBreakdown();
|
||||
}
|
||||
|
||||
void IOWrapperChart::OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Sentence &sentence, long translationId)
|
||||
{
|
||||
ReconstructApplicationContext(applied, sentence, applicationContext);
|
||||
const TargetPhrase &phrase = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
|
||||
out << "Trans Opt " << translationId
|
||||
<< " " << applied->GetRange()
|
||||
<< ": ";
|
||||
WriteApplicationContext(out, applicationContext);
|
||||
out << ": " << phrase.GetTargetLHS()
|
||||
<< "->" << phrase
|
||||
<< " " << applied->GetScore(); // << hypo->GetScoreBreakdown() TODO: missing in incremental search hypothesis
|
||||
}
|
||||
|
||||
|
||||
void IOWrapperChart::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
|
||||
{
|
||||
if (hypo != NULL) {
|
||||
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
|
||||
out << std::endl;
|
||||
}
|
||||
|
||||
// recursive
|
||||
const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
||||
std::vector<const ChartHypothesis*>::const_iterator iter;
|
||||
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
|
||||
const ChartHypothesis *prevHypo = *iter;
|
||||
OutputTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void IOWrapperChart::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Sentence &sentence, long translationId)
|
||||
{
|
||||
if (applied != NULL) {
|
||||
OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
|
||||
out << std::endl;
|
||||
}
|
||||
|
||||
// recursive
|
||||
const search::Applied *child = applied->Children();
|
||||
for (size_t i = 0; i < applied->GetArity(); i++) {
|
||||
OutputTranslationOptions(out, applicationContext, child++, sentence, translationId);
|
||||
}
|
||||
}
|
||||
|
||||
void IOWrapperChart::OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
|
||||
{
|
||||
|
||||
if (hypo != NULL) {
|
||||
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
|
||||
|
||||
const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
|
||||
|
||||
out << " ||| ";
|
||||
if (const PhraseProperty *property = currTarPhr.GetProperty("Tree")) {
|
||||
out << " " << property->GetValueString();
|
||||
} else {
|
||||
out << " " << "noTreeInfo";
|
||||
}
|
||||
out << std::endl;
|
||||
}
|
||||
|
||||
// recursive
|
||||
const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
||||
std::vector<const ChartHypothesis*>::const_iterator iter;
|
||||
for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
|
||||
const ChartHypothesis *prevHypo = *iter;
|
||||
OutputTreeFragmentsTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void IOWrapperChart::OutputDetailedTranslationReport(
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId)
|
||||
{
|
||||
if (hypo == NULL) {
|
||||
return;
|
||||
}
|
||||
std::ostringstream out;
|
||||
ApplicationContext applicationContext;
|
||||
|
||||
OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
|
||||
UTIL_THROW_IF2(m_detailedTranslationCollector == NULL,
|
||||
"No ouput file for detailed reports specified");
|
||||
m_detailedTranslationCollector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
|
||||
void IOWrapperChart::OutputDetailedTreeFragmentsTranslationReport(
|
||||
const ChartHypothesis *hypo,
|
||||
const Sentence &sentence,
|
||||
long translationId)
|
||||
{
|
||||
if (hypo == NULL) {
|
||||
return;
|
||||
}
|
||||
std::ostringstream out;
|
||||
ApplicationContext applicationContext;
|
||||
|
||||
OutputTreeFragmentsTranslationOptions(out, applicationContext, hypo, sentence, translationId);
|
||||
UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
|
||||
"No output file for tree fragments specified");
|
||||
|
||||
//Tree of full sentence
|
||||
const StatefulFeatureFunction* treeStructure = StaticData::Instance().GetTreeStructure();
|
||||
if (treeStructure != NULL) {
|
||||
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
||||
for( size_t i=0; i<sff.size(); i++ ) {
|
||||
if (sff[i] == treeStructure) {
|
||||
const TreeState* tree = dynamic_cast<const TreeState*>(hypo->GetFFState(i));
|
||||
out << "Full Tree " << translationId << ": " << tree->GetTree()->GetString() << "\n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_detailTreeFragmentsOutputCollector->Write(translationId, out.str());
|
||||
|
||||
}
|
||||
|
||||
void IOWrapperChart::OutputDetailedTreeFragmentsTranslationReport(
|
||||
const search::Applied *applied,
|
||||
const Sentence &sentence,
|
||||
long translationId)
|
||||
{
|
||||
if (applied == NULL) {
|
||||
return;
|
||||
}
|
||||
std::ostringstream out;
|
||||
ApplicationContext applicationContext;
|
||||
|
||||
OutputTreeFragmentsTranslationOptions(out, applicationContext, applied, sentence, translationId);
|
||||
UTIL_THROW_IF2(m_detailTreeFragmentsOutputCollector == NULL,
|
||||
"No output file for tree fragments specified");
|
||||
|
||||
//Tree of full sentence
|
||||
//TODO: incremental search doesn't support stateful features
|
||||
|
||||
m_detailTreeFragmentsOutputCollector->Write(translationId, out.str());
|
||||
|
||||
}
|
||||
|
||||
//DIMw
|
||||
void IOWrapperChart::OutputDetailedAllTranslationReport(
|
||||
const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList,
|
||||
const ChartManager &manager,
|
||||
const Sentence &sentence,
|
||||
long translationId)
|
||||
{
|
||||
std::ostringstream out;
|
||||
ApplicationContext applicationContext;
|
||||
|
||||
const ChartCellCollection& cells = manager.GetChartCellCollection();
|
||||
size_t size = manager.GetSource().GetSize();
|
||||
for (size_t width = 1; width <= size; ++width) {
|
||||
for (size_t startPos = 0; startPos <= size-width; ++startPos) {
|
||||
size_t endPos = startPos + width - 1;
|
||||
WordsRange range(startPos, endPos);
|
||||
const ChartCell& cell = cells.Get(range);
|
||||
const HypoList* hyps = cell.GetAllSortedHypotheses();
|
||||
out << "Chart Cell [" << startPos << ".." << endPos << "]" << endl;
|
||||
HypoList::const_iterator iter;
|
||||
size_t c = 1;
|
||||
for (iter = hyps->begin(); iter != hyps->end(); ++iter) {
|
||||
out << "----------------Item " << c++ << " ---------------------"
|
||||
<< endl;
|
||||
OutputTranslationOptions(out, applicationContext, *iter,
|
||||
sentence, translationId);
|
||||
}
|
||||
}
|
||||
}
|
||||
UTIL_THROW_IF2(m_detailAllOutputCollector == NULL,
|
||||
"No output file for details specified");
|
||||
m_detailAllOutputCollector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
|
||||
void IOWrapperChart::OutputAllFeatureScores(const ScoreComponentCollection &features, std::ostream &out)
|
||||
{
|
||||
std::string lastName = "";
|
||||
const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
|
||||
for( size_t i=0; i<sff.size(); i++ ) {
|
||||
const StatefulFeatureFunction *ff = sff[i];
|
||||
if (ff->GetScoreProducerDescription() != "BleuScoreFeature"
|
||||
&& ff->IsTuneable()) {
|
||||
OutputFeatureScores( out, features, ff, lastName );
|
||||
}
|
||||
}
|
||||
const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
|
||||
for( size_t i=0; i<slf.size(); i++ ) {
|
||||
const StatelessFeatureFunction *ff = slf[i];
|
||||
if (ff->IsTuneable()) {
|
||||
OutputFeatureScores( out, features, ff, lastName );
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void IOWrapperChart::OutputFeatureScores( std::ostream& out, const ScoreComponentCollection &features, const FeatureFunction *ff, std::string &lastName )
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
bool labeledOutput = staticData.IsLabeledNBestList();
|
||||
|
||||
// regular features (not sparse)
|
||||
if (ff->GetNumScoreComponents() != 0) {
|
||||
if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
|
||||
lastName = ff->GetScoreProducerDescription();
|
||||
out << " " << lastName << "=";
|
||||
}
|
||||
vector<float> scores = features.GetScoresForProducer( ff );
|
||||
for (size_t j = 0; j<scores.size(); ++j) {
|
||||
out << " " << scores[j];
|
||||
}
|
||||
}
|
||||
|
||||
// sparse features
|
||||
const FVector scores = features.GetVectorForProducer( ff );
|
||||
for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
|
||||
out << " " << i->first << "= " << i->second;
|
||||
}
|
||||
}
|
||||
|
||||
void IOWrapperChart::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
|
||||
long translationId)
|
||||
{
|
||||
std::ostringstream out;
|
||||
|
||||
if (m_nBestOutputCollector->OutputIsCout()) {
|
||||
// Set precision only if we're writing the n-best list to cout. This is to
|
||||
// preserve existing behaviour, but should probably be done either way.
|
||||
IOWrapperChart::FixPrecision(out);
|
||||
}
|
||||
|
||||
bool includeWordAlignment =
|
||||
StaticData::Instance().PrintAlignmentInfoInNbest();
|
||||
|
||||
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
|
||||
|
||||
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
|
||||
p != nBestList.end(); ++p) {
|
||||
const ChartKBestExtractor::Derivation &derivation = **p;
|
||||
|
||||
// get the derivation's target-side yield
|
||||
Phrase outputPhrase = ChartKBestExtractor::GetOutputPhrase(derivation);
|
||||
|
||||
// delete <s> and </s>
|
||||
UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
|
||||
"Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
|
||||
outputPhrase.RemoveWord(0);
|
||||
outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
|
||||
|
||||
// print the translation ID, surface factors, and scores
|
||||
out << translationId << " ||| ";
|
||||
OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
|
||||
out << " ||| ";
|
||||
OutputAllFeatureScores(derivation.scoreBreakdown, out);
|
||||
out << " ||| " << derivation.score;
|
||||
|
||||
// optionally, print word alignments
|
||||
if (includeWordAlignment) {
|
||||
out << " ||| ";
|
||||
Alignments align;
|
||||
OutputAlignmentNBest(align, derivation, 0);
|
||||
for (Alignments::const_iterator q = align.begin(); q != align.end();
|
||||
++q) {
|
||||
out << q->first << "-" << q->second << " ";
|
||||
}
|
||||
}
|
||||
|
||||
// optionally, print tree
|
||||
if (PrintNBestTrees) {
|
||||
TreePointer tree = ChartKBestExtractor::GetOutputTree(derivation);
|
||||
out << " ||| " << tree->GetString();
|
||||
}
|
||||
|
||||
out << std::endl;
|
||||
}
|
||||
|
||||
assert(m_nBestOutputCollector);
|
||||
m_nBestOutputCollector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
|
||||
void IOWrapperChart::FixPrecision(std::ostream &stream, size_t size)
|
||||
{
|
||||
stream.setf(std::ios::fixed);
|
||||
stream.precision(size);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void ShiftOffsets(vector<T> &offsets, T shift)
|
||||
{
|
||||
T currPos = shift;
|
||||
for (size_t i = 0; i < offsets.size(); ++i) {
|
||||
if (offsets[i] == 0) {
|
||||
offsets[i] = currPos;
|
||||
++currPos;
|
||||
} else {
|
||||
currPos += offsets[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t CalcSourceSize(const Moses::ChartHypothesis *hypo)
|
||||
{
|
||||
size_t ret = hypo->GetCurrSourceRange().GetNumWordsCovered();
|
||||
const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
|
||||
for (size_t i = 0; i < prevHypos.size(); ++i) {
|
||||
size_t childSize = prevHypos[i]->GetCurrSourceRange().GetNumWordsCovered();
|
||||
ret -= (childSize - 1);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t IOWrapperChart::OutputAlignmentNBest(
|
||||
Alignments &retAlign,
|
||||
const Moses::ChartKBestExtractor::Derivation &derivation,
|
||||
size_t startTarget)
|
||||
{
|
||||
const ChartHypothesis &hypo = derivation.edge.head->hypothesis;
|
||||
|
||||
size_t totalTargetSize = 0;
|
||||
size_t startSource = hypo.GetCurrSourceRange().GetStartPos();
|
||||
|
||||
const TargetPhrase &tp = hypo.GetCurrTargetPhrase();
|
||||
|
||||
size_t thisSourceSize = CalcSourceSize(&hypo);
|
||||
|
||||
// position of each terminal word in translation rule, irrespective of alignment
|
||||
// if non-term, number is undefined
|
||||
vector<size_t> sourceOffsets(thisSourceSize, 0);
|
||||
vector<size_t> targetOffsets(tp.GetSize(), 0);
|
||||
|
||||
const AlignmentInfo &aiNonTerm = hypo.GetCurrTargetPhrase().GetAlignNonTerm();
|
||||
vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
|
||||
const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
|
||||
|
||||
UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
|
||||
"Error");
|
||||
|
||||
size_t targetInd = 0;
|
||||
for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
|
||||
if (tp.GetWord(targetPos).IsNonTerminal()) {
|
||||
UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
|
||||
size_t sourceInd = targetPos2SourceInd[targetPos];
|
||||
size_t sourcePos = sourceInd2pos[sourceInd];
|
||||
|
||||
const Moses::ChartKBestExtractor::Derivation &subderivation =
|
||||
*derivation.subderivations[sourceInd];
|
||||
|
||||
// calc source size
|
||||
size_t sourceSize = subderivation.edge.head->hypothesis.GetCurrSourceRange().GetNumWordsCovered();
|
||||
sourceOffsets[sourcePos] = sourceSize;
|
||||
|
||||
// calc target size.
|
||||
// Recursively look thru child hypos
|
||||
size_t currStartTarget = startTarget + totalTargetSize;
|
||||
size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
|
||||
currStartTarget);
|
||||
targetOffsets[targetPos] = targetSize;
|
||||
|
||||
totalTargetSize += targetSize;
|
||||
++targetInd;
|
||||
} else {
|
||||
++totalTargetSize;
|
||||
}
|
||||
}
|
||||
|
||||
// convert position within translation rule to absolute position within
|
||||
// source sentence / output sentence
|
||||
ShiftOffsets(sourceOffsets, startSource);
|
||||
ShiftOffsets(targetOffsets, startTarget);
|
||||
|
||||
// get alignments from this hypo
|
||||
const AlignmentInfo &aiTerm = hypo.GetCurrTargetPhrase().GetAlignTerm();
|
||||
|
||||
// add to output arg, offsetting by source & target
|
||||
AlignmentInfo::const_iterator iter;
|
||||
for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
|
||||
const std::pair<size_t,size_t> &align = *iter;
|
||||
size_t relSource = align.first;
|
||||
size_t relTarget = align.second;
|
||||
size_t absSource = sourceOffsets[relSource];
|
||||
size_t absTarget = targetOffsets[relTarget];
|
||||
|
||||
pair<size_t, size_t> alignPoint(absSource, absTarget);
|
||||
pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
|
||||
UTIL_THROW_IF2(!ret.second, "Error");
|
||||
}
|
||||
|
||||
return totalTargetSize;
|
||||
}
|
||||
|
||||
void IOWrapperChart::OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo)
|
||||
{
|
||||
ostringstream out;
|
||||
|
||||
if (hypo) {
|
||||
Alignments retAlign;
|
||||
OutputAlignment(retAlign, hypo, 0);
|
||||
|
||||
// output alignments
|
||||
Alignments::const_iterator iter;
|
||||
for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
|
||||
const pair<size_t, size_t> &alignPoint = *iter;
|
||||
out << alignPoint.first << "-" << alignPoint.second << " ";
|
||||
}
|
||||
}
|
||||
out << endl;
|
||||
|
||||
m_alignmentInfoCollector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
void IOWrapperChart::OutputUnknowns(const std::vector<Moses::Phrase*> &unknowns,
|
||||
long translationId)
|
||||
{
|
||||
std::ostringstream out;
|
||||
for (std::size_t i = 0; i < unknowns.size(); ++i) {
|
||||
out << *(unknowns[i]);
|
||||
}
|
||||
out << std::endl;
|
||||
m_unknownsCollector->Write(translationId, out.str());
|
||||
}
|
||||
|
||||
|
||||
void IOWrapperChart::OutputAlignment(vector< set<size_t> > &retAlignmentsS2T, const AlignmentInfo &ai)
|
||||
{
|
||||
typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
|
||||
AlignVec alignments = ai.GetSortedAlignments();
|
||||
|
||||
AlignVec::const_iterator it;
|
||||
for (it = alignments.begin(); it != alignments.end(); ++it) {
|
||||
const std::pair<size_t,size_t> &alignPoint = **it;
|
||||
|
||||
UTIL_THROW_IF2(alignPoint.first >= retAlignmentsS2T.size(), "Error");
|
||||
pair<set<size_t>::iterator, bool> ret = retAlignmentsS2T[alignPoint.first].insert(alignPoint.second);
|
||||
UTIL_THROW_IF2(!ret.second, "Error");
|
||||
}
|
||||
}
|
||||
|
||||
IOWrapperChart *IOWrapperChart::GetIOWrapper(const StaticData &staticData)
|
||||
{
|
||||
IOWrapperChart *ioWrapper;
|
||||
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
|
||||
,&outputFactorOrder = staticData.GetOutputFactorOrder();
|
||||
FactorMask inputFactorUsed(inputFactorOrder);
|
||||
|
||||
// io
|
||||
if (staticData.GetParam("input-file").size() == 1) {
|
||||
VERBOSE(2,"IO from File" << endl);
|
||||
string filePath = staticData.GetParam("input-file")[0];
|
||||
|
||||
ioWrapper = new IOWrapperChart(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
||||
, staticData.GetNBestSize()
|
||||
, staticData.GetNBestFilePath()
|
||||
, filePath);
|
||||
} else {
|
||||
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
|
||||
ioWrapper = new IOWrapperChart(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
||||
, staticData.GetNBestSize()
|
||||
, staticData.GetNBestFilePath());
|
||||
}
|
||||
|
||||
IFVERBOSE(1)
|
||||
PrintUserTime("Created input-output object");
|
||||
|
||||
return ioWrapper;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,145 +0,0 @@
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (c) 2006 University of Edinburgh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the University of Edinburgh nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
// example file on how to use moses library
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include "moses/TypeDef.h"
|
||||
#include "moses/Sentence.h"
|
||||
#include "moses/FactorTypeSet.h"
|
||||
#include "moses/ChartKBestExtractor.h"
|
||||
#include "moses/OutputCollector.h"
|
||||
#include "moses/ChartHypothesis.h"
|
||||
#include "search/applied.hh"
|
||||
#include "moses/ChartManager.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class FactorCollection;
|
||||
class ScoreComponentCollection;
|
||||
|
||||
/** Helper class that holds misc variables to write data out to command line.
|
||||
*/
|
||||
class IOWrapperChart
|
||||
{
|
||||
protected:
|
||||
typedef std::vector<std::pair<Moses::Word, Moses::WordsRange> > ApplicationContext;
|
||||
|
||||
const std::vector<Moses::FactorType> &m_inputFactorOrder;
|
||||
const std::vector<Moses::FactorType> &m_outputFactorOrder;
|
||||
const Moses::FactorMask &m_inputFactorUsed;
|
||||
std::ostream *m_outputSearchGraphStream;
|
||||
std::ostream *m_detailedTranslationReportingStream;
|
||||
std::ostream *m_detailedTreeFragmentsTranslationReportingStream;
|
||||
//DIMw
|
||||
std::ostream *m_detailedAllTranslationReportingStream;
|
||||
std::ostream *m_alignmentInfoStream;
|
||||
std::ostream *m_unknownsStream;
|
||||
std::string m_inputFilePath;
|
||||
std::istream *m_inputStream;
|
||||
Moses::OutputCollector *m_detailedTranslationCollector;
|
||||
Moses::OutputCollector *m_detailTreeFragmentsOutputCollector;
|
||||
//DIMw
|
||||
Moses::OutputCollector *m_detailAllOutputCollector;
|
||||
Moses::OutputCollector *m_nBestOutputCollector;
|
||||
Moses::OutputCollector *m_searchGraphOutputCollector;
|
||||
Moses::OutputCollector *m_singleBestOutputCollector;
|
||||
Moses::OutputCollector *m_alignmentInfoCollector;
|
||||
Moses::OutputCollector *m_unknownsCollector;
|
||||
|
||||
typedef std::set< std::pair<size_t, size_t> > Alignments;
|
||||
std::size_t OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartKBestExtractor::Derivation &derivation, std::size_t startTarget);
|
||||
size_t OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget);
|
||||
void OutputAlignment(std::vector< std::set<size_t> > &retAlignmentsS2T, const Moses::AlignmentInfo &ai);
|
||||
void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputTranslationOption(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputTreeFragmentsTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
|
||||
void ReconstructApplicationContext(const Moses::ChartHypothesis &hypo,
|
||||
const Moses::Sentence &sentence,
|
||||
ApplicationContext &context);
|
||||
void ReconstructApplicationContext(const search::Applied *applied,
|
||||
const Moses::Sentence &sentence,
|
||||
ApplicationContext &context);
|
||||
void WriteApplicationContext(std::ostream &out,
|
||||
const ApplicationContext &context);
|
||||
|
||||
void OutputAllFeatureScores(const Moses::ScoreComponentCollection &features
|
||||
, std::ostream &out);
|
||||
void OutputFeatureScores( std::ostream& out
|
||||
, const Moses::ScoreComponentCollection &features
|
||||
, const Moses::FeatureFunction *ff
|
||||
, std::string &lastName );
|
||||
|
||||
public:
|
||||
static IOWrapperChart *GetIOWrapper(const Moses::StaticData &staticData);
|
||||
|
||||
IOWrapperChart(const std::vector<Moses::FactorType> &inputFactorOrder
|
||||
, const std::vector<Moses::FactorType> &outputFactorOrder
|
||||
, const Moses::FactorMask &inputFactorUsed
|
||||
, size_t nBestSize
|
||||
, const std::string &nBestFilePath
|
||||
, const std::string &inputFilePath="");
|
||||
~IOWrapperChart();
|
||||
|
||||
Moses::InputType* GetInput(Moses::InputType *inputType);
|
||||
bool ReadInput(IOWrapperChart &ioWrapper, InputTypeEnum inputType, InputType*& source);
|
||||
|
||||
void OutputBestHypo(const Moses::ChartHypothesis *hypo, long translationId);
|
||||
void OutputBestHypo(search::Applied applied, long translationId);
|
||||
void OutputBestNone(long translationId);
|
||||
void OutputNBestList(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, long translationId);
|
||||
void OutputNBestList(const std::vector<search::Applied> &nbest, long translationId);
|
||||
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputDetailedTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputDetailedTreeFragmentsTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputDetailedTreeFragmentsTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
|
||||
void OutputDetailedAllTranslationReport(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, const Moses::ChartManager &manager, const Moses::Sentence &sentence, long translationId);
|
||||
void Backtrack(const Moses::ChartHypothesis *hypo);
|
||||
|
||||
Moses::OutputCollector *GetSearchGraphOutputCollector() {
|
||||
return m_searchGraphOutputCollector;
|
||||
}
|
||||
|
||||
void OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo);
|
||||
void OutputUnknowns(const std::vector<Moses::Phrase*> &, long);
|
||||
|
||||
static void FixPrecision(std::ostream &, size_t size=3);
|
||||
};
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user