2006-07-04 22:04:38 +04:00
|
|
|
// $Id$
|
|
|
|
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (c) 2006 University of Edinburgh
|
|
|
|
All rights reserved.
|
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without modification,
|
|
|
|
are permitted provided that the following conditions are met:
|
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
|
|
this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright notice,
|
|
|
|
this list of conditions and the following disclaimer in the documentation
|
|
|
|
and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of the University of Edinburgh nor the names of its contributors
|
|
|
|
may be used to endorse or promote products derived from this software
|
|
|
|
without specific prior written permission.
|
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
|
|
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
|
|
|
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
|
|
|
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
// example file on how to use moses library
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include "TypeDef.h"
|
|
|
|
#include "Util.h"
|
|
|
|
#include "IOCommandLine.h"
|
|
|
|
#include "Hypothesis.h"
|
2006-07-25 03:48:18 +04:00
|
|
|
#include "WordsRange.h"
|
2006-07-04 22:04:38 +04:00
|
|
|
#include "LatticePathList.h"
|
2006-07-28 05:15:09 +04:00
|
|
|
#include "StaticData.h"
|
2006-07-29 05:11:45 +04:00
|
|
|
#include "DummyScoreProducers.h"
|
2006-07-04 22:04:38 +04:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
IOCommandLine::IOCommandLine(
|
2006-08-02 00:02:39 +04:00
|
|
|
const vector<FactorType> &inputFactorOrder
|
|
|
|
, const vector<FactorType> &outputFactorOrder
|
2006-08-11 00:42:34 +04:00
|
|
|
, const FactorMask &inputFactorUsed
|
2006-07-04 22:04:38 +04:00
|
|
|
, FactorCollection &factorCollection
|
|
|
|
, size_t nBestSize
|
|
|
|
, const string &nBestFilePath)
|
2006-08-02 00:02:39 +04:00
|
|
|
:m_inputFactorOrder(inputFactorOrder)
|
|
|
|
,m_outputFactorOrder(outputFactorOrder)
|
2006-07-04 22:04:38 +04:00
|
|
|
,m_inputFactorUsed(inputFactorUsed)
|
|
|
|
,m_factorCollection(factorCollection)
|
|
|
|
{
|
|
|
|
if (nBestSize > 0)
|
|
|
|
{
|
|
|
|
m_nBestFile.open(nBestFilePath.c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-07-21 21:43:42 +04:00
|
|
|
InputType*IOCommandLine::GetInput(InputType* in)
|
2006-07-04 22:04:38 +04:00
|
|
|
{
|
2006-08-02 00:02:39 +04:00
|
|
|
return InputOutput::GetInput(in,std::cin,m_inputFactorOrder, m_factorCollection);
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
|
|
|
|
2006-08-12 01:04:38 +04:00
|
|
|
/***
|
|
|
|
* print surface factor only for the given phrase
|
|
|
|
*/
|
2006-08-02 00:02:39 +04:00
|
|
|
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
|
2006-07-04 22:04:38 +04:00
|
|
|
{
|
2006-08-02 00:12:58 +04:00
|
|
|
assert(outputFactorOrder.size() > 0);
|
2006-08-21 18:24:09 +04:00
|
|
|
if (reportAllFactors == true)
|
2006-08-02 00:02:39 +04:00
|
|
|
{
|
|
|
|
out << phrase;
|
2006-08-21 18:24:09 +04:00
|
|
|
}
|
2006-08-02 00:02:39 +04:00
|
|
|
else
|
|
|
|
{
|
|
|
|
size_t size = phrase.GetSize();
|
|
|
|
for (size_t pos = 0 ; pos < size ; pos++)
|
|
|
|
{
|
2006-08-02 00:12:58 +04:00
|
|
|
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
|
|
|
|
out << *factor;
|
|
|
|
|
|
|
|
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++)
|
2006-08-02 00:02:39 +04:00
|
|
|
{
|
|
|
|
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
|
2006-08-02 00:12:58 +04:00
|
|
|
out << "|" << *factor;
|
2006-08-02 00:02:39 +04:00
|
|
|
}
|
2006-08-02 00:12:58 +04:00
|
|
|
out << " ";
|
2006-08-02 00:02:39 +04:00
|
|
|
}
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-08-02 00:02:39 +04:00
|
|
|
void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
|
2006-08-30 23:51:07 +04:00
|
|
|
,bool reportSegmentation, bool reportAllFactors)
|
2006-07-04 22:04:38 +04:00
|
|
|
{
|
|
|
|
if ( hypo != NULL)
|
|
|
|
{
|
2006-08-30 23:51:07 +04:00
|
|
|
OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
|
2006-08-12 01:04:38 +04:00
|
|
|
OutputSurface(out, hypo->GetTargetPhrase(), outputFactorOrder, reportAllFactors);
|
2006-07-25 03:48:18 +04:00
|
|
|
|
2006-08-30 23:51:07 +04:00
|
|
|
if (reportSegmentation == true
|
2006-08-21 18:24:09 +04:00
|
|
|
&& hypo->GetTargetPhrase().GetSize() > 0) {
|
|
|
|
out << "|" << hypo->GetCurrSourceWordsRange().GetStartPos()
|
|
|
|
<< "-" << hypo->GetCurrSourceWordsRange().GetEndPos() << "| ";
|
|
|
|
}
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-07-15 01:51:05 +04:00
|
|
|
void IOCommandLine::Backtrack(const Hypothesis *hypo){
|
|
|
|
|
2006-07-17 19:05:00 +04:00
|
|
|
if (hypo->GetPrevHypo() != NULL) {
|
2006-08-30 23:51:07 +04:00
|
|
|
VERBOSE(3,hypo->m_id << " <= ");
|
2006-07-15 01:51:05 +04:00
|
|
|
Backtrack(hypo->GetPrevHypo());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-08-30 23:51:07 +04:00
|
|
|
void IOCommandLine::SetOutput(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
|
2006-07-04 22:04:38 +04:00
|
|
|
{
|
|
|
|
if (hypo != NULL)
|
|
|
|
{
|
2006-08-30 21:03:27 +04:00
|
|
|
VERBOSE(2,"BEST TRANSLATION: " << *hypo << endl);
|
2006-08-30 23:51:07 +04:00
|
|
|
VERBOSE(3,"Best path: ");
|
2006-07-17 19:05:00 +04:00
|
|
|
Backtrack(hypo);
|
2006-08-30 23:51:07 +04:00
|
|
|
VERBOSE(3,"0" << std::endl);
|
2006-07-15 01:51:05 +04:00
|
|
|
|
2006-08-30 23:51:07 +04:00
|
|
|
OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
|
|
|
else
|
2006-07-14 22:13:50 +04:00
|
|
|
{
|
2006-08-30 21:03:27 +04:00
|
|
|
TRACE_ERR("NO BEST TRANSLATION" << endl);
|
2006-07-14 22:13:50 +04:00
|
|
|
}
|
|
|
|
|
2006-07-04 22:04:38 +04:00
|
|
|
cout << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
void IOCommandLine::SetNBest(const LatticePathList &nBestList, long translationId)
|
|
|
|
{
|
2006-08-13 23:08:39 +04:00
|
|
|
bool labeledOutput = StaticData::Instance()->IsLabeledNBestList();
|
|
|
|
|
2006-07-04 22:04:38 +04:00
|
|
|
LatticePathList::const_iterator iter;
|
|
|
|
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
|
|
|
|
{
|
|
|
|
const LatticePath &path = **iter;
|
2006-07-29 02:11:20 +04:00
|
|
|
const std::vector<const Hypothesis *> &edges = path.GetEdges();
|
2006-07-04 22:04:38 +04:00
|
|
|
|
2006-08-04 08:45:48 +04:00
|
|
|
// print the surface factor of the translation
|
2006-07-04 22:04:38 +04:00
|
|
|
m_nBestFile << translationId << " ||| ";
|
|
|
|
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
|
|
|
|
{
|
2006-07-29 02:11:20 +04:00
|
|
|
const Hypothesis &edge = *edges[currEdge];
|
2006-08-02 00:02:39 +04:00
|
|
|
OutputSurface(m_nBestFile, edge.GetTargetPhrase(), m_outputFactorOrder, false); // false for not reporting all factors
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
|
|
|
m_nBestFile << " ||| ";
|
|
|
|
|
2006-08-04 08:45:48 +04:00
|
|
|
// print the scores in a hardwired order
|
|
|
|
// before each model type, the corresponding command-line-like name must be emitted
|
|
|
|
// MERT script relies on this
|
2006-07-29 05:11:45 +04:00
|
|
|
|
|
|
|
// basic distortion
|
2006-08-13 23:08:39 +04:00
|
|
|
if (labeledOutput)
|
|
|
|
m_nBestFile << "d: ";
|
2006-07-29 05:11:45 +04:00
|
|
|
m_nBestFile << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance()->GetDistortionScoreProducer()) << " ";
|
2006-07-04 22:04:38 +04:00
|
|
|
|
2006-08-13 23:51:17 +04:00
|
|
|
// reordering
|
2006-08-13 22:11:52 +04:00
|
|
|
vector<LexicalReordering*> rms = StaticData::Instance()->GetReorderModels();
|
|
|
|
if(rms.size() > 0)
|
|
|
|
{
|
|
|
|
vector<LexicalReordering*>::iterator iter;
|
|
|
|
for(iter = rms.begin(); iter != rms.end(); ++iter)
|
|
|
|
{
|
2006-08-13 23:51:17 +04:00
|
|
|
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
|
|
|
for (size_t j = 0; j<scores.size(); ++j)
|
|
|
|
{
|
|
|
|
m_nBestFile << scores[j] << " ";
|
|
|
|
}
|
2006-08-13 22:11:52 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-07-04 22:04:38 +04:00
|
|
|
// lm
|
2006-07-28 05:15:09 +04:00
|
|
|
const LMList& lml = StaticData::Instance()->GetAllLM();
|
2006-08-04 08:45:48 +04:00
|
|
|
if (lml.size() > 0) {
|
2006-08-13 23:08:39 +04:00
|
|
|
if (labeledOutput)
|
|
|
|
m_nBestFile << "lm: ";
|
2006-08-04 08:45:48 +04:00
|
|
|
LMList::const_iterator lmi = lml.begin();
|
|
|
|
for (; lmi != lml.end(); ++lmi) {
|
|
|
|
m_nBestFile << path.GetScoreBreakdown().GetScoreForProducer(*lmi) << " ";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// translation components
|
2006-07-28 05:15:09 +04:00
|
|
|
vector<PhraseDictionaryBase*> pds = StaticData::Instance()->GetPhraseDictionaries();
|
2006-08-04 08:45:48 +04:00
|
|
|
if (pds.size() > 0) {
|
2006-08-13 23:08:39 +04:00
|
|
|
if (labeledOutput)
|
|
|
|
m_nBestFile << "tm: ";
|
2006-08-13 07:03:18 +04:00
|
|
|
vector<PhraseDictionaryBase*>::iterator iter;
|
|
|
|
for (iter = pds.begin(); iter != pds.end(); ++iter) {
|
|
|
|
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
2006-08-04 08:45:48 +04:00
|
|
|
for (size_t j = 0; j<scores.size(); ++j)
|
|
|
|
m_nBestFile << scores[j] << " ";
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
2006-07-04 22:04:38 +04:00
|
|
|
|
2006-08-04 08:45:48 +04:00
|
|
|
// word penalty
|
2006-08-13 23:08:39 +04:00
|
|
|
if (labeledOutput)
|
|
|
|
m_nBestFile << "w: ";
|
2006-07-29 05:11:45 +04:00
|
|
|
m_nBestFile << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance()->GetWordPenaltyProducer()) << " ";
|
2006-07-04 22:04:38 +04:00
|
|
|
|
|
|
|
// generation
|
2006-07-28 05:15:09 +04:00
|
|
|
vector<GenerationDictionary*> gds = StaticData::Instance()->GetGenerationDictionaries();
|
2006-08-04 08:45:48 +04:00
|
|
|
if (gds.size() > 0) {
|
2006-08-13 23:08:39 +04:00
|
|
|
if (labeledOutput)
|
|
|
|
m_nBestFile << "g: ";
|
2006-08-13 07:03:18 +04:00
|
|
|
vector<GenerationDictionary*>::iterator iter;
|
|
|
|
for (iter = gds.begin(); iter != gds.end(); ++iter) {
|
|
|
|
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
2006-08-04 08:45:48 +04:00
|
|
|
for (size_t j = 0; j<scores.size(); j++) {
|
|
|
|
m_nBestFile << scores[j] << " ";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2006-07-04 22:04:38 +04:00
|
|
|
|
|
|
|
// total
|
2006-07-29 05:11:45 +04:00
|
|
|
m_nBestFile << "||| " << path.GetTotalScore() << endl;
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|
2006-07-29 02:57:31 +04:00
|
|
|
|
|
|
|
m_nBestFile<<std::flush;
|
2006-07-04 22:04:38 +04:00
|
|
|
}
|