svn properties

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1585 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
hieuhoang1972 2008-03-04 11:10:16 +00:00
parent 226dd90d0c
commit 16f69dda75
12 changed files with 1205 additions and 1243 deletions

View File

@ -1,23 +1,23 @@
This is the code to put moses on the web. It's buggy and a bit complicated to run.
1st compile the c++ executable. i use eclipse, the makefile doesn't work with this, it will create an exe called moses-cgi.
i've also included a linux precompiled version which u should use if u can. it may run on your system if u have the same library versions as me... (my computer is fedora 6)
to run the system:
1. make a subdirectory in the directory which can be seen by the apache server. cd into this directory
2. make 2 named pipes called 'input' and 'output'
3. copy moses-cgi into this directory, or softlink it into there
4. run the exe like
./moses-cgi -f moses.ini < input > output
5. copy moses.php into the same directory, this is the html page people should be using to run the demo
6. make sure that apache can execute moses.php and can access 'input' & 'output'
told u it's complicated !
if u want to see what it looks like, go to
http://groups.inf.ed.ac.uk/hoang/demo/en-de/moses.php
u're welcome to change the moses.php. please keep the link back to factored-translation.com
email me ((hieuhoang@gmail.com) if u have any queries, i'm sure u will...
This is the code to put moses on the web. It's buggy and a bit complicated to run.
1st compile the c++ executable. i use eclipse, the makefile doesn't work with this, it will create an exe called moses-cgi.
i've also included a linux precompiled version which u should use if u can. it may run on your system if u have the same library versions as me... (my computer is fedora 6)
to run the system:
1. make a subdirectory in the directory which can be seen by the apache server. cd into this directory
2. make 2 named pipes called 'input' and 'output'
3. copy moses-cgi into this directory, or softlink it into there
4. run the exe like
./moses-cgi -f moses.ini < input > output
5. copy moses.php into the same directory, this is the html page people should be using to run the demo
6. make sure that apache can execute moses.php and can access 'input' & 'output'
told u it's complicated !
if u want to see what it looks like, go to
http://groups.inf.ed.ac.uk/hoang/demo/en-de/moses.php
u're welcome to change the moses.php. please keep the link back to factored-translation.com
email me ((hieuhoang@gmail.com) if u have any queries, i'm sure u will...

View File

@ -1,49 +1,49 @@
<html>
<head><title>Moses demo</title></head>
<body>
<A HREF="../">back <<--</A><BR><BR>
<B>Moses demo</B><BR><BR>
<?php
$strInput = "";
$strOutput= "";
if ($_SERVER['REQUEST_METHOD'] == 'POST')
{
$input = $_REQUEST['txtInput'];
$inputLower = strtolower($input);
$inputFile = fopen('input', 'a') or die("can't open input file");
$outputFile = fopen('output', 'r') or die("can't open output file");
fwrite($inputFile, $inputLower ."\n");
$output = fgets($outputFile);
fclose($inputFile);
fclose($outputFile);
}
?>
<BR>
<form action="moses.php" method="POST">
<textarea name="txtInput" rows="5" cols="50"><?=$input?></textarea>
<BR>
<input type="submit" name="txt_submit" value="Submit">
</form><br><br>
<?php
if ($_SERVER['REQUEST_METHOD'] == 'POST')
{
echo "Input sentence is: ".$inputLower."<BR>";
echo "Translated is: " .$output ."<BR>";
}
?>
<H6>
Copyright 2007 <A HREF="http://www.factored-translation.com/">Factored Translation</A><BR>
Licensed under the <A HREF="http://www.gnu.org/licenses/lgpl.html">LGPL</A><BR>
</H6>
</body>
</html>
<html>
<head><title>Moses demo</title></head>
<body>
<A HREF="../">back <<--</A><BR><BR>
<B>Moses demo</B><BR><BR>
<?php
$strInput = "";
$strOutput= "";
if ($_SERVER['REQUEST_METHOD'] == 'POST')
{
$input = $_REQUEST['txtInput'];
$inputLower = strtolower($input);
$inputFile = fopen('input', 'a') or die("can't open input file");
$outputFile = fopen('output', 'r') or die("can't open output file");
fwrite($inputFile, $inputLower ."\n");
$output = fgets($outputFile);
fclose($inputFile);
fclose($outputFile);
}
?>
<BR>
<form action="moses.php" method="POST">
<textarea name="txtInput" rows="5" cols="50"><?=$input?></textarea>
<BR>
<input type="submit" name="txt_submit" value="Submit">
</form><br><br>
<?php
if ($_SERVER['REQUEST_METHOD'] == 'POST')
{
echo "Input sentence is: ".$inputLower."<BR>";
echo "Translated is: " .$output ."<BR>";
}
?>
<H6>
Copyright 2007 <A HREF="http://www.factored-translation.com/">Factored Translation</A><BR>
Licensed under the <A HREF="http://www.gnu.org/licenses/lgpl.html">LGPL</A><BR>
</H6>
</body>
</html>

View File

@ -1,204 +1,204 @@
// $Id: FactorCollection.cpp 1218 2007-02-16 18:08:37Z hieuhoang1972 $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "Tokenizer.h"
#include "Util.h"
using namespace std;
bool Tokenizer::m_initialized = false;
std::set<std::string> Tokenizer::m_prefixes;
std::set<std::string> Tokenizer::m_punctuation;
std::set<std::string> Tokenizer::m_quotes;
void Tokenizer::SentenceSeparator(vector<string> &newTokens, const string &token)
{
string lastChar = token.substr(token.size()-1, 1);
string word = token.substr(0, token.size()-1);
if (token.size() == 1 && m_punctuation.find(lastChar) != m_punctuation.end())
{
newTokens.push_back(lastChar);
newTokens.push_back("\n");
}
else if (lastChar == ".")
{
set<std::string>::iterator iterSet = m_prefixes.find(token);
if (iterSet != m_prefixes.end())
{ // found a prefix. add as is
newTokens.push_back(token);
}
else
{ // a full stop. new sentence
newTokens.push_back(word);
newTokens.push_back(lastChar);
newTokens.push_back("\n");
}
}
else if (m_punctuation.find(lastChar) != m_punctuation.end())
{
newTokens.push_back(word);
newTokens.push_back(lastChar);
newTokens.push_back("\n");
}
else
{ // just a normal word
newTokens.push_back(token);
}
}
void Tokenizer::QuotesFirst(vector<string> &newTokens, const string &token)
{
string lastChar = token.substr(0, 1);
string word = token.substr(1, token.size()-1);
if (m_quotes.find(lastChar) != m_quotes.end())
{
newTokens.push_back(lastChar);
if (word != "")
newTokens.push_back(word);
}
else
{
newTokens.push_back(token);
}
}
void Tokenizer::QuotesLast(vector<string> &newTokens, const string &token)
{
string lastChar = token.substr(token.size()-1, 1);
string word = token.substr(0, token.size()-1);
if (m_quotes.find(lastChar) != m_quotes.end())
{
if (word != "")
newTokens.push_back(word);
newTokens.push_back(lastChar);
}
else
{
newTokens.push_back(token);
}
}
string Tokenizer::Tokenize(const string &input)
{
stringstream buffer("");
vector<string> newTokens
,oldTokens = ::Tokenize(input, " \t\n");
vector<string>::iterator iterTokens;
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
{
string &token = *iterTokens;
SentenceSeparator(newTokens, token);
}
oldTokens = newTokens;
newTokens.clear();
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
{
string &token = *iterTokens;
QuotesFirst(newTokens, token);
}
oldTokens = newTokens;
newTokens.clear();
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
{
string &token = *iterTokens;
QuotesLast(newTokens, token);
}
return Join(" ", newTokens);
}
Tokenizer::Tokenizer(const std::string &language)
:m_language(language)
{
if (m_initialized)
return;
m_initialized = true;
m_prefixes.insert("adj.");
m_prefixes.insert("adm.");
m_prefixes.insert("adv.");
m_prefixes.insert("asst.");
m_prefixes.insert("ave.");
m_prefixes.insert("bldg.");
m_prefixes.insert("brig.");
m_prefixes.insert("bros.");
m_prefixes.insert("capt.");
m_prefixes.insert("cmdr.");
m_prefixes.insert("col.");
m_prefixes.insert("comdr.");
m_prefixes.insert("con.");
m_prefixes.insert("corp.");
m_prefixes.insert("cpl.");
m_prefixes.insert("dr.");
m_prefixes.insert("ens.");
m_prefixes.insert("gen.");
m_prefixes.insert("gov.");
m_prefixes.insert("hon.");
m_prefixes.insert("hosp.");
m_prefixes.insert("insp.");
m_prefixes.insert("lt.");
m_prefixes.insert("maj.");
m_prefixes.insert("messrs.");
m_prefixes.insert("mlle.");
m_prefixes.insert("mme.");
m_prefixes.insert("mr.");
m_prefixes.insert("mrs.");
m_prefixes.insert("ms.");
m_prefixes.insert("msgr.");
m_prefixes.insert("op.");
m_prefixes.insert("ord.");
m_prefixes.insert("pfc.");
m_prefixes.insert("ph.");
m_prefixes.insert("prof.");
m_prefixes.insert("pvt.");
m_prefixes.insert("rep.");
m_prefixes.insert("reps.");
m_prefixes.insert("res.");
m_prefixes.insert("rev.");
m_prefixes.insert("rt.");
m_prefixes.insert("sen.");
m_prefixes.insert("sens.");
m_prefixes.insert("sgt.");
m_prefixes.insert("sr.");
m_prefixes.insert("st.");
m_prefixes.insert("supt.");
m_prefixes.insert("surg.");
m_prefixes.insert("v.");
m_prefixes.insert("vs.");
m_punctuation.insert(":");
m_punctuation.insert(".");
m_punctuation.insert("!");
m_punctuation.insert("?");
m_punctuation.insert(";");
m_quotes.insert("\"");
m_quotes.insert("'");
m_quotes.insert("<EFBFBD>");
m_quotes.insert("<EFBFBD>");
}
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "Tokenizer.h"
#include "Util.h"
using namespace std;
bool Tokenizer::m_initialized = false;
std::set<std::string> Tokenizer::m_prefixes;
std::set<std::string> Tokenizer::m_punctuation;
std::set<std::string> Tokenizer::m_quotes;
void Tokenizer::SentenceSeparator(vector<string> &newTokens, const string &token)
{
string lastChar = token.substr(token.size()-1, 1);
string word = token.substr(0, token.size()-1);
if (token.size() == 1 && m_punctuation.find(lastChar) != m_punctuation.end())
{
newTokens.push_back(lastChar);
newTokens.push_back("\n");
}
else if (lastChar == ".")
{
set<std::string>::iterator iterSet = m_prefixes.find(token);
if (iterSet != m_prefixes.end())
{ // found a prefix. add as is
newTokens.push_back(token);
}
else
{ // a full stop. new sentence
newTokens.push_back(word);
newTokens.push_back(lastChar);
newTokens.push_back("\n");
}
}
else if (m_punctuation.find(lastChar) != m_punctuation.end())
{
newTokens.push_back(word);
newTokens.push_back(lastChar);
newTokens.push_back("\n");
}
else
{ // just a normal word
newTokens.push_back(token);
}
}
void Tokenizer::QuotesFirst(vector<string> &newTokens, const string &token)
{
string lastChar = token.substr(0, 1);
string word = token.substr(1, token.size()-1);
if (m_quotes.find(lastChar) != m_quotes.end())
{
newTokens.push_back(lastChar);
if (word != "")
newTokens.push_back(word);
}
else
{
newTokens.push_back(token);
}
}
void Tokenizer::QuotesLast(vector<string> &newTokens, const string &token)
{
string lastChar = token.substr(token.size()-1, 1);
string word = token.substr(0, token.size()-1);
if (m_quotes.find(lastChar) != m_quotes.end())
{
if (word != "")
newTokens.push_back(word);
newTokens.push_back(lastChar);
}
else
{
newTokens.push_back(token);
}
}
string Tokenizer::Tokenize(const string &input)
{
stringstream buffer("");
vector<string> newTokens
,oldTokens = ::Tokenize(input, " \t\n");
vector<string>::iterator iterTokens;
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
{
string &token = *iterTokens;
SentenceSeparator(newTokens, token);
}
oldTokens = newTokens;
newTokens.clear();
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
{
string &token = *iterTokens;
QuotesFirst(newTokens, token);
}
oldTokens = newTokens;
newTokens.clear();
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
{
string &token = *iterTokens;
QuotesLast(newTokens, token);
}
return Join(" ", newTokens);
}
Tokenizer::Tokenizer(const std::string &language)
:m_language(language)
{
if (m_initialized)
return;
m_initialized = true;
m_prefixes.insert("adj.");
m_prefixes.insert("adm.");
m_prefixes.insert("adv.");
m_prefixes.insert("asst.");
m_prefixes.insert("ave.");
m_prefixes.insert("bldg.");
m_prefixes.insert("brig.");
m_prefixes.insert("bros.");
m_prefixes.insert("capt.");
m_prefixes.insert("cmdr.");
m_prefixes.insert("col.");
m_prefixes.insert("comdr.");
m_prefixes.insert("con.");
m_prefixes.insert("corp.");
m_prefixes.insert("cpl.");
m_prefixes.insert("dr.");
m_prefixes.insert("ens.");
m_prefixes.insert("gen.");
m_prefixes.insert("gov.");
m_prefixes.insert("hon.");
m_prefixes.insert("hosp.");
m_prefixes.insert("insp.");
m_prefixes.insert("lt.");
m_prefixes.insert("maj.");
m_prefixes.insert("messrs.");
m_prefixes.insert("mlle.");
m_prefixes.insert("mme.");
m_prefixes.insert("mr.");
m_prefixes.insert("mrs.");
m_prefixes.insert("ms.");
m_prefixes.insert("msgr.");
m_prefixes.insert("op.");
m_prefixes.insert("ord.");
m_prefixes.insert("pfc.");
m_prefixes.insert("ph.");
m_prefixes.insert("prof.");
m_prefixes.insert("pvt.");
m_prefixes.insert("rep.");
m_prefixes.insert("reps.");
m_prefixes.insert("res.");
m_prefixes.insert("rev.");
m_prefixes.insert("rt.");
m_prefixes.insert("sen.");
m_prefixes.insert("sens.");
m_prefixes.insert("sgt.");
m_prefixes.insert("sr.");
m_prefixes.insert("st.");
m_prefixes.insert("supt.");
m_prefixes.insert("surg.");
m_prefixes.insert("v.");
m_prefixes.insert("vs.");
m_punctuation.insert(":");
m_punctuation.insert(".");
m_punctuation.insert("!");
m_punctuation.insert("?");
m_punctuation.insert(";");
m_quotes.insert("\"");
m_quotes.insert("'");
m_quotes.insert("");
m_quotes.insert("");
}

View File

@ -1,45 +1,45 @@
// $Id: FactorCollection.cpp 1218 2007-02-16 18:08:37Z hieuhoang1972 $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <string>
#include <vector>
#include <set>
class Tokenizer
{
protected:
std::string m_language;
static bool m_initialized;
static std::set<std::string> m_prefixes;
static std::set<std::string> m_punctuation;
static std::set<std::string> m_quotes;
public:
Tokenizer(const std::string &language);
std::string Tokenize(const std::string &input);
void SentenceSeparator(std::vector<std::string> &newTokens, const std::string &token);
void QuotesFirst(std::vector<std::string> &newTokens, const std::string &token);
void QuotesLast(std::vector<std::string> &newTokens, const std::string &token);
};
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <string>
#include <vector>
#include <set>
class Tokenizer
{
protected:
std::string m_language;
static bool m_initialized;
static std::set<std::string> m_prefixes;
static std::set<std::string> m_punctuation;
static std::set<std::string> m_quotes;
public:
Tokenizer(const std::string &language);
std::string Tokenize(const std::string &input);
void SentenceSeparator(std::vector<std::string> &newTokens, const std::string &token);
void QuotesFirst(std::vector<std::string> &newTokens, const std::string &token);
void QuotesLast(std::vector<std::string> &newTokens, const std::string &token);
};

View File

@ -1,382 +1,382 @@
// $Id: IOStream.cpp 110 2007-09-19 22:10:27Z hieu $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (c) 2006 University of Edinburgh
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the University of Edinburgh nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
// example file on how to use moses library
#include <iostream>
#include "TypeDef.h"
#include "Util.h"
#include "IOStream.h"
#include "Hypothesis.h"
#include "WordsRange.h"
#include "TrellisPathList.h"
#include "StaticData.h"
#include "DummyScoreProducers.h"
#include "InputFileStream.h"
using namespace std;
IOStream::IOStream(
const vector<FactorType> &inputFactorOrder
, const vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, size_t nBestSize
, const string &nBestFilePath)
:m_inputFactorOrder(inputFactorOrder)
,m_outputFactorOrder(outputFactorOrder)
,m_inputFactorUsed(inputFactorUsed)
,m_inputFile(NULL)
,m_inputStream(&std::cin)
,m_nBestStream(NULL)
{
m_surpressSingleBestOutput = false;
if (nBestSize > 0)
{
if (nBestFilePath == "-")
{
m_nBestStream = &std::cout;
m_surpressSingleBestOutput = true;
}
else
{
std::ofstream *nBestFile = new std::ofstream;
m_nBestStream = nBestFile;
nBestFile->open(nBestFilePath.c_str());
}
}
}
IOStream::IOStream(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, size_t nBestSize
, const std::string &nBestFilePath
, const std::string &inputFilePath)
:m_inputFactorOrder(inputFactorOrder)
,m_outputFactorOrder(outputFactorOrder)
,m_inputFactorUsed(inputFactorUsed)
,m_inputFilePath(inputFilePath)
,m_inputFile(new InputFileStream(inputFilePath))
,m_nBestStream(NULL)
{
m_surpressSingleBestOutput = false;
m_inputStream = m_inputFile;
if (nBestSize > 0)
{
if (nBestFilePath == "-")
{
m_nBestStream = &std::cout;
m_surpressSingleBestOutput = true;
}
else
{
std::ofstream *nBestFile = new std::ofstream;
m_nBestStream = nBestFile;
nBestFile->open(nBestFilePath.c_str());
}
}
}
IOStream::~IOStream()
{
if (m_inputFile != NULL)
delete m_inputFile;
if (m_nBestStream != NULL && !m_surpressSingleBestOutput)
{ // outputting n-best to file, rather than stdout. need to close file and delete obj
delete m_nBestStream;
}
}
string IOStream::GetInput()
{
std::string line;
if (getline(*m_inputStream, line, '\n').eof())
return "";
line = Trim(line);
return line;
}
InputType*IOStream::GetInput(InputType* inputType)
{
if(inputType->Read(*m_inputStream, m_inputFactorOrder))
{
inputType->SetTranslationId(m_translationId++);
return inputType;
}
else
{
delete inputType;
return NULL;
}
}
/***
* print surface factor only for the given phrase
*/
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
{
assert(outputFactorOrder.size() > 0);
if (reportAllFactors == true)
{
out << phrase;
}
else
{
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++)
{
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
out << *factor;
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++)
{
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
out << "|" << *factor;
}
out << " ";
}
}
}
void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
,bool reportSegmentation, bool reportAllFactors)
{
if ( hypo != NULL)
{
OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors);
if (reportSegmentation == true
&& hypo->GetCurrTargetPhrase().GetSize() > 0) {
out << "|" << hypo->GetCurrSourceWordsRange().GetStartPos()
<< "-" << hypo->GetCurrSourceWordsRange().GetEndPos() << "| ";
}
}
}
void IOStream::Backtrack(const Hypothesis *hypo){
if (hypo->GetPrevHypo() != NULL) {
VERBOSE(3,hypo->GetId() << " <= ");
Backtrack(hypo->GetPrevHypo());
}
}
void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
{
if (hypo != NULL)
{
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
VERBOSE(3,"Best path: ");
Backtrack(hypo);
VERBOSE(3,"0" << std::endl);
if (!m_surpressSingleBestOutput)
{
OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
cout << endl;
}
}
else
{
VERBOSE(1, "NO BEST TRANSLATION" << endl);
if (!m_surpressSingleBestOutput)
{
cout << endl;
}
}
}
void IOStream::OutputNBestList(const TrellisPathList &nBestList, long translationId)
{
bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
bool includeAlignment = StaticData::Instance().NBestIncludesAlignment();
TrellisPathList::const_iterator iter;
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
{
const TrellisPath &path = **iter;
const std::vector<const Hypothesis *> &edges = path.GetEdges();
// print the surface factor of the translation
*m_nBestStream << translationId << " ||| ";
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
{
const Hypothesis &edge = *edges[currEdge];
OutputSurface(*m_nBestStream, edge.GetCurrTargetPhrase(), m_outputFactorOrder, false); // false for not reporting all factors
}
*m_nBestStream << " ||| ";
// print the scores in a hardwired order
// before each model type, the corresponding command-line-like name must be emitted
// MERT script relies on this
// basic distortion
if (labeledOutput)
*m_nBestStream << "d: ";
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetDistortionScoreProducer()) << " ";
// reordering
vector<LexicalReordering*> rms = StaticData::Instance().GetReorderModels();
if(rms.size() > 0)
{
vector<LexicalReordering*>::iterator iter;
for(iter = rms.begin(); iter != rms.end(); ++iter)
{
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
for (size_t j = 0; j<scores.size(); ++j)
{
*m_nBestStream << scores[j] << " ";
}
}
}
// lm
const LMList& lml = StaticData::Instance().GetAllLM();
if (lml.size() > 0) {
if (labeledOutput)
*m_nBestStream << "lm: ";
LMList::const_iterator lmi = lml.begin();
for (; lmi != lml.end(); ++lmi) {
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(*lmi) << " ";
}
}
// translation components
if (StaticData::Instance().GetInputType()==0){
// translation components for text input
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
if (pds.size() > 0) {
if (labeledOutput)
*m_nBestStream << "tm: ";
vector<PhraseDictionary*>::iterator iter;
for (iter = pds.begin(); iter != pds.end(); ++iter) {
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
for (size_t j = 0; j<scores.size(); ++j)
*m_nBestStream << scores[j] << " ";
}
}
}
else{
// translation components for Confusion Network input
// first translation component has GetNumInputScores() scores from the input Confusion Network
// at the beginning of the vector
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
if (pds.size() > 0) {
vector<PhraseDictionary*>::iterator iter;
iter = pds.begin();
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
size_t pd_numinputscore = (*iter)->GetNumInputScores();
if (pd_numinputscore){
if (labeledOutput)
*m_nBestStream << "I: ";
for (size_t j = 0; j < pd_numinputscore; ++j)
*m_nBestStream << scores[j] << " ";
}
for (iter = pds.begin() ; iter != pds.end(); ++iter) {
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
size_t pd_numinputscore = (*iter)->GetNumInputScores();
if (iter == pds.begin() && labeledOutput)
*m_nBestStream << "tm: ";
for (size_t j = pd_numinputscore; j < scores.size() ; ++j)
*m_nBestStream << scores[j] << " ";
}
}
}
// word penalty
if (labeledOutput)
*m_nBestStream << "w: ";
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetWordPenaltyProducer()) << " ";
// generation
vector<GenerationDictionary*> gds = StaticData::Instance().GetGenerationDictionaries();
if (gds.size() > 0) {
if (labeledOutput)
*m_nBestStream << "g: ";
vector<GenerationDictionary*>::iterator iter;
for (iter = gds.begin(); iter != gds.end(); ++iter) {
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
for (size_t j = 0; j<scores.size(); j++) {
*m_nBestStream << scores[j] << " ";
}
}
}
// total
*m_nBestStream << "||| " << path.GetTotalScore();
if (includeAlignment) {
*m_nBestStream << " |||";
for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
{
const Hypothesis &edge = *edges[currEdge];
WordsRange sourceRange = edge.GetCurrSourceWordsRange();
WordsRange targetRange = edge.GetCurrTargetWordsRange();
*m_nBestStream << " " << sourceRange.GetStartPos();
if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
*m_nBestStream << "-" << sourceRange.GetEndPos();
}
*m_nBestStream << "=" << targetRange.GetStartPos();
if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
*m_nBestStream << "-" << targetRange.GetEndPos();
}
}
}
*m_nBestStream << endl;
}
*m_nBestStream<<std::flush;
}
void IOStream::ClearInStream()
{
m_inputStream->clear();
}
void IOStream::FlushOutStream()
{
cout.flush();
}
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (c) 2006 University of Edinburgh
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the University of Edinburgh nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
// example file on how to use moses library
#include <iostream>
#include "TypeDef.h"
#include "Util.h"
#include "IOStream.h"
#include "Hypothesis.h"
#include "WordsRange.h"
#include "TrellisPathList.h"
#include "StaticData.h"
#include "DummyScoreProducers.h"
#include "InputFileStream.h"
using namespace std;
IOStream::IOStream(
const vector<FactorType> &inputFactorOrder
, const vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, size_t nBestSize
, const string &nBestFilePath)
:m_inputFactorOrder(inputFactorOrder)
,m_outputFactorOrder(outputFactorOrder)
,m_inputFactorUsed(inputFactorUsed)
,m_inputFile(NULL)
,m_inputStream(&std::cin)
,m_nBestStream(NULL)
{
m_surpressSingleBestOutput = false;
if (nBestSize > 0)
{
if (nBestFilePath == "-")
{
m_nBestStream = &std::cout;
m_surpressSingleBestOutput = true;
}
else
{
std::ofstream *nBestFile = new std::ofstream;
m_nBestStream = nBestFile;
nBestFile->open(nBestFilePath.c_str());
}
}
}
IOStream::IOStream(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, size_t nBestSize
, const std::string &nBestFilePath
, const std::string &inputFilePath)
:m_inputFactorOrder(inputFactorOrder)
,m_outputFactorOrder(outputFactorOrder)
,m_inputFactorUsed(inputFactorUsed)
,m_inputFilePath(inputFilePath)
,m_inputFile(new InputFileStream(inputFilePath))
,m_nBestStream(NULL)
{
m_surpressSingleBestOutput = false;
m_inputStream = m_inputFile;
if (nBestSize > 0)
{
if (nBestFilePath == "-")
{
m_nBestStream = &std::cout;
m_surpressSingleBestOutput = true;
}
else
{
std::ofstream *nBestFile = new std::ofstream;
m_nBestStream = nBestFile;
nBestFile->open(nBestFilePath.c_str());
}
}
}
IOStream::~IOStream()
{
if (m_inputFile != NULL)
delete m_inputFile;
if (m_nBestStream != NULL && !m_surpressSingleBestOutput)
{ // outputting n-best to file, rather than stdout. need to close file and delete obj
delete m_nBestStream;
}
}
string IOStream::GetInput()
{
std::string line;
if (getline(*m_inputStream, line, '\n').eof())
return "";
line = Trim(line);
return line;
}
InputType*IOStream::GetInput(InputType* inputType)
{
if(inputType->Read(*m_inputStream, m_inputFactorOrder))
{
inputType->SetTranslationId(m_translationId++);
return inputType;
}
else
{
delete inputType;
return NULL;
}
}
/***
* print surface factor only for the given phrase
*/
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
{
assert(outputFactorOrder.size() > 0);
if (reportAllFactors == true)
{
out << phrase;
}
else
{
size_t size = phrase.GetSize();
for (size_t pos = 0 ; pos < size ; pos++)
{
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
out << *factor;
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++)
{
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
out << "|" << *factor;
}
out << " ";
}
}
}
void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
,bool reportSegmentation, bool reportAllFactors)
{
if ( hypo != NULL)
{
OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors);
if (reportSegmentation == true
&& hypo->GetCurrTargetPhrase().GetSize() > 0) {
out << "|" << hypo->GetCurrSourceWordsRange().GetStartPos()
<< "-" << hypo->GetCurrSourceWordsRange().GetEndPos() << "| ";
}
}
}
void IOStream::Backtrack(const Hypothesis *hypo){
if (hypo->GetPrevHypo() != NULL) {
VERBOSE(3,hypo->GetId() << " <= ");
Backtrack(hypo->GetPrevHypo());
}
}
void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
{
if (hypo != NULL)
{
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
VERBOSE(3,"Best path: ");
Backtrack(hypo);
VERBOSE(3,"0" << std::endl);
if (!m_surpressSingleBestOutput)
{
OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
cout << endl;
}
}
else
{
VERBOSE(1, "NO BEST TRANSLATION" << endl);
if (!m_surpressSingleBestOutput)
{
cout << endl;
}
}
}
void IOStream::OutputNBestList(const TrellisPathList &nBestList, long translationId)
{
bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
bool includeAlignment = StaticData::Instance().NBestIncludesAlignment();
TrellisPathList::const_iterator iter;
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
{
const TrellisPath &path = **iter;
const std::vector<const Hypothesis *> &edges = path.GetEdges();
// print the surface factor of the translation
*m_nBestStream << translationId << " ||| ";
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
{
const Hypothesis &edge = *edges[currEdge];
OutputSurface(*m_nBestStream, edge.GetCurrTargetPhrase(), m_outputFactorOrder, false); // false for not reporting all factors
}
*m_nBestStream << " ||| ";
// print the scores in a hardwired order
// before each model type, the corresponding command-line-like name must be emitted
// MERT script relies on this
// basic distortion
if (labeledOutput)
*m_nBestStream << "d: ";
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetDistortionScoreProducer()) << " ";
// reordering
vector<LexicalReordering*> rms = StaticData::Instance().GetReorderModels();
if(rms.size() > 0)
{
vector<LexicalReordering*>::iterator iter;
for(iter = rms.begin(); iter != rms.end(); ++iter)
{
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
for (size_t j = 0; j<scores.size(); ++j)
{
*m_nBestStream << scores[j] << " ";
}
}
}
// lm
const LMList& lml = StaticData::Instance().GetAllLM();
if (lml.size() > 0) {
if (labeledOutput)
*m_nBestStream << "lm: ";
LMList::const_iterator lmi = lml.begin();
for (; lmi != lml.end(); ++lmi) {
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(*lmi) << " ";
}
}
// translation components
if (StaticData::Instance().GetInputType()==0){
// translation components for text input
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
if (pds.size() > 0) {
if (labeledOutput)
*m_nBestStream << "tm: ";
vector<PhraseDictionary*>::iterator iter;
for (iter = pds.begin(); iter != pds.end(); ++iter) {
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
for (size_t j = 0; j<scores.size(); ++j)
*m_nBestStream << scores[j] << " ";
}
}
}
else{
// translation components for Confusion Network input
// first translation component has GetNumInputScores() scores from the input Confusion Network
// at the beginning of the vector
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
if (pds.size() > 0) {
vector<PhraseDictionary*>::iterator iter;
iter = pds.begin();
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
size_t pd_numinputscore = (*iter)->GetNumInputScores();
if (pd_numinputscore){
if (labeledOutput)
*m_nBestStream << "I: ";
for (size_t j = 0; j < pd_numinputscore; ++j)
*m_nBestStream << scores[j] << " ";
}
for (iter = pds.begin() ; iter != pds.end(); ++iter) {
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
size_t pd_numinputscore = (*iter)->GetNumInputScores();
if (iter == pds.begin() && labeledOutput)
*m_nBestStream << "tm: ";
for (size_t j = pd_numinputscore; j < scores.size() ; ++j)
*m_nBestStream << scores[j] << " ";
}
}
}
// word penalty
if (labeledOutput)
*m_nBestStream << "w: ";
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetWordPenaltyProducer()) << " ";
// generation
vector<GenerationDictionary*> gds = StaticData::Instance().GetGenerationDictionaries();
if (gds.size() > 0) {
if (labeledOutput)
*m_nBestStream << "g: ";
vector<GenerationDictionary*>::iterator iter;
for (iter = gds.begin(); iter != gds.end(); ++iter) {
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
for (size_t j = 0; j<scores.size(); j++) {
*m_nBestStream << scores[j] << " ";
}
}
}
// total
*m_nBestStream << "||| " << path.GetTotalScore();
if (includeAlignment) {
*m_nBestStream << " |||";
for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
{
const Hypothesis &edge = *edges[currEdge];
WordsRange sourceRange = edge.GetCurrSourceWordsRange();
WordsRange targetRange = edge.GetCurrTargetWordsRange();
*m_nBestStream << " " << sourceRange.GetStartPos();
if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
*m_nBestStream << "-" << sourceRange.GetEndPos();
}
*m_nBestStream << "=" << targetRange.GetStartPos();
if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
*m_nBestStream << "-" << targetRange.GetEndPos();
}
}
}
*m_nBestStream << endl;
}
*m_nBestStream<<std::flush;
}
void IOStream::ClearInStream()
{
m_inputStream->clear();
}
void IOStream::FlushOutStream()
{
cout.flush();
}

View File

@ -1,87 +1,87 @@
// $Id: IOStream.h 110 2007-09-19 22:10:27Z hieu $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (c) 2006 University of Edinburgh
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the University of Edinburgh nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
// example file on how to use moses library
#pragma once
#include <fstream>
#include <vector>
#include "TypeDef.h"
#include "Sentence.h"
class FactorMask;
class FactorCollection;
class Hypothesis;
class TrellisPathList;
class InputFileStream;
class IOStream
{
protected:
long m_translationId;
const std::vector<FactorType> &m_inputFactorOrder;
const std::vector<FactorType> &m_outputFactorOrder;
const FactorMask &m_inputFactorUsed;
std::ostream *m_nBestStream;
std::string m_inputFilePath;
std::istream *m_inputStream;
InputFileStream *m_inputFile;
bool m_surpressSingleBestOutput;
public:
IOStream(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, size_t nBestSize
, const std::string &nBestFilePath);
IOStream(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, size_t nBestSize
, const std::string &nBestFilePath
, const std::string &inputFilePath);
~IOStream();
InputType* GetInput(InputType *inputType);
std::string GetInput();
void OutputBestHypo(const Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
void OutputNBestList(const TrellisPathList &nBestList, long translationId);
void Backtrack(const Hypothesis *hypo);
void ResetTranslationId() { m_translationId = 0; }
void ClearInStream();
void FlushOutStream();
};
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (c) 2006 University of Edinburgh
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the University of Edinburgh nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
// example file on how to use moses library
#pragma once
#include <fstream>
#include <vector>
#include "TypeDef.h"
#include "Sentence.h"
class FactorMask;
class FactorCollection;
class Hypothesis;
class TrellisPathList;
class InputFileStream;
class IOStream
{
protected:
long m_translationId;
const std::vector<FactorType> &m_inputFactorOrder;
const std::vector<FactorType> &m_outputFactorOrder;
const FactorMask &m_inputFactorUsed;
std::ostream *m_nBestStream;
std::string m_inputFilePath;
std::istream *m_inputStream;
InputFileStream *m_inputFile;
bool m_surpressSingleBestOutput;
public:
IOStream(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, size_t nBestSize
, const std::string &nBestFilePath);
IOStream(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
, size_t nBestSize
, const std::string &nBestFilePath
, const std::string &inputFilePath);
~IOStream();
InputType* GetInput(InputType *inputType);
std::string GetInput();
void OutputBestHypo(const Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
void OutputNBestList(const TrellisPathList &nBestList, long translationId);
void Backtrack(const Hypothesis *hypo);
void ResetTranslationId() { m_translationId = 0; }
void ClearInStream();
void FlushOutStream();
};

View File

@ -1,230 +1,230 @@
// $Id: Main.cpp 110 2007-09-19 22:10:27Z hieu $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (c) 2006 University of Edinburgh
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the University of Edinburgh nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
// example file on how to use moses library
#ifdef WIN32
// Include Visual Leak Detector
#include <vld.h>
#include <windows.h>
#else
#define Sleep(millisec) usleep(millisec * 1000)
#endif
#include <signal.h>
#include <fstream>
#include "Main.h"
#include "TrellisPathList.h"
#include "FactorCollection.h"
#include "Manager.h"
#include "Phrase.h"
#include "Util.h"
#include "TrellisPathList.h"
#include "Timer.h"
#include "IOStream.h"
#include "Sentence.h"
#include "ConfusionNet.h"
#include "TranslationAnalysis.h"
#include "Tokenizer.h"
#if HAVE_CONFIG_H
#include "config.h"
#else
// those not using autoconf have to build MySQL support for now
# define USE_MYSQL 1
#endif
using namespace std;
#undef max
int main(int argc, char* argv[])
{
//signal(SIGPIPE, SIG_IGN); // ensures the server doesn't exit once the script stops reading from the output pipe
IFVERBOSE(1)
{
TRACE_ERR("command: ");
for(int i=0;i<argc;++i) TRACE_ERR(argv[i]<<" ");
TRACE_ERR(endl);
}
cout.setf(std::ios::fixed);
cout.precision(3);
cerr.setf(std::ios::fixed);
cerr.precision(3);
// load data structures
Parameter *parameter = new Parameter();
if (!parameter->LoadParam(argc, argv))
{
parameter->Explain();
delete parameter;
return EXIT_FAILURE;
}
const StaticData &staticData = StaticData::Instance();
if (!StaticData::LoadDataStatic(parameter))
return EXIT_FAILURE;
// set up read/writing class
IOStream *ioStream = GetIODevice(staticData);
// check on weights
vector<float> weights = staticData.GetAllWeights();
IFVERBOSE(2) {
TRACE_ERR("The score component vector looks like this:\n" << staticData.GetScoreIndexManager());
TRACE_ERR("The global weight vector looks like this:");
for (size_t j=0; j<weights.size(); j++) { TRACE_ERR(" " << weights[j]); }
TRACE_ERR("\n");
}
// every score must have a weight! check that here:
if(weights.size() != staticData.GetScoreIndexManager().GetTotalNumberOfScores()) {
TRACE_ERR("ERROR: " << staticData.GetScoreIndexManager().GetTotalNumberOfScores() << " score components, but " << weights.size() << " weights defined" << std::endl);
return EXIT_FAILURE;
}
if (ioStream == NULL)
return EXIT_FAILURE;
// read each sentence & decode
size_t lineCount = 0;
while(true)
{
// wait for input
string input;
while ((input = ioStream->GetInput()).size() == 0)
{
ioStream->ClearInStream();
Sleep(1000);
}
// tokenize input
Tokenizer tokenizer("en");
string inputTokenized = tokenizer.Tokenize(input);
vector<string> sentences = Tokenize(inputTokenized, "\n");
std::vector<FactorType> factorOrder;
factorOrder.push_back(0);
vector<string>::iterator iterSentences;
for (iterSentences = sentences.begin() ; iterSentences != sentences.end() ; ++iterSentences)
{
IFVERBOSE(1)
ResetUserTime();
const string &sentence = *iterSentences;
Sentence sourceSentence(Input);
sourceSentence.CreateFromString(factorOrder, sentence, "|");
VERBOSE(2,"\nTRANSLATING(" << ++lineCount << "): " << sentence);
Manager manager(sourceSentence);
manager.ProcessSentence();
InputType *source = new Sentence(Input);
ioStream->OutputBestHypo(manager.GetBestHypothesis(), source->GetTranslationId(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors()
);
IFVERBOSE(2) { PrintUserTime("Best Hypothesis Generation Time:"); }
delete source;
// n-best
size_t nBestSize = staticData.GetNBestSize();
if (nBestSize > 0)
{
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
TrellisPathList nBestList;
manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
ioStream->OutputNBestList(nBestList, source->GetTranslationId());
//RemoveAllInColl(nBestList);
IFVERBOSE(2) { PrintUserTime("N-Best Hypotheses Generation Time:"); }
}
if (staticData.IsDetailedTranslationReportingEnabled())
{
TranslationAnalysis::PrintTranslationAnalysis(std::cerr, manager.GetBestHypothesis());
}
IFVERBOSE(2) { PrintUserTime("Sentence Decoding Time:"); }
manager.CalcDecoderStatistics();
ioStream->FlushOutStream();
}
} // while(true)
delete ioStream;
IFVERBOSE(1)
PrintUserTime("End.");
#ifdef HACK_EXIT
//This avoids that detructors are called (it can take a long time)
exit(EXIT_SUCCESS);
#else
return EXIT_SUCCESS;
#endif
}
IOStream *GetIODevice(const StaticData &staticData)
{
IOStream *ioStream;
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
,&outputFactorOrder = staticData.GetOutputFactorOrder();
FactorMask inputFactorUsed(inputFactorOrder);
// io
if (staticData.GetParam("input-file").size() == 1)
{
VERBOSE(2,"IO from File" << endl);
string filePath = staticData.GetParam("input-file")[0];
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
, staticData.GetNBestSize()
, staticData.GetNBestFilePath()
, filePath);
}
else
{
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
, staticData.GetNBestSize()
, staticData.GetNBestFilePath());
}
ioStream->ResetTranslationId();
IFVERBOSE(1)
PrintUserTime("Created input-output object");
return ioStream;
}
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (c) 2006 University of Edinburgh
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the University of Edinburgh nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
// example file on how to use moses library
#ifdef WIN32
// Include Visual Leak Detector
#include <vld.h>
#include <windows.h>
#else
#define Sleep(millisec) usleep(millisec * 1000)
#endif
#include <signal.h>
#include <fstream>
#include "Main.h"
#include "TrellisPathList.h"
#include "FactorCollection.h"
#include "Manager.h"
#include "Phrase.h"
#include "Util.h"
#include "TrellisPathList.h"
#include "Timer.h"
#include "IOStream.h"
#include "Sentence.h"
#include "ConfusionNet.h"
#include "TranslationAnalysis.h"
#include "Tokenizer.h"
#if HAVE_CONFIG_H
#include "config.h"
#else
// those not using autoconf have to build MySQL support for now
# define USE_MYSQL 1
#endif
using namespace std;
#undef max
int main(int argc, char* argv[])
{
//signal(SIGPIPE, SIG_IGN); // ensures the server doesn't exit once the script stops reading from the output pipe
IFVERBOSE(1)
{
TRACE_ERR("command: ");
for(int i=0;i<argc;++i) TRACE_ERR(argv[i]<<" ");
TRACE_ERR(endl);
}
cout.setf(std::ios::fixed);
cout.precision(3);
cerr.setf(std::ios::fixed);
cerr.precision(3);
// load data structures
Parameter *parameter = new Parameter();
if (!parameter->LoadParam(argc, argv))
{
parameter->Explain();
delete parameter;
return EXIT_FAILURE;
}
const StaticData &staticData = StaticData::Instance();
if (!StaticData::LoadDataStatic(parameter))
return EXIT_FAILURE;
// set up read/writing class
IOStream *ioStream = GetIODevice(staticData);
// check on weights
vector<float> weights = staticData.GetAllWeights();
IFVERBOSE(2) {
TRACE_ERR("The score component vector looks like this:\n" << staticData.GetScoreIndexManager());
TRACE_ERR("The global weight vector looks like this:");
for (size_t j=0; j<weights.size(); j++) { TRACE_ERR(" " << weights[j]); }
TRACE_ERR("\n");
}
// every score must have a weight! check that here:
if(weights.size() != staticData.GetScoreIndexManager().GetTotalNumberOfScores()) {
TRACE_ERR("ERROR: " << staticData.GetScoreIndexManager().GetTotalNumberOfScores() << " score components, but " << weights.size() << " weights defined" << std::endl);
return EXIT_FAILURE;
}
if (ioStream == NULL)
return EXIT_FAILURE;
// read each sentence & decode
size_t lineCount = 0;
while(true)
{
// wait for input
string input;
while ((input = ioStream->GetInput()).size() == 0)
{
ioStream->ClearInStream();
Sleep(1000);
}
// tokenize input
Tokenizer tokenizer("en");
string inputTokenized = tokenizer.Tokenize(input);
vector<string> sentences = Tokenize(inputTokenized, "\n");
std::vector<FactorType> factorOrder;
factorOrder.push_back(0);
vector<string>::iterator iterSentences;
for (iterSentences = sentences.begin() ; iterSentences != sentences.end() ; ++iterSentences)
{
IFVERBOSE(1)
ResetUserTime();
const string &sentence = *iterSentences;
Sentence sourceSentence(Input);
sourceSentence.CreateFromString(factorOrder, sentence, "|");
VERBOSE(2,"\nTRANSLATING(" << ++lineCount << "): " << sentence);
Manager manager(sourceSentence);
manager.ProcessSentence();
InputType *source = new Sentence(Input);
ioStream->OutputBestHypo(manager.GetBestHypothesis(), source->GetTranslationId(),
staticData.GetReportSegmentation(),
staticData.GetReportAllFactors()
);
IFVERBOSE(2) { PrintUserTime("Best Hypothesis Generation Time:"); }
delete source;
// n-best
size_t nBestSize = staticData.GetNBestSize();
if (nBestSize > 0)
{
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
TrellisPathList nBestList;
manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
ioStream->OutputNBestList(nBestList, source->GetTranslationId());
//RemoveAllInColl(nBestList);
IFVERBOSE(2) { PrintUserTime("N-Best Hypotheses Generation Time:"); }
}
if (staticData.IsDetailedTranslationReportingEnabled())
{
TranslationAnalysis::PrintTranslationAnalysis(std::cerr, manager.GetBestHypothesis());
}
IFVERBOSE(2) { PrintUserTime("Sentence Decoding Time:"); }
manager.CalcDecoderStatistics();
ioStream->FlushOutStream();
}
} // while(true)
delete ioStream;
IFVERBOSE(1)
PrintUserTime("End.");
#ifdef HACK_EXIT
//This avoids that detructors are called (it can take a long time)
exit(EXIT_SUCCESS);
#else
return EXIT_SUCCESS;
#endif
}
IOStream *GetIODevice(const StaticData &staticData)
{
IOStream *ioStream;
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
,&outputFactorOrder = staticData.GetOutputFactorOrder();
FactorMask inputFactorUsed(inputFactorOrder);
// io
if (staticData.GetParam("input-file").size() == 1)
{
VERBOSE(2,"IO from File" << endl);
string filePath = staticData.GetParam("input-file")[0];
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
, staticData.GetNBestSize()
, staticData.GetNBestFilePath()
, filePath);
}
else
{
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
, staticData.GetNBestSize()
, staticData.GetNBestFilePath());
}
ioStream->ResetTranslationId();
IFVERBOSE(1)
PrintUserTime("Created input-output object");
return ioStream;
}

View File

@ -1,42 +1,42 @@
// $Id: Main.h 110 2007-09-19 22:10:27Z hieu $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (c) 2006 University of Edinburgh
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the University of Edinburgh nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
// example file on how to use moses library
#pragma once
#include "StaticData.h"
class IOStream;
int main(int argc, char* argv[]);
IOStream *GetIODevice(const StaticData &staticData);
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (c) 2006 University of Edinburgh
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the University of Edinburgh nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
// example file on how to use moses library
#pragma once
#include "StaticData.h"
class IOStream;
int main(int argc, char* argv[]);
IOStream *GetIODevice(const StaticData &staticData);

View File

@ -1,7 +1,7 @@
bin_PROGRAMS = moses
moses_SOURCES = Main.cpp IOStream.cpp TranslationAnalysis.cpp
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src
moses_LDADD = -L$(top_srcdir)/moses/src -lmoses
moses_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
bin_PROGRAMS = moses
moses_SOURCES = Main.cpp IOStream.cpp TranslationAnalysis.cpp
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src
moses_LDADD = -L$(top_srcdir)/moses/src -lmoses
moses_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a

View File

@ -1,112 +1,112 @@
// $Id: TranslationAnalysis.cpp 110 2007-09-19 22:10:27Z hieu $
#include <iostream>
#include <sstream>
#include <algorithm>
#include "StaticData.h"
#include "Hypothesis.h"
#include "TranslationAnalysis.h"
namespace TranslationAnalysis {
void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
{
os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
std::vector<const Hypothesis*> translationPath;
while (hypo) {
translationPath.push_back(hypo);
hypo = hypo->GetPrevHypo();
}
std::reverse(translationPath.begin(), translationPath.end());
std::vector<std::string> droppedWords;
std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
++tpi; // skip initial translation state
std::vector<std::string> sourceMap;
std::vector<std::string> targetMap;
std::vector<unsigned int> lmAcc(0);
size_t lmCalls = 0;
bool doLMStats = ((*tpi)->GetLMStats() != 0);
if (doLMStats)
lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
for (; tpi != translationPath.end(); ++tpi) {
std::ostringstream sms;
std::ostringstream tms;
std::string target = (*tpi)->GetTargetPhraseStringRep();
std::string source = (*tpi)->GetSourcePhraseStringRep();
WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
// language model backoff stats,
if (doLMStats) {
std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
std::vector<unsigned int>::iterator acc = lmAcc.begin();
for (; i != lmstats.end(); ++i, ++acc) {
std::vector<unsigned int>::iterator j = i->begin();
lmCalls += i->size();
for (; j != i->end(); ++j) {
(*acc) += *j;
}
}
}
bool epsilon = false;
if (target == "") {
target="<EPSILON>";
epsilon = true;
droppedWords.push_back(source);
}
os << " SOURCE: " << swr << " " << source << std::endl
<< " TRANSLATED AS: " << target << std::endl;
size_t twr_i = twr.GetStartPos();
size_t swr_i = swr.GetStartPos();
if (!epsilon) { sms << twr_i; }
if (epsilon) { tms << "del(" << swr_i << ")"; } else { tms << swr_i; }
swr_i++; twr_i++;
for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
sms << '-' << twr_i;
}
for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
tms << '-' << swr_i;
}
if (!epsilon) targetMap.push_back(sms.str());
sourceMap.push_back(tms.str());
}
std::vector<std::string>::iterator si = sourceMap.begin();
std::vector<std::string>::iterator ti = targetMap.begin();
os << std::endl << "SOURCE/TARGET SPANS:";
os << std::endl << " SOURCE:";
for (; si != sourceMap.end(); ++si) {
os << " " << *si;
}
os << std::endl << " TARGET:";
for (; ti != targetMap.end(); ++ti) {
os << " " << *ti;
}
os << std::endl << std::endl;
if (doLMStats && lmCalls > 0) {
std::vector<unsigned int>::iterator acc = lmAcc.begin();
const LMList& lmlist = StaticData::Instance().GetAllLM();
LMList::const_iterator i = lmlist.begin();
for (; acc != lmAcc.end(); ++acc, ++i) {
char buf[256];
sprintf(buf, "%.4f", (double)(*acc)/(double)lmCalls);
os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
}
}
if (droppedWords.size() > 0) {
std::vector<std::string>::iterator dwi = droppedWords.begin();
os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
for (; dwi != droppedWords.end(); ++dwi) {
os << "\tdropped=" << *dwi << std::endl;
}
}
os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED):" << std::endl;
StaticData::Instance().GetScoreIndexManager().Debug_PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
os << std::endl;
}
}
// $Id$
#include <iostream>
#include <sstream>
#include <algorithm>
#include "StaticData.h"
#include "Hypothesis.h"
#include "TranslationAnalysis.h"
namespace TranslationAnalysis {
void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
{
os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
std::vector<const Hypothesis*> translationPath;
while (hypo) {
translationPath.push_back(hypo);
hypo = hypo->GetPrevHypo();
}
std::reverse(translationPath.begin(), translationPath.end());
std::vector<std::string> droppedWords;
std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
++tpi; // skip initial translation state
std::vector<std::string> sourceMap;
std::vector<std::string> targetMap;
std::vector<unsigned int> lmAcc(0);
size_t lmCalls = 0;
bool doLMStats = ((*tpi)->GetLMStats() != 0);
if (doLMStats)
lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
for (; tpi != translationPath.end(); ++tpi) {
std::ostringstream sms;
std::ostringstream tms;
std::string target = (*tpi)->GetTargetPhraseStringRep();
std::string source = (*tpi)->GetSourcePhraseStringRep();
WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
// language model backoff stats,
if (doLMStats) {
std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
std::vector<unsigned int>::iterator acc = lmAcc.begin();
for (; i != lmstats.end(); ++i, ++acc) {
std::vector<unsigned int>::iterator j = i->begin();
lmCalls += i->size();
for (; j != i->end(); ++j) {
(*acc) += *j;
}
}
}
bool epsilon = false;
if (target == "") {
target="<EPSILON>";
epsilon = true;
droppedWords.push_back(source);
}
os << " SOURCE: " << swr << " " << source << std::endl
<< " TRANSLATED AS: " << target << std::endl;
size_t twr_i = twr.GetStartPos();
size_t swr_i = swr.GetStartPos();
if (!epsilon) { sms << twr_i; }
if (epsilon) { tms << "del(" << swr_i << ")"; } else { tms << swr_i; }
swr_i++; twr_i++;
for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
sms << '-' << twr_i;
}
for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
tms << '-' << swr_i;
}
if (!epsilon) targetMap.push_back(sms.str());
sourceMap.push_back(tms.str());
}
std::vector<std::string>::iterator si = sourceMap.begin();
std::vector<std::string>::iterator ti = targetMap.begin();
os << std::endl << "SOURCE/TARGET SPANS:";
os << std::endl << " SOURCE:";
for (; si != sourceMap.end(); ++si) {
os << " " << *si;
}
os << std::endl << " TARGET:";
for (; ti != targetMap.end(); ++ti) {
os << " " << *ti;
}
os << std::endl << std::endl;
if (doLMStats && lmCalls > 0) {
std::vector<unsigned int>::iterator acc = lmAcc.begin();
const LMList& lmlist = StaticData::Instance().GetAllLM();
LMList::const_iterator i = lmlist.begin();
for (; acc != lmAcc.end(); ++acc, ++i) {
char buf[256];
sprintf(buf, "%.4f", (double)(*acc)/(double)lmCalls);
os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
}
}
if (droppedWords.size() > 0) {
std::vector<std::string>::iterator dwi = droppedWords.begin();
os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
for (; dwi != droppedWords.end(); ++dwi) {
os << "\tdropped=" << *dwi << std::endl;
}
}
os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED):" << std::endl;
StaticData::Instance().GetScoreIndexManager().Debug_PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
os << std::endl;
}
}

View File

@ -1,25 +1,25 @@
// $Id: TranslationAnalysis.h 110 2007-09-19 22:10:27Z hieu $
/*
* also see moses/SentenceStats
*/
#ifndef _TRANSLATION_ANALYSIS_H_
#define _TRANSLATION_ANALYSIS_H_
#include <iostream>
class Hypothesis;
namespace TranslationAnalysis
{
/***
* print details about the translation represented in hypothesis to
* os. Included information: phrase alignment, words dropped, scores
*/
void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo);
}
#endif
// $Id$
/*
* also see moses/SentenceStats
*/
#ifndef _TRANSLATION_ANALYSIS_H_
#define _TRANSLATION_ANALYSIS_H_
#include <iostream>
class Hypothesis;
namespace TranslationAnalysis
{
/***
* print details about the translation represented in hypothesis to
* os. Included information: phrase alignment, words dropped, scores
*/
void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo);
}
#endif

View File

@ -1,38 +0,0 @@
<html>
<head><title>Moses demo</title></head>
<body>
<B>
Moses demo<BR><BR>
<?php
$strInput = "";
$strOutput= "";
if ($_SERVER['REQUEST_METHOD'] == 'POST')
{
$strInput = $_REQUEST['txt'];
echo "Input is: ".$strInput."<BR>";
$inputFile = fopen('input', 'a') or die("can't open input file");
$outputFile = fopen('output', 'r') or die("can't open output file");
fwrite($inputFile, $strInput."\n");
$strOutput = fgets($outputFile);
fclose($inputFile);
fclose($outputFile);
}
?>
Output is: <?=$strOutput?><BR>
<BR>
<form action="moses.php" method="POST">
<textarea name="txt" rows="5" cols="50"><?=$strInput?></textarea>
<BR>
<input type="submit" name="txt_submit" value="Submit">
</form><br><br>
</body>
</html>