mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 19:37:58 +03:00
svn properties
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1585 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
226dd90d0c
commit
16f69dda75
@ -1,23 +1,23 @@
|
||||
This is the code to put moses on the web. It's buggy and a bit complicated to run.
|
||||
|
||||
1st compile the c++ executable. i use eclipse, the makefile doesn't work with this, it will create an exe called moses-cgi.
|
||||
|
||||
i've also included a linux precompiled version which u should use if u can. it may run on your system if u have the same library versions as me... (my computer is fedora 6)
|
||||
|
||||
|
||||
to run the system:
|
||||
1. make a subdirectory in the directory which can be seen by the apache server. cd into this directory
|
||||
2. make 2 named pipes called 'input' and 'output'
|
||||
3. copy moses-cgi into this directory, or softlink it into there
|
||||
4. run the exe like
|
||||
./moses-cgi -f moses.ini < input > output
|
||||
5. copy moses.php into the same directory, this is the html page people should be using to run the demo
|
||||
6. make sure that apache can execute moses.php and can access 'input' & 'output'
|
||||
told u it's complicated !
|
||||
|
||||
if u want to see what it looks like, go to
|
||||
http://groups.inf.ed.ac.uk/hoang/demo/en-de/moses.php
|
||||
u're welcome to change the moses.php. please keep the link back to factored-translation.com
|
||||
|
||||
email me ((hieuhoang@gmail.com) if u have any queries, i'm sure u will...
|
||||
This is the code to put moses on the web. It's buggy and a bit complicated to run.
|
||||
|
||||
1st compile the c++ executable. i use eclipse, the makefile doesn't work with this, it will create an exe called moses-cgi.
|
||||
|
||||
i've also included a linux precompiled version which u should use if u can. it may run on your system if u have the same library versions as me... (my computer is fedora 6)
|
||||
|
||||
|
||||
to run the system:
|
||||
1. make a subdirectory in the directory which can be seen by the apache server. cd into this directory
|
||||
2. make 2 named pipes called 'input' and 'output'
|
||||
3. copy moses-cgi into this directory, or softlink it into there
|
||||
4. run the exe like
|
||||
./moses-cgi -f moses.ini < input > output
|
||||
5. copy moses.php into the same directory, this is the html page people should be using to run the demo
|
||||
6. make sure that apache can execute moses.php and can access 'input' & 'output'
|
||||
told u it's complicated !
|
||||
|
||||
if u want to see what it looks like, go to
|
||||
http://groups.inf.ed.ac.uk/hoang/demo/en-de/moses.php
|
||||
u're welcome to change the moses.php. please keep the link back to factored-translation.com
|
||||
|
||||
email me ((hieuhoang@gmail.com) if u have any queries, i'm sure u will...
|
||||
|
@ -1,49 +1,49 @@
|
||||
<html>
|
||||
<head><title>Moses demo</title></head>
|
||||
<body>
|
||||
<A HREF="../">back <<--</A><BR><BR>
|
||||
|
||||
<B>Moses demo</B><BR><BR>
|
||||
<?php
|
||||
|
||||
$strInput = "";
|
||||
$strOutput= "";
|
||||
|
||||
if ($_SERVER['REQUEST_METHOD'] == 'POST')
|
||||
{
|
||||
$input = $_REQUEST['txtInput'];
|
||||
$inputLower = strtolower($input);
|
||||
|
||||
$inputFile = fopen('input', 'a') or die("can't open input file");
|
||||
$outputFile = fopen('output', 'r') or die("can't open output file");
|
||||
|
||||
fwrite($inputFile, $inputLower ."\n");
|
||||
|
||||
$output = fgets($outputFile);
|
||||
|
||||
fclose($inputFile);
|
||||
fclose($outputFile);
|
||||
}
|
||||
?>
|
||||
|
||||
<BR>
|
||||
<form action="moses.php" method="POST">
|
||||
<textarea name="txtInput" rows="5" cols="50"><?=$input?></textarea>
|
||||
<BR>
|
||||
<input type="submit" name="txt_submit" value="Submit">
|
||||
</form><br><br>
|
||||
|
||||
<?php
|
||||
if ($_SERVER['REQUEST_METHOD'] == 'POST')
|
||||
{
|
||||
echo "Input sentence is: ".$inputLower."<BR>";
|
||||
echo "Translated is: " .$output ."<BR>";
|
||||
}
|
||||
?>
|
||||
|
||||
<H6>
|
||||
Copyright 2007 <A HREF="http://www.factored-translation.com/">Factored Translation</A><BR>
|
||||
Licensed under the <A HREF="http://www.gnu.org/licenses/lgpl.html">LGPL</A><BR>
|
||||
</H6>
|
||||
</body>
|
||||
</html>
|
||||
<html>
|
||||
<head><title>Moses demo</title></head>
|
||||
<body>
|
||||
<A HREF="../">back <<--</A><BR><BR>
|
||||
|
||||
<B>Moses demo</B><BR><BR>
|
||||
<?php
|
||||
|
||||
$strInput = "";
|
||||
$strOutput= "";
|
||||
|
||||
if ($_SERVER['REQUEST_METHOD'] == 'POST')
|
||||
{
|
||||
$input = $_REQUEST['txtInput'];
|
||||
$inputLower = strtolower($input);
|
||||
|
||||
$inputFile = fopen('input', 'a') or die("can't open input file");
|
||||
$outputFile = fopen('output', 'r') or die("can't open output file");
|
||||
|
||||
fwrite($inputFile, $inputLower ."\n");
|
||||
|
||||
$output = fgets($outputFile);
|
||||
|
||||
fclose($inputFile);
|
||||
fclose($outputFile);
|
||||
}
|
||||
?>
|
||||
|
||||
<BR>
|
||||
<form action="moses.php" method="POST">
|
||||
<textarea name="txtInput" rows="5" cols="50"><?=$input?></textarea>
|
||||
<BR>
|
||||
<input type="submit" name="txt_submit" value="Submit">
|
||||
</form><br><br>
|
||||
|
||||
<?php
|
||||
if ($_SERVER['REQUEST_METHOD'] == 'POST')
|
||||
{
|
||||
echo "Input sentence is: ".$inputLower."<BR>";
|
||||
echo "Translated is: " .$output ."<BR>";
|
||||
}
|
||||
?>
|
||||
|
||||
<H6>
|
||||
Copyright 2007 <A HREF="http://www.factored-translation.com/">Factored Translation</A><BR>
|
||||
Licensed under the <A HREF="http://www.gnu.org/licenses/lgpl.html">LGPL</A><BR>
|
||||
</H6>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -1,204 +1,204 @@
|
||||
// $Id: FactorCollection.cpp 1218 2007-02-16 18:08:37Z hieuhoang1972 $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "Tokenizer.h"
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool Tokenizer::m_initialized = false;
|
||||
std::set<std::string> Tokenizer::m_prefixes;
|
||||
std::set<std::string> Tokenizer::m_punctuation;
|
||||
std::set<std::string> Tokenizer::m_quotes;
|
||||
|
||||
void Tokenizer::SentenceSeparator(vector<string> &newTokens, const string &token)
|
||||
{
|
||||
string lastChar = token.substr(token.size()-1, 1);
|
||||
string word = token.substr(0, token.size()-1);
|
||||
|
||||
if (token.size() == 1 && m_punctuation.find(lastChar) != m_punctuation.end())
|
||||
{
|
||||
newTokens.push_back(lastChar);
|
||||
newTokens.push_back("\n");
|
||||
}
|
||||
else if (lastChar == ".")
|
||||
{
|
||||
set<std::string>::iterator iterSet = m_prefixes.find(token);
|
||||
if (iterSet != m_prefixes.end())
|
||||
{ // found a prefix. add as is
|
||||
newTokens.push_back(token);
|
||||
}
|
||||
else
|
||||
{ // a full stop. new sentence
|
||||
newTokens.push_back(word);
|
||||
newTokens.push_back(lastChar);
|
||||
newTokens.push_back("\n");
|
||||
}
|
||||
}
|
||||
else if (m_punctuation.find(lastChar) != m_punctuation.end())
|
||||
{
|
||||
newTokens.push_back(word);
|
||||
newTokens.push_back(lastChar);
|
||||
newTokens.push_back("\n");
|
||||
}
|
||||
|
||||
else
|
||||
{ // just a normal word
|
||||
newTokens.push_back(token);
|
||||
}
|
||||
}
|
||||
|
||||
void Tokenizer::QuotesFirst(vector<string> &newTokens, const string &token)
|
||||
{
|
||||
string lastChar = token.substr(0, 1);
|
||||
string word = token.substr(1, token.size()-1);
|
||||
if (m_quotes.find(lastChar) != m_quotes.end())
|
||||
{
|
||||
newTokens.push_back(lastChar);
|
||||
if (word != "")
|
||||
newTokens.push_back(word);
|
||||
}
|
||||
else
|
||||
{
|
||||
newTokens.push_back(token);
|
||||
}
|
||||
}
|
||||
|
||||
void Tokenizer::QuotesLast(vector<string> &newTokens, const string &token)
|
||||
{
|
||||
string lastChar = token.substr(token.size()-1, 1);
|
||||
string word = token.substr(0, token.size()-1);
|
||||
if (m_quotes.find(lastChar) != m_quotes.end())
|
||||
{
|
||||
if (word != "")
|
||||
newTokens.push_back(word);
|
||||
newTokens.push_back(lastChar);
|
||||
}
|
||||
else
|
||||
{
|
||||
newTokens.push_back(token);
|
||||
}
|
||||
}
|
||||
|
||||
string Tokenizer::Tokenize(const string &input)
|
||||
{
|
||||
stringstream buffer("");
|
||||
vector<string> newTokens
|
||||
,oldTokens = ::Tokenize(input, " \t\n");
|
||||
|
||||
vector<string>::iterator iterTokens;
|
||||
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
|
||||
{
|
||||
string &token = *iterTokens;
|
||||
SentenceSeparator(newTokens, token);
|
||||
}
|
||||
oldTokens = newTokens;
|
||||
|
||||
newTokens.clear();
|
||||
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
|
||||
{
|
||||
string &token = *iterTokens;
|
||||
QuotesFirst(newTokens, token);
|
||||
}
|
||||
oldTokens = newTokens;
|
||||
|
||||
newTokens.clear();
|
||||
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
|
||||
{
|
||||
string &token = *iterTokens;
|
||||
QuotesLast(newTokens, token);
|
||||
}
|
||||
|
||||
return Join(" ", newTokens);
|
||||
}
|
||||
|
||||
|
||||
Tokenizer::Tokenizer(const std::string &language)
|
||||
:m_language(language)
|
||||
{
|
||||
if (m_initialized)
|
||||
return;
|
||||
|
||||
m_initialized = true;
|
||||
|
||||
m_prefixes.insert("adj.");
|
||||
m_prefixes.insert("adm.");
|
||||
m_prefixes.insert("adv.");
|
||||
m_prefixes.insert("asst.");
|
||||
m_prefixes.insert("ave.");
|
||||
m_prefixes.insert("bldg.");
|
||||
m_prefixes.insert("brig.");
|
||||
m_prefixes.insert("bros.");
|
||||
m_prefixes.insert("capt.");
|
||||
m_prefixes.insert("cmdr.");
|
||||
m_prefixes.insert("col.");
|
||||
m_prefixes.insert("comdr.");
|
||||
m_prefixes.insert("con.");
|
||||
m_prefixes.insert("corp.");
|
||||
m_prefixes.insert("cpl.");
|
||||
m_prefixes.insert("dr.");
|
||||
m_prefixes.insert("ens.");
|
||||
m_prefixes.insert("gen.");
|
||||
m_prefixes.insert("gov.");
|
||||
m_prefixes.insert("hon.");
|
||||
m_prefixes.insert("hosp.");
|
||||
m_prefixes.insert("insp.");
|
||||
m_prefixes.insert("lt.");
|
||||
m_prefixes.insert("maj.");
|
||||
m_prefixes.insert("messrs.");
|
||||
m_prefixes.insert("mlle.");
|
||||
m_prefixes.insert("mme.");
|
||||
m_prefixes.insert("mr.");
|
||||
m_prefixes.insert("mrs.");
|
||||
m_prefixes.insert("ms.");
|
||||
m_prefixes.insert("msgr.");
|
||||
m_prefixes.insert("op.");
|
||||
m_prefixes.insert("ord.");
|
||||
m_prefixes.insert("pfc.");
|
||||
m_prefixes.insert("ph.");
|
||||
m_prefixes.insert("prof.");
|
||||
m_prefixes.insert("pvt.");
|
||||
m_prefixes.insert("rep.");
|
||||
m_prefixes.insert("reps.");
|
||||
m_prefixes.insert("res.");
|
||||
m_prefixes.insert("rev.");
|
||||
m_prefixes.insert("rt.");
|
||||
m_prefixes.insert("sen.");
|
||||
m_prefixes.insert("sens.");
|
||||
m_prefixes.insert("sgt.");
|
||||
m_prefixes.insert("sr.");
|
||||
m_prefixes.insert("st.");
|
||||
m_prefixes.insert("supt.");
|
||||
m_prefixes.insert("surg.");
|
||||
m_prefixes.insert("v.");
|
||||
m_prefixes.insert("vs.");
|
||||
|
||||
m_punctuation.insert(":");
|
||||
m_punctuation.insert(".");
|
||||
m_punctuation.insert("!");
|
||||
m_punctuation.insert("?");
|
||||
m_punctuation.insert(";");
|
||||
|
||||
m_quotes.insert("\"");
|
||||
m_quotes.insert("'");
|
||||
m_quotes.insert("<EFBFBD>");
|
||||
m_quotes.insert("<EFBFBD>");
|
||||
}
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "Tokenizer.h"
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool Tokenizer::m_initialized = false;
|
||||
std::set<std::string> Tokenizer::m_prefixes;
|
||||
std::set<std::string> Tokenizer::m_punctuation;
|
||||
std::set<std::string> Tokenizer::m_quotes;
|
||||
|
||||
void Tokenizer::SentenceSeparator(vector<string> &newTokens, const string &token)
|
||||
{
|
||||
string lastChar = token.substr(token.size()-1, 1);
|
||||
string word = token.substr(0, token.size()-1);
|
||||
|
||||
if (token.size() == 1 && m_punctuation.find(lastChar) != m_punctuation.end())
|
||||
{
|
||||
newTokens.push_back(lastChar);
|
||||
newTokens.push_back("\n");
|
||||
}
|
||||
else if (lastChar == ".")
|
||||
{
|
||||
set<std::string>::iterator iterSet = m_prefixes.find(token);
|
||||
if (iterSet != m_prefixes.end())
|
||||
{ // found a prefix. add as is
|
||||
newTokens.push_back(token);
|
||||
}
|
||||
else
|
||||
{ // a full stop. new sentence
|
||||
newTokens.push_back(word);
|
||||
newTokens.push_back(lastChar);
|
||||
newTokens.push_back("\n");
|
||||
}
|
||||
}
|
||||
else if (m_punctuation.find(lastChar) != m_punctuation.end())
|
||||
{
|
||||
newTokens.push_back(word);
|
||||
newTokens.push_back(lastChar);
|
||||
newTokens.push_back("\n");
|
||||
}
|
||||
|
||||
else
|
||||
{ // just a normal word
|
||||
newTokens.push_back(token);
|
||||
}
|
||||
}
|
||||
|
||||
void Tokenizer::QuotesFirst(vector<string> &newTokens, const string &token)
|
||||
{
|
||||
string lastChar = token.substr(0, 1);
|
||||
string word = token.substr(1, token.size()-1);
|
||||
if (m_quotes.find(lastChar) != m_quotes.end())
|
||||
{
|
||||
newTokens.push_back(lastChar);
|
||||
if (word != "")
|
||||
newTokens.push_back(word);
|
||||
}
|
||||
else
|
||||
{
|
||||
newTokens.push_back(token);
|
||||
}
|
||||
}
|
||||
|
||||
void Tokenizer::QuotesLast(vector<string> &newTokens, const string &token)
|
||||
{
|
||||
string lastChar = token.substr(token.size()-1, 1);
|
||||
string word = token.substr(0, token.size()-1);
|
||||
if (m_quotes.find(lastChar) != m_quotes.end())
|
||||
{
|
||||
if (word != "")
|
||||
newTokens.push_back(word);
|
||||
newTokens.push_back(lastChar);
|
||||
}
|
||||
else
|
||||
{
|
||||
newTokens.push_back(token);
|
||||
}
|
||||
}
|
||||
|
||||
string Tokenizer::Tokenize(const string &input)
|
||||
{
|
||||
stringstream buffer("");
|
||||
vector<string> newTokens
|
||||
,oldTokens = ::Tokenize(input, " \t\n");
|
||||
|
||||
vector<string>::iterator iterTokens;
|
||||
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
|
||||
{
|
||||
string &token = *iterTokens;
|
||||
SentenceSeparator(newTokens, token);
|
||||
}
|
||||
oldTokens = newTokens;
|
||||
|
||||
newTokens.clear();
|
||||
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
|
||||
{
|
||||
string &token = *iterTokens;
|
||||
QuotesFirst(newTokens, token);
|
||||
}
|
||||
oldTokens = newTokens;
|
||||
|
||||
newTokens.clear();
|
||||
for (iterTokens = oldTokens.begin() ; iterTokens != oldTokens.end() ; ++iterTokens)
|
||||
{
|
||||
string &token = *iterTokens;
|
||||
QuotesLast(newTokens, token);
|
||||
}
|
||||
|
||||
return Join(" ", newTokens);
|
||||
}
|
||||
|
||||
|
||||
Tokenizer::Tokenizer(const std::string &language)
|
||||
:m_language(language)
|
||||
{
|
||||
if (m_initialized)
|
||||
return;
|
||||
|
||||
m_initialized = true;
|
||||
|
||||
m_prefixes.insert("adj.");
|
||||
m_prefixes.insert("adm.");
|
||||
m_prefixes.insert("adv.");
|
||||
m_prefixes.insert("asst.");
|
||||
m_prefixes.insert("ave.");
|
||||
m_prefixes.insert("bldg.");
|
||||
m_prefixes.insert("brig.");
|
||||
m_prefixes.insert("bros.");
|
||||
m_prefixes.insert("capt.");
|
||||
m_prefixes.insert("cmdr.");
|
||||
m_prefixes.insert("col.");
|
||||
m_prefixes.insert("comdr.");
|
||||
m_prefixes.insert("con.");
|
||||
m_prefixes.insert("corp.");
|
||||
m_prefixes.insert("cpl.");
|
||||
m_prefixes.insert("dr.");
|
||||
m_prefixes.insert("ens.");
|
||||
m_prefixes.insert("gen.");
|
||||
m_prefixes.insert("gov.");
|
||||
m_prefixes.insert("hon.");
|
||||
m_prefixes.insert("hosp.");
|
||||
m_prefixes.insert("insp.");
|
||||
m_prefixes.insert("lt.");
|
||||
m_prefixes.insert("maj.");
|
||||
m_prefixes.insert("messrs.");
|
||||
m_prefixes.insert("mlle.");
|
||||
m_prefixes.insert("mme.");
|
||||
m_prefixes.insert("mr.");
|
||||
m_prefixes.insert("mrs.");
|
||||
m_prefixes.insert("ms.");
|
||||
m_prefixes.insert("msgr.");
|
||||
m_prefixes.insert("op.");
|
||||
m_prefixes.insert("ord.");
|
||||
m_prefixes.insert("pfc.");
|
||||
m_prefixes.insert("ph.");
|
||||
m_prefixes.insert("prof.");
|
||||
m_prefixes.insert("pvt.");
|
||||
m_prefixes.insert("rep.");
|
||||
m_prefixes.insert("reps.");
|
||||
m_prefixes.insert("res.");
|
||||
m_prefixes.insert("rev.");
|
||||
m_prefixes.insert("rt.");
|
||||
m_prefixes.insert("sen.");
|
||||
m_prefixes.insert("sens.");
|
||||
m_prefixes.insert("sgt.");
|
||||
m_prefixes.insert("sr.");
|
||||
m_prefixes.insert("st.");
|
||||
m_prefixes.insert("supt.");
|
||||
m_prefixes.insert("surg.");
|
||||
m_prefixes.insert("v.");
|
||||
m_prefixes.insert("vs.");
|
||||
|
||||
m_punctuation.insert(":");
|
||||
m_punctuation.insert(".");
|
||||
m_punctuation.insert("!");
|
||||
m_punctuation.insert("?");
|
||||
m_punctuation.insert(";");
|
||||
|
||||
m_quotes.insert("\"");
|
||||
m_quotes.insert("'");
|
||||
m_quotes.insert("“");
|
||||
m_quotes.insert("„");
|
||||
}
|
||||
|
@ -1,45 +1,45 @@
|
||||
// $Id: FactorCollection.cpp 1218 2007-02-16 18:08:37Z hieuhoang1972 $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
class Tokenizer
|
||||
{
|
||||
protected:
|
||||
std::string m_language;
|
||||
static bool m_initialized;
|
||||
static std::set<std::string> m_prefixes;
|
||||
static std::set<std::string> m_punctuation;
|
||||
static std::set<std::string> m_quotes;
|
||||
public:
|
||||
|
||||
Tokenizer(const std::string &language);
|
||||
|
||||
std::string Tokenize(const std::string &input);
|
||||
void SentenceSeparator(std::vector<std::string> &newTokens, const std::string &token);
|
||||
void QuotesFirst(std::vector<std::string> &newTokens, const std::string &token);
|
||||
void QuotesLast(std::vector<std::string> &newTokens, const std::string &token);
|
||||
};
|
||||
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
|
||||
class Tokenizer
|
||||
{
|
||||
protected:
|
||||
std::string m_language;
|
||||
static bool m_initialized;
|
||||
static std::set<std::string> m_prefixes;
|
||||
static std::set<std::string> m_punctuation;
|
||||
static std::set<std::string> m_quotes;
|
||||
public:
|
||||
|
||||
Tokenizer(const std::string &language);
|
||||
|
||||
std::string Tokenize(const std::string &input);
|
||||
void SentenceSeparator(std::vector<std::string> &newTokens, const std::string &token);
|
||||
void QuotesFirst(std::vector<std::string> &newTokens, const std::string &token);
|
||||
void QuotesLast(std::vector<std::string> &newTokens, const std::string &token);
|
||||
};
|
||||
|
||||
|
@ -1,382 +1,382 @@
|
||||
// $Id: IOStream.cpp 110 2007-09-19 22:10:27Z hieu $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (c) 2006 University of Edinburgh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the University of Edinburgh nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
// example file on how to use moses library
|
||||
|
||||
#include <iostream>
|
||||
#include "TypeDef.h"
|
||||
#include "Util.h"
|
||||
#include "IOStream.h"
|
||||
#include "Hypothesis.h"
|
||||
#include "WordsRange.h"
|
||||
#include "TrellisPathList.h"
|
||||
#include "StaticData.h"
|
||||
#include "DummyScoreProducers.h"
|
||||
#include "InputFileStream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
IOStream::IOStream(
|
||||
const vector<FactorType> &inputFactorOrder
|
||||
, const vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, size_t nBestSize
|
||||
, const string &nBestFilePath)
|
||||
:m_inputFactorOrder(inputFactorOrder)
|
||||
,m_outputFactorOrder(outputFactorOrder)
|
||||
,m_inputFactorUsed(inputFactorUsed)
|
||||
,m_inputFile(NULL)
|
||||
,m_inputStream(&std::cin)
|
||||
,m_nBestStream(NULL)
|
||||
{
|
||||
m_surpressSingleBestOutput = false;
|
||||
if (nBestSize > 0)
|
||||
{
|
||||
if (nBestFilePath == "-")
|
||||
{
|
||||
m_nBestStream = &std::cout;
|
||||
m_surpressSingleBestOutput = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ofstream *nBestFile = new std::ofstream;
|
||||
m_nBestStream = nBestFile;
|
||||
nBestFile->open(nBestFilePath.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IOStream::IOStream(const std::vector<FactorType> &inputFactorOrder
|
||||
, const std::vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, size_t nBestSize
|
||||
, const std::string &nBestFilePath
|
||||
, const std::string &inputFilePath)
|
||||
:m_inputFactorOrder(inputFactorOrder)
|
||||
,m_outputFactorOrder(outputFactorOrder)
|
||||
,m_inputFactorUsed(inputFactorUsed)
|
||||
,m_inputFilePath(inputFilePath)
|
||||
,m_inputFile(new InputFileStream(inputFilePath))
|
||||
,m_nBestStream(NULL)
|
||||
{
|
||||
m_surpressSingleBestOutput = false;
|
||||
m_inputStream = m_inputFile;
|
||||
|
||||
if (nBestSize > 0)
|
||||
{
|
||||
if (nBestFilePath == "-")
|
||||
{
|
||||
m_nBestStream = &std::cout;
|
||||
m_surpressSingleBestOutput = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ofstream *nBestFile = new std::ofstream;
|
||||
m_nBestStream = nBestFile;
|
||||
nBestFile->open(nBestFilePath.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IOStream::~IOStream()
|
||||
{
|
||||
if (m_inputFile != NULL)
|
||||
delete m_inputFile;
|
||||
if (m_nBestStream != NULL && !m_surpressSingleBestOutput)
|
||||
{ // outputting n-best to file, rather than stdout. need to close file and delete obj
|
||||
delete m_nBestStream;
|
||||
}
|
||||
}
|
||||
|
||||
string IOStream::GetInput()
|
||||
{
|
||||
std::string line;
|
||||
if (getline(*m_inputStream, line, '\n').eof())
|
||||
return "";
|
||||
line = Trim(line);
|
||||
return line;
|
||||
}
|
||||
|
||||
InputType*IOStream::GetInput(InputType* inputType)
|
||||
{
|
||||
if(inputType->Read(*m_inputStream, m_inputFactorOrder))
|
||||
{
|
||||
inputType->SetTranslationId(m_translationId++);
|
||||
return inputType;
|
||||
}
|
||||
else
|
||||
{
|
||||
delete inputType;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/***
|
||||
* print surface factor only for the given phrase
|
||||
*/
|
||||
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
|
||||
{
|
||||
assert(outputFactorOrder.size() > 0);
|
||||
if (reportAllFactors == true)
|
||||
{
|
||||
out << phrase;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t size = phrase.GetSize();
|
||||
for (size_t pos = 0 ; pos < size ; pos++)
|
||||
{
|
||||
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
|
||||
out << *factor;
|
||||
|
||||
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++)
|
||||
{
|
||||
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
|
||||
out << "|" << *factor;
|
||||
}
|
||||
out << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
|
||||
,bool reportSegmentation, bool reportAllFactors)
|
||||
{
|
||||
if ( hypo != NULL)
|
||||
{
|
||||
OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
|
||||
OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors);
|
||||
|
||||
if (reportSegmentation == true
|
||||
&& hypo->GetCurrTargetPhrase().GetSize() > 0) {
|
||||
out << "|" << hypo->GetCurrSourceWordsRange().GetStartPos()
|
||||
<< "-" << hypo->GetCurrSourceWordsRange().GetEndPos() << "| ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IOStream::Backtrack(const Hypothesis *hypo){
|
||||
|
||||
if (hypo->GetPrevHypo() != NULL) {
|
||||
VERBOSE(3,hypo->GetId() << " <= ");
|
||||
Backtrack(hypo->GetPrevHypo());
|
||||
}
|
||||
}
|
||||
|
||||
void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
|
||||
{
|
||||
if (hypo != NULL)
|
||||
{
|
||||
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
|
||||
VERBOSE(3,"Best path: ");
|
||||
Backtrack(hypo);
|
||||
VERBOSE(3,"0" << std::endl);
|
||||
|
||||
if (!m_surpressSingleBestOutput)
|
||||
{
|
||||
OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
VERBOSE(1, "NO BEST TRANSLATION" << endl);
|
||||
if (!m_surpressSingleBestOutput)
|
||||
{
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IOStream::OutputNBestList(const TrellisPathList &nBestList, long translationId)
|
||||
{
|
||||
bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
|
||||
bool includeAlignment = StaticData::Instance().NBestIncludesAlignment();
|
||||
|
||||
TrellisPathList::const_iterator iter;
|
||||
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
|
||||
{
|
||||
const TrellisPath &path = **iter;
|
||||
const std::vector<const Hypothesis *> &edges = path.GetEdges();
|
||||
|
||||
// print the surface factor of the translation
|
||||
*m_nBestStream << translationId << " ||| ";
|
||||
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
|
||||
{
|
||||
const Hypothesis &edge = *edges[currEdge];
|
||||
OutputSurface(*m_nBestStream, edge.GetCurrTargetPhrase(), m_outputFactorOrder, false); // false for not reporting all factors
|
||||
}
|
||||
*m_nBestStream << " ||| ";
|
||||
|
||||
// print the scores in a hardwired order
|
||||
// before each model type, the corresponding command-line-like name must be emitted
|
||||
// MERT script relies on this
|
||||
|
||||
// basic distortion
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "d: ";
|
||||
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetDistortionScoreProducer()) << " ";
|
||||
|
||||
// reordering
|
||||
vector<LexicalReordering*> rms = StaticData::Instance().GetReorderModels();
|
||||
if(rms.size() > 0)
|
||||
{
|
||||
vector<LexicalReordering*>::iterator iter;
|
||||
for(iter = rms.begin(); iter != rms.end(); ++iter)
|
||||
{
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||
for (size_t j = 0; j<scores.size(); ++j)
|
||||
{
|
||||
*m_nBestStream << scores[j] << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// lm
|
||||
const LMList& lml = StaticData::Instance().GetAllLM();
|
||||
if (lml.size() > 0) {
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "lm: ";
|
||||
LMList::const_iterator lmi = lml.begin();
|
||||
for (; lmi != lml.end(); ++lmi) {
|
||||
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(*lmi) << " ";
|
||||
}
|
||||
}
|
||||
|
||||
// translation components
|
||||
if (StaticData::Instance().GetInputType()==0){
|
||||
// translation components for text input
|
||||
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
|
||||
if (pds.size() > 0) {
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "tm: ";
|
||||
vector<PhraseDictionary*>::iterator iter;
|
||||
for (iter = pds.begin(); iter != pds.end(); ++iter) {
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||
for (size_t j = 0; j<scores.size(); ++j)
|
||||
*m_nBestStream << scores[j] << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
else{
|
||||
// translation components for Confusion Network input
|
||||
// first translation component has GetNumInputScores() scores from the input Confusion Network
|
||||
// at the beginning of the vector
|
||||
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
|
||||
if (pds.size() > 0) {
|
||||
vector<PhraseDictionary*>::iterator iter;
|
||||
|
||||
iter = pds.begin();
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||
|
||||
size_t pd_numinputscore = (*iter)->GetNumInputScores();
|
||||
|
||||
if (pd_numinputscore){
|
||||
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "I: ";
|
||||
|
||||
for (size_t j = 0; j < pd_numinputscore; ++j)
|
||||
*m_nBestStream << scores[j] << " ";
|
||||
}
|
||||
|
||||
|
||||
for (iter = pds.begin() ; iter != pds.end(); ++iter) {
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||
|
||||
size_t pd_numinputscore = (*iter)->GetNumInputScores();
|
||||
|
||||
if (iter == pds.begin() && labeledOutput)
|
||||
*m_nBestStream << "tm: ";
|
||||
for (size_t j = pd_numinputscore; j < scores.size() ; ++j)
|
||||
*m_nBestStream << scores[j] << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// word penalty
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "w: ";
|
||||
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetWordPenaltyProducer()) << " ";
|
||||
|
||||
// generation
|
||||
vector<GenerationDictionary*> gds = StaticData::Instance().GetGenerationDictionaries();
|
||||
if (gds.size() > 0) {
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "g: ";
|
||||
vector<GenerationDictionary*>::iterator iter;
|
||||
for (iter = gds.begin(); iter != gds.end(); ++iter) {
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||
for (size_t j = 0; j<scores.size(); j++) {
|
||||
*m_nBestStream << scores[j] << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// total
|
||||
*m_nBestStream << "||| " << path.GetTotalScore();
|
||||
if (includeAlignment) {
|
||||
*m_nBestStream << " |||";
|
||||
for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
|
||||
{
|
||||
const Hypothesis &edge = *edges[currEdge];
|
||||
WordsRange sourceRange = edge.GetCurrSourceWordsRange();
|
||||
WordsRange targetRange = edge.GetCurrTargetWordsRange();
|
||||
*m_nBestStream << " " << sourceRange.GetStartPos();
|
||||
if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
|
||||
*m_nBestStream << "-" << sourceRange.GetEndPos();
|
||||
}
|
||||
*m_nBestStream << "=" << targetRange.GetStartPos();
|
||||
if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
|
||||
*m_nBestStream << "-" << targetRange.GetEndPos();
|
||||
}
|
||||
}
|
||||
}
|
||||
*m_nBestStream << endl;
|
||||
}
|
||||
|
||||
*m_nBestStream<<std::flush;
|
||||
}
|
||||
|
||||
void IOStream::ClearInStream()
|
||||
{
|
||||
m_inputStream->clear();
|
||||
}
|
||||
|
||||
void IOStream::FlushOutStream()
|
||||
{
|
||||
cout.flush();
|
||||
}
|
||||
|
||||
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (c) 2006 University of Edinburgh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the University of Edinburgh nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
// example file on how to use moses library
|
||||
|
||||
#include <iostream>
|
||||
#include "TypeDef.h"
|
||||
#include "Util.h"
|
||||
#include "IOStream.h"
|
||||
#include "Hypothesis.h"
|
||||
#include "WordsRange.h"
|
||||
#include "TrellisPathList.h"
|
||||
#include "StaticData.h"
|
||||
#include "DummyScoreProducers.h"
|
||||
#include "InputFileStream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
IOStream::IOStream(
|
||||
const vector<FactorType> &inputFactorOrder
|
||||
, const vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, size_t nBestSize
|
||||
, const string &nBestFilePath)
|
||||
:m_inputFactorOrder(inputFactorOrder)
|
||||
,m_outputFactorOrder(outputFactorOrder)
|
||||
,m_inputFactorUsed(inputFactorUsed)
|
||||
,m_inputFile(NULL)
|
||||
,m_inputStream(&std::cin)
|
||||
,m_nBestStream(NULL)
|
||||
{
|
||||
m_surpressSingleBestOutput = false;
|
||||
if (nBestSize > 0)
|
||||
{
|
||||
if (nBestFilePath == "-")
|
||||
{
|
||||
m_nBestStream = &std::cout;
|
||||
m_surpressSingleBestOutput = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ofstream *nBestFile = new std::ofstream;
|
||||
m_nBestStream = nBestFile;
|
||||
nBestFile->open(nBestFilePath.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IOStream::IOStream(const std::vector<FactorType> &inputFactorOrder
|
||||
, const std::vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, size_t nBestSize
|
||||
, const std::string &nBestFilePath
|
||||
, const std::string &inputFilePath)
|
||||
:m_inputFactorOrder(inputFactorOrder)
|
||||
,m_outputFactorOrder(outputFactorOrder)
|
||||
,m_inputFactorUsed(inputFactorUsed)
|
||||
,m_inputFilePath(inputFilePath)
|
||||
,m_inputFile(new InputFileStream(inputFilePath))
|
||||
,m_nBestStream(NULL)
|
||||
{
|
||||
m_surpressSingleBestOutput = false;
|
||||
m_inputStream = m_inputFile;
|
||||
|
||||
if (nBestSize > 0)
|
||||
{
|
||||
if (nBestFilePath == "-")
|
||||
{
|
||||
m_nBestStream = &std::cout;
|
||||
m_surpressSingleBestOutput = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ofstream *nBestFile = new std::ofstream;
|
||||
m_nBestStream = nBestFile;
|
||||
nBestFile->open(nBestFilePath.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IOStream::~IOStream()
|
||||
{
|
||||
if (m_inputFile != NULL)
|
||||
delete m_inputFile;
|
||||
if (m_nBestStream != NULL && !m_surpressSingleBestOutput)
|
||||
{ // outputting n-best to file, rather than stdout. need to close file and delete obj
|
||||
delete m_nBestStream;
|
||||
}
|
||||
}
|
||||
|
||||
string IOStream::GetInput()
|
||||
{
|
||||
std::string line;
|
||||
if (getline(*m_inputStream, line, '\n').eof())
|
||||
return "";
|
||||
line = Trim(line);
|
||||
return line;
|
||||
}
|
||||
|
||||
InputType*IOStream::GetInput(InputType* inputType)
|
||||
{
|
||||
if(inputType->Read(*m_inputStream, m_inputFactorOrder))
|
||||
{
|
||||
inputType->SetTranslationId(m_translationId++);
|
||||
return inputType;
|
||||
}
|
||||
else
|
||||
{
|
||||
delete inputType;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/***
|
||||
* print surface factor only for the given phrase
|
||||
*/
|
||||
void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
|
||||
{
|
||||
assert(outputFactorOrder.size() > 0);
|
||||
if (reportAllFactors == true)
|
||||
{
|
||||
out << phrase;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t size = phrase.GetSize();
|
||||
for (size_t pos = 0 ; pos < size ; pos++)
|
||||
{
|
||||
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
|
||||
out << *factor;
|
||||
|
||||
for (size_t i = 1 ; i < outputFactorOrder.size() ; i++)
|
||||
{
|
||||
const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
|
||||
out << "|" << *factor;
|
||||
}
|
||||
out << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void OutputSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
|
||||
,bool reportSegmentation, bool reportAllFactors)
|
||||
{
|
||||
if ( hypo != NULL)
|
||||
{
|
||||
OutputSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
|
||||
OutputSurface(out, hypo->GetCurrTargetPhrase(), outputFactorOrder, reportAllFactors);
|
||||
|
||||
if (reportSegmentation == true
|
||||
&& hypo->GetCurrTargetPhrase().GetSize() > 0) {
|
||||
out << "|" << hypo->GetCurrSourceWordsRange().GetStartPos()
|
||||
<< "-" << hypo->GetCurrSourceWordsRange().GetEndPos() << "| ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IOStream::Backtrack(const Hypothesis *hypo){
|
||||
|
||||
if (hypo->GetPrevHypo() != NULL) {
|
||||
VERBOSE(3,hypo->GetId() << " <= ");
|
||||
Backtrack(hypo->GetPrevHypo());
|
||||
}
|
||||
}
|
||||
|
||||
void IOStream::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
|
||||
{
|
||||
if (hypo != NULL)
|
||||
{
|
||||
VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
|
||||
VERBOSE(3,"Best path: ");
|
||||
Backtrack(hypo);
|
||||
VERBOSE(3,"0" << std::endl);
|
||||
|
||||
if (!m_surpressSingleBestOutput)
|
||||
{
|
||||
OutputSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
VERBOSE(1, "NO BEST TRANSLATION" << endl);
|
||||
if (!m_surpressSingleBestOutput)
|
||||
{
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void IOStream::OutputNBestList(const TrellisPathList &nBestList, long translationId)
|
||||
{
|
||||
bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
|
||||
bool includeAlignment = StaticData::Instance().NBestIncludesAlignment();
|
||||
|
||||
TrellisPathList::const_iterator iter;
|
||||
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter)
|
||||
{
|
||||
const TrellisPath &path = **iter;
|
||||
const std::vector<const Hypothesis *> &edges = path.GetEdges();
|
||||
|
||||
// print the surface factor of the translation
|
||||
*m_nBestStream << translationId << " ||| ";
|
||||
for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--)
|
||||
{
|
||||
const Hypothesis &edge = *edges[currEdge];
|
||||
OutputSurface(*m_nBestStream, edge.GetCurrTargetPhrase(), m_outputFactorOrder, false); // false for not reporting all factors
|
||||
}
|
||||
*m_nBestStream << " ||| ";
|
||||
|
||||
// print the scores in a hardwired order
|
||||
// before each model type, the corresponding command-line-like name must be emitted
|
||||
// MERT script relies on this
|
||||
|
||||
// basic distortion
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "d: ";
|
||||
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetDistortionScoreProducer()) << " ";
|
||||
|
||||
// reordering
|
||||
vector<LexicalReordering*> rms = StaticData::Instance().GetReorderModels();
|
||||
if(rms.size() > 0)
|
||||
{
|
||||
vector<LexicalReordering*>::iterator iter;
|
||||
for(iter = rms.begin(); iter != rms.end(); ++iter)
|
||||
{
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||
for (size_t j = 0; j<scores.size(); ++j)
|
||||
{
|
||||
*m_nBestStream << scores[j] << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// lm
|
||||
const LMList& lml = StaticData::Instance().GetAllLM();
|
||||
if (lml.size() > 0) {
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "lm: ";
|
||||
LMList::const_iterator lmi = lml.begin();
|
||||
for (; lmi != lml.end(); ++lmi) {
|
||||
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(*lmi) << " ";
|
||||
}
|
||||
}
|
||||
|
||||
// translation components
|
||||
if (StaticData::Instance().GetInputType()==0){
|
||||
// translation components for text input
|
||||
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
|
||||
if (pds.size() > 0) {
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "tm: ";
|
||||
vector<PhraseDictionary*>::iterator iter;
|
||||
for (iter = pds.begin(); iter != pds.end(); ++iter) {
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||
for (size_t j = 0; j<scores.size(); ++j)
|
||||
*m_nBestStream << scores[j] << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
else{
|
||||
// translation components for Confusion Network input
|
||||
// first translation component has GetNumInputScores() scores from the input Confusion Network
|
||||
// at the beginning of the vector
|
||||
vector<PhraseDictionary*> pds = StaticData::Instance().GetPhraseDictionaries();
|
||||
if (pds.size() > 0) {
|
||||
vector<PhraseDictionary*>::iterator iter;
|
||||
|
||||
iter = pds.begin();
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||
|
||||
size_t pd_numinputscore = (*iter)->GetNumInputScores();
|
||||
|
||||
if (pd_numinputscore){
|
||||
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "I: ";
|
||||
|
||||
for (size_t j = 0; j < pd_numinputscore; ++j)
|
||||
*m_nBestStream << scores[j] << " ";
|
||||
}
|
||||
|
||||
|
||||
for (iter = pds.begin() ; iter != pds.end(); ++iter) {
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||
|
||||
size_t pd_numinputscore = (*iter)->GetNumInputScores();
|
||||
|
||||
if (iter == pds.begin() && labeledOutput)
|
||||
*m_nBestStream << "tm: ";
|
||||
for (size_t j = pd_numinputscore; j < scores.size() ; ++j)
|
||||
*m_nBestStream << scores[j] << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// word penalty
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "w: ";
|
||||
*m_nBestStream << path.GetScoreBreakdown().GetScoreForProducer(StaticData::Instance().GetWordPenaltyProducer()) << " ";
|
||||
|
||||
// generation
|
||||
vector<GenerationDictionary*> gds = StaticData::Instance().GetGenerationDictionaries();
|
||||
if (gds.size() > 0) {
|
||||
if (labeledOutput)
|
||||
*m_nBestStream << "g: ";
|
||||
vector<GenerationDictionary*>::iterator iter;
|
||||
for (iter = gds.begin(); iter != gds.end(); ++iter) {
|
||||
vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer(*iter);
|
||||
for (size_t j = 0; j<scores.size(); j++) {
|
||||
*m_nBestStream << scores[j] << " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// total
|
||||
*m_nBestStream << "||| " << path.GetTotalScore();
|
||||
if (includeAlignment) {
|
||||
*m_nBestStream << " |||";
|
||||
for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--)
|
||||
{
|
||||
const Hypothesis &edge = *edges[currEdge];
|
||||
WordsRange sourceRange = edge.GetCurrSourceWordsRange();
|
||||
WordsRange targetRange = edge.GetCurrTargetWordsRange();
|
||||
*m_nBestStream << " " << sourceRange.GetStartPos();
|
||||
if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
|
||||
*m_nBestStream << "-" << sourceRange.GetEndPos();
|
||||
}
|
||||
*m_nBestStream << "=" << targetRange.GetStartPos();
|
||||
if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
|
||||
*m_nBestStream << "-" << targetRange.GetEndPos();
|
||||
}
|
||||
}
|
||||
}
|
||||
*m_nBestStream << endl;
|
||||
}
|
||||
|
||||
*m_nBestStream<<std::flush;
|
||||
}
|
||||
|
||||
void IOStream::ClearInStream()
|
||||
{
|
||||
m_inputStream->clear();
|
||||
}
|
||||
|
||||
void IOStream::FlushOutStream()
|
||||
{
|
||||
cout.flush();
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,87 +1,87 @@
|
||||
// $Id: IOStream.h 110 2007-09-19 22:10:27Z hieu $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (c) 2006 University of Edinburgh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the University of Edinburgh nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
// example file on how to use moses library
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "TypeDef.h"
|
||||
#include "Sentence.h"
|
||||
|
||||
class FactorMask;
|
||||
class FactorCollection;
|
||||
class Hypothesis;
|
||||
class TrellisPathList;
|
||||
class InputFileStream;
|
||||
|
||||
class IOStream
|
||||
{
|
||||
protected:
|
||||
long m_translationId;
|
||||
|
||||
const std::vector<FactorType> &m_inputFactorOrder;
|
||||
const std::vector<FactorType> &m_outputFactorOrder;
|
||||
const FactorMask &m_inputFactorUsed;
|
||||
std::ostream *m_nBestStream;
|
||||
std::string m_inputFilePath;
|
||||
std::istream *m_inputStream;
|
||||
InputFileStream *m_inputFile;
|
||||
bool m_surpressSingleBestOutput;
|
||||
|
||||
public:
|
||||
IOStream(const std::vector<FactorType> &inputFactorOrder
|
||||
, const std::vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, size_t nBestSize
|
||||
, const std::string &nBestFilePath);
|
||||
|
||||
IOStream(const std::vector<FactorType> &inputFactorOrder
|
||||
, const std::vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, size_t nBestSize
|
||||
, const std::string &nBestFilePath
|
||||
, const std::string &inputFilePath);
|
||||
~IOStream();
|
||||
|
||||
InputType* GetInput(InputType *inputType);
|
||||
std::string GetInput();
|
||||
void OutputBestHypo(const Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
|
||||
void OutputNBestList(const TrellisPathList &nBestList, long translationId);
|
||||
void Backtrack(const Hypothesis *hypo);
|
||||
|
||||
void ResetTranslationId() { m_translationId = 0; }
|
||||
void ClearInStream();
|
||||
void FlushOutStream();
|
||||
|
||||
};
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (c) 2006 University of Edinburgh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the University of Edinburgh nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
// example file on how to use moses library
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "TypeDef.h"
|
||||
#include "Sentence.h"
|
||||
|
||||
class FactorMask;
|
||||
class FactorCollection;
|
||||
class Hypothesis;
|
||||
class TrellisPathList;
|
||||
class InputFileStream;
|
||||
|
||||
class IOStream
|
||||
{
|
||||
protected:
|
||||
long m_translationId;
|
||||
|
||||
const std::vector<FactorType> &m_inputFactorOrder;
|
||||
const std::vector<FactorType> &m_outputFactorOrder;
|
||||
const FactorMask &m_inputFactorUsed;
|
||||
std::ostream *m_nBestStream;
|
||||
std::string m_inputFilePath;
|
||||
std::istream *m_inputStream;
|
||||
InputFileStream *m_inputFile;
|
||||
bool m_surpressSingleBestOutput;
|
||||
|
||||
public:
|
||||
IOStream(const std::vector<FactorType> &inputFactorOrder
|
||||
, const std::vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, size_t nBestSize
|
||||
, const std::string &nBestFilePath);
|
||||
|
||||
IOStream(const std::vector<FactorType> &inputFactorOrder
|
||||
, const std::vector<FactorType> &outputFactorOrder
|
||||
, const FactorMask &inputFactorUsed
|
||||
, size_t nBestSize
|
||||
, const std::string &nBestFilePath
|
||||
, const std::string &inputFilePath);
|
||||
~IOStream();
|
||||
|
||||
InputType* GetInput(InputType *inputType);
|
||||
std::string GetInput();
|
||||
void OutputBestHypo(const Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
|
||||
void OutputNBestList(const TrellisPathList &nBestList, long translationId);
|
||||
void Backtrack(const Hypothesis *hypo);
|
||||
|
||||
void ResetTranslationId() { m_translationId = 0; }
|
||||
void ClearInStream();
|
||||
void FlushOutStream();
|
||||
|
||||
};
|
||||
|
@ -1,230 +1,230 @@
|
||||
// $Id: Main.cpp 110 2007-09-19 22:10:27Z hieu $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (c) 2006 University of Edinburgh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the University of Edinburgh nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
// example file on how to use moses library
|
||||
|
||||
#ifdef WIN32
|
||||
// Include Visual Leak Detector
|
||||
#include <vld.h>
|
||||
#include <windows.h>
|
||||
|
||||
#else
|
||||
#define Sleep(millisec) usleep(millisec * 1000)
|
||||
#endif
|
||||
|
||||
#include <signal.h>
|
||||
#include <fstream>
|
||||
#include "Main.h"
|
||||
#include "TrellisPathList.h"
|
||||
#include "FactorCollection.h"
|
||||
#include "Manager.h"
|
||||
#include "Phrase.h"
|
||||
#include "Util.h"
|
||||
#include "TrellisPathList.h"
|
||||
#include "Timer.h"
|
||||
#include "IOStream.h"
|
||||
#include "Sentence.h"
|
||||
#include "ConfusionNet.h"
|
||||
#include "TranslationAnalysis.h"
|
||||
#include "Tokenizer.h"
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#else
|
||||
// those not using autoconf have to build MySQL support for now
|
||||
# define USE_MYSQL 1
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
#undef max
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
//signal(SIGPIPE, SIG_IGN); // ensures the server doesn't exit once the script stops reading from the output pipe
|
||||
|
||||
IFVERBOSE(1)
|
||||
{
|
||||
TRACE_ERR("command: ");
|
||||
for(int i=0;i<argc;++i) TRACE_ERR(argv[i]<<" ");
|
||||
TRACE_ERR(endl);
|
||||
}
|
||||
|
||||
cout.setf(std::ios::fixed);
|
||||
cout.precision(3);
|
||||
cerr.setf(std::ios::fixed);
|
||||
cerr.precision(3);
|
||||
|
||||
// load data structures
|
||||
Parameter *parameter = new Parameter();
|
||||
if (!parameter->LoadParam(argc, argv))
|
||||
{
|
||||
parameter->Explain();
|
||||
delete parameter;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
if (!StaticData::LoadDataStatic(parameter))
|
||||
return EXIT_FAILURE;
|
||||
|
||||
// set up read/writing class
|
||||
IOStream *ioStream = GetIODevice(staticData);
|
||||
|
||||
// check on weights
|
||||
vector<float> weights = staticData.GetAllWeights();
|
||||
IFVERBOSE(2) {
|
||||
TRACE_ERR("The score component vector looks like this:\n" << staticData.GetScoreIndexManager());
|
||||
TRACE_ERR("The global weight vector looks like this:");
|
||||
for (size_t j=0; j<weights.size(); j++) { TRACE_ERR(" " << weights[j]); }
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
// every score must have a weight! check that here:
|
||||
if(weights.size() != staticData.GetScoreIndexManager().GetTotalNumberOfScores()) {
|
||||
TRACE_ERR("ERROR: " << staticData.GetScoreIndexManager().GetTotalNumberOfScores() << " score components, but " << weights.size() << " weights defined" << std::endl);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (ioStream == NULL)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
// read each sentence & decode
|
||||
size_t lineCount = 0;
|
||||
while(true)
|
||||
{
|
||||
// wait for input
|
||||
string input;
|
||||
while ((input = ioStream->GetInput()).size() == 0)
|
||||
{
|
||||
ioStream->ClearInStream();
|
||||
Sleep(1000);
|
||||
}
|
||||
// tokenize input
|
||||
Tokenizer tokenizer("en");
|
||||
string inputTokenized = tokenizer.Tokenize(input);
|
||||
vector<string> sentences = Tokenize(inputTokenized, "\n");
|
||||
|
||||
std::vector<FactorType> factorOrder;
|
||||
factorOrder.push_back(0);
|
||||
|
||||
vector<string>::iterator iterSentences;
|
||||
for (iterSentences = sentences.begin() ; iterSentences != sentences.end() ; ++iterSentences)
|
||||
{
|
||||
IFVERBOSE(1)
|
||||
ResetUserTime();
|
||||
|
||||
const string &sentence = *iterSentences;
|
||||
|
||||
Sentence sourceSentence(Input);
|
||||
sourceSentence.CreateFromString(factorOrder, sentence, "|");
|
||||
|
||||
VERBOSE(2,"\nTRANSLATING(" << ++lineCount << "): " << sentence);
|
||||
|
||||
Manager manager(sourceSentence);
|
||||
manager.ProcessSentence();
|
||||
InputType *source = new Sentence(Input);
|
||||
ioStream->OutputBestHypo(manager.GetBestHypothesis(), source->GetTranslationId(),
|
||||
staticData.GetReportSegmentation(),
|
||||
staticData.GetReportAllFactors()
|
||||
);
|
||||
IFVERBOSE(2) { PrintUserTime("Best Hypothesis Generation Time:"); }
|
||||
delete source;
|
||||
|
||||
// n-best
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
if (nBestSize > 0)
|
||||
{
|
||||
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
|
||||
TrellisPathList nBestList;
|
||||
manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
|
||||
ioStream->OutputNBestList(nBestList, source->GetTranslationId());
|
||||
//RemoveAllInColl(nBestList);
|
||||
|
||||
IFVERBOSE(2) { PrintUserTime("N-Best Hypotheses Generation Time:"); }
|
||||
}
|
||||
|
||||
if (staticData.IsDetailedTranslationReportingEnabled())
|
||||
{
|
||||
TranslationAnalysis::PrintTranslationAnalysis(std::cerr, manager.GetBestHypothesis());
|
||||
}
|
||||
|
||||
IFVERBOSE(2) { PrintUserTime("Sentence Decoding Time:"); }
|
||||
manager.CalcDecoderStatistics();
|
||||
ioStream->FlushOutStream();
|
||||
}
|
||||
} // while(true)
|
||||
|
||||
delete ioStream;
|
||||
|
||||
IFVERBOSE(1)
|
||||
PrintUserTime("End.");
|
||||
|
||||
#ifdef HACK_EXIT
|
||||
//This avoids that detructors are called (it can take a long time)
|
||||
exit(EXIT_SUCCESS);
|
||||
#else
|
||||
return EXIT_SUCCESS;
|
||||
#endif
|
||||
}
|
||||
|
||||
IOStream *GetIODevice(const StaticData &staticData)
|
||||
{
|
||||
IOStream *ioStream;
|
||||
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
|
||||
,&outputFactorOrder = staticData.GetOutputFactorOrder();
|
||||
FactorMask inputFactorUsed(inputFactorOrder);
|
||||
|
||||
// io
|
||||
if (staticData.GetParam("input-file").size() == 1)
|
||||
{
|
||||
VERBOSE(2,"IO from File" << endl);
|
||||
string filePath = staticData.GetParam("input-file")[0];
|
||||
|
||||
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
||||
, staticData.GetNBestSize()
|
||||
, staticData.GetNBestFilePath()
|
||||
, filePath);
|
||||
}
|
||||
else
|
||||
{
|
||||
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
|
||||
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
||||
, staticData.GetNBestSize()
|
||||
, staticData.GetNBestFilePath());
|
||||
}
|
||||
ioStream->ResetTranslationId();
|
||||
|
||||
IFVERBOSE(1)
|
||||
PrintUserTime("Created input-output object");
|
||||
|
||||
return ioStream;
|
||||
}
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (c) 2006 University of Edinburgh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the University of Edinburgh nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
// example file on how to use moses library
|
||||
|
||||
#ifdef WIN32
|
||||
// Include Visual Leak Detector
|
||||
#include <vld.h>
|
||||
#include <windows.h>
|
||||
|
||||
#else
|
||||
#define Sleep(millisec) usleep(millisec * 1000)
|
||||
#endif
|
||||
|
||||
#include <signal.h>
|
||||
#include <fstream>
|
||||
#include "Main.h"
|
||||
#include "TrellisPathList.h"
|
||||
#include "FactorCollection.h"
|
||||
#include "Manager.h"
|
||||
#include "Phrase.h"
|
||||
#include "Util.h"
|
||||
#include "TrellisPathList.h"
|
||||
#include "Timer.h"
|
||||
#include "IOStream.h"
|
||||
#include "Sentence.h"
|
||||
#include "ConfusionNet.h"
|
||||
#include "TranslationAnalysis.h"
|
||||
#include "Tokenizer.h"
|
||||
|
||||
#if HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#else
|
||||
// those not using autoconf have to build MySQL support for now
|
||||
# define USE_MYSQL 1
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
#undef max
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
//signal(SIGPIPE, SIG_IGN); // ensures the server doesn't exit once the script stops reading from the output pipe
|
||||
|
||||
IFVERBOSE(1)
|
||||
{
|
||||
TRACE_ERR("command: ");
|
||||
for(int i=0;i<argc;++i) TRACE_ERR(argv[i]<<" ");
|
||||
TRACE_ERR(endl);
|
||||
}
|
||||
|
||||
cout.setf(std::ios::fixed);
|
||||
cout.precision(3);
|
||||
cerr.setf(std::ios::fixed);
|
||||
cerr.precision(3);
|
||||
|
||||
// load data structures
|
||||
Parameter *parameter = new Parameter();
|
||||
if (!parameter->LoadParam(argc, argv))
|
||||
{
|
||||
parameter->Explain();
|
||||
delete parameter;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
if (!StaticData::LoadDataStatic(parameter))
|
||||
return EXIT_FAILURE;
|
||||
|
||||
// set up read/writing class
|
||||
IOStream *ioStream = GetIODevice(staticData);
|
||||
|
||||
// check on weights
|
||||
vector<float> weights = staticData.GetAllWeights();
|
||||
IFVERBOSE(2) {
|
||||
TRACE_ERR("The score component vector looks like this:\n" << staticData.GetScoreIndexManager());
|
||||
TRACE_ERR("The global weight vector looks like this:");
|
||||
for (size_t j=0; j<weights.size(); j++) { TRACE_ERR(" " << weights[j]); }
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
// every score must have a weight! check that here:
|
||||
if(weights.size() != staticData.GetScoreIndexManager().GetTotalNumberOfScores()) {
|
||||
TRACE_ERR("ERROR: " << staticData.GetScoreIndexManager().GetTotalNumberOfScores() << " score components, but " << weights.size() << " weights defined" << std::endl);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (ioStream == NULL)
|
||||
return EXIT_FAILURE;
|
||||
|
||||
// read each sentence & decode
|
||||
size_t lineCount = 0;
|
||||
while(true)
|
||||
{
|
||||
// wait for input
|
||||
string input;
|
||||
while ((input = ioStream->GetInput()).size() == 0)
|
||||
{
|
||||
ioStream->ClearInStream();
|
||||
Sleep(1000);
|
||||
}
|
||||
// tokenize input
|
||||
Tokenizer tokenizer("en");
|
||||
string inputTokenized = tokenizer.Tokenize(input);
|
||||
vector<string> sentences = Tokenize(inputTokenized, "\n");
|
||||
|
||||
std::vector<FactorType> factorOrder;
|
||||
factorOrder.push_back(0);
|
||||
|
||||
vector<string>::iterator iterSentences;
|
||||
for (iterSentences = sentences.begin() ; iterSentences != sentences.end() ; ++iterSentences)
|
||||
{
|
||||
IFVERBOSE(1)
|
||||
ResetUserTime();
|
||||
|
||||
const string &sentence = *iterSentences;
|
||||
|
||||
Sentence sourceSentence(Input);
|
||||
sourceSentence.CreateFromString(factorOrder, sentence, "|");
|
||||
|
||||
VERBOSE(2,"\nTRANSLATING(" << ++lineCount << "): " << sentence);
|
||||
|
||||
Manager manager(sourceSentence);
|
||||
manager.ProcessSentence();
|
||||
InputType *source = new Sentence(Input);
|
||||
ioStream->OutputBestHypo(manager.GetBestHypothesis(), source->GetTranslationId(),
|
||||
staticData.GetReportSegmentation(),
|
||||
staticData.GetReportAllFactors()
|
||||
);
|
||||
IFVERBOSE(2) { PrintUserTime("Best Hypothesis Generation Time:"); }
|
||||
delete source;
|
||||
|
||||
// n-best
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
if (nBestSize > 0)
|
||||
{
|
||||
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
|
||||
TrellisPathList nBestList;
|
||||
manager.CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
|
||||
ioStream->OutputNBestList(nBestList, source->GetTranslationId());
|
||||
//RemoveAllInColl(nBestList);
|
||||
|
||||
IFVERBOSE(2) { PrintUserTime("N-Best Hypotheses Generation Time:"); }
|
||||
}
|
||||
|
||||
if (staticData.IsDetailedTranslationReportingEnabled())
|
||||
{
|
||||
TranslationAnalysis::PrintTranslationAnalysis(std::cerr, manager.GetBestHypothesis());
|
||||
}
|
||||
|
||||
IFVERBOSE(2) { PrintUserTime("Sentence Decoding Time:"); }
|
||||
manager.CalcDecoderStatistics();
|
||||
ioStream->FlushOutStream();
|
||||
}
|
||||
} // while(true)
|
||||
|
||||
delete ioStream;
|
||||
|
||||
IFVERBOSE(1)
|
||||
PrintUserTime("End.");
|
||||
|
||||
#ifdef HACK_EXIT
|
||||
//This avoids that detructors are called (it can take a long time)
|
||||
exit(EXIT_SUCCESS);
|
||||
#else
|
||||
return EXIT_SUCCESS;
|
||||
#endif
|
||||
}
|
||||
|
||||
IOStream *GetIODevice(const StaticData &staticData)
|
||||
{
|
||||
IOStream *ioStream;
|
||||
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
|
||||
,&outputFactorOrder = staticData.GetOutputFactorOrder();
|
||||
FactorMask inputFactorUsed(inputFactorOrder);
|
||||
|
||||
// io
|
||||
if (staticData.GetParam("input-file").size() == 1)
|
||||
{
|
||||
VERBOSE(2,"IO from File" << endl);
|
||||
string filePath = staticData.GetParam("input-file")[0];
|
||||
|
||||
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
||||
, staticData.GetNBestSize()
|
||||
, staticData.GetNBestFilePath()
|
||||
, filePath);
|
||||
}
|
||||
else
|
||||
{
|
||||
VERBOSE(1,"IO from STDOUT/STDIN" << endl);
|
||||
ioStream = new IOStream(inputFactorOrder, outputFactorOrder, inputFactorUsed
|
||||
, staticData.GetNBestSize()
|
||||
, staticData.GetNBestFilePath());
|
||||
}
|
||||
ioStream->ResetTranslationId();
|
||||
|
||||
IFVERBOSE(1)
|
||||
PrintUserTime("Created input-output object");
|
||||
|
||||
return ioStream;
|
||||
}
|
||||
|
@ -1,42 +1,42 @@
|
||||
// $Id: Main.h 110 2007-09-19 22:10:27Z hieu $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (c) 2006 University of Edinburgh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the University of Edinburgh nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
// example file on how to use moses library
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "StaticData.h"
|
||||
|
||||
class IOStream;
|
||||
|
||||
int main(int argc, char* argv[]);
|
||||
IOStream *GetIODevice(const StaticData &staticData);
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (c) 2006 University of Edinburgh
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
* Neither the name of the University of Edinburgh nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
|
||||
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
||||
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
// example file on how to use moses library
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "StaticData.h"
|
||||
|
||||
class IOStream;
|
||||
|
||||
int main(int argc, char* argv[]);
|
||||
IOStream *GetIODevice(const StaticData &staticData);
|
||||
|
@ -1,7 +1,7 @@
|
||||
bin_PROGRAMS = moses
|
||||
moses_SOURCES = Main.cpp IOStream.cpp TranslationAnalysis.cpp
|
||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src
|
||||
|
||||
moses_LDADD = -L$(top_srcdir)/moses/src -lmoses
|
||||
moses_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
||||
|
||||
bin_PROGRAMS = moses
|
||||
moses_SOURCES = Main.cpp IOStream.cpp TranslationAnalysis.cpp
|
||||
AM_CPPFLAGS = -W -Wall -ffor-scope -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -DUSE_HYPO_POOL -I$(top_srcdir)/moses/src
|
||||
|
||||
moses_LDADD = -L$(top_srcdir)/moses/src -lmoses
|
||||
moses_DEPENDENCIES = $(top_srcdir)/moses/src/libmoses.a
|
||||
|
||||
|
@ -1,112 +1,112 @@
|
||||
// $Id: TranslationAnalysis.cpp 110 2007-09-19 22:10:27Z hieu $
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "StaticData.h"
|
||||
#include "Hypothesis.h"
|
||||
#include "TranslationAnalysis.h"
|
||||
|
||||
namespace TranslationAnalysis {
|
||||
|
||||
void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
|
||||
{
|
||||
os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
|
||||
std::vector<const Hypothesis*> translationPath;
|
||||
while (hypo) {
|
||||
translationPath.push_back(hypo);
|
||||
hypo = hypo->GetPrevHypo();
|
||||
}
|
||||
std::reverse(translationPath.begin(), translationPath.end());
|
||||
|
||||
std::vector<std::string> droppedWords;
|
||||
std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
|
||||
++tpi; // skip initial translation state
|
||||
std::vector<std::string> sourceMap;
|
||||
std::vector<std::string> targetMap;
|
||||
std::vector<unsigned int> lmAcc(0);
|
||||
size_t lmCalls = 0;
|
||||
bool doLMStats = ((*tpi)->GetLMStats() != 0);
|
||||
if (doLMStats)
|
||||
lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
|
||||
for (; tpi != translationPath.end(); ++tpi) {
|
||||
std::ostringstream sms;
|
||||
std::ostringstream tms;
|
||||
std::string target = (*tpi)->GetTargetPhraseStringRep();
|
||||
std::string source = (*tpi)->GetSourcePhraseStringRep();
|
||||
WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
|
||||
WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
|
||||
|
||||
// language model backoff stats,
|
||||
if (doLMStats) {
|
||||
std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
|
||||
std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
|
||||
std::vector<unsigned int>::iterator acc = lmAcc.begin();
|
||||
|
||||
for (; i != lmstats.end(); ++i, ++acc) {
|
||||
std::vector<unsigned int>::iterator j = i->begin();
|
||||
lmCalls += i->size();
|
||||
for (; j != i->end(); ++j) {
|
||||
(*acc) += *j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool epsilon = false;
|
||||
if (target == "") {
|
||||
target="<EPSILON>";
|
||||
epsilon = true;
|
||||
droppedWords.push_back(source);
|
||||
}
|
||||
os << " SOURCE: " << swr << " " << source << std::endl
|
||||
<< " TRANSLATED AS: " << target << std::endl;
|
||||
size_t twr_i = twr.GetStartPos();
|
||||
size_t swr_i = swr.GetStartPos();
|
||||
if (!epsilon) { sms << twr_i; }
|
||||
if (epsilon) { tms << "del(" << swr_i << ")"; } else { tms << swr_i; }
|
||||
swr_i++; twr_i++;
|
||||
for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
|
||||
sms << '-' << twr_i;
|
||||
}
|
||||
for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
|
||||
tms << '-' << swr_i;
|
||||
}
|
||||
if (!epsilon) targetMap.push_back(sms.str());
|
||||
sourceMap.push_back(tms.str());
|
||||
}
|
||||
std::vector<std::string>::iterator si = sourceMap.begin();
|
||||
std::vector<std::string>::iterator ti = targetMap.begin();
|
||||
os << std::endl << "SOURCE/TARGET SPANS:";
|
||||
os << std::endl << " SOURCE:";
|
||||
for (; si != sourceMap.end(); ++si) {
|
||||
os << " " << *si;
|
||||
}
|
||||
os << std::endl << " TARGET:";
|
||||
for (; ti != targetMap.end(); ++ti) {
|
||||
os << " " << *ti;
|
||||
}
|
||||
os << std::endl << std::endl;
|
||||
if (doLMStats && lmCalls > 0) {
|
||||
std::vector<unsigned int>::iterator acc = lmAcc.begin();
|
||||
const LMList& lmlist = StaticData::Instance().GetAllLM();
|
||||
LMList::const_iterator i = lmlist.begin();
|
||||
for (; acc != lmAcc.end(); ++acc, ++i) {
|
||||
char buf[256];
|
||||
sprintf(buf, "%.4f", (double)(*acc)/(double)lmCalls);
|
||||
os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (droppedWords.size() > 0) {
|
||||
std::vector<std::string>::iterator dwi = droppedWords.begin();
|
||||
os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
|
||||
for (; dwi != droppedWords.end(); ++dwi) {
|
||||
os << "\tdropped=" << *dwi << std::endl;
|
||||
}
|
||||
}
|
||||
os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED):" << std::endl;
|
||||
StaticData::Instance().GetScoreIndexManager().Debug_PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
// $Id$
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "StaticData.h"
|
||||
#include "Hypothesis.h"
|
||||
#include "TranslationAnalysis.h"
|
||||
|
||||
namespace TranslationAnalysis {
|
||||
|
||||
void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
|
||||
{
|
||||
os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
|
||||
std::vector<const Hypothesis*> translationPath;
|
||||
while (hypo) {
|
||||
translationPath.push_back(hypo);
|
||||
hypo = hypo->GetPrevHypo();
|
||||
}
|
||||
std::reverse(translationPath.begin(), translationPath.end());
|
||||
|
||||
std::vector<std::string> droppedWords;
|
||||
std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
|
||||
++tpi; // skip initial translation state
|
||||
std::vector<std::string> sourceMap;
|
||||
std::vector<std::string> targetMap;
|
||||
std::vector<unsigned int> lmAcc(0);
|
||||
size_t lmCalls = 0;
|
||||
bool doLMStats = ((*tpi)->GetLMStats() != 0);
|
||||
if (doLMStats)
|
||||
lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
|
||||
for (; tpi != translationPath.end(); ++tpi) {
|
||||
std::ostringstream sms;
|
||||
std::ostringstream tms;
|
||||
std::string target = (*tpi)->GetTargetPhraseStringRep();
|
||||
std::string source = (*tpi)->GetSourcePhraseStringRep();
|
||||
WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
|
||||
WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
|
||||
|
||||
// language model backoff stats,
|
||||
if (doLMStats) {
|
||||
std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
|
||||
std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
|
||||
std::vector<unsigned int>::iterator acc = lmAcc.begin();
|
||||
|
||||
for (; i != lmstats.end(); ++i, ++acc) {
|
||||
std::vector<unsigned int>::iterator j = i->begin();
|
||||
lmCalls += i->size();
|
||||
for (; j != i->end(); ++j) {
|
||||
(*acc) += *j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool epsilon = false;
|
||||
if (target == "") {
|
||||
target="<EPSILON>";
|
||||
epsilon = true;
|
||||
droppedWords.push_back(source);
|
||||
}
|
||||
os << " SOURCE: " << swr << " " << source << std::endl
|
||||
<< " TRANSLATED AS: " << target << std::endl;
|
||||
size_t twr_i = twr.GetStartPos();
|
||||
size_t swr_i = swr.GetStartPos();
|
||||
if (!epsilon) { sms << twr_i; }
|
||||
if (epsilon) { tms << "del(" << swr_i << ")"; } else { tms << swr_i; }
|
||||
swr_i++; twr_i++;
|
||||
for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
|
||||
sms << '-' << twr_i;
|
||||
}
|
||||
for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
|
||||
tms << '-' << swr_i;
|
||||
}
|
||||
if (!epsilon) targetMap.push_back(sms.str());
|
||||
sourceMap.push_back(tms.str());
|
||||
}
|
||||
std::vector<std::string>::iterator si = sourceMap.begin();
|
||||
std::vector<std::string>::iterator ti = targetMap.begin();
|
||||
os << std::endl << "SOURCE/TARGET SPANS:";
|
||||
os << std::endl << " SOURCE:";
|
||||
for (; si != sourceMap.end(); ++si) {
|
||||
os << " " << *si;
|
||||
}
|
||||
os << std::endl << " TARGET:";
|
||||
for (; ti != targetMap.end(); ++ti) {
|
||||
os << " " << *ti;
|
||||
}
|
||||
os << std::endl << std::endl;
|
||||
if (doLMStats && lmCalls > 0) {
|
||||
std::vector<unsigned int>::iterator acc = lmAcc.begin();
|
||||
const LMList& lmlist = StaticData::Instance().GetAllLM();
|
||||
LMList::const_iterator i = lmlist.begin();
|
||||
for (; acc != lmAcc.end(); ++acc, ++i) {
|
||||
char buf[256];
|
||||
sprintf(buf, "%.4f", (double)(*acc)/(double)lmCalls);
|
||||
os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (droppedWords.size() > 0) {
|
||||
std::vector<std::string>::iterator dwi = droppedWords.begin();
|
||||
os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
|
||||
for (; dwi != droppedWords.end(); ++dwi) {
|
||||
os << "\tdropped=" << *dwi << std::endl;
|
||||
}
|
||||
}
|
||||
os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED):" << std::endl;
|
||||
StaticData::Instance().GetScoreIndexManager().Debug_PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,25 +1,25 @@
|
||||
// $Id: TranslationAnalysis.h 110 2007-09-19 22:10:27Z hieu $
|
||||
|
||||
/*
|
||||
* also see moses/SentenceStats
|
||||
*/
|
||||
|
||||
#ifndef _TRANSLATION_ANALYSIS_H_
|
||||
#define _TRANSLATION_ANALYSIS_H_
|
||||
|
||||
#include <iostream>
|
||||
|
||||
class Hypothesis;
|
||||
|
||||
namespace TranslationAnalysis
|
||||
{
|
||||
|
||||
/***
|
||||
* print details about the translation represented in hypothesis to
|
||||
* os. Included information: phrase alignment, words dropped, scores
|
||||
*/
|
||||
void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
// $Id$
|
||||
|
||||
/*
|
||||
* also see moses/SentenceStats
|
||||
*/
|
||||
|
||||
#ifndef _TRANSLATION_ANALYSIS_H_
|
||||
#define _TRANSLATION_ANALYSIS_H_
|
||||
|
||||
#include <iostream>
|
||||
|
||||
class Hypothesis;
|
||||
|
||||
namespace TranslationAnalysis
|
||||
{
|
||||
|
||||
/***
|
||||
* print details about the translation represented in hypothesis to
|
||||
* os. Included information: phrase alignment, words dropped, scores
|
||||
*/
|
||||
void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,38 +0,0 @@
|
||||
<html>
|
||||
<head><title>Moses demo</title></head>
|
||||
<body>
|
||||
<B>
|
||||
Moses demo<BR><BR>
|
||||
<?php
|
||||
|
||||
$strInput = "";
|
||||
$strOutput= "";
|
||||
|
||||
if ($_SERVER['REQUEST_METHOD'] == 'POST')
|
||||
{
|
||||
$strInput = $_REQUEST['txt'];
|
||||
echo "Input is: ".$strInput."<BR>";
|
||||
|
||||
$inputFile = fopen('input', 'a') or die("can't open input file");
|
||||
$outputFile = fopen('output', 'r') or die("can't open output file");
|
||||
|
||||
fwrite($inputFile, $strInput."\n");
|
||||
|
||||
$strOutput = fgets($outputFile);
|
||||
|
||||
fclose($inputFile);
|
||||
fclose($outputFile);
|
||||
}
|
||||
?>
|
||||
|
||||
Output is: <?=$strOutput?><BR>
|
||||
<BR>
|
||||
<form action="moses.php" method="POST">
|
||||
<textarea name="txt" rows="5" cols="50"><?=$strInput?></textarea>
|
||||
<BR>
|
||||
<input type="submit" name="txt_submit" value="Submit">
|
||||
</form><br><br>
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue
Block a user