mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-01 08:21:47 +03:00
alpha version
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1768 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
bab63339f1
commit
79e3b6a74e
5
reranking/src/data/README
Normal file
5
reranking/src/data/README
Normal file
@ -0,0 +1,5 @@
|
||||
|
||||
sample usage:
|
||||
|
||||
../src/nbest -input-file nbest.small -output-file nbest.1best 1 -sort -weights weights
|
||||
|
7
reranking/src/data/nbest.small
Normal file
7
reranking/src/data/nbest.small
Normal file
@ -0,0 +1,7 @@
|
||||
0 ||| Once a major milestone in the Balkans ||| d: 0 -0.608213 0 0 -0.512647 0 0 lm: -35.7187 tm: -3.97053 -17.5137 -3.24082 -15.8638 2.99969 w: -7 ||| -3.92049
|
||||
0 ||| Once a crucial period in the Balkans ||| d: 0 -0.944329 0 0 -1.06468 0 0 lm: -37.5341 tm: -4.27619 -19.441 -3.81074 -14.767 3.99959 w: -7 ||| -4.00353
|
||||
1 ||| Since the world is focused on Iraq , North Korea and a possible crisis with Iran on nuclear weapons , Kosovo is somewhat unnoticed . ||| d: -6 -5.80589 -0.65383 -1.29291 -6.19413 -0.0861354 -0.993748 lm: -112.868 tm: -42.7841 -61.6487 -16.5351 -23.8061 21.9977 w: -25 ||| -13.0796
|
||||
2 ||| The public will soon turn its attention back to that province during a decision regarding his fate . ||| d: -8 -4.61691 0 -3.62979 -4.85916 0 -4.43407 lm: -81.3478 tm: -46.0407 -63.79 -23.7663 -25.175 14.9984 w: -18 ||| -12.1226
|
||||
2 ||| The public will soon be able to turn its attention back into this province during a decision on his fate . ||| d: -8 -5.53064 0 -3.51999 -3.26708 0 -4.44003 lm: -84.7939 tm: -36.2621 -66.32 -21.0804 -33.9136 13.9985 w: -21 ||| -12.1227
|
||||
2 ||| The public will soon turn his attention to them at a decision on his destiny . ||| d: -8 -5.3448 0 -2.65118 -4.35949 0 -3.95447 lm: -67.451 tm: -54.851 -89.0503 -17.9389 -22.9488 12.9986 w: -16 ||| -12.1234
|
||||
2 ||| The public will soon turn his attention to them at a decision on his destiny . ||| d: -8 -5.3448 0 -2.65118 -4.35949 0 -3.95447 lm: -67.451 tm: -54.851 -89.0503 -17.9389 -22.9488 12.9986 w: -16 ||| -12.1234
|
11
reranking/src/data/weights
Normal file
11
reranking/src/data/weights
Normal file
@ -0,0 +1,11 @@
|
||||
0
|
||||
1 2 3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
11
|
||||
12 13
|
58
reranking/src/src/Hypo.cpp
Normal file
58
reranking/src/src/Hypo.cpp
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* nbest: tool to process moses n-best lists
|
||||
*
|
||||
* File: Hypo.cpp
|
||||
* basic functions to process one hypothesis
|
||||
*
|
||||
* Created by Holger Schwenk, University of Le Mans, 05/16/2008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "Hypo.h"
|
||||
#include <iostream>
|
||||
|
||||
//const char* NBEST_DELIM = "|||";
|
||||
|
||||
Hypo::Hypo()
|
||||
{
|
||||
//cerr << "Hypo: constructor called" << endl;
|
||||
}
|
||||
|
||||
Hypo::~Hypo()
|
||||
{
|
||||
//cerr << "Hypo: destructor called" << endl;
|
||||
}
|
||||
|
||||
void Hypo::Write(ofstream &outf)
|
||||
{
|
||||
outf << id << NBEST_DELIM2 << trg << NBEST_DELIM2;
|
||||
for (vector<float>::iterator i = f.begin(); i != f.end(); i++)
|
||||
outf << (*i) << " ";
|
||||
outf << NBEST_DELIM << " " << s << endl;
|
||||
|
||||
}
|
||||
|
||||
float Hypo::CalcGlobal(Weights &w)
|
||||
{
|
||||
//cerr << " HYP: calc global" << endl;
|
||||
int sz=w.val.size();
|
||||
if (sz<f.size()) {
|
||||
cerr << " - NOTE: padding weight vector with " << f.size()-sz << " zeros" << endl;
|
||||
w.val.resize(f.size());
|
||||
}
|
||||
|
||||
s=0;
|
||||
for (int i=0; i<f.size(); i++) {
|
||||
//cerr << "i=" << i << ", " << w.val[i] << ", " << f[i] << endl;
|
||||
s+=w.val[i]*f[i];
|
||||
}
|
||||
//cerr << "s=" << s << endl;
|
||||
return s;
|
||||
}
|
||||
|
||||
// this is actually a "greater than" since we want to sort in descending order
|
||||
bool Hypo::operator< (const Hypo &h2) const {
|
||||
return (this->s > h2.s);
|
||||
}
|
||||
|
43
reranking/src/src/Hypo.h
Normal file
43
reranking/src/src/Hypo.h
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* nbest: tool to process moses n-best lists
|
||||
*
|
||||
* File: Hypo.h
|
||||
* basic functions to process one hypothesis
|
||||
*
|
||||
* Created by Holger Schwenk, University of Le Mans, 05/16/2008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _HYPO_H_
|
||||
#define _HYPO_H_
|
||||
|
||||
using namespace std;
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Tools.h"
|
||||
|
||||
#define NBEST_DELIM "|||"
|
||||
#define NBEST_DELIM2 " ||| "
|
||||
|
||||
class Hypo {
|
||||
int id;
|
||||
string trg; // translation
|
||||
vector<float> f; // feature function scores
|
||||
float s; // global score
|
||||
// segmentation
|
||||
public:
|
||||
Hypo();
|
||||
Hypo(int p_id,string &p_trg, vector<float> &p_f, float p_s) : id(p_id),trg(p_trg),f(p_f),s(p_s) {};
|
||||
~Hypo();
|
||||
float CalcGlobal(Weights&);
|
||||
void Write(ofstream&);
|
||||
bool operator< (const Hypo&) const;
|
||||
// bool CompareLikelihoods (const Hypo&, const Hypo&) const;
|
||||
};
|
||||
|
||||
#endif
|
92
reranking/src/src/Main.cpp
Normal file
92
reranking/src/src/Main.cpp
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* nbest: tool to process moses n-best lists
|
||||
*
|
||||
* File: Main.cpp
|
||||
* command line interface
|
||||
*
|
||||
* Created by Holger Schwenk, University of Le Mans, 05/16/2008
|
||||
*
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "ParameterNBest.h"
|
||||
#include "NBest.h"
|
||||
#include "Tools.h"
|
||||
|
||||
#include "Util.h" // from Moses
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main (int argc, char *argv[]) {
|
||||
// parse parameters
|
||||
ParameterNBest *parameter = new ParameterNBest();
|
||||
if (!parameter->LoadParam(argc, argv))
|
||||
{
|
||||
parameter->Explain();
|
||||
delete parameter;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// read input
|
||||
ifstream inpf;
|
||||
PARAM_VEC p=parameter->GetParam("input-file");
|
||||
if (p.size()<1 || p.size()>2) Error("The option -input-file requires one or two arguments");
|
||||
int in_n=p.size()>1 ? Scan<int>(p[1]) : 0;
|
||||
cout << "NBest version 0.1, written by Holger.Schwenk@lium.univ-lemans.fr" << endl
|
||||
<< " - reading input from file '" << p[0] << "'";
|
||||
if (in_n>0) cout << " (limited to the first " << in_n << " hypothesis)";
|
||||
cout << endl;
|
||||
inpf.open(p[0].c_str());
|
||||
if (inpf.fail()) { perror ("ERROR"); exit(1); }
|
||||
|
||||
// open output
|
||||
ofstream outf;
|
||||
p=parameter->GetParam("output-file");
|
||||
if (p.size()<1 || p.size()>2) Error("The option -output-file requires one or two arguments");
|
||||
int out_n=p.size()>1 ? Scan<int>(p[1]) : 0;
|
||||
cout << " - writing output to file '" << p[0] << "'";
|
||||
if (out_n>0) cout << " (limited to the first " << out_n << " hypothesis)";
|
||||
cout << endl;
|
||||
outf.open(p[0].c_str());
|
||||
if (outf.fail()) { perror ("ERROR"); exit(1); }
|
||||
|
||||
// eventually read weights
|
||||
Weights w;
|
||||
int do_calc=false;
|
||||
if (parameter->isParamSpecified("weights")) {
|
||||
p=parameter->GetParam("weights");
|
||||
if (p.size()<1) Error("The option -weights requires one argument");
|
||||
cout << " - reading weights from file '" << p[0] << "'";
|
||||
int n=w.Read(p[0].c_str());
|
||||
cout << " (found " << n << " values)" << endl;
|
||||
do_calc=true;
|
||||
cout << " - recalculating global scores" << endl;
|
||||
}
|
||||
|
||||
// shall we sort ?
|
||||
bool do_sort = parameter->isParamSpecified("sort");
|
||||
if (do_sort) cout << " - sorting global scores" << endl;
|
||||
|
||||
// main loop
|
||||
int nb_sent=0, nb_nbest=0;
|
||||
while (!inpf.eof()) {
|
||||
NBest nbest(inpf, in_n);
|
||||
|
||||
if (do_calc) nbest.CalcGlobal(w);
|
||||
if (do_sort) nbest.Sort();
|
||||
nbest.Write(outf, out_n);
|
||||
|
||||
nb_sent++;
|
||||
nb_nbest+=nbest.NbNBest();
|
||||
}
|
||||
inpf.close();
|
||||
outf.close();
|
||||
|
||||
// display final statistics
|
||||
cout << " - processed " << nb_nbest << " n-best hypotheses in " << nb_sent << " sentences"
|
||||
<< " (average " << (float) nb_nbest/nb_sent << ")" << endl;
|
||||
|
||||
return 0;
|
||||
}
|
20
reranking/src/src/Makefile
Normal file
20
reranking/src/src/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
|
||||
# where to find include files and libraries from Moses
|
||||
MOSES_INC=../../moses/src
|
||||
LIB_DIR=../../moses/src/
|
||||
MOSES_INC=/opt/mt/moses/moses/src
|
||||
LIB_DIR=/opt/mt/moses/moses/src/
|
||||
|
||||
LIBS=-lmoses -lz
|
||||
OBJS=Main.o NBest.o Hypo.o Tools.o ParameterNBest.o
|
||||
|
||||
CFLAGS=-I$(MOSES_INC)
|
||||
|
||||
nbest-tool: $(OBJS)
|
||||
c++ -o nbest $(OBJS) -L$(LIB_DIR) $(LIBS)
|
||||
|
||||
%.o: %.cpp
|
||||
g++ $(CFLAGS) -o $@ -c $<
|
||||
|
||||
clean:
|
||||
-rm $(OBJS)
|
125
reranking/src/src/NBest.cpp
Normal file
125
reranking/src/src/NBest.cpp
Normal file
@ -0,0 +1,125 @@
|
||||
/*
|
||||
* nbest: tool to process moses n-best lists
|
||||
*
|
||||
* File: NBest.cpp
|
||||
* basic functions on n-best lists
|
||||
*
|
||||
* Created by Holger Schwenk, University of Le Mans, 05/16/2008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "NBest.h"
|
||||
|
||||
#include "Util.h" // from Moses
|
||||
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
|
||||
//NBest::NBest() {
|
||||
//cerr << "NBEST: constructor called" << endl;
|
||||
//}
|
||||
|
||||
|
||||
bool NBest::ParseLine(ifstream &inpf, const int n) {
|
||||
static string line; // used internally to buffer an input line
|
||||
static int prev_id=-1; // used to detect a change of the n-best ID
|
||||
int id;
|
||||
vector<float> f;
|
||||
float s;
|
||||
int pos=0, epos;
|
||||
vector<string> blocks;
|
||||
|
||||
|
||||
if (line.empty()) {
|
||||
getline(inpf,line);
|
||||
if (inpf.eof()) return false;
|
||||
}
|
||||
|
||||
// split line into blocks
|
||||
//cerr << "PARSE line: " << line << endl;
|
||||
while ((epos=line.find(NBEST_DELIM,pos))!=string::npos) {
|
||||
blocks.push_back(line.substr(pos,epos-pos));
|
||||
// cerr << " block: " << blocks.back() << endl;
|
||||
pos=epos+strlen(NBEST_DELIM);
|
||||
}
|
||||
blocks.push_back(line.substr(pos,line.size()));
|
||||
// cerr << " block: " << blocks.back() << endl;
|
||||
|
||||
if (blocks.size()<4) {
|
||||
cerr << line << endl;
|
||||
Error("can't parse the above line");
|
||||
}
|
||||
|
||||
// parse ID
|
||||
id=Scan<int>(blocks[0]);
|
||||
if (prev_id>=0 && id!=prev_id) {prev_id=id; return false;} // new nbest list has started
|
||||
prev_id=id;
|
||||
//cerr << "same ID " << id << endl;
|
||||
|
||||
if (n>0 && nbest.size() >= n) {
|
||||
//cerr << "skipped" << endl;
|
||||
line.clear();
|
||||
return true; // skip parsing of unused hypos
|
||||
}
|
||||
|
||||
// parse feature function scores
|
||||
//cerr << "PARSE features: '" << blocks[2] << "' size: " << blocks[2].size() << endl;
|
||||
pos=blocks[2].find_first_not_of(' ');
|
||||
while (pos<blocks[2].size() && (epos=blocks[2].find(" ",pos))!=string::npos) {
|
||||
string feat=blocks[2].substr(pos,epos-pos);
|
||||
//cerr << " feat: '" << feat << "', pos: " << pos << ", " << epos << endl;
|
||||
if (feat.find(":",0)!=string::npos) {
|
||||
//cerr << " name: " << feat << endl;
|
||||
}
|
||||
else {
|
||||
f.push_back(Scan<float>(feat));
|
||||
//cerr << " value: " << f.back() << endl;
|
||||
}
|
||||
pos=epos+1;
|
||||
}
|
||||
|
||||
// eventually parse segmentation
|
||||
if (blocks.size()>4) {
|
||||
Error("parsing segmentation not yet supported");
|
||||
}
|
||||
|
||||
nbest.push_back(Hypo(id, blocks[1], f, Scan<float>(blocks[3])));
|
||||
|
||||
line.clear(); // force read of new line
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
NBest::NBest(ifstream &inpf, const int n) {
|
||||
//cerr << "NBEST: constructor with file called" << endl;
|
||||
while (ParseLine(inpf,n));
|
||||
//cerr << "NBEST: found " << nbest.size() << " lines" << endl;
|
||||
}
|
||||
|
||||
|
||||
NBest::~NBest() {
|
||||
//cerr << "NBEST: destructor called" << endl;
|
||||
}
|
||||
|
||||
void NBest::Write(ofstream &outf, int n)
|
||||
{
|
||||
if (n<1 || n>nbest.size()) n=nbest.size();
|
||||
for (int i=0; i<n; i++) nbest[i].Write(outf);
|
||||
}
|
||||
|
||||
|
||||
float NBest::CalcGlobal(Weights &w)
|
||||
{
|
||||
//cerr << "NBEST: calc global of size " << nbest.size() << endl;
|
||||
for (vector<Hypo>::iterator i = nbest.begin(); i != nbest.end(); i++) {
|
||||
(*i).CalcGlobal(w);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void NBest::Sort() {
|
||||
sort(nbest.begin(),nbest.end());
|
||||
}
|
||||
|
41
reranking/src/src/NBest.h
Normal file
41
reranking/src/src/NBest.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* nbest: tool to process moses n-best lists
|
||||
*
|
||||
* File: NBest.h
|
||||
* basic functions on n-best lists
|
||||
*
|
||||
* Created by Holger Schwenk, University of Le Mans, 05/16/2008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _NBEST_H_
|
||||
#define _NBEST_H_
|
||||
|
||||
using namespace std;
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "Tools.h"
|
||||
#include "Hypo.h"
|
||||
|
||||
class NBest {
|
||||
int id;
|
||||
string src;
|
||||
vector<Hypo> nbest;
|
||||
bool ParseLine(ifstream &inpf, const int n);
|
||||
public:
|
||||
NBest(ifstream&, const int=0);
|
||||
~NBest();
|
||||
int NbNBest() {return nbest.size(); };
|
||||
float CalcGlobal(Weights&);
|
||||
void Sort(); // largest values first
|
||||
void Write(ofstream&, int=0);
|
||||
};
|
||||
|
||||
void Error(char *msg);
|
||||
|
||||
#endif
|
355
reranking/src/src/ParameterNBest.cpp
Normal file
355
reranking/src/src/ParameterNBest.cpp
Normal file
@ -0,0 +1,355 @@
|
||||
// $Id: $
|
||||
|
||||
/***********************************************************************
|
||||
nbest - tool to process Moses n-best list
|
||||
Copyright (C) 2008 Holger Schwenk, University of Le Mans, France
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "ParameterNBest.h"
|
||||
#include "Tools.h"
|
||||
|
||||
#include "Util.h" // from Moses
|
||||
#include "InputFileStream.h"
|
||||
#include "UserMessage.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
/** define allowed parameters */
|
||||
ParameterNBest::ParameterNBest()
|
||||
{
|
||||
AddParam("input-file", "i", "file name of the input n-best list");
|
||||
AddParam("output-file", "o", "file name of the output n-best list");
|
||||
AddParam("recalc", "r", "recalc global scores");
|
||||
AddParam("weights", "w", "coefficients of the feature functions");
|
||||
AddParam("sort", "s", "sort n-best list according to the global scores");
|
||||
AddParam("lexical", "l", "report number of lexically different hypothesis");
|
||||
}
|
||||
|
||||
ParameterNBest::~ParameterNBest()
|
||||
{
|
||||
}
|
||||
|
||||
/** initialize a parameter, sub of constructor */
|
||||
void ParameterNBest::AddParam(const string ¶mName, const string &description)
|
||||
{
|
||||
m_valid[paramName] = true;
|
||||
m_description[paramName] = description;
|
||||
}
|
||||
|
||||
/** initialize a parameter (including abbreviation), sub of constructor */
|
||||
void ParameterNBest::AddParam(const string ¶mName, const string &abbrevName, const string &description)
|
||||
{
|
||||
m_valid[paramName] = true;
|
||||
m_valid[abbrevName] = true;
|
||||
m_abbreviation[paramName] = abbrevName;
|
||||
m_description[paramName] = description;
|
||||
}
|
||||
|
||||
/** print descriptions of all parameters */
|
||||
void ParameterNBest::Explain() {
|
||||
cerr << "Usage:" << endl;
|
||||
for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++)
|
||||
{
|
||||
const string paramName = iterParam->first;
|
||||
const string paramDescription = iterParam->second;
|
||||
cerr << "\t-" << paramName;
|
||||
PARAM_STRING::const_iterator iterAbbr = m_abbreviation.find( paramName );
|
||||
if ( iterAbbr != m_abbreviation.end() )
|
||||
cerr << " (" << iterAbbr->second << ")";
|
||||
cerr << ": " << paramDescription << endl;
|
||||
}
|
||||
}
|
||||
|
||||
/** check whether an item on the command line is a switch or a value
|
||||
* \param token token on the command line to checked **/
|
||||
|
||||
bool ParameterNBest::isOption(const char* token) {
|
||||
if (! token) return false;
|
||||
std::string tokenString(token);
|
||||
size_t length = tokenString.size();
|
||||
if (length > 0 && tokenString.substr(0,1) != "-") return false;
|
||||
if (length > 1 && tokenString.substr(1,1).find_first_not_of("0123456789") == 0) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/** load all parameters from the configuration file and the command line switches */
|
||||
bool ParameterNBest::LoadParam(const string &filePath)
|
||||
{
|
||||
const char *argv[] = {"executable", "-f", filePath.c_str() };
|
||||
return LoadParam(3, (char**) argv);
|
||||
}
|
||||
|
||||
/** load all parameters from the configuration file and the command line switches */
|
||||
bool ParameterNBest::LoadParam(int argc, char* argv[])
|
||||
{
|
||||
// config file (-f) arg mandatory
|
||||
string configPath;
|
||||
/*
|
||||
if ( (configPath = FindParam("-f", argc, argv)) == ""
|
||||
&& (configPath = FindParam("-config", argc, argv)) == "")
|
||||
{
|
||||
PrintCredit();
|
||||
|
||||
UserMessage::Add("No configuration file was specified. Use -config or -f");
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!ReadConfigFile(configPath))
|
||||
{
|
||||
UserMessage::Add("Could not read "+configPath);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// overwrite parameters with values from switches
|
||||
for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++)
|
||||
{
|
||||
const string paramName = iterParam->first;
|
||||
OverwriteParam("-" + paramName, paramName, argc, argv);
|
||||
}
|
||||
|
||||
// ... also shortcuts
|
||||
for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin(); iterParam != m_abbreviation.end(); iterParam++)
|
||||
{
|
||||
const string paramName = iterParam->first;
|
||||
const string paramShortName = iterParam->second;
|
||||
OverwriteParam("-" + paramShortName, paramName, argc, argv);
|
||||
}
|
||||
|
||||
// logging of parameters that were set in either config or switch
|
||||
int verbose = 1;
|
||||
if (m_setting.find("verbose") != m_setting.end() &&
|
||||
m_setting["verbose"].size() > 0)
|
||||
verbose = Scan<int>(m_setting["verbose"][0]);
|
||||
if (verbose >= 1) { // only if verbose
|
||||
TRACE_ERR( "Defined parameters (per moses.ini or switch):" << endl);
|
||||
for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ; iterParam != m_setting.end(); iterParam++) {
|
||||
TRACE_ERR( "\t" << iterParam->first << ": ");
|
||||
for ( size_t i = 0; i < iterParam->second.size(); i++ )
|
||||
TRACE_ERR( iterParam->second[i] << " ");
|
||||
TRACE_ERR( endl);
|
||||
}
|
||||
}
|
||||
|
||||
// check for illegal parameters
|
||||
bool noErrorFlag = true;
|
||||
for (int i = 0 ; i < argc ; i++)
|
||||
{
|
||||
if (isOption(argv[i]))
|
||||
{
|
||||
string paramSwitch = (string) argv[i];
|
||||
string paramName = paramSwitch.substr(1);
|
||||
if (m_valid.find(paramName) == m_valid.end())
|
||||
{
|
||||
UserMessage::Add("illegal switch: " + paramSwitch);
|
||||
noErrorFlag = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check if parameters make sense
|
||||
return Validate() && noErrorFlag;
|
||||
}
|
||||
|
||||
/** check that parameter settings make sense */
|
||||
bool ParameterNBest::Validate()
|
||||
{
|
||||
bool noErrorFlag = true;
|
||||
|
||||
// required parameters
|
||||
if (m_setting["input-file"].size() == 0) {
|
||||
UserMessage::Add("No input-file");
|
||||
noErrorFlag = false;
|
||||
}
|
||||
|
||||
if (m_setting["output-file"].size() == 0) {
|
||||
UserMessage::Add("No output-file");
|
||||
noErrorFlag = false;
|
||||
}
|
||||
|
||||
if (m_setting["recalc"].size() > 0 && m_setting["weights"].size()==0) {
|
||||
UserMessage::Add("you need to spezify weight when recalculating global scores");
|
||||
noErrorFlag = false;
|
||||
}
|
||||
|
||||
|
||||
return noErrorFlag;
|
||||
}
|
||||
|
||||
/** check whether a file exists */
|
||||
bool ParameterNBest::FilesExist(const string ¶mName, size_t tokenizeIndex,std::vector<std::string> const& extensions)
|
||||
{
|
||||
typedef std::vector<std::string> StringVec;
|
||||
StringVec::const_iterator iter;
|
||||
|
||||
PARAM_MAP::const_iterator iterParam = m_setting.find(paramName);
|
||||
if (iterParam == m_setting.end())
|
||||
{ // no param. therefore nothing to check
|
||||
return true;
|
||||
}
|
||||
const StringVec &pathVec = (*iterParam).second;
|
||||
for (iter = pathVec.begin() ; iter != pathVec.end() ; ++iter)
|
||||
{
|
||||
StringVec vec = Tokenize(*iter);
|
||||
if (tokenizeIndex >= vec.size())
|
||||
{
|
||||
stringstream errorMsg("");
|
||||
errorMsg << "Expected at least " << (tokenizeIndex+1) << " tokens per emtry in '"
|
||||
<< paramName << "', but only found "
|
||||
<< vec.size();
|
||||
UserMessage::Add(errorMsg.str());
|
||||
return false;
|
||||
}
|
||||
const string &pathStr = vec[tokenizeIndex];
|
||||
|
||||
bool fileFound=0;
|
||||
for(size_t i=0;i<extensions.size() && !fileFound;++i)
|
||||
{
|
||||
fileFound|=FileExists(pathStr + extensions[i]);
|
||||
}
|
||||
if(!fileFound)
|
||||
{
|
||||
stringstream errorMsg("");
|
||||
errorMsg << "File " << pathStr << " does not exist";
|
||||
UserMessage::Add(errorMsg.str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** look for a switch in arg, update parameter */
|
||||
// TODO arg parsing like this does not belong in the library, it belongs
|
||||
// in moses-cmd
|
||||
string ParameterNBest::FindParam(const string ¶mSwitch, int argc, char* argv[])
|
||||
{
|
||||
for (int i = 0 ; i < argc ; i++)
|
||||
{
|
||||
if (string(argv[i]) == paramSwitch)
|
||||
{
|
||||
if (i+1 < argc)
|
||||
{
|
||||
return argv[i+1];
|
||||
} else {
|
||||
stringstream errorMsg("");
|
||||
errorMsg << "Option " << paramSwitch << " requires a parameter!";
|
||||
UserMessage::Add(errorMsg.str());
|
||||
// TODO return some sort of error, not the empty string
|
||||
}
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
/** update parameter settings with command line switches
|
||||
* \param paramSwitch (potentially short) name of switch
|
||||
* \param paramName full name of parameter
|
||||
* \param argc number of arguments on command line
|
||||
* \param argv values of paramters on command line */
|
||||
void ParameterNBest::OverwriteParam(const string ¶mSwitch, const string ¶mName, int argc, char* argv[])
|
||||
{
|
||||
int startPos = -1;
|
||||
for (int i = 0 ; i < argc ; i++)
|
||||
{
|
||||
if (string(argv[i]) == paramSwitch)
|
||||
{
|
||||
startPos = i+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (startPos < 0)
|
||||
return;
|
||||
|
||||
int index = 0;
|
||||
m_setting[paramName]; // defines the parameter, important for boolean switches
|
||||
while (startPos < argc && (!isOption(argv[startPos])))
|
||||
{
|
||||
if (m_setting[paramName].size() > (size_t)index)
|
||||
m_setting[paramName][index] = argv[startPos];
|
||||
else
|
||||
m_setting[paramName].push_back(argv[startPos]);
|
||||
index++;
|
||||
startPos++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** read parameters from a configuration file */
|
||||
bool ParameterNBest::ReadConfigFile( string filePath )
|
||||
{
|
||||
InputFileStream inFile(filePath);
|
||||
string line, paramName;
|
||||
while(getline(inFile, line))
|
||||
{
|
||||
// comments
|
||||
size_t comPos = line.find_first_of("#");
|
||||
if (comPos != string::npos)
|
||||
line = line.substr(0, comPos);
|
||||
// trim leading and trailing spaces/tabs
|
||||
line = Trim(line);
|
||||
|
||||
if (line[0]=='[')
|
||||
{ // new parameter
|
||||
for (size_t currPos = 0 ; currPos < line.size() ; currPos++)
|
||||
{
|
||||
if (line[currPos] == ']')
|
||||
{
|
||||
paramName = line.substr(1, currPos - 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (line != "")
|
||||
{ // add value to parameter
|
||||
m_setting[paramName].push_back(line);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void ParameterNBest::PrintCredit()
|
||||
{
|
||||
cerr << "NBest - A tool to process Moses n-best lists" << endl
|
||||
<< "Copyright (C) 2008 Holger Schwenk" << endl << endl
|
||||
|
||||
<< "This library is free software; you can redistribute it and/or" << endl
|
||||
<< "modify it under the terms of the GNU Lesser General Public" << endl
|
||||
<< "License as published by the Free Software Foundation; either" << endl
|
||||
<< "version 2.1 of the License, or (at your option) any later version." << endl << endl
|
||||
|
||||
<< "This library is distributed in the hope that it will be useful," << endl
|
||||
<< "but WITHOUT ANY WARRANTY; without even the implied warranty of" << endl
|
||||
<< "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU" << endl
|
||||
<< "Lesser General Public License for more details." << endl << endl
|
||||
|
||||
<< "You should have received a copy of the GNU Lesser General Public" << endl
|
||||
<< "License along with this library; if not, write to the Free Software" << endl
|
||||
<< "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA" << endl << endl
|
||||
<< "***********************************************************************" << endl << endl
|
||||
<< "Built on " << __DATE__ << endl << endl
|
||||
|
||||
<< "Written by Holger Schwenk, Holger.Schwenk@lium.univ-lemans.fr" << endl << endl;
|
||||
}
|
||||
|
76
reranking/src/src/ParameterNBest.h
Normal file
76
reranking/src/src/ParameterNBest.h
Normal file
@ -0,0 +1,76 @@
|
||||
// $Id: $
|
||||
|
||||
/***********************************************************************
|
||||
nbest - tool to process Moses n-best list
|
||||
Copyright (C) 2008 Holger Schwenk, University of Le Mans, France
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "TypeDef.h"
|
||||
|
||||
typedef std::vector<std::string> PARAM_VEC;
|
||||
typedef std::map<std::string, PARAM_VEC > PARAM_MAP;
|
||||
typedef std::map<std::string, bool> PARAM_BOOL;
|
||||
typedef std::map<std::string, std::string > PARAM_STRING;
|
||||
|
||||
/** Handles parameter values set in config file or on command line.
|
||||
* Process raw parameter data (names and values as strings) for StaticData
|
||||
* to parse; to get useful values, see StaticData. */
|
||||
class ParameterNBest
|
||||
{
|
||||
protected:
|
||||
PARAM_MAP m_setting;
|
||||
PARAM_BOOL m_valid;
|
||||
PARAM_STRING m_abbreviation;
|
||||
PARAM_STRING m_description;
|
||||
|
||||
std::string FindParam(const std::string ¶mSwitch, int argc, char* argv[]);
|
||||
void OverwriteParam(const std::string ¶mSwitch, const std::string ¶mName, int argc, char* argv[]);
|
||||
bool ReadConfigFile( std::string filePath );
|
||||
bool FilesExist(const std::string ¶mName, size_t tokenizeIndex,std::vector<std::string> const& fileExtension=std::vector<std::string>(1,""));
|
||||
bool isOption(const char* token);
|
||||
bool Validate();
|
||||
|
||||
void AddParam(const std::string ¶mName, const std::string &description);
|
||||
void AddParam(const std::string ¶mName, const std::string &abbrevName, const std::string &description);
|
||||
|
||||
void PrintCredit();
|
||||
|
||||
public:
|
||||
ParameterNBest();
|
||||
~ParameterNBest();
|
||||
bool LoadParam(int argc, char* argv[]);
|
||||
bool LoadParam(const std::string &filePath);
|
||||
void Explain();
|
||||
|
||||
/** return a vector of strings holding the whitespace-delimited values on the ini-file line corresponding to the given parameter name */
|
||||
const PARAM_VEC &GetParam(const std::string ¶mName)
|
||||
{
|
||||
return m_setting[paramName];
|
||||
}
|
||||
/** check if parameter is defined (either in moses.ini or as switch) */
|
||||
bool isParamSpecified(const std::string ¶mName)
|
||||
{
|
||||
return m_setting.find( paramName ) != m_setting.end();
|
||||
}
|
||||
|
||||
};
|
||||
|
27
reranking/src/src/Tools.cpp
Normal file
27
reranking/src/src/Tools.cpp
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* nbest: tool to process moses n-best lists
|
||||
*
|
||||
* File: Tools.cpp
|
||||
* basic utility functions
|
||||
*
|
||||
* Created by Holger Schwenk, University of Le Mans, 05/16/2008
|
||||
*
|
||||
*/
|
||||
|
||||
#include "Tools.h"
|
||||
|
||||
int Weights::Read(const char *fname) {
|
||||
ifstream inpf;
|
||||
|
||||
inpf.open(fname);
|
||||
if (inpf.fail()) {
|
||||
perror ("ERROR"); exit(1);
|
||||
}
|
||||
|
||||
float f;
|
||||
while (inpf >> f) val.push_back(f);
|
||||
|
||||
inpf.close();
|
||||
return val.size();
|
||||
}
|
||||
|
71
reranking/src/src/Tools.h
Normal file
71
reranking/src/src/Tools.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* nbest: tool to process moses n-best lists
|
||||
*
|
||||
* File: Tools.cpp
|
||||
* basic utility functions
|
||||
*
|
||||
* Created by Holger Schwenk, University of Le Mans, 05/16/2008
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _TOOLS_H_
|
||||
#define _TOOLS_H_
|
||||
|
||||
using namespace std;
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
class Weights {
|
||||
vector<float> val;
|
||||
public:
|
||||
Weights() {};
|
||||
~Weights() {};
|
||||
int Read(const char *);
|
||||
friend class Hypo;
|
||||
};
|
||||
|
||||
//******************************************************
|
||||
|
||||
/*
|
||||
template<typename T>
|
||||
inline T Scan(const std::string &input)
|
||||
{
|
||||
std::stringstream stream(input);
|
||||
T ret;
|
||||
stream >> ret;
|
||||
return ret;
|
||||
}
|
||||
*/
|
||||
|
||||
//******************************************************
|
||||
|
||||
inline void Error (char *msg) {
|
||||
cerr << "ERROR: " << msg << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//******************************************************
|
||||
// From Moses code:
|
||||
|
||||
|
||||
/*
|
||||
* Outputting debugging/verbose information to stderr.
|
||||
* Use TRACE_ENABLE flag to redirect tracing output into oblivion
|
||||
* so that you can output your own ad-hoc debugging info.
|
||||
* However, if you use stderr diretly, please delete calls to it once
|
||||
* you finished debugging so that it won't clutter up.
|
||||
* Also use TRACE_ENABLE to turn off output of any debugging info
|
||||
* when compiling for a gui front-end so that running gui won't generate
|
||||
* output on command line
|
||||
* */
|
||||
#ifdef TRACE_ENABLE
|
||||
#define TRACE_ERR(str) std::cerr << str
|
||||
#else
|
||||
#define TRACE_ERR(str) {}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -1,2 +0,0 @@
|
||||
|
||||
just to make sure how it works
|
Loading…
Reference in New Issue
Block a user