Bug fix about the TER calculation

This commit is contained in:
Christophe SERVAN 2014-08-29 14:46:56 +02:00
parent 049a9a9ea7
commit be9b3cb1c6
23 changed files with 3024 additions and 2297 deletions

View File

@ -1,17 +1,37 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#include "alignmentStruct.h"
using namespace std;
namespace TERCpp
{
string alignmentStruct::toString()
{
stringstream s;
string alignmentStruct::toString()
{
stringstream s;
// s << "nword : " << vectorToString(nwords)<<endl;
// s << "alignment" << vectorToString(alignment)<<endl;
// s << "afterShift" << vectorToString(alignment)<<endl;
s << "Nothing to be printed" <<endl;
return s.str();
}
s << "Nothing to be printed" <<endl;
return s.str();
}
// alignmentStruct::alignmentStruct()
// {
@ -79,7 +99,7 @@ string alignmentStruct::toString()
// return s.str();
// }
/* The distance of the shift. */
/* The distance of the shift. */
// int alignmentStruct::distance()
// {
// if (moveto < start)

View File

@ -1,5 +1,26 @@
#ifndef MERT_TER_ALIGNMENT_STRUCT_H_
#define MERT_TER_ALIGNMENT_STRUCT_H_
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#ifndef MERT_TER_ALIGNMENTSTRUCT_H_
#define MERT_TER_ALIGNMENTSTRUCT_H_
#include <vector>
#include <stdio.h>
@ -7,15 +28,16 @@
#include <sstream>
#include "tools.h"
using namespace std;
using namespace Tools;
namespace TERCpp
{
class alignmentStruct
{
private:
public:
class alignmentStruct
{
private:
public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@ -31,15 +53,14 @@ public:
// int end;
// int moveto;
// int newloc;
vector<string> nwords; // The words we shifted
vector<char> alignment ; // for pra_more output
vector<vecInt> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
double cost;
string toString();
};
vector<string> nwords; // The words we shifted
vector<char> alignment ; // for pra_more output
vector<vecInt> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
double cost;
string toString();
};
}
#endif // MERT_TER_ALIGNMENT_STRUCT_H_
#endif

View File

@ -1,5 +1,26 @@
#ifndef MERT_TER_BEST_SHIFT_STRUCT_H_
#define MERT_TER_BEST_SHIFT_STRUCT_H_
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#ifndef __BESTSHIFTSTRUCT_H_
#define __BESTSHIFTSTRUCT_H_
#include <vector>
#include <stdio.h>
@ -15,10 +36,10 @@ using namespace Tools;
namespace TERCpp
{
class bestShiftStruct
{
private:
public:
class bestShiftStruct
{
private:
public:
// alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@ -34,17 +55,16 @@ public:
// int end;
// int moveto;
// int newloc;
terShift m_best_shift;
terAlignment m_best_align;
bool m_empty;
terShift m_best_shift;
terAlignment m_best_align;
bool m_empty;
// vector<string> nwords; // The words we shifted
// char* alignment ; // for pra_more output
// vector<vecInt> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
// double cost;
};
};
}
#endif // MERT_TER_BEST_SHIFT_STRUCT_H_
#endif

View File

@ -1,3 +1,23 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#include "hashMap.h"
// The following class defines a hash function for strings
@ -8,142 +28,156 @@ using namespace std;
namespace HashMapSpace
{
// hashMap::hashMap();
/* hashMap::~hashMap()
/* hashMap::~hashMap()
{
// vector<stringHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMap::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMap::trouve ( long searchKey )
{
// vector<stringHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMap::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMap::trouve ( long searchKey )
{
long foundKey;
long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
}
}
return 0;
}
int hashMap::trouve ( string key )
{
long searchKey=hashValue ( key );
long foundKey;;
int hashMap::trouve ( string key )
{
long searchKey=hashValue ( key );
long foundKey;;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
}
}
return 0;
}
/**
* long hashMap::hashValue ( string key )
* @param key
* @return
*/
long hashMap::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length());
/**
* long hashMap::hashValue ( string key )
* @param key
* @return
*/
long hashMap::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
}
/**
* void hashMap::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMap::addHasher ( string key, string value )
{
if ( trouve ( hashValue ( key ) ) ==0 ) {
}
/**
* void hashMap::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMap::addHasher ( string key, string value )
{
if ( trouve ( hashValue ( key ) ) ==0 )
{
// cerr << "ICI1" <<endl;
stringHasher H ( hashValue ( key ),key,value );
stringHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
m_hasher.push_back ( H );
}
}
stringHasher hashMap::getHasher ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
stringHasher defaut(0,"","");
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return ( *l_hasher );
m_hasher.push_back ( H );
}
}
}
return defaut;
}
string hashMap::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
stringHasher hashMap::getHasher ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
stringHasher defaut(0,"","");
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return ( *l_hasher );
}
}
return defaut;
}
string hashMap::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue();
return ( *l_hasher ).getValue();
}
}
return "";
}
}
return "";
}
string hashMap::searchValue ( string value )
{
string hashMap::searchValue ( string value )
{
// long searchKey=hashValue ( key );
// long foundKey;
string foundValue;
string foundValue;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundValue= ( *l_hasher ).getValue();
if ( foundValue.compare ( value ) == 0 ) {
return ( *l_hasher ).getKey();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundValue= ( *l_hasher ).getValue();
if ( foundValue.compare ( value ) == 0 )
{
return ( *l_hasher ).getKey();
}
}
return "";
}
}
return "";
}
void hashMap::setValue ( string key , string value )
{
long searchKey=hashValue ( key );
long foundKey;
void hashMap::setValue ( string key , string value )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
( *l_hasher ).setValue ( value );
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
}
}
}
}
}
/**
*
*/
void hashMap::printHash()
{
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
}
/**
*
*/
void hashMap::printHash()
{
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
}

View File

@ -1,10 +1,29 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
/*
* Generic hashmap manipulation functions
*/
#ifndef MERT_TER_HASHMAP_H_
#define MERT_TER_HASHMAP_H_
#ifndef __HASHMAP_H_
#define __HASHMAP_H_
#include <boost/functional/hash.hpp>
#include "stringHasher.h"
#include <vector>
#include <string>
@ -16,28 +35,30 @@ using namespace std;
namespace HashMapSpace
{
class hashMap
{
private:
vector<stringHasher> m_hasher;
class hashMap
{
private:
vector<stringHasher> m_hasher;
public:
public:
// ~hashMap();
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, string value );
stringHasher getHasher ( string key );
string getValue ( string key );
string searchValue ( string key );
void setValue ( string key , string value );
void printHash();
vector<stringHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, string value );
stringHasher getHasher ( string key );
string getValue ( string key );
string searchValue ( string key );
void setValue ( string key , string value );
void printHash();
vector<stringHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
}
#endif // MERT_TER_HASHMAP_H_
#endif

View File

@ -1,3 +1,23 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#include "hashMapInfos.h"
// The following class defines a hash function for strings
@ -8,108 +28,117 @@ using namespace std;
namespace HashMapSpace
{
// hashMapInfos::hashMap();
/* hashMapInfos::~hashMap()
/* hashMapInfos::~hashMap()
{
// vector<infosHasher>::const_iterator del = m_hasher.begin();
for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapInfos::trouve ( long searchKey )
{
// vector<infosHasher>::const_iterator del = m_hasher.begin();
for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapInfos::trouve ( long searchKey )
{
long foundKey;
long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
}
}
return 0;
}
int hashMapInfos::trouve ( string key )
{
long searchKey=hashValue ( key );
long foundKey;;
int hashMapInfos::trouve ( string key )
{
long searchKey=hashValue ( key );
long foundKey;;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
}
}
return 0;
}
/**
* long hashMapInfos::hashValue ( string key )
* @param key
* @return
*/
long hashMapInfos::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length());
/**
* long hashMapInfos::hashValue ( string key )
* @param key
* @return
*/
long hashMapInfos::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher;
// return hasher ( key );
}
/**
* void hashMapInfos::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMapInfos::addHasher ( string key, vector<int> value )
{
if ( trouve ( hashValue ( key ) ) ==0 ) {
}
/**
* void hashMapInfos::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMapInfos::addHasher ( string key, vector<int> value )
{
if ( trouve ( hashValue ( key ) ) ==0 )
{
// cerr << "ICI1" <<endl;
infosHasher H ( hashValue ( key ),key,value );
infosHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
m_hasher.push_back ( H );
}
}
void hashMapInfos::addValue ( string key, vector<int> value )
{
addHasher ( key, value );
}
infosHasher hashMapInfos::getHasher ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return ( *l_hasher );
m_hasher.push_back ( H );
}
}
}
vector<int> temp;
infosHasher defaut(0,"",temp);
return defaut;
}
vector<int> hashMapInfos::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
vector<int> retour;
void hashMapInfos::addValue ( string key, vector<int> value )
{
addHasher ( key, value );
}
infosHasher hashMapInfos::getHasher ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return ( *l_hasher );
}
}
vector<int> temp;
infosHasher defaut(0,"",temp);
return defaut;
}
vector<int> hashMapInfos::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
vector<int> retour;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue();
return ( *l_hasher ).getValue();
}
}
return retour;
}
}
return retour;
}
// string hashMapInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
@ -129,30 +158,42 @@ vector<int> hashMapInfos::getValue ( string key )
// }
//
void hashMapInfos::setValue ( string key , vector<int> value )
{
long searchKey=hashValue ( key );
long foundKey;
void hashMapInfos::setValue ( string key , vector<int> value )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
( *l_hasher ).setValue ( value );
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
}
}
}
string hashMapInfos::toString ()
{
stringstream to_return;
for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
to_return << (*l_hasher).toString();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
return to_return.str();
}
}
}
/**
*
*/
void hashMapInfos::printHash()
{
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
/**
*
*/
void hashMapInfos::printHash()
{
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
}
}
}

View File

@ -1,9 +1,29 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
/*
* Generic hashmap manipulation functions
*/
#ifndef MERT_TER_HASHMAP_INFOS_H_
#define MERT_TER_HASHMAP_INFOS_H_
#ifndef __HASHMAPINFOS_H_
#define __HASHMAPINFOS_H_
#include <boost/functional/hash.hpp>
#include "infosHasher.h"
#include <vector>
#include <string>
@ -14,29 +34,32 @@ using namespace std;
namespace HashMapSpace
{
class hashMapInfos
{
private:
vector<infosHasher> m_hasher;
class hashMapInfos
{
private:
vector<infosHasher> m_hasher;
public:
public:
// ~hashMap();
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, vector<int> value );
void addValue ( string key, vector<int> value );
infosHasher getHasher ( string key );
vector<int> getValue ( string key );
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, vector<int> value );
void addValue ( string key, vector<int> value );
infosHasher getHasher ( string key );
vector<int> getValue ( string key );
// string searchValue ( string key );
void setValue ( string key , vector<int> value );
void printHash();
vector<infosHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
void setValue ( string key , vector<int> value );
void printHash();
string toString();
vector<infosHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
}
#endif // MERT_TER_HASHMAP_INFOS_H_
#endif

View File

@ -1,3 +1,23 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#include "hashMapStringInfos.h"
// The following class defines a hash function for strings
@ -7,157 +27,179 @@ using namespace std;
namespace HashMapSpace
{
// hashMapStringInfos::hashMap();
/* hashMapStringInfos::~hashMap()
{
// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapStringInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapStringInfos::trouve ( long searchKey )
{
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
// hashMapStringInfos::hashMap();
/* hashMapStringInfos::~hashMap()
{
// vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapStringInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapStringInfos::trouve ( long searchKey )
{
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
}
}
return 0;
}
int hashMapStringInfos::trouve ( string key )
{
long searchKey = hashValue ( key );
long foundKey;;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return 1;
int hashMapStringInfos::trouve ( string key )
{
long searchKey = hashValue ( key );
long foundKey;;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
}
}
return 0;
}
/**
* long hashMapStringInfos::hashValue ( string key )
* @param key
* @return
*/
long hashMapStringInfos::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> > ( loc );
return coll.hash ( key.data(), key.data() + key.length() );
/**
* long hashMapStringInfos::hashValue ( string key )
* @param key
* @return
*/
long hashMapStringInfos::hashValue ( string key )
{
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> > ( loc );
return coll.hash ( key.data(), key.data() + key.length() );
// boost::hash<string> hasher;
// return hasher ( key );
}
/**
* void hashMapStringInfos::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMapStringInfos::addHasher ( string key, vector<string> value )
{
if ( trouve ( hashValue ( key ) ) == 0 ) {
// cerr << "ICI1" <<endl;
stringInfosHasher H ( hashValue ( key ), key, value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
m_hasher.push_back ( H );
}
}
void hashMapStringInfos::addValue ( string key, vector<string> value )
{
addHasher ( key, value );
}
stringInfosHasher hashMapStringInfos::getHasher ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return ( *l_hasher );
}
}
vector<string> tmp;
stringInfosHasher defaut ( 0, "", tmp );
return defaut;
}
vector<string> hashMapStringInfos::getValue ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
vector<string> retour;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue();
/**
* void hashMapStringInfos::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMapStringInfos::addHasher ( string key, vector<string> value )
{
if ( trouve ( hashValue ( key ) ) == 0 )
{
// cerr << "ICI1" <<endl;
stringInfosHasher H ( hashValue ( key ), key, value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
m_hasher.push_back ( H );
}
}
}
return retour;
}
// string hashMapStringInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
// // long foundKey;
// vector<int> foundValue;
//
// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
// {
// foundValue= ( *l_hasher ).getValue();
// /* if ( foundValue.compare ( value ) == 0 )
// {
// return ( *l_hasher ).getKey();
// }*/
// }
// return "";
// }
//
void hashMapStringInfos::setValue ( string key , vector<string> value )
{
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
void hashMapStringInfos::addValue ( string key, vector<string> value )
{
addHasher ( key, value );
}
}
}
stringInfosHasher hashMapStringInfos::getHasher ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return ( *l_hasher );
}
}
vector<string> tmp;
stringInfosHasher defaut ( 0, "", tmp );
return defaut;
}
vector<string> hashMapStringInfos::getValue ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
vector<string> retour;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue();
}
}
return retour;
}
// string hashMapStringInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
// // long foundKey;
// vector<int> foundValue;
//
// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
// {
// foundValue= ( *l_hasher ).getValue();
// /* if ( foundValue.compare ( value ) == 0 )
// {
// return ( *l_hasher ).getKey();
// }*/
// }
// return "";
// }
//
void hashMapStringInfos::setValue ( string key , vector<string> value )
{
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
}
}
}
/**
*
*/
void hashMapStringInfos::printHash()
{
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
}
vector< stringInfosHasher > hashMapStringInfos::getHashMap()
{
return m_hasher;
}
string hashMapStringInfos::toString ()
{
stringstream to_return;
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
to_return << (*l_hasher).toString();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
return to_return.str();
}
/**
*
*/
void hashMapStringInfos::printHash()
{
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
}
vector< stringInfosHasher > hashMapStringInfos::getHashMap()
{
return m_hasher;
}

View File

@ -1,9 +1,29 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
/*
* Generic hashmap manipulation functions
*/
#ifndef MERT_TER_HASHMAP_STRING_INFOS_H_
#define MERT_TER_HASHMAP_STRING_INFOS_H_
#ifndef __HASHMAPSTRINGINFOS_H_
#define __HASHMAPSTRINGINFOS_H_
#include <boost/functional/hash.hpp>
#include "stringInfosHasher.h"
#include <vector>
#include <string>
@ -14,29 +34,32 @@ using namespace std;
namespace HashMapSpace
{
class hashMapStringInfos
{
private:
vector<stringInfosHasher> m_hasher;
class hashMapStringInfos
{
private:
vector<stringInfosHasher> m_hasher;
public:
public:
// ~hashMap();
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, vector<string> value );
void addValue ( string key, vector<string> value );
stringInfosHasher getHasher ( string key );
vector<string> getValue ( string key );
long hashValue ( string key );
int trouve ( long searchKey );
int trouve ( string key );
void addHasher ( string key, vector<string> value );
void addValue ( string key, vector<string> value );
stringInfosHasher getHasher ( string key );
vector<string> getValue ( string key );
// string searchValue ( string key );
void setValue ( string key , vector<string> value );
void printHash();
vector<stringInfosHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
void setValue ( string key , vector<string> value );
void printHash();
string toString();
vector<stringInfosHasher> getHashMap();
string printStringHash();
string printStringHash2();
string printStringHashForLexicon();
};
}
#endif // MERT_TER_HASHMAP_STRING_INFOS_H_
#endif

View File

@ -1,34 +1,61 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#include "infosHasher.h"
// The following class defines a hash function for strings
using namespace std;
using namespace Tools;
namespace HashMapSpace
{
infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueVecInt;
}
infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueVecInt;
}
// infosHasher::~infosHasher(){};*/
long infosHasher::getHashKey()
{
return m_hashKey;
}
string infosHasher::getKey()
{
return m_key;
}
vector<int> infosHasher::getValue()
{
return m_value;
}
void infosHasher::setValue ( vector<int> value )
{
m_value=value;
}
long infosHasher::getHashKey()
{
return m_hashKey;
}
string infosHasher::getKey()
{
return m_key;
}
vector<int> infosHasher::getValue()
{
return m_value;
}
void infosHasher::setValue ( vector<int> value )
{
m_value=value;
}
string infosHasher::toString()
{
stringstream to_return;
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
return to_return.str();
}
// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;

View File

@ -1,31 +1,54 @@
#ifndef MERT_TER_INFO_SHASHER_H_
#define MERT_TER_INFO_SHASHER_H_
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#ifndef __INFOSHASHER_H_
#define __INFOSHASHER_H_
#include <string>
// #include <ext/hash_map>
#include <stdio.h>
#include <iostream>
#include <sstream>
#include <vector>
#include "tools.h"
using namespace std;
namespace HashMapSpace
{
class infosHasher
{
private:
long m_hashKey;
string m_key;
vector<int> m_value;
class infosHasher
{
private:
long m_hashKey;
string m_key;
vector<int> m_value;
public:
infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
long getHashKey();
string getKey();
vector<int> getValue();
void setValue ( vector<int> value );
public:
infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
long getHashKey();
string getKey();
vector<int> getValue();
void setValue ( vector<int> value );
string toString();
};
};
}
#endif // MERT_TER_INFO_SHASHER_H_
#endif

View File

@ -1,3 +1,23 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#include "stringHasher.h"
// The following class defines a hash function for strings
@ -6,29 +26,29 @@ using namespace std;
namespace HashMapSpace
{
stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueTxt;
}
stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueTxt;
}
// stringHasher::~stringHasher(){};*/
long stringHasher::getHashKey()
{
return m_hashKey;
}
string stringHasher::getKey()
{
return m_key;
}
string stringHasher::getValue()
{
return m_value;
}
void stringHasher::setValue ( string value )
{
m_value=value;
}
long stringHasher::getHashKey()
{
return m_hashKey;
}
string stringHasher::getKey()
{
return m_key;
}
string stringHasher::getValue()
{
return m_value;
}
void stringHasher::setValue ( string value )
{
m_value=value;
}
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;

View File

@ -1,28 +1,50 @@
#ifndef MERT_TER_STRING_HASHER_H_
#define MERT_TER_STRING_HASHER_H_
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#ifndef __STRINGHASHER_H_
#define __STRINGHASHER_H_
#include <string>
//#include <ext/hash_map>
#include <iostream>
using namespace std;
namespace HashMapSpace
{
class stringHasher
{
private:
long m_hashKey;
string m_key;
string m_value;
class stringHasher
{
private:
long m_hashKey;
string m_key;
string m_value;
public:
stringHasher ( long cle, string cleTxt, string valueTxt );
long getHashKey();
string getKey();
string getValue();
void setValue ( string value );
};
public:
stringHasher ( long cle, string cleTxt, string valueTxt );
long getHashKey();
string getKey();
string getValue();
void setValue ( string value );
};
}
#endif // MERT_TER_STRING_HASHER_H_
#endif

View File

@ -1,34 +1,61 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#include "stringInfosHasher.h"
// The following class defines a hash function for strings
using namespace std;
using namespace Tools;
namespace HashMapSpace
{
stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueVecInt;
}
stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
{
m_hashKey=cle;
m_key=cleTxt;
m_value=valueVecInt;
}
// stringInfosHasher::~stringInfosHasher(){};*/
long stringInfosHasher::getHashKey()
{
return m_hashKey;
}
string stringInfosHasher::getKey()
{
return m_key;
}
vector<string> stringInfosHasher::getValue()
{
return m_value;
}
void stringInfosHasher::setValue ( vector<string> value )
{
m_value=value;
}
long stringInfosHasher::getHashKey()
{
return m_hashKey;
}
string stringInfosHasher::getKey()
{
return m_key;
}
vector<string> stringInfosHasher::getValue()
{
return m_value;
}
void stringInfosHasher::setValue ( vector<string> value )
{
m_value=value;
}
string stringInfosHasher::toString()
{
stringstream to_return;
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
return to_return.str();
}
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;

View File

@ -1,28 +1,52 @@
#ifndef MERT_TER_STRING_INFOS_HASHER_H_
#define MERT_TER_STRING_INFOS_HASHER_H_
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#ifndef __STRINGINFOSHASHER_H_
#define __STRINGINFOSHASHER_H_
#include <string>
// #include <ext/hash_map>
#include <iostream>
#include <vector>
#include "tools.h"
using namespace std;
namespace HashMapSpace
{
class stringInfosHasher
{
private:
long m_hashKey;
string m_key;
vector<string> m_value;
class stringInfosHasher
{
private:
long m_hashKey;
string m_key;
vector<string> m_value;
public:
stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
long getHashKey();
string getKey();
vector<string> getValue();
void setValue ( vector<string> value );
string toString();
};
public:
stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
long getHashKey();
string getKey();
vector<string> getValue();
void setValue ( vector<string> value );
};
}
#endif // MERT_TER_STRING_INFOS_HASHER_H_
#endif

View File

@ -1,131 +1,214 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#include "terAlignment.h"
using namespace std;
namespace TERCpp
{
terAlignment::terAlignment()
{
terAlignment::terAlignment()
{
// vector<string> ref;
// vector<string> hyp;
// vector<string> aftershift;
// TERshift[] allshifts = null;
// TERshift[] allshifts = null;
numEdits=0;
numWords=0;
bestRef="";
numEdits=0;
numWords=0;
bestRef="";
numIns=0;
numDel=0;
numSub=0;
numSft=0;
numWsf=0;
}
string terAlignment::toString()
{
stringstream s;
s.str ( "" );
s << "Original Ref: " << join ( " ", ref ) << endl;
s << "Original Hyp: " << join ( " ", hyp ) <<endl;
s << "Hyp After Shift: " << join ( " ", aftershift );
s << endl;
numIns=0;
numDel=0;
numSub=0;
numSft=0;
numWsf=0;
}
string terAlignment::toString()
{
stringstream s;
s.str ( "" );
s << "Original Ref: \t" << join ( " ", ref ) << endl;
s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
s << "Hyp After Shift:\t" << join ( " ", aftershift );
// s << "Hyp After Shift: " << join ( " ", aftershift );
s << endl;
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
if ( ( int ) sizeof ( alignment ) >0 ) {
s << "Alignment: (";
if ( ( int ) sizeof ( alignment ) >0 )
{
s << "Alignment: (";
// s += "\nAlignment: (";
for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) {
s << alignment[i];
for ( int i = 0; i < ( int ) ( alignment.size() ); i++ )
{
s << alignment[i];
// s+=alignment[i];
}
}
// s += ")";
s << ")";
}
s << endl;
if ( ( int ) allshifts.size() == 0 ) {
s << ")";
}
s << endl;
if ( ( int ) allshifts.size() == 0 )
{
// s += "\nNumShifts: 0";
s << "NumShifts: 0";
} else {
s << "NumShifts: 0";
}
else
{
// s += "\nNumShifts: " + (int)allshifts.size();
s << "NumShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ ) {
s << endl << " " ;
s << ( ( terShift ) allshifts[i] ).toString();
s << "NumShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ )
{
s << endl << " " ;
s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
}
}
s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
}
}
s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
return s.str();
return s.str();
}
string terAlignment::join ( string delim, vector<string> arr )
{
if ( ( int ) arr.size() == 0 ) return "";
}
string terAlignment::join ( string delim, vector<string> arr )
{
if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String("");
stringstream s;
s.str ( "" );
for ( int i = 0; i < ( int ) arr.size(); i++ ) {
if ( i == 0 ) {
s << arr.at ( i );
} else {
s << delim << arr.at ( i );
}
}
return s.str();
stringstream s;
s.str ( "" );
for ( int i = 0; i < ( int ) arr.size(); i++ )
{
if ( i == 0 )
{
s << arr.at ( i );
}
else
{
s << delim << arr.at ( i );
}
}
return s.str();
// return "";
}
double terAlignment::score()
{
if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
return 1.0;
}
if ( numWords <= 0.0 ) {
return 0.0;
}
return ( double ) numEdits / numWords;
}
double terAlignment::scoreAv()
{
if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
return 1.0;
}
if ( averageWords <= 0.0 ) {
return 0.0;
}
return ( double ) numEdits / averageWords;
}
void terAlignment::scoreDetails()
{
numIns = numDel = numSub = numWsf = numSft = 0;
if((int)allshifts.size()>0) {
for(int i = 0; i < (int)allshifts.size(); ++i) {
numWsf += allshifts[i].size();
}
numSft = allshifts.size();
}
if((int)alignment.size()>0 ) {
for(int i = 0; i < (int)alignment.size(); ++i) {
switch (alignment[i]) {
case 'S':
case 'T':
numSub++;
break;
case 'D':
numDel++;
break;
case 'I':
numIns++;
break;
}
double terAlignment::score()
{
if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) )
{
return 1.0;
}
if ( numWords <= 0.0 )
{
return 0.0;
}
return ( double ) numEdits / numWords;
}
double terAlignment::scoreAv()
{
if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) )
{
return 1.0;
}
if ( averageWords <= 0.0 )
{
return 0.0;
}
return ( double ) numEdits / averageWords;
}
void terAlignment::scoreDetails()
{
numIns = numDel = numSub = numWsf = numSft = 0;
if((int)allshifts.size()>0)
{
for(int i = 0; i < (int)allshifts.size(); ++i)
{
numWsf += allshifts[i].size();
}
numSft = allshifts.size();
}
if((int)alignment.size()>0 )
{
for(int i = 0; i < (int)alignment.size(); ++i)
{
switch (alignment[i])
{
case 'S':
case 'T':
numSub++;
break;
case 'D':
numDel++;
break;
case 'I':
numIns++;
break;
}
}
}
// if(numEdits != numSft + numDel + numIns + numSub)
// System.out.println("** Error, unmatch edit erros " + numEdits +
// " vs " + (numSft + numDel + numIns + numSub));
}
// if(numEdits != numSft + numDel + numIns + numSub)
// System.out.println("** Error, unmatch edit erros " + numEdits +
// " vs " + (numSft + numDel + numIns + numSub));
string terAlignment::printAlignments()
{
stringstream to_return;
for(int i = 0; i < (int)alignment.size(); ++i)
{
char alignInfo=alignment.at(i);
if (alignInfo == 'A' )
{
alignInfo='A';
}
if (i==0)
{
to_return << alignInfo;
}
else
{
to_return << " " << alignInfo;
}
}
return to_return.str();
}
string terAlignment::printAllShifts()
{
stringstream to_return;
if ( ( int ) allshifts.size() == 0 )
{
// s += "\nNumShifts: 0";
to_return << "NbrShifts: 0";
}
else
{
// s += "\nNumShifts: " + (int)allshifts.size();
to_return << "NbrShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ )
{
to_return << "\t" ;
to_return << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i];
}
}
return to_return.str();
}
}

View File

@ -1,5 +1,26 @@
#ifndef MERT_TER_TER_ALIGNMENT_H_
#define MERT_TER_TER_ALIGNMENT_H_
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#ifndef MERT_TER_TERALIGNMENT_H_
#define MERT_TER_TERALIGNMENT_H_
#include <vector>
#include <stdio.h>
@ -13,39 +34,41 @@ using namespace std;
namespace TERCpp
{
class terAlignment
{
private:
public:
class terAlignment
{
private:
public:
terAlignment();
string toString();
void scoreDetails();
terAlignment();
string toString();
void scoreDetails();
vector<string> ref;
vector<string> hyp;
vector<string> aftershift;
vector<string> ref;
vector<string> hyp;
vector<string> aftershift;
vector<terShift> allshifts;
vector<int> hyp_int;
vector<int> aftershift_int;
vector<terShift> allshifts;
double numEdits;
double numWords;
double averageWords;
vector<char> alignment;
string bestRef;
double numEdits;
double numWords;
double averageWords;
vector<char> alignment;
string bestRef;
int numIns;
int numDel;
int numSub;
int numSft;
int numWsf;
int numIns;
int numDel;
int numSub;
int numSft;
int numWsf;
string join ( string delim, vector<string> arr );
double score();
double scoreAv();
};
string join ( string delim, vector<string> arr );
double score();
double scoreAv();
string printAlignments();
string printAllShifts();
};
}
#endif // MERT_TER_TER_ALIGNMENT_H__
#endif

View File

@ -1,3 +1,23 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#include "terShift.h"
using namespace std;
@ -22,32 +42,32 @@ namespace TERCpp
// numSft=0;
// numWsf=0;
// }
terShift::terShift ()
{
start = 0;
end = 0;
moveto = 0;
newloc = 0;
cost=1.0;
}
terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
{
start = _start;
end = _end;
moveto = _moveto;
newloc = _newloc;
cost=1.0;
}
terShift::terShift ()
{
start = 0;
end = 0;
moveto = 0;
newloc = 0;
cost=1.0;
}
terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
{
start = _start;
end = _end;
moveto = _moveto;
newloc = _newloc;
cost=1.0;
}
terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
{
start = _start;
end = _end;
moveto = _moveto;
newloc = _newloc;
shifted = _shifted;
cost=1.0;
}
terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
{
start = _start;
end = _end;
moveto = _moveto;
newloc = _newloc;
shifted = _shifted;
cost=1.0;
}
// string terShift::vectorToString(vector<string> vec)
// {
// string retour("");
@ -58,38 +78,44 @@ terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<stri
// return retour;
// }
string terShift::toString()
{
stringstream s;
s.str ( "" );
s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
if ( ( int ) shifted.size() > 0 ) {
s << " (" << vectorToString ( shifted ) << ")";
}
return s.str();
}
string terShift::toString()
{
stringstream s;
s.str ( "" );
s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
if ( ( int ) shifted.size() > 0 )
{
s << " (" << vectorToString ( shifted ) << ")";
}
return s.str();
}
/* The distance of the shift. */
int terShift::distance()
{
if ( moveto < start ) {
return start - moveto;
} else if ( moveto > end ) {
return moveto - end;
} else {
return moveto - start;
}
}
/* The distance of the shift. */
int terShift::distance()
{
if ( moveto < start )
{
return start - moveto;
}
else if ( moveto > end )
{
return moveto - end;
}
else
{
return moveto - start;
}
}
bool terShift::leftShift()
{
return ( moveto < start );
}
bool terShift::leftShift()
{
return ( moveto < start );
}
int terShift::size()
{
return ( end - start ) + 1;
}
int terShift::size()
{
return ( end - start ) + 1;
}
// terShift terShift::operator=(terShift t)
// {
//

View File

@ -1,5 +1,26 @@
#ifndef MERT_TER_TER_SHIFT_H_
#define MERT_TER_TER_SHIFT_H_
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#ifndef MERT_TER_TERSHIFT_H_
#define MERT_TER_TERSHIFT_H_
#include <vector>
#include <stdio.h>
@ -7,38 +28,38 @@
#include <sstream>
#include "tools.h"
using namespace std;
using namespace Tools;
namespace TERCpp
{
class terShift
{
private:
public:
class terShift
{
private:
public:
terShift();
terShift ( int _start, int _end, int _moveto, int _newloc );
terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
string toString();
int distance() ;
bool leftShift();
int size();
terShift();
terShift ( int _start, int _end, int _moveto, int _newloc );
terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
string toString();
int distance() ;
bool leftShift();
int size();
// terShift operator=(terShift t);
// string vectorToString(vector<string> vec);
int start;
int end;
int moveto;
int newloc;
vector<string> shifted; // The words we shifted
vector<char> alignment ; // for pra_more output
vector<string> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
double cost;
};
int start;
int end;
int moveto;
int newloc;
vector<string> shifted; // The words we shifted
vector<char> alignment ; // for pra_more output
vector<string> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to
// calculate it multiple times.
double cost;
};
}
#endif // MERT_TER_TER_SHIFT_H_
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,25 @@
#ifndef MERT_TER_TER_CALC_H_
#define MERT_TER_TER_CALC_H_
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#ifndef _TERCPPTERCALC_H__
#define _TERCPPTERCALC_H__
#include <vector>
#include <stdio.h>
@ -21,62 +41,63 @@ namespace TERCpp
{
// typedef size_t WERelement[2];
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
typedef vector<terShift> vecTerShift;
/**
@author
*/
class terCalc
{
private :
typedef vector<terShift> vecTerShift;
/**
@author
*/
class terCalc
{
private :
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
WERalignment l_WERalignment;
// HashMap contenant les caleurs de hash de chaque mot
hashMap bagOfWords;
int MAX_SHIFT_SIZE;
/* Variables for some internal counting. */
int NUM_SEGMENTS_SCORED;
int NUM_SHIFTS_CONSIDERED;
int NUM_BEAM_SEARCH_CALLS;
int MAX_SHIFT_DIST;
bool PRINT_DEBUG;
WERalignment l_WERalignment;
// HashMap contenant les valeurs de hash de chaque mot
hashMap bagOfWords;
int TAILLE_PERMUT_MAX;
// Increments internes
int NBR_SEGS_EVALUATED;
int NBR_PERMUTS_CONSID;
int NBR_BS_APPELS;
int DIST_MAX_PERMUT;
bool PRINT_DEBUG;
/* These are resized by the MIN_EDIT_DIST code if they aren't big enough */
double S[1000][1000];
char P[1000][1000];
vector<vecInt> refSpans;
vector<vecInt> hypSpans;
int BEAM_WIDTH;
// Utilisés dans minDistEdit et ils ne sont pas réajustés
double S[1000][1000];
char P[1000][1000];
vector<vecInt> refSpans;
vector<vecInt> hypSpans;
int TAILLE_BEAM;
public:
int shift_cost;
int insert_cost;
int delete_cost;
int substitute_cost;
int match_cost;
double INF;
terCalc();
public:
int shift_cost;
int insert_cost;
int delete_cost;
int substitute_cost;
int match_cost;
double infinite;
terCalc();
// ~terCalc();
// size_t* hashVec ( vector<string> s );
void setDebugMode ( bool b );
int WERCalculation ( size_t * ref, size_t * hyp );
int WERCalculation ( vector<string> ref, vector<string> hyp );
int WERCalculation ( vector<int> ref, vector<int> hyp );
void setDebugMode ( bool b );
// int WERCalculation ( size_t * ref, size_t * hyp );
// int WERCalculation ( vector<string> ref, vector<string> hyp );
// int WERCalculation ( vector<int> ref, vector<int> hyp );
terAlignment WERCalculation ( vector<string> hyp, vector<string> ref );
// string vectorToString(vector<string> vec);
// vector<string> subVector(vector<string> vec, int start, int end);
hashMapInfos BuildWordMatches ( vector<string> hyp, vector<string> ref );
terAlignment MinEditDist ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
bool spanIntersection ( vecInt refSpan, vecInt hypSpan );
terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
terAlignment TER ( vector<string> hyp, vector<string> ref );
terAlignment TER ( vector<int> hyp, vector<int> ref );
bestShiftStruct CalcBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
void FindAlignErr ( terAlignment align, bool* herr, bool* rerr, int* ralign );
vector<vecTerShift> GatherAllPossShifts ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
alignmentStruct PerformShift ( vector<string> words, terShift s );
alignmentStruct PerformShift ( vector<string> words, int start, int end, int newloc );
};
hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref );
terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans );
bool trouverIntersection ( vecInt refSpan, vecInt hypSpan );
terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength );
terAlignment TER ( vector<string> hyp, vector<string> ref );
terAlignment TER ( vector<int> hyp, vector<int> ref );
bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align );
void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign );
vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign );
alignmentStruct permuter ( vector<string> words, terShift s );
alignmentStruct permuter ( vector<string> words, int start, int end, int newloc );
};
}
#endif // MERT_TER_TER_CALC_H_
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,38 +1,66 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#ifndef MERT_TER_TOOLS_H_
#define MERT_TER_TOOLS_H_
#include <vector>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <sstream>
#include <boost/xpressive/xpressive.hpp>
using namespace std;
namespace Tools
{
typedef vector<double> vecDouble;
typedef vector<char> vecChar;
typedef vector<int> vecInt;
typedef vector<float> vecFloat;
typedef vector<string> vecString;
typedef vector<string> alignmentElement;
typedef vector<alignmentElement> WERalignment;
typedef vector<double> vecDouble;
typedef vector<char> vecChar;
typedef vector<int> vecInt;
typedef vector<float> vecFloat;
typedef vector<size_t> vecSize_t;
typedef vector<string> vecString;
typedef vector<string> alignmentElement;
typedef vector<alignmentElement> WERalignment;
struct param {
bool debugMode;
string referenceFile; // path to the resources
string hypothesisFile; // path to the configuration files
string outputFileExtension;
string outputFileName;
bool noPunct;
bool caseOn;
bool normalize;
bool tercomLike;
bool sgmlInputs;
bool noTxtIds;
struct param
{
bool debugMode;
string referenceFile; // path to the resources
string hypothesisFile; // path to the configuration files
string outputFileExtension;
string outputFileName;
bool noPunct;
bool caseOn;
bool normalize;
bool tercomLike;
bool sgmlInputs;
bool noTxtIds;
bool printAlignments;
bool WER;
int debugLevel;
};
// param = { false, "","","","" };
@ -40,26 +68,35 @@ struct param {
// private:
// public:
string vectorToString ( vector<string> vec );
string vectorToString ( vector<string> vec, string s );
vector<string> subVector ( vector<string> vec, int start, int end );
vector<int> subVector ( vector<int> vec, int start, int end );
vector<float> subVector ( vector<float> vec, int start, int end );
vector<string> copyVector ( vector<string> vec );
vector<int> copyVector ( vector<int> vec );
vector<float> copyVector ( vector<float> vec );
vector<string> stringToVector ( string s, string tok );
vector<int> stringToVectorInt ( string s, string tok );
vector<float> stringToVectorFloat ( string s, string tok );
string lowerCase(string str);
string removePunct(string str);
string tokenizePunct(string str);
string removePunctTercom(string str);
string normalizeStd(string str);
string printParams(param p);
string vectorToString ( vector<string> vec );
string vectorToString ( vector<char> vec );
string vectorToString ( vector<int> vec );
string vectorToString ( vector<string> vec, string s );
string vectorToString ( vector<char> vec, string s );
string vectorToString ( vector<int> vec, string s );
string vectorToString ( vector<bool> vec, string s );
string vectorToString ( char* vec, string s, int taille );
string vectorToString ( int* vec, string s , int taille );
string vectorToString ( bool* vec, string s , int taille );
vector<string> subVector ( vector<string> vec, int start, int end );
vector<int> subVector ( vector<int> vec, int start, int end );
vector<float> subVector ( vector<float> vec, int start, int end );
vector<string> copyVector ( vector<string> vec );
vector<int> copyVector ( vector<int> vec );
vector<float> copyVector ( vector<float> vec );
vector<string> stringToVector ( string s, string tok );
vector<string> stringToVector ( char s, string tok );
vector<string> stringToVector ( int s, string tok );
vector<int> stringToVectorInt ( string s, string tok );
vector<float> stringToVectorFloat ( string s, string tok );
string lowerCase(string str);
string removePunct(string str);
string tokenizePunct(string str);
string removePunctTercom(string str);
string normalizeStd(string str);
string printParams(param p);
string join ( string delim, vector<string> arr );
// };
param copyParam(param p);
param copyParam(param p);
}
#endif // MERT_TER_TOOLS_H_
#endif