Memory leak correction in TER algorithm

This commit is contained in:
Christophe SERVAN 2015-02-16 19:02:46 +01:00
parent f6884c55a1
commit 87a4f19546
25 changed files with 3117 additions and 2177 deletions

View File

@ -46,6 +46,7 @@ TER/infosHasher.cpp
TER/stringInfosHasher.cpp TER/stringInfosHasher.cpp
TER/tercalc.cpp TER/tercalc.cpp
TER/tools.cpp TER/tools.cpp
TER/bestShiftStruct.cpp
TerScorer.cpp TerScorer.cpp
CderScorer.cpp CderScorer.cpp
MeteorScorer.cpp MeteorScorer.cpp

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -23,15 +23,24 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std; using namespace std;
namespace TERCpp namespace TERCpp
{ {
string alignmentStruct::toString() string alignmentStruct::toString()
{ {
stringstream s; stringstream s;
// s << "nword : " << vectorToString(nwords)<<endl; // s << "nword : " << vectorToString(nwords)<<endl;
// s << "alignment" << vectorToString(alignment)<<endl; // s << "alignment" << vectorToString(alignment)<<endl;
// s << "afterShift" << vectorToString(alignment)<<endl; // s << "afterShift" << vectorToString(alignment)<<endl;
s << "Nothing to be printed" <<endl; s << "Nothing to be printed" <<endl;
return s.str(); return s.str();
} }
void alignmentStruct::set(alignmentStruct l_alignmentStruct)
{
nwords=l_alignmentStruct.nwords; // The words we shifted
alignment=l_alignmentStruct.alignment ; // for pra_more output
aftershift=l_alignmentStruct.aftershift; // for pra_more output
cost=l_alignmentStruct.cost;
}
// alignmentStruct::alignmentStruct() // alignmentStruct::alignmentStruct()
// { // {
@ -99,7 +108,7 @@ string alignmentStruct::toString()
// return s.str(); // return s.str();
// } // }
/* The distance of the shift. */ /* The distance of the shift. */
// int alignmentStruct::distance() // int alignmentStruct::distance()
// { // {
// if (moveto < start) // if (moveto < start)

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation, along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/ **********************************/
#ifndef MERT_TER_ALIGNMENTSTRUCT_H_ #ifndef __TERCPPALIGNMENTSTRUCT_H__
#define MERT_TER_ALIGNMENTSTRUCT_H_ #define __TERCPPALIGNMENTSTRUCT_H__
#include <vector> #include <vector>
@ -34,10 +34,10 @@ using namespace Tools;
namespace TERCpp namespace TERCpp
{ {
class alignmentStruct class alignmentStruct
{ {
private: private:
public: public:
// alignmentStruct(); // alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc); // alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@ -53,14 +53,15 @@ public:
// int end; // int end;
// int moveto; // int moveto;
// int newloc; // int newloc;
vector<string> nwords; // The words we shifted vector<string> nwords; // The words we shifted
vector<char> alignment ; // for pra_more output vector<char> alignment ; // for pra_more output
vector<vecInt> aftershift; // for pra_more output vector<vecInt> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to // This is used to store the cost of a shift, so we don't have to
// calculate it multiple times. // calculate it multiple times.
double cost; double cost;
string toString(); string toString();
}; void set(alignmentStruct l_alignmentStruct);
};
} }
#endif #endif

View File

@ -0,0 +1,66 @@
/*********************************
tercpp: an open-source Translation Edit Rate (TER) scorer tool for Machine Translation.
Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version.
This program and library are distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/
#include "bestShiftStruct.h"
using namespace std;
namespace TERCpp
{
bestShiftStruct::bestShiftStruct()
{
m_best_shift=new terShift();
m_best_align=new terAlignment();
m_empty=new bool(false);
}
bestShiftStruct::~bestShiftStruct()
{
delete(m_best_align);
delete(m_best_shift);
}
void bestShiftStruct::setEmpty(bool b)
{
m_empty=new bool(b);
}
void bestShiftStruct::setBestShift(terShift * l_terShift)
{
m_best_shift->set(l_terShift);
}
void bestShiftStruct::setBestAlign(terAlignment * l_terAlignment)
{
m_best_align->set(l_terAlignment);
}
string bestShiftStruct::toString()
{
stringstream s;
s << m_best_shift->toString() << endl;
s << m_best_align->toString() << endl;
// s << (*m_empty) << endl;
}
bool bestShiftStruct::getEmpty()
{
return (*(m_empty));
}
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation, along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/ **********************************/
#ifndef __BESTSHIFTSTRUCT_H_ #ifndef __BESTSHIFTSTRUCT_H__
#define __BESTSHIFTSTRUCT_H_ #define __BESTSHIFTSTRUCT_H__
#include <vector> #include <vector>
@ -36,10 +36,10 @@ using namespace Tools;
namespace TERCpp namespace TERCpp
{ {
class bestShiftStruct class bestShiftStruct
{ {
private: private:
public: public:
// alignmentStruct(); // alignmentStruct();
// alignmentStruct (int _start, int _end, int _moveto, int _newloc); // alignmentStruct (int _start, int _end, int _moveto, int _newloc);
@ -55,16 +55,36 @@ public:
// int end; // int end;
// int moveto; // int moveto;
// int newloc; // int newloc;
terShift m_best_shift; terShift * m_best_shift;
terAlignment m_best_align; terAlignment * m_best_align;
bool m_empty; bool * m_empty;
bestShiftStruct();
~bestShiftStruct();
inline void set(bestShiftStruct l_bestShiftStruct)
{
m_best_shift->set(l_bestShiftStruct.m_best_shift);
m_best_align->set(l_bestShiftStruct.m_best_align);
setEmpty(l_bestShiftStruct.getEmpty());
}
inline void set(bestShiftStruct * l_bestShiftStruct)
{
m_best_shift->set(l_bestShiftStruct->m_best_shift);
m_best_align->set(l_bestShiftStruct->m_best_align);
setEmpty(l_bestShiftStruct->getEmpty());
}
void setEmpty(bool b);
void setBestShift(terShift * l_terShift);
void setBestAlign(terAlignment * l_terAlignment);
string toString();
bool getEmpty();
// vector<string> nwords; // The words we shifted // vector<string> nwords; // The words we shifted
// char* alignment ; // for pra_more output // char* alignment ; // for pra_more output
// vector<vecInt> aftershift; // for pra_more output // vector<vecInt> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to // This is used to store the cost of a shift, so we don't have to
// calculate it multiple times. // calculate it multiple times.
// double cost; // double cost;
}; };
} }
#endif #endif

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -28,142 +28,156 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
// hashMap::hashMap(); // hashMap::hashMap();
/* hashMap::~hashMap() /* hashMap::~hashMap()
{
// vector<stringHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMap::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMap::trouve ( long searchKey )
{ {
// vector<stringHasher>::const_iterator del = m_hasher.begin(); long foundKey;
for ( vector<stringHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMap::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMap::trouve ( long searchKey )
{
long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
foundKey= ( *l_hasher ).getHashKey(); {
if ( searchKey == foundKey ) { foundKey= ( *l_hasher ).getHashKey();
return 1; if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
} }
} int hashMap::trouve ( string key )
return 0; {
} long searchKey=hashValue ( key );
int hashMap::trouve ( string key ) long foundKey;;
{
long searchKey=hashValue ( key );
long foundKey;;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
foundKey= ( *l_hasher ).getHashKey(); {
if ( searchKey == foundKey ) { foundKey= ( *l_hasher ).getHashKey();
return 1; if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
} }
} /**
return 0; * long hashMap::hashValue ( string key )
} * @param key
/** * @return
* long hashMap::hashValue ( string key ) */
* @param key long hashMap::hashValue ( string key )
* @return {
*/ locale loc; // the "C" locale
long hashMap::hashValue ( string key ) const collate<char>& coll = use_facet<collate<char> >(loc);
{ return coll.hash(key.data(),key.data()+key.length());
locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher; // boost::hash<string> hasher;
// return hasher ( key ); // return hasher ( key );
} }
/** /**
* void hashMap::addHasher ( string key, string value ) * void hashMap::addHasher ( string key, string value )
* @param key * @param key
* @param value * @param value
*/ */
void hashMap::addHasher ( string key, string value ) void hashMap::addHasher ( string key, string value )
{ {
if ( trouve ( hashValue ( key ) ) ==0 ) { if ( trouve ( hashValue ( key ) ) ==0 )
{
// cerr << "ICI1" <<endl; // cerr << "ICI1" <<endl;
stringHasher H ( hashValue ( key ),key,value ); stringHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl; // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl; // cerr << "ICI2" <<endl;
m_hasher.push_back ( H ); m_hasher.push_back ( H );
} }
}
stringHasher hashMap::getHasher ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
stringHasher defaut(0,"","");
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return ( *l_hasher );
} }
} stringHasher hashMap::getHasher ( string key )
return defaut; {
} long searchKey=hashValue ( key );
string hashMap::getValue ( string key ) long foundKey;
{ stringHasher defaut(0,"","");
long searchKey=hashValue ( key );
long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
foundKey= ( *l_hasher ).getHashKey(); {
if ( searchKey == foundKey ) { foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return ( *l_hasher );
}
}
return defaut;
}
string hashMap::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl; // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue(); return ( *l_hasher ).getValue();
}
}
return "";
} }
} string hashMap::searchValue ( string value )
return ""; {
}
string hashMap::searchValue ( string value )
{
// long searchKey=hashValue ( key ); // long searchKey=hashValue ( key );
// long foundKey; // long foundKey;
string foundValue; string foundValue;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
foundValue= ( *l_hasher ).getValue(); {
if ( foundValue.compare ( value ) == 0 ) { foundValue= ( *l_hasher ).getValue();
return ( *l_hasher ).getKey(); if ( foundValue.compare ( value ) == 0 )
{
return ( *l_hasher ).getKey();
}
}
return "";
} }
}
return "";
}
void hashMap::setValue ( string key , string value ) void hashMap::setValue ( string key , string value )
{ {
long searchKey=hashValue ( key ); long searchKey=hashValue ( key );
long foundKey; long foundKey;
// vector<stringHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
foundKey= ( *l_hasher ).getHashKey(); {
if ( searchKey == foundKey ) { foundKey= ( *l_hasher ).getHashKey();
( *l_hasher ).setValue ( value ); if ( searchKey == foundKey )
{
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue(); // return ( *l_hasher ).getValue();
}
}
} }
}
}
/** /**
* *
*/ */
void hashMap::printHash() void hashMap::printHash()
{ {
for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { for ( vector<stringHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; {
} cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
} }
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/* /*
* Generic hashmap manipulation functions * Generic hashmap manipulation functions
*/ */
#ifndef __HASHMAP_H_ #ifndef __HASHMAP_H__
#define __HASHMAP_H_ #define __HASHMAP_H__
#include <boost/functional/hash.hpp> #include <boost/functional/hash.hpp>
#include "stringHasher.h" #include "stringHasher.h"
#include <vector> #include <vector>
@ -35,27 +35,27 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class hashMap class hashMap
{ {
private: private:
vector<stringHasher> m_hasher; vector<stringHasher> m_hasher;
public: public:
// ~hashMap(); // ~hashMap();
long hashValue ( string key ); long hashValue ( string key );
int trouve ( long searchKey ); int trouve ( long searchKey );
int trouve ( string key ); int trouve ( string key );
void addHasher ( string key, string value ); void addHasher ( string key, string value );
stringHasher getHasher ( string key ); stringHasher getHasher ( string key );
string getValue ( string key ); string getValue ( string key );
string searchValue ( string key ); string searchValue ( string key );
void setValue ( string key , string value ); void setValue ( string key , string value );
void printHash(); void printHash();
vector<stringHasher> getHashMap(); vector<stringHasher> getHashMap();
string printStringHash(); string printStringHash();
string printStringHash2(); string printStringHash2();
string printStringHashForLexicon(); string printStringHashForLexicon();
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -28,108 +28,117 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
// hashMapInfos::hashMap(); // hashMapInfos::hashMap();
/* hashMapInfos::~hashMap() /* hashMapInfos::~hashMap()
{
// vector<infosHasher>::const_iterator del = m_hasher.begin();
for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapInfos::trouve ( long searchKey )
{ {
// vector<infosHasher>::const_iterator del = m_hasher.begin(); long foundKey;
for ( vector<infosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{
delete(*del);
}
}*/
/**
* int hashMapInfos::trouve ( long searchKey )
* @param searchKey
* @return
*/
int hashMapInfos::trouve ( long searchKey )
{
long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
foundKey= ( *l_hasher ).getHashKey(); {
if ( searchKey == foundKey ) { foundKey= ( *l_hasher ).getHashKey();
return 1; if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
} }
} int hashMapInfos::trouve ( string key )
return 0; {
} long searchKey=hashValue ( key );
int hashMapInfos::trouve ( string key ) long foundKey;;
{
long searchKey=hashValue ( key );
long foundKey;;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
foundKey= ( *l_hasher ).getHashKey(); {
if ( searchKey == foundKey ) { foundKey= ( *l_hasher ).getHashKey();
return 1; if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
} }
}
return 0;
}
/** /**
* long hashMapInfos::hashValue ( string key ) * long hashMapInfos::hashValue ( string key )
* @param key * @param key
* @return * @return
*/ */
long hashMapInfos::hashValue ( string key ) long hashMapInfos::hashValue ( string key )
{ {
locale loc; // the "C" locale locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> >(loc); const collate<char>& coll = use_facet<collate<char> >(loc);
return coll.hash(key.data(),key.data()+key.length()); return coll.hash(key.data(),key.data()+key.length());
// boost::hash<string> hasher; // boost::hash<string> hasher;
// return hasher ( key ); // return hasher ( key );
} }
/** /**
* void hashMapInfos::addHasher ( string key, string value ) * void hashMapInfos::addHasher ( string key, string value )
* @param key * @param key
* @param value * @param value
*/ */
void hashMapInfos::addHasher ( string key, vector<int> value ) void hashMapInfos::addHasher ( string key, vector<int> value )
{ {
if ( trouve ( hashValue ( key ) ) ==0 ) { if ( trouve ( hashValue ( key ) ) ==0 )
{
// cerr << "ICI1" <<endl; // cerr << "ICI1" <<endl;
infosHasher H ( hashValue ( key ),key,value ); infosHasher H ( hashValue ( key ),key,value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl; // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl; // cerr << "ICI2" <<endl;
m_hasher.push_back ( H ); m_hasher.push_back ( H );
} }
}
void hashMapInfos::addValue ( string key, vector<int> value )
{
addHasher ( key, value );
}
infosHasher hashMapInfos::getHasher ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) {
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return ( *l_hasher );
} }
} void hashMapInfos::addValue ( string key, vector<int> value )
vector<int> temp; {
infosHasher defaut(0,"",temp); addHasher ( key, value );
return defaut; }
} infosHasher hashMapInfos::getHasher ( string key )
vector<int> hashMapInfos::getValue ( string key ) {
{ long searchKey=hashValue ( key );
long searchKey=hashValue ( key ); long foundKey;
long foundKey;
vector<int> retour;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
foundKey= ( *l_hasher ).getHashKey(); {
if ( searchKey == foundKey ) { foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return ( *l_hasher );
}
}
vector<int> temp;
infosHasher defaut(0,"",temp);
return defaut;
}
vector<int> hashMapInfos::getValue ( string key )
{
long searchKey=hashValue ( key );
long foundKey;
vector<int> retour;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
foundKey= ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl; // cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue(); return ( *l_hasher ).getValue();
}
}
return retour;
} }
}
return retour;
}
// string hashMapInfos::searchValue ( string value ) // string hashMapInfos::searchValue ( string value )
// { // {
// // long searchKey=hashValue ( key ); // // long searchKey=hashValue ( key );
@ -149,38 +158,42 @@ vector<int> hashMapInfos::getValue ( string key )
// } // }
// //
void hashMapInfos::setValue ( string key , vector<int> value ) void hashMapInfos::setValue ( string key , vector<int> value )
{ {
long searchKey=hashValue ( key ); long searchKey=hashValue ( key );
long foundKey; long foundKey;
// vector<infosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<infosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
foundKey= ( *l_hasher ).getHashKey(); {
if ( searchKey == foundKey ) { foundKey= ( *l_hasher ).getHashKey();
( *l_hasher ).setValue ( value ); if ( searchKey == foundKey )
{
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue(); // return ( *l_hasher ).getValue();
}
}
}
string hashMapInfos::toString ()
{
stringstream to_return;
for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
to_return << (*l_hasher).toString();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
return to_return.str();
} }
}
}
string hashMapInfos::toString ()
{
stringstream to_return;
for ( vector<infosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
to_return << (*l_hasher).toString();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
return to_return.str();
}
/** /**
* *
*/ */
void hashMapInfos::printHash() void hashMapInfos::printHash()
{ {
for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ ) { for ( vector<infosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
{
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
} }
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/* /*
* Generic hashmap manipulation functions * Generic hashmap manipulation functions
*/ */
#ifndef __HASHMAPINFOS_H_ #ifndef __HASHMAPINFOS_H__
#define __HASHMAPINFOS_H_ #define __HASHMAPINFOS_H__
#include <boost/functional/hash.hpp> #include <boost/functional/hash.hpp>
#include "infosHasher.h" #include "infosHasher.h"
#include <vector> #include <vector>
@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class hashMapInfos class hashMapInfos
{ {
private: private:
vector<infosHasher> m_hasher; vector<infosHasher> m_hasher;
public: public:
// ~hashMap(); // ~hashMap();
long hashValue ( string key ); long hashValue ( string key );
int trouve ( long searchKey ); int trouve ( long searchKey );
int trouve ( string key ); int trouve ( string key );
void addHasher ( string key, vector<int> value ); void addHasher ( string key, vector<int> value );
void addValue ( string key, vector<int> value ); void addValue ( string key, vector<int> value );
infosHasher getHasher ( string key ); infosHasher getHasher ( string key );
vector<int> getValue ( string key ); vector<int> getValue ( string key );
// string searchValue ( string key ); // string searchValue ( string key );
void setValue ( string key , vector<int> value ); void setValue ( string key , vector<int> value );
void printHash(); void printHash();
string toString(); string toString();
vector<infosHasher> getHashMap(); vector<infosHasher> getHashMap();
string printStringHash(); string printStringHash();
string printStringHash2(); string printStringHash2();
string printStringHashForLexicon(); string printStringHashForLexicon();
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -27,166 +27,179 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
// hashMapStringInfos::hashMap(); // hashMapStringInfos::hashMap();
/* hashMapStringInfos::~hashMap() /* hashMapStringInfos::~hashMap()
{ {
// vector<stringInfosHasher>::const_iterator del = m_hasher.begin(); // vector<stringInfosHasher>::const_iterator del = m_hasher.begin();
for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ ) for ( vector<stringInfosHasher>::const_iterator del=m_hasher.begin(); del != m_hasher.end(); del++ )
{ {
delete(*del); delete(*del);
} }
}*/ }*/
/** /**
* int hashMapStringInfos::trouve ( long searchKey ) * int hashMapStringInfos::trouve ( long searchKey )
* @param searchKey * @param searchKey
* @return * @return
*/ */
int hashMapStringInfos::trouve ( long searchKey ) int hashMapStringInfos::trouve ( long searchKey )
{ {
long foundKey; long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
foundKey = ( *l_hasher ).getHashKey(); {
if ( searchKey == foundKey ) { foundKey = ( *l_hasher ).getHashKey();
return 1; if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
} }
}
return 0;
}
int hashMapStringInfos::trouve ( string key ) int hashMapStringInfos::trouve ( string key )
{ {
long searchKey = hashValue ( key ); long searchKey = hashValue ( key );
long foundKey;; long foundKey;;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
foundKey = ( *l_hasher ).getHashKey(); {
if ( searchKey == foundKey ) { foundKey = ( *l_hasher ).getHashKey();
return 1; if ( searchKey == foundKey )
{
return 1;
}
}
return 0;
} }
}
return 0;
}
/** /**
* long hashMapStringInfos::hashValue ( string key ) * long hashMapStringInfos::hashValue ( string key )
* @param key * @param key
* @return * @return
*/ */
long hashMapStringInfos::hashValue ( string key ) long hashMapStringInfos::hashValue ( string key )
{ {
locale loc; // the "C" locale locale loc; // the "C" locale
const collate<char>& coll = use_facet<collate<char> > ( loc ); const collate<char>& coll = use_facet<collate<char> > ( loc );
return coll.hash ( key.data(), key.data() + key.length() ); return coll.hash ( key.data(), key.data() + key.length() );
// boost::hash<string> hasher; // boost::hash<string> hasher;
// return hasher ( key ); // return hasher ( key );
}
/**
* void hashMapStringInfos::addHasher ( string key, string value )
* @param key
* @param value
*/
void hashMapStringInfos::addHasher ( string key, vector<string> value )
{
if ( trouve ( hashValue ( key ) ) == 0 ) {
// cerr << "ICI1" <<endl;
stringInfosHasher H ( hashValue ( key ), key, value );
// cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
// cerr << "ICI2" <<endl;
m_hasher.push_back ( H );
}
}
void hashMapStringInfos::addValue ( string key, vector<string> value )
{
addHasher ( key, value );
}
stringInfosHasher hashMapStringInfos::getHasher ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
return ( *l_hasher );
} }
} /**
vector<string> tmp; * void hashMapStringInfos::addHasher ( string key, string value )
stringInfosHasher defaut ( 0, "", tmp ); * @param key
return defaut; * @param value
} */
vector<string> hashMapStringInfos::getValue ( string key ) void hashMapStringInfos::addHasher ( string key, vector<string> value )
{ {
long searchKey = hashValue ( key ); if ( trouve ( hashValue ( key ) ) == 0 )
long foundKey; {
vector<string> retour; // cerr << "ICI1" <<endl;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin(); stringInfosHasher H ( hashValue ( key ), key, value );
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { // cerr <<" "<< hashValue ( key )<<" "<< key<<" "<<value <<endl;
foundKey = ( *l_hasher ).getHashKey(); // cerr << "ICI2" <<endl;
if ( searchKey == foundKey ) {
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl; m_hasher.push_back ( H );
return ( *l_hasher ).getValue(); }
} }
} void hashMapStringInfos::addValue ( string key, vector<string> value )
return retour; {
} addHasher ( key, value );
// string hashMapStringInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
// // long foundKey;
// vector<int> foundValue;
//
// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
// {
// foundValue= ( *l_hasher ).getValue();
// /* if ( foundValue.compare ( value ) == 0 )
// {
// return ( *l_hasher ).getKey();
// }*/
// }
// return "";
// }
//
void hashMapStringInfos::setValue ( string key , vector<string> value )
{
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) {
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey ) {
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
} }
} stringInfosHasher hashMapStringInfos::getHasher ( string key )
} {
long searchKey = hashValue ( key );
long foundKey;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
return ( *l_hasher );
}
}
vector<string> tmp;
stringInfosHasher defaut ( 0, "", tmp );
return defaut;
}
vector<string> hashMapStringInfos::getValue ( string key )
{
long searchKey = hashValue ( key );
long foundKey;
vector<string> retour;
// vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
foundKey = ( *l_hasher ).getHashKey();
if ( searchKey == foundKey )
{
// cerr <<"value found : " << key<<"|"<< ( *l_hasher ).getValue()<<endl;
return ( *l_hasher ).getValue();
}
}
return retour;
}
// string hashMapStringInfos::searchValue ( string value )
// {
// // long searchKey=hashValue ( key );
// // long foundKey;
// vector<int> foundValue;
//
// // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
// for ( vector<stringInfosHasher>:: iterator l_hasher=m_hasher.begin() ; l_hasher!=m_hasher.end() ; l_hasher++ )
// {
// foundValue= ( *l_hasher ).getValue();
// /* if ( foundValue.compare ( value ) == 0 )
// {
// return ( *l_hasher ).getKey();
// }*/
// }
// return "";
// }
//
string hashMapStringInfos::toString () void hashMapStringInfos::setValue ( string key , vector<string> value )
{ {
stringstream to_return; long searchKey = hashValue ( key );
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { long foundKey;
to_return << (*l_hasher).toString(); // vector<stringInfosHasher>::const_iterator l_hasher=m_hasher.begin();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
} {
return to_return.str(); foundKey = ( *l_hasher ).getHashKey();
} if ( searchKey == foundKey )
{
( *l_hasher ).setValue ( value );
// return ( *l_hasher ).getValue();
}
}
}
/** string hashMapStringInfos::toString ()
* {
*/ stringstream to_return;
void hashMapStringInfos::printHash() for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{ {
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ ) { to_return << (*l_hasher).toString();
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl; // cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
} }
} return to_return.str();
vector< stringInfosHasher > hashMapStringInfos::getHashMap() }
{
return m_hasher; /**
} *
*/
void hashMapStringInfos::printHash()
{
for ( vector<stringInfosHasher>:: iterator l_hasher = m_hasher.begin() ; l_hasher != m_hasher.end() ; l_hasher++ )
{
// cout << ( *l_hasher ).getHashKey() <<" | "<< ( *l_hasher ).getKey() << " | " << ( *l_hasher ).getValue() << endl;
}
}
vector< stringInfosHasher > hashMapStringInfos::getHashMap()
{
return m_hasher;
}

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -21,8 +21,8 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/* /*
* Generic hashmap manipulation functions * Generic hashmap manipulation functions
*/ */
#ifndef __HASHMAPSTRINGINFOS_H_ #ifndef __HASHMAPSTRINGINFOS_H__
#define __HASHMAPSTRINGINFOS_H_ #define __HASHMAPSTRINGINFOS_H__
#include <boost/functional/hash.hpp> #include <boost/functional/hash.hpp>
#include "stringInfosHasher.h" #include "stringInfosHasher.h"
#include <vector> #include <vector>
@ -34,29 +34,29 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class hashMapStringInfos class hashMapStringInfos
{ {
private: private:
vector<stringInfosHasher> m_hasher; vector<stringInfosHasher> m_hasher;
public: public:
// ~hashMap(); // ~hashMap();
long hashValue ( string key ); long hashValue ( string key );
int trouve ( long searchKey ); int trouve ( long searchKey );
int trouve ( string key ); int trouve ( string key );
void addHasher ( string key, vector<string> value ); void addHasher ( string key, vector<string> value );
void addValue ( string key, vector<string> value ); void addValue ( string key, vector<string> value );
stringInfosHasher getHasher ( string key ); stringInfosHasher getHasher ( string key );
vector<string> getValue ( string key ); vector<string> getValue ( string key );
// string searchValue ( string key ); // string searchValue ( string key );
void setValue ( string key , vector<string> value ); void setValue ( string key , vector<string> value );
void printHash(); void printHash();
string toString(); string toString();
vector<stringInfosHasher> getHashMap(); vector<stringInfosHasher> getHashMap();
string printStringHash(); string printStringHash();
string printStringHash2(); string printStringHash2();
string printStringHashForLexicon(); string printStringHashForLexicon();
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace namespace HashMapSpace
{ {
infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt ) infosHasher::infosHasher (long cle,string cleTxt, vector<int> valueVecInt )
{ {
m_hashKey=cle; m_hashKey=cle;
m_key=cleTxt; m_key=cleTxt;
m_value=valueVecInt; m_value=valueVecInt;
} }
// infosHasher::~infosHasher(){};*/ // infosHasher::~infosHasher(){};*/
long infosHasher::getHashKey() long infosHasher::getHashKey()
{ {
return m_hashKey; return m_hashKey;
} }
string infosHasher::getKey() string infosHasher::getKey()
{ {
return m_key; return m_key;
} }
vector<int> infosHasher::getValue() vector<int> infosHasher::getValue()
{ {
return m_value; return m_value;
} }
void infosHasher::setValue ( vector<int> value ) void infosHasher::setValue ( vector<int> value )
{ {
m_value=value; m_value=value;
} }
string infosHasher::toString() string infosHasher::toString()
{ {
stringstream to_return; stringstream to_return;
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
return to_return.str(); return to_return.str();
} }
// typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S; // typedef stdext::hash_map<std::string,string, stringhasher> HASH_S_S;

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation, along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/ **********************************/
#ifndef __INFOSHASHER_H_ #ifndef __INFOSHASHER_H__
#define __INFOSHASHER_H_ #define __INFOSHASHER_H__
#include <string> #include <string>
// #include <ext/hash_map> // #include <ext/hash_map>
#include <stdio.h> #include <stdio.h>
@ -31,23 +31,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std; using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class infosHasher class infosHasher
{ {
private: private:
long m_hashKey; long m_hashKey;
string m_key; string m_key;
vector<int> m_value; vector<int> m_value;
public: public:
infosHasher ( long cle, string cleTxt, vector<int> valueVecInt ); infosHasher ( long cle, string cleTxt, vector<int> valueVecInt );
long getHashKey(); long getHashKey();
string getKey(); string getKey();
vector<int> getValue(); vector<int> getValue();
void setValue ( vector<int> value ); void setValue ( vector<int> value );
string toString(); string toString();
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -26,29 +26,29 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt ) stringHasher::stringHasher ( long cle, string cleTxt, string valueTxt )
{ {
m_hashKey=cle; m_hashKey=cle;
m_key=cleTxt; m_key=cleTxt;
m_value=valueTxt; m_value=valueTxt;
} }
// stringHasher::~stringHasher(){};*/ // stringHasher::~stringHasher(){};*/
long stringHasher::getHashKey() long stringHasher::getHashKey()
{ {
return m_hashKey; return m_hashKey;
} }
string stringHasher::getKey() string stringHasher::getKey()
{ {
return m_key; return m_key;
} }
string stringHasher::getValue() string stringHasher::getValue()
{ {
return m_value; return m_value;
} }
void stringHasher::setValue ( string value ) void stringHasher::setValue ( string value )
{ {
m_value=value; m_value=value;
} }
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S; // typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation, along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/ **********************************/
#ifndef __STRINGHASHER_H_ #ifndef __STRINGHASHER_H__
#define __STRINGHASHER_H_ #define __STRINGHASHER_H__
#include <string> #include <string>
//#include <ext/hash_map> //#include <ext/hash_map>
#include <iostream> #include <iostream>
@ -28,22 +28,22 @@ using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class stringHasher class stringHasher
{ {
private: private:
long m_hashKey; long m_hashKey;
string m_key; string m_key;
string m_value; string m_value;
public: public:
stringHasher ( long cle, string cleTxt, string valueTxt ); stringHasher ( long cle, string cleTxt, string valueTxt );
long getHashKey(); long getHashKey();
string getKey(); string getKey();
string getValue(); string getValue();
void setValue ( string value ); void setValue ( string value );
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -27,35 +27,35 @@ using namespace Tools;
namespace HashMapSpace namespace HashMapSpace
{ {
stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt ) stringInfosHasher::stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt )
{ {
m_hashKey=cle; m_hashKey=cle;
m_key=cleTxt; m_key=cleTxt;
m_value=valueVecInt; m_value=valueVecInt;
} }
// stringInfosHasher::~stringInfosHasher(){};*/ // stringInfosHasher::~stringInfosHasher(){};*/
long stringInfosHasher::getHashKey() long stringInfosHasher::getHashKey()
{ {
return m_hashKey; return m_hashKey;
} }
string stringInfosHasher::getKey() string stringInfosHasher::getKey()
{ {
return m_key; return m_key;
} }
vector<string> stringInfosHasher::getValue() vector<string> stringInfosHasher::getValue()
{ {
return m_value; return m_value;
} }
void stringInfosHasher::setValue ( vector<string> value ) void stringInfosHasher::setValue ( vector<string> value )
{ {
m_value=value; m_value=value;
} }
string stringInfosHasher::toString() string stringInfosHasher::toString()
{ {
stringstream to_return; stringstream to_return;
to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl; to_return << m_hashKey << "\t" << m_key << "\t" << vectorToString(m_value,"\t") << endl;
return to_return.str(); return to_return.str();
} }
// typedef stdext::hash_map<string, string, stringhasher> HASH_S_S; // typedef stdext::hash_map<string, string, stringhasher> HASH_S_S;

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation, along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/ **********************************/
#ifndef __STRINGINFOSHASHER_H_ #ifndef __STRINGINFOSHASHER_H__
#define __STRINGINFOSHASHER_H_ #define __STRINGINFOSHASHER_H__
#include <string> #include <string>
// #include <ext/hash_map> // #include <ext/hash_map>
#include <iostream> #include <iostream>
@ -29,23 +29,23 @@ Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
using namespace std; using namespace std;
namespace HashMapSpace namespace HashMapSpace
{ {
class stringInfosHasher class stringInfosHasher
{ {
private: private:
long m_hashKey; long m_hashKey;
string m_key; string m_key;
vector<string> m_value; vector<string> m_value;
public: public:
stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt ); stringInfosHasher ( long cle, string cleTxt, vector<string> valueVecInt );
long getHashKey(); long getHashKey();
string getKey(); string getKey();
vector<string> getValue(); vector<string> getValue();
void setValue ( vector<string> value ); void setValue ( vector<string> value );
string toString(); string toString();
}; };
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -24,163 +24,244 @@ using namespace std;
namespace TERCpp namespace TERCpp
{ {
terAlignment::terAlignment() terAlignment::terAlignment()
{ {
// vector<string> ref; // vector<string> ref;
// vector<string> hyp; // vector<string> hyp;
// vector<string> aftershift; // vector<string> aftershift;
// TERshift[] allshifts = null; // TERshift[] allshifts = null;
numEdits=0; numEdits=0;
numWords=0; numWords=0;
bestRef=""; // bestRef="";
numIns=0; numIns=0;
numDel=0; numDel=0;
numSub=0; numSub=0;
numSft=0; numSft=0;
numWsf=0; numWsf=0;
} averageWords=0;
string terAlignment::toString()
{ }
stringstream s; void terAlignment::set(terAlignment& l_terAlignment)
s.str ( "" ); {
s << "Original Ref: \t" << join ( " ", ref ) << endl; numEdits=l_terAlignment.numEdits;
s << "Original Hyp: \t" << join ( " ", hyp ) <<endl; numWords=l_terAlignment.numWords;
s << "Hyp After Shift:\t" << join ( " ", aftershift ); bestRef=l_terAlignment.bestRef;
numIns=l_terAlignment.numIns;
numDel=l_terAlignment.numDel;
numSub=l_terAlignment.numSub;
numSft=l_terAlignment.numSft;
numWsf=l_terAlignment.numWsf;
averageWords=l_terAlignment.averageWords;
ref=l_terAlignment.ref;
hyp=l_terAlignment.hyp;
aftershift=l_terAlignment.aftershift;
// allshifts=l_terAlignment.allshifts;
hyp_int=l_terAlignment.hyp_int;
aftershift_int=l_terAlignment.aftershift_int;
alignment=l_terAlignment.alignment;
allshifts=(*(new vector<terShift>((int)l_terAlignment.allshifts.size())));
for (int l_i=0; l_i< (int)l_terAlignment.allshifts.size(); l_i++)
{
allshifts.at(l_i).set(l_terAlignment.allshifts.at(l_i));
}
}
void terAlignment::set(terAlignment* l_terAlignment)
{
numEdits=l_terAlignment->numEdits;
numWords=l_terAlignment->numWords;
bestRef=l_terAlignment->bestRef;
numIns=l_terAlignment->numIns;
numDel=l_terAlignment->numDel;
numSub=l_terAlignment->numSub;
numSft=l_terAlignment->numSft;
numWsf=l_terAlignment->numWsf;
averageWords=l_terAlignment->averageWords;
ref=l_terAlignment->ref;
hyp=l_terAlignment->hyp;
aftershift=l_terAlignment->aftershift;
// allshifts=l_terAlignment->allshifts;
hyp_int=l_terAlignment->hyp_int;
aftershift_int=l_terAlignment->aftershift_int;
alignment=l_terAlignment->alignment;
allshifts=(*(new vector<terShift>((int)l_terAlignment->allshifts.size())));
for (int l_i=0; l_i< (int)l_terAlignment->allshifts.size(); l_i++)
{
allshifts.at(l_i).set(l_terAlignment->allshifts.at(l_i));
}
}
string terAlignment::toString()
{
stringstream s;
s.str ( "" );
s << "Original Ref: \t" << join ( " ", ref ) << endl;
s << "Original Hyp: \t" << join ( " ", hyp ) <<endl;
s << "Hyp After Shift:\t" << join ( " ", aftershift );
// s << "Hyp After Shift: " << join ( " ", aftershift ); // s << "Hyp After Shift: " << join ( " ", aftershift );
s << endl; s << endl;
// string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift); // string s = "Original Ref: " + join(" ", ref) + "\nOriginal Hyp: " + join(" ", hyp) + "\nHyp After Shift: " + join(" ", aftershift);
if ( ( int ) sizeof ( alignment ) >0 ) { if ( ( int ) sizeof ( alignment ) >0 )
s << "Alignment: ("; {
s << "Alignment: (";
// s += "\nAlignment: ("; // s += "\nAlignment: (";
for ( int i = 0; i < ( int ) ( alignment.size() ); i++ ) { for ( int i = 0; i < ( int ) ( alignment.size() ); i++ )
s << alignment[i]; {
s << alignment[i];
// s+=alignment[i]; // s+=alignment[i];
} }
// s += ")"; // s += ")";
s << ")"; s << ")";
} }
s << endl; s << endl;
if ( ( int ) allshifts.size() == 0 ) { if ( ( int ) allshifts.size() == 0 )
{
// s += "\nNumShifts: 0"; // s += "\nNumShifts: 0";
s << "NumShifts: 0"; s << "NumShifts: 0";
} else { }
else
{
// s += "\nNumShifts: " + (int)allshifts.size(); // s += "\nNumShifts: " + (int)allshifts.size();
s << "NumShifts: "<< ( int ) allshifts.size(); s << "NumShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ ) { for ( int i = 0; i < ( int ) allshifts.size(); i++ )
s << endl << " " ; {
s << ( ( terShift ) allshifts[i] ).toString(); s << endl << " " ;
s << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i]; // s += "\n " + allshifts[i];
} }
} }
s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")"; s << endl << "Score: " << scoreAv() << " (" << numEdits << "/" << averageWords << ")";
// s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")"; // s += "\nScore: " + score() + " (" + numEdits + "/" + numWords + ")";
return s.str(); return s.str();
} }
string terAlignment::join ( string delim, vector<string> arr ) string terAlignment::join ( string delim, vector<string> arr )
{ {
if ( ( int ) arr.size() == 0 ) return ""; if ( ( int ) arr.size() == 0 ) return "";
// if ((int)delim.compare("") == 0) delim = new String(""); // if ((int)delim.compare("") == 0) delim = new String("");
// String s = new String(""); // String s = new String("");
stringstream s; stringstream s;
s.str ( "" ); s.str ( "" );
for ( int i = 0; i < ( int ) arr.size(); i++ ) { for ( int i = 0; i < ( int ) arr.size(); i++ )
if ( i == 0 ) { {
s << arr.at ( i ); if ( i == 0 )
} else { {
s << delim << arr.at ( i ); s << arr.at ( i );
} }
} else
return s.str(); {
s << delim << arr.at ( i );
}
}
return s.str();
// return ""; // return "";
}
double terAlignment::score()
{
if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
return 1.0;
}
if ( numWords <= 0.0 ) {
return 0.0;
}
return ( double ) numEdits / numWords;
}
double terAlignment::scoreAv()
{
if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) ) {
return 1.0;
}
if ( averageWords <= 0.0 ) {
return 0.0;
}
return ( double ) numEdits / averageWords;
}
void terAlignment::scoreDetails()
{
numIns = numDel = numSub = numWsf = numSft = 0;
if((int)allshifts.size()>0) {
for(int i = 0; i < (int)allshifts.size(); ++i) {
numWsf += allshifts[i].size();
} }
numSft = allshifts.size(); double terAlignment::score()
} {
if ( ( numWords <= 0.0 ) && ( numEdits > 0.0 ) )
if((int)alignment.size()>0 ) { {
for(int i = 0; i < (int)alignment.size(); ++i) { return 1.0;
switch (alignment[i]) { }
case 'S': if ( numWords <= 0.0 )
case 'T': {
numSub++; return 0.0;
break; }
case 'D': return ( double ) numEdits / numWords;
numDel++;
break;
case 'I':
numIns++;
break;
}
} }
} double terAlignment::scoreAv()
// if(numEdits != numSft + numDel + numIns + numSub) {
// System.out.println("** Error, unmatch edit erros " + numEdits + if ( ( averageWords <= 0.0 ) && ( numEdits > 0.0 ) )
// " vs " + (numSft + numDel + numIns + numSub)); {
} return 1.0;
string terAlignment::printAlignments() }
{ if ( averageWords <= 0.0 )
stringstream to_return; {
for(int i = 0; i < (int)alignment.size(); ++i) { return 0.0;
char alignInfo=alignment.at(i); }
if (alignInfo == 'A' ) { return ( double ) numEdits / averageWords;
alignInfo='A';
} }
if (i==0) { void terAlignment::scoreDetails()
to_return << alignInfo; {
} else { numIns = numDel = numSub = numWsf = numSft = 0;
to_return << " " << alignInfo; if((int)allshifts.size()>0)
} {
for(int i = 0; i < (int)allshifts.size(); ++i)
{
numWsf += allshifts[i].size();
}
numSft = allshifts.size();
}
if((int)alignment.size()>0 )
{
for(int i = 0; i < (int)alignment.size(); ++i)
{
switch (alignment[i])
{
case 'S':
case 'T':
numSub++;
break;
case 'D':
numDel++;
break;
case 'I':
numIns++;
break;
}
}
}
// if(numEdits != numSft + numDel + numIns + numSub)
// System.out.println("** Error, unmatch edit erros " + numEdits +
// " vs " + (numSft + numDel + numIns + numSub));
}
string terAlignment::printAlignments()
{
stringstream to_return;
for(int i = 0; i < (int)alignment.size(); ++i)
{
char alignInfo=alignment.at(i);
if (alignInfo == 'A' )
{
alignInfo='A';
}
if (i==0)
{
to_return << alignInfo;
}
else
{
to_return << " " << alignInfo;
}
}
return to_return.str();
} }
return to_return.str();
}
string terAlignment::printAllShifts() string terAlignment::printAllShifts()
{ {
stringstream to_return; stringstream to_return;
if ( ( int ) allshifts.size() == 0 ) { if ( ( int ) allshifts.size() == 0 )
{
// s += "\nNumShifts: 0"; // s += "\nNumShifts: 0";
to_return << "NbrShifts: 0"; to_return << "NbrShifts: 0";
} else { }
else
{
// s += "\nNumShifts: " + (int)allshifts.size(); // s += "\nNumShifts: " + (int)allshifts.size();
to_return << "NbrShifts: "<< ( int ) allshifts.size(); to_return << "NbrShifts: "<< ( int ) allshifts.size();
for ( int i = 0; i < ( int ) allshifts.size(); i++ ) { for ( int i = 0; i < ( int ) allshifts.size(); i++ )
to_return << "\t" ; {
to_return << ( ( terShift ) allshifts[i] ).toString(); to_return << "\t" ;
to_return << ( ( terShift ) allshifts[i] ).toString();
// s += "\n " + allshifts[i]; // s += "\n " + allshifts[i];
} }
} }
return to_return.str(); return to_return.str();
} }
} }

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation, along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/ **********************************/
#ifndef MERT_TER_TERALIGNMENT_H_ #ifndef __TERCPPTERALIGNMENT_H__
#define MERT_TER_TERALIGNMENT_H_ #define __TERCPPTERALIGNMENT_H__
#include <vector> #include <vector>
@ -34,41 +34,44 @@ using namespace std;
namespace TERCpp namespace TERCpp
{ {
class terAlignment class terAlignment
{ {
private: private:
public: public:
terAlignment(); vector<string> ref;
string toString(); vector<string> hyp;
void scoreDetails(); vector<string> aftershift;
vector<terShift> allshifts;
vector<int> hyp_int;
vector<int> aftershift_int;
vector<string> ref; double numEdits;
vector<string> hyp; double numWords;
vector<string> aftershift; double averageWords;
vector<terShift> allshifts; vector<char> alignment;
vector<int> hyp_int; string bestRef;
vector<int> aftershift_int;
double numEdits; int numIns;
double numWords; int numDel;
double averageWords; int numSub;
vector<char> alignment; int numSft;
string bestRef; int numWsf;
int numIns;
int numDel;
int numSub;
int numSft;
int numWsf;
string join ( string delim, vector<string> arr ); terAlignment();
double score(); string toString();
double scoreAv(); void scoreDetails();
string printAlignments();
string printAllShifts();
}; string join ( string delim, vector<string> arr );
double score();
double scoreAv();
string printAlignments();
string printAllShifts();
void set(terAlignment& l_terAlignment);
void set(terAlignment* l_terAlignment);
};
} }
#endif #endif

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -42,32 +42,70 @@ namespace TERCpp
// numSft=0; // numSft=0;
// numWsf=0; // numWsf=0;
// } // }
terShift::terShift () terShift::terShift ()
{ {
start = 0; start = 0;
end = 0; end = 0;
moveto = 0; moveto = 0;
newloc = 0; newloc = 0;
cost=1.0; cost=1.0;
} shifted.clear();
terShift::terShift ( int _start, int _end, int _moveto, int _newloc ) alignment.clear();
{ aftershift.clear();
start = _start; }
end = _end; terShift::terShift ( int _start, int _end, int _moveto, int _newloc )
moveto = _moveto; {
newloc = _newloc; start = _start;
cost=1.0; end = _end;
} moveto = _moveto;
newloc = _newloc;
cost=1.0;
}
terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted ) terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted )
{ {
start = _start; start = _start;
end = _end; end = _end;
moveto = _moveto; moveto = _moveto;
newloc = _newloc; newloc = _newloc;
shifted = _shifted; shifted = _shifted;
cost=1.0; cost=1.0;
} }
void terShift::set(terShift l_terShift)
{
start=l_terShift.start;
end=l_terShift.end;
moveto=l_terShift.moveto;
newloc=l_terShift.newloc;
shifted=l_terShift.shifted;
// alignment=l_terShift.alignment;
// aftershift=l_terShift.aftershift;
}
void terShift::set(terShift *l_terShift)
{
start=l_terShift->start;
end=l_terShift->end;
moveto=l_terShift->moveto;
newloc=l_terShift->newloc;
shifted=l_terShift->shifted;
// alignment=l_terShift->alignment;
// aftershift=l_terShift->aftershift;
}
void terShift::erase()
{
start = 0;
end = 0;
moveto = 0;
newloc = 0;
cost=1.0;
shifted.clear();
alignment.clear();
aftershift.clear();
}
// string terShift::vectorToString(vector<string> vec) // string terShift::vectorToString(vector<string> vec)
// { // {
// string retour(""); // string retour("");
@ -78,38 +116,54 @@ terShift::terShift ( int _start, int _end, int _moveto, int _newloc, vector<stri
// return retour; // return retour;
// } // }
string terShift::toString() string terShift::toString()
{ {
stringstream s; stringstream s;
s.str ( "" ); s.str ( "" );
s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]"; s << "[" << start << ", " << end << ", " << moveto << "/" << newloc << "]";
if ( ( int ) shifted.size() > 0 ) { if ( ( int ) shifted.size() > 0 )
s << " (" << vectorToString ( shifted ) << ")"; {
} s << " (" << vectorToString ( shifted ) << ")";
return s.str(); }
} // s<< endl;
// if ( ( int ) shifted.size() > 0 )
// {
// s << " (" << vectorToString ( alignment ) << ")";
// }
// s<< endl;
// if ( ( int ) shifted.size() > 0 )
// {
// s << " (" << vectorToString ( aftershift ) << ")";
// }
return s.str();
}
/* The distance of the shift. */ /* The distance of the shift. */
int terShift::distance() int terShift::distance()
{ {
if ( moveto < start ) { if ( moveto < start )
return start - moveto; {
} else if ( moveto > end ) { return start - moveto;
return moveto - end; }
} else { else if ( moveto > end )
return moveto - start; {
} return moveto - end;
} }
else
{
return moveto - start;
}
}
bool terShift::leftShift() bool terShift::leftShift()
{ {
return ( moveto < start ); return ( moveto < start );
} }
int terShift::size() int terShift::size()
{ {
return ( end - start ) + 1; return ( end - start ) + 1;
} }
// terShift terShift::operator=(terShift t) // terShift terShift::operator=(terShift t)
// { // {
// //

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation, along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/ **********************************/
#ifndef MERT_TER_TERSHIFT_H_ #ifndef __TERCPPTERSHIFT_H__
#define MERT_TER_TERSHIFT_H_ #define __TERCPPTERSHIFT_H__
#include <vector> #include <vector>
@ -34,32 +34,35 @@ using namespace Tools;
namespace TERCpp namespace TERCpp
{ {
class terShift class terShift
{ {
private: private:
public: public:
terShift(); terShift();
terShift ( int _start, int _end, int _moveto, int _newloc ); terShift ( int _start, int _end, int _moveto, int _newloc );
terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted ); terShift ( int _start, int _end, int _moveto, int _newloc, vector<string> _shifted );
string toString(); string toString();
int distance() ; int distance() ;
bool leftShift(); bool leftShift();
int size(); int size();
// terShift operator=(terShift t); // terShift operator=(terShift t);
// string vectorToString(vector<string> vec); // string vectorToString(vector<string> vec);
int start; int start;
int end; int end;
int moveto; int moveto;
int newloc; int newloc;
vector<string> shifted; // The words we shifted vector<string> shifted; // The words we shifted
vector<char> alignment ; // for pra_more output vector<char> alignment ; // for pra_more output
vector<string> aftershift; // for pra_more output vector<string> aftershift; // for pra_more output
// This is used to store the cost of a shift, so we don't have to // This is used to store the cost of a shift, so we don't have to
// calculate it multiple times. // calculate it multiple times.
double cost; double cost;
}; void set(terShift l_terShift);
void set(terShift *l_terShift);
void erase();
};
} }
#endif #endif

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation, along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/ **********************************/
#ifndef _TERCPPTERCALC_H__ #ifndef _TERCPPTERCALC_H___
#define _TERCPPTERCALC_H__ #define _TERCPPTERCALC_H___
#include <vector> #include <vector>
#include <stdio.h> #include <stdio.h>
@ -41,62 +41,70 @@ namespace TERCpp
{ {
// typedef size_t WERelement[2]; // typedef size_t WERelement[2];
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del) // Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
typedef vector<terShift> vecTerShift; typedef vector<terShift> vecTerShift;
/** /**
@author @author
*/ */
class terCalc class terCalc
{ {
private : private :
// Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del) // Vecteur d'alignement contenant le hash du mot et son evaluation (0=ok, 1=sub, 2=ins, 3=del)
WERalignment l_WERalignment; WERalignment l_WERalignment;
// HashMap contenant les valeurs de hash de chaque mot // HashMap contenant les valeurs de hash de chaque mot
hashMap bagOfWords; hashMap bagOfWords;
int TAILLE_PERMUT_MAX; int TAILLE_PERMUT_MAX;
// Increments internes int NBR_PERMUT_MAX;
int NBR_SEGS_EVALUATED; // Increments internes
int NBR_PERMUTS_CONSID; int NBR_SEGS_EVALUATED;
int NBR_BS_APPELS; int NBR_PERMUTS_CONSID;
int DIST_MAX_PERMUT; int NBR_BS_APPELS;
bool PRINT_DEBUG; int DIST_MAX_PERMUT;
int CALL_TER_ALIGN;
int CALL_CALC_PERMUT;
int CALL_FIND_BSHIFT;
int MAX_LENGTH_SENTENCE;
bool PRINT_DEBUG;
// Utilisés dans minDistEdit et ils ne sont pas réajustés // Utilisés dans minDistEdit et ils ne sont pas réajustés
double S[1000][1000]; vector < vector < double > > * S;
char P[1000][1000]; vector < vector < char > > * P;
vector<vecInt> refSpans; vector<vecInt> refSpans;
vector<vecInt> hypSpans; vector<vecInt> hypSpans;
int TAILLE_BEAM; int TAILLE_BEAM;
public: public:
int shift_cost; int shift_cost;
int insert_cost; int insert_cost;
int delete_cost; int delete_cost;
int substitute_cost; int substitute_cost;
int match_cost; int match_cost;
double infinite; double infinite;
terCalc(); terCalc();
// ~terCalc(); ~terCalc();
// size_t* hashVec ( vector<string> s ); // size_t* hashVec ( vector<string> s );
void setDebugMode ( bool b ); void setDebugMode ( bool b );
// int WERCalculation ( size_t * ref, size_t * hyp ); // int WERCalculation ( size_t * ref, size_t * hyp );
// int WERCalculation ( vector<string> ref, vector<string> hyp ); // int WERCalculation ( vector<string> ref, vector<string> hyp );
// int WERCalculation ( vector<int> ref, vector<int> hyp ); // int WERCalculation ( vector<int> ref, vector<int> hyp );
terAlignment WERCalculation ( vector<string> hyp, vector<string> ref ); terAlignment WERCalculation ( vector< string >& hyp, vector< string >& ref );
// string vectorToString(vector<string> vec); // string vectorToString(vector<string> vec);
// vector<string> subVector(vector<string> vec, int start, int end); // vector<string> subVector(vector<string> vec, int start, int end);
hashMapInfos createConcordMots ( vector<string> hyp, vector<string> ref ); hashMapInfos createConcordMots ( vector<string>& hyp, vector<string>& ref );
terAlignment minimizeDistanceEdition ( vector<string> hyp, vector<string> ref, vector<vecInt> curHypSpans ); terAlignment minimizeDistanceEdition ( vector<string>& hyp, vector<string>& ref, vector<vecInt>& curHypSpans );
bool trouverIntersection ( vecInt refSpan, vecInt hypSpan ); void minimizeDistanceEdition ( vector<string>& hyp, vector<string>& ref, vector<vecInt>& curHypSpans , terAlignment* l_terAlign);
terAlignment TER ( vector<string> hyp, vector<string> ref , float avRefLength ); // terAlignment minimizeDistanceEdition ( vector<string>& hyp, vector<string>& ref, vector<vecInt>& curHypSpans );
terAlignment TER ( vector<string> hyp, vector<string> ref ); bool trouverIntersection ( vecInt& refSpan, vecInt& hypSpan );
terAlignment TER ( vector<int> hyp, vector<int> ref ); terAlignment TER ( vector<string>& hyp, vector<string>& ref , float avRefLength );
bestShiftStruct findBestShift ( vector<string> cur, vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment cur_align ); terAlignment TER ( vector<string>& hyp, vector<string>& ref );
void calculateTerAlignment ( terAlignment align, bool* herr, bool* rerr, int* ralign ); terAlignment TER ( vector<int>& hyp, vector<int>& ref );
vector<vecTerShift> calculerPermutations ( vector<string> hyp, vector<string> ref, hashMapInfos rloc, terAlignment align, bool* herr, bool* rerr, int* ralign ); bestShiftStruct * findBestShift ( vector< string >& cur, vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& med_align );
alignmentStruct permuter ( vector<string> words, terShift s ); void calculateTerAlignment ( terAlignment& align, vector<bool>* herr, vector<bool>* rerr, vector<int>* ralign );
alignmentStruct permuter ( vector<string> words, int start, int end, int newloc ); vector<vecTerShift> * calculerPermutations ( vector< string >& hyp, vector< string >& ref, hashMapInfos& rloc, TERCpp::terAlignment& align, vector<bool>* herr, vector<bool>* rerr, vector<int>* ralign );
}; alignmentStruct permuter ( vector<string>& words, terShift& s );
alignmentStruct permuter ( vector<string>& words, terShift* s );
alignmentStruct permuter ( vector<string>& words, int start, int end, int newloc );
};
} }

File diff suppressed because it is too large Load Diff

View File

@ -5,7 +5,7 @@ Copyright 2010-2013, Christophe Servan, LIUM, University of Le Mans, France
Contact: christophe.servan@lium.univ-lemans.fr Contact: christophe.servan@lium.univ-lemans.fr
The tercpp tool and library are free software: you can redistribute it and/or modify it The tercpp tool and library are free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the licence, or the Free Software Foundation, either version 3 of the licence, or
(at your option) any later version. (at your option) any later version.
@ -18,8 +18,8 @@ You should have received a copy of the GNU Lesser General Public License
along with this library; if not, write to the Free Software Foundation, along with this library; if not, write to the Free Software Foundation,
Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
**********************************/ **********************************/
#ifndef MERT_TER_TOOLS_H_ #ifndef __TERCPPTOOLS_H__
#define MERT_TER_TOOLS_H_ #define __TERCPPTOOLS_H__
#include <vector> #include <vector>
@ -35,31 +35,34 @@ using namespace std;
namespace Tools namespace Tools
{ {
typedef vector<double> vecDouble; typedef vector<double> vecDouble;
typedef vector<char> vecChar; typedef vector<char> vecChar;
typedef vector<int> vecInt; typedef vector<int> vecInt;
typedef vector<float> vecFloat; typedef vector<float> vecFloat;
typedef vector<size_t> vecSize_t; typedef vector<size_t> vecSize_t;
typedef vector<string> vecString; typedef vector<string> vecString;
typedef vector<string> alignmentElement; typedef vector<string> alignmentElement;
typedef vector<alignmentElement> WERalignment; typedef vector<alignmentElement> WERalignment;
struct param { struct param
bool debugMode; {
string referenceFile; // path to the resources bool debugMode;
string hypothesisFile; // path to the configuration files string referenceFile; // path to the resources
string outputFileExtension; string hypothesisFile; // path to the configuration files
string outputFileName; string outputFileExtension;
bool noPunct; string outputFileName;
bool caseOn; bool noPunct;
bool normalize; bool caseOn;
bool tercomLike; bool normalize;
bool sgmlInputs; bool tercomLike;
bool noTxtIds; bool sgmlInputs;
bool printAlignments; bool verbose;
bool WER; bool count_verbose;
int debugLevel; bool noTxtIds;
bool printAlignments;
bool WER;
int debugLevel;
}; };
// param = { false, "","","","" }; // param = { false, "","","","" };
@ -67,35 +70,38 @@ struct param {
// private: // private:
// public: // public:
string vectorToString ( vector<string> vec ); string vectorToString ( vector<string> vec );
string vectorToString ( vector<char> vec ); string vectorToString ( vector<char> vec );
string vectorToString ( vector<int> vec ); string vectorToString ( vector<int> vec );
string vectorToString ( vector<string> vec, string s ); string vectorToString ( vector<string> vec, string s );
string vectorToString ( vector<char> vec, string s ); string vectorToString ( vector<char> vec, string s );
string vectorToString ( vector<int> vec, string s ); string vectorToString ( vector<int> vec, string s );
string vectorToString ( vector<bool> vec, string s ); string vectorToString ( vector<bool> vec, string s );
string vectorToString ( char* vec, string s, int taille ); string vectorToString ( char* vec, string s, int taille );
string vectorToString ( int* vec, string s , int taille ); string vectorToString ( int* vec, string s , int taille );
string vectorToString ( bool* vec, string s , int taille ); string vectorToString ( bool* vec, string s , int taille );
vector<string> subVector ( vector<string> vec, int start, int end ); string vectorToString ( vector<char>* vec, string s, int taille );
vector<int> subVector ( vector<int> vec, int start, int end ); string vectorToString ( vector<int>* vec, string s , int taille );
vector<float> subVector ( vector<float> vec, int start, int end ); string vectorToString ( vector<bool>* vec, string s , int taille );
vector<string> copyVector ( vector<string> vec ); vector<string> subVector ( vector<string> vec, int start, int end );
vector<int> copyVector ( vector<int> vec ); vector<int> subVector ( vector<int> vec, int start, int end );
vector<float> copyVector ( vector<float> vec ); vector<float> subVector ( vector<float> vec, int start, int end );
vector<string> stringToVector ( string s, string tok ); vector<string> copyVector ( vector<string> vec );
vector<string> stringToVector ( char s, string tok ); vector<int> copyVector ( vector<int> vec );
vector<string> stringToVector ( int s, string tok ); vector<float> copyVector ( vector<float> vec );
vector<int> stringToVectorInt ( string s, string tok ); vector<string> stringToVector ( string s, string tok );
vector<float> stringToVectorFloat ( string s, string tok ); vector<string> stringToVector ( char s, string tok );
string lowerCase(string str); vector<string> stringToVector ( int s, string tok );
string removePunct(string str); vector<int> stringToVectorInt ( string s, string tok );
string tokenizePunct(string str); vector<float> stringToVectorFloat ( string s, string tok );
string removePunctTercom(string str); string lowerCase(string str);
string normalizeStd(string str); string removePunct(string str);
string printParams(param p); string tokenizePunct(string str);
string join ( string delim, vector<string> arr ); string removePunctTercom(string str);
string normalizeStd(string str);
string printParams(param p);
string join ( string delim, vector<string> arr );
// }; // };
param copyParam(param p); param copyParam(param p);
} }
#endif #endif