2008-05-14 11:57:45 +04:00
|
|
|
/*
|
|
|
|
* Util.h
|
2012-02-20 03:29:53 +04:00
|
|
|
* mert - Minimum Error Rate Training
|
2008-05-14 11:57:45 +04:00
|
|
|
*
|
|
|
|
* Created by Nicola Bertoldi on 13/05/08.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2012-02-20 04:46:08 +04:00
|
|
|
#ifndef MERT_UTIL_H_
|
|
|
|
#define MERT_UTIL_H_
|
2008-05-14 11:57:45 +04:00
|
|
|
|
2012-04-12 04:16:27 +04:00
|
|
|
#include <cmath>
|
2011-11-14 14:52:21 +04:00
|
|
|
#include <cstdlib>
|
2008-05-20 18:15:30 +04:00
|
|
|
#include <stdexcept>
|
2008-05-14 11:57:45 +04:00
|
|
|
#include <limits>
|
|
|
|
#include <vector>
|
|
|
|
#include <map>
|
|
|
|
#include <iostream>
|
|
|
|
#include <sstream>
|
|
|
|
#include <string>
|
2008-10-17 01:14:38 +04:00
|
|
|
#include <cstring>
|
2008-05-14 11:57:45 +04:00
|
|
|
|
2008-05-27 20:50:52 +04:00
|
|
|
#include "Types.h"
|
2008-05-14 11:57:45 +04:00
|
|
|
|
2012-06-30 23:23:45 +04:00
|
|
|
namespace MosesTuning
|
|
|
|
{
|
|
|
|
|
2008-05-14 11:57:45 +04:00
|
|
|
#ifdef TRACE_ENABLE
|
|
|
|
#define TRACE_ERR(str) { std::cerr << str; }
|
|
|
|
#else
|
|
|
|
#define TRACE_ERR(str) { }
|
|
|
|
#endif
|
|
|
|
|
2013-11-15 14:55:38 +04:00
|
|
|
#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2)
|
|
|
|
// gcc nth_element() bug
|
|
|
|
#define NTH_ELEMENT3(begin, middle, end) std::sort(begin, end)
|
|
|
|
#define NTH_ELEMENT4(begin, middle, end, orderer) std::sort(begin, end, orderer)
|
|
|
|
#else
|
|
|
|
#define NTH_ELEMENT3(begin, middle, end) std::nth_element(begin, middle, end)
|
|
|
|
#define NTH_ELEMENT4(begin, middle, end, orderer) std::nth_element(begin, middle, end, orderer)
|
|
|
|
#endif
|
|
|
|
|
2011-11-13 16:13:44 +04:00
|
|
|
const char kDefaultDelimiterSymbol[] = " ";
|
2008-05-14 11:57:45 +04:00
|
|
|
|
2008-05-16 11:09:15 +04:00
|
|
|
int verboselevel();
|
|
|
|
int setverboselevel(int v);
|
|
|
|
|
2012-04-12 04:16:27 +04:00
|
|
|
|
|
|
|
const float kEPS = 0.0001f;
|
|
|
|
|
|
|
|
template <typename T>
|
2013-05-29 21:16:15 +04:00
|
|
|
bool IsAlmostEqual(T expected, T actual, float round=kEPS)
|
|
|
|
{
|
2012-05-10 02:51:05 +04:00
|
|
|
if (std::abs(expected - actual) < round) {
|
2012-04-12 04:16:27 +04:00
|
|
|
return true;
|
|
|
|
} else {
|
2012-05-10 02:51:05 +04:00
|
|
|
std::cerr << "Fail: expected = " << expected
|
|
|
|
<< " (actual = " << actual << ")" << std::endl;
|
2012-04-12 04:16:27 +04:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-13 16:13:44 +04:00
|
|
|
/**
|
|
|
|
* Find the specified delimiter for the string 'str', and 'str' is assigned
|
|
|
|
* to a substring object that starts at the position of first occurrence of
|
|
|
|
* the delimiter in 'str'. 'substr' is copied from 'str' ranging from
|
|
|
|
* the start position of 'str' to the position of first occurrence of
|
|
|
|
* the delimiter.
|
|
|
|
*
|
|
|
|
* It returns the position of first occurrence in the queried string.
|
|
|
|
* If the content is not found, std::string::npos is returned.
|
|
|
|
*/
|
|
|
|
size_t getNextPound(std::string &str, std::string &substr,
|
|
|
|
const std::string &delimiter = kDefaultDelimiterSymbol);
|
|
|
|
|
2011-08-20 19:25:19 +04:00
|
|
|
void split(const std::string &s, char delim, std::vector<std::string> &elems);
|
2008-05-14 11:57:45 +04:00
|
|
|
|
2012-02-20 03:20:44 +04:00
|
|
|
/**
|
|
|
|
* Split the string 'str' with specified delimitter 'delim' into tokens.
|
|
|
|
* The resulting tokens are set to 'res'.
|
|
|
|
*
|
|
|
|
* ex. "a,b,c" => {"a", "b", "c"}.
|
|
|
|
*/
|
2011-11-11 17:00:30 +04:00
|
|
|
void Tokenize(const char *str, const char delim, std::vector<std::string> *res);
|
|
|
|
|
2008-05-14 11:57:45 +04:00
|
|
|
template<typename T>
|
|
|
|
inline T Scan(const std::string &input)
|
|
|
|
{
|
2011-02-24 15:42:19 +03:00
|
|
|
std::stringstream stream(input);
|
|
|
|
T ret;
|
|
|
|
stream >> ret;
|
|
|
|
return ret;
|
2011-11-12 04:40:01 +04:00
|
|
|
}
|
2008-05-14 11:57:45 +04:00
|
|
|
|
2012-04-04 17:04:51 +04:00
|
|
|
/**
|
|
|
|
* Returns true iff "str" ends with "suffix".
|
2012-04-05 05:10:16 +04:00
|
|
|
* e.g., Given str = "abc:" and suffix = ":", this function returns true.
|
2012-04-04 17:04:51 +04:00
|
|
|
*/
|
2013-05-29 21:16:15 +04:00
|
|
|
inline bool EndsWith(const std::string& str, const char* suffix)
|
|
|
|
{
|
2012-04-04 17:04:51 +04:00
|
|
|
return str.find_last_of(suffix) == str.size() - 1;
|
|
|
|
}
|
|
|
|
|
2008-05-27 20:50:52 +04:00
|
|
|
template<typename T>
|
|
|
|
inline std::string stringify(T x)
|
|
|
|
{
|
2011-02-24 15:42:19 +03:00
|
|
|
std::ostringstream o;
|
|
|
|
if (!(o << x))
|
|
|
|
throw std::runtime_error("stringify(template<typename T>)");
|
|
|
|
return o.str();
|
2008-05-27 20:50:52 +04:00
|
|
|
}
|
|
|
|
|
2011-11-12 12:30:33 +04:00
|
|
|
inline ScoreStatsType ConvertCharToScoreStatsType(const char *str)
|
|
|
|
{
|
|
|
|
return std::atoi(str);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline ScoreStatsType ConvertStringToScoreStatsType(const std::string& str)
|
|
|
|
{
|
|
|
|
return ConvertCharToScoreStatsType(str.c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
inline FeatureStatsType ConvertCharToFeatureStatsType(const char *str)
|
|
|
|
{
|
|
|
|
return static_cast<FeatureStatsType>(std::atof(str));
|
|
|
|
}
|
|
|
|
|
|
|
|
inline FeatureStatsType ConvertStringToFeatureStatsType(const std::string &str)
|
|
|
|
{
|
|
|
|
return ConvertCharToFeatureStatsType(str.c_str());
|
|
|
|
}
|
|
|
|
|
2012-06-22 21:19:16 +04:00
|
|
|
inline std::string trimStr(const std::string& Src, const std::string& c = " \r\n")
|
|
|
|
{
|
2012-06-26 00:03:11 +04:00
|
|
|
size_t p2 = Src.find_last_not_of(c);
|
2012-06-22 21:19:16 +04:00
|
|
|
if (p2 == std::string::npos) return std::string();
|
2012-06-26 00:03:11 +04:00
|
|
|
size_t p1 = Src.find_first_not_of(c);
|
2012-06-22 21:19:16 +04:00
|
|
|
if (p1 == std::string::npos) p1 = 0;
|
|
|
|
return Src.substr(p1, (p2-p1)+1);
|
|
|
|
}
|
|
|
|
|
2009-01-07 16:30:06 +03:00
|
|
|
// Utilities to measure decoding time
|
|
|
|
void ResetUserTime();
|
|
|
|
void PrintUserTime(const std::string &message);
|
|
|
|
double GetUserTime();
|
|
|
|
|
2012-06-30 23:23:45 +04:00
|
|
|
}
|
|
|
|
|
2012-02-20 04:46:08 +04:00
|
|
|
#endif // MERT_UTIL_H_
|