2008-06-11 14:52:57 +04:00
// $Id$
/***********************************************************************
Moses - factored phrase - based language decoder
Copyright ( C ) 2006 University of Edinburgh
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 2.1 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , write to the Free Software
Foundation , Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2010-06-05 17:10:26 +04:00
# include <ctime>
2008-06-11 14:52:57 +04:00
# include <iostream>
# include <iterator>
# include <fstream>
# include <sstream>
# include <algorithm>
# include "Parameter.h"
# include "Util.h"
# include "InputFileStream.h"
# include "UserMessage.h"
using namespace std ;
2008-10-09 03:51:26 +04:00
namespace Moses
{
2008-06-11 14:52:57 +04:00
/** define allowed parameters */
2011-02-24 16:14:42 +03:00
Parameter : : Parameter ( )
2008-06-11 14:52:57 +04:00
{
2011-02-24 16:14:42 +03:00
AddParam ( " beam-threshold " , " b " , " threshold for threshold pruning " ) ;
AddParam ( " config " , " f " , " location of the configuration file " ) ;
AddParam ( " continue-partial-translation " , " cpt " , " start from nonempty hypothesis " ) ;
2011-11-22 18:40:33 +04:00
AddParam ( " decoding-graph-backoff " , " dpb " , " only use subsequent decoding paths for unknown spans of given length " ) ;
2011-02-24 16:14:42 +03:00
AddParam ( " drop-unknown " , " du " , " drop unknown words instead of copying them " ) ;
2010-03-07 10:57:48 +03:00
AddParam ( " disable-discarding " , " dd " , " disable hypothesis discarding " ) ;
2011-02-24 16:14:42 +03:00
AddParam ( " factor-delimiter " , " fd " , " specify a different factor delimiter than the default " ) ;
AddParam ( " generation-file " , " location and properties of the generation table " ) ;
AddParam ( " global-lexical-file " , " gl " , " discriminatively trained global lexical translation model file " ) ;
AddParam ( " input-factors " , " list of factors in the input " ) ;
AddParam ( " input-file " , " i " , " location of the input file to be translated " ) ;
AddParam ( " inputtype " , " text (0), confusion network (1), word lattice (2) (default = 0) " ) ;
AddParam ( " labeled-n-best-list " , " print out labels for each weight type in n-best list. default is true " ) ;
AddParam ( " include-alignment-in-n-best " , " include word alignment in the n-best list. default is false " ) ;
AddParam ( " lmodel-file " , " location and properties of the language models " ) ;
AddParam ( " lmodel-dub " , " dictionary upper bounds of language models " ) ;
2011-09-09 22:03:00 +04:00
AddParam ( " lmodel-oov-feature " , " add language model oov feature, one per model " ) ;
2011-02-24 16:14:42 +03:00
AddParam ( " mapping " , " description of decoding steps " ) ;
AddParam ( " max-partial-trans-opt " , " maximum number of partial translation options per input span (during mapping steps) " ) ;
AddParam ( " max-trans-opt-per-coverage " , " maximum number of translation options per input span (after applying mapping steps) " ) ;
AddParam ( " max-phrase-length " , " maximum phrase length (default 20) " ) ;
AddParam ( " n-best-list " , " file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT " ) ;
2011-10-04 19:46:24 +04:00
AddParam ( " lattice-samples " , " generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list " ) ;
2011-02-24 16:14:42 +03:00
AddParam ( " n-best-factor " , " factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0 " ) ;
2010-03-07 10:57:48 +03:00
AddParam ( " print-all-derivations " , " to print all derivations in search graph " ) ;
2011-02-24 16:14:42 +03:00
AddParam ( " output-factors " , " list of factors in the output " ) ;
AddParam ( " phrase-drop-allowed " , " da " , " if present, allow dropping of source words " ) ; //da = drop any (word); see -du for comparison
AddParam ( " report-all-factors " , " report all factors in output, not just first " ) ;
AddParam ( " report-all-factors-in-n-best " , " Report all factors in n-best-lists. Default is false " ) ;
AddParam ( " report-segmentation " , " t " , " report phrase segmentation in the output " ) ;
2011-05-13 23:28:23 +04:00
# ifdef HAVE_SYNLM
AddParam ( " slmodel-file " , " location of the syntactic language model file(s) " ) ;
AddParam ( " weight-slm " , " slm " , " weight(s) for syntactic language model " ) ;
AddParam ( " slmodel-factor " , " factor to use with syntactic language model " ) ;
AddParam ( " slmodel-beam " , " beam width to use with syntactic language model's parser " ) ;
# endif
2011-02-24 16:14:42 +03:00
AddParam ( " stack " , " s " , " maximum stack size for histogram pruning " ) ;
AddParam ( " stack-diversity " , " sd " , " minimum number of hypothesis of each coverage in stack (default 0) " ) ;
AddParam ( " threads " , " th " , " number of threads to use in decoding (defaults to single-threaded) " ) ;
AddParam ( " translation-details " , " T " , " for each best hypothesis, report translation details to the given file " ) ;
AddParam ( " ttable-file " , " location and properties of the translation tables " ) ;
AddParam ( " ttable-limit " , " ttl " , " maximum number of translation table entries per input phrase " ) ;
AddParam ( " translation-option-threshold " , " tot " , " threshold for translation options relative to best for input phrase " ) ;
AddParam ( " early-discarding-threshold " , " edt " , " threshold for constructing hypotheses based on estimate cost " ) ;
AddParam ( " verbose " , " v " , " verbosity level of the logging " ) ;
AddParam ( " weight-d " , " d " , " weight(s) for distortion (reordering components) " ) ;
2010-08-10 17:12:00 +04:00
AddParam ( " weight-lr " , " lr " , " weight(s) for lexicalized reordering, if not included in weight-d " ) ;
2011-02-24 16:14:42 +03:00
AddParam ( " weight-generation " , " g " , " weight(s) for generation components " ) ;
AddParam ( " weight-i " , " I " , " weight(s) for word insertion - used for parameters from confusion network and lattice input links " ) ;
AddParam ( " weight-l " , " lm " , " weight(s) for language models " ) ;
AddParam ( " weight-lex " , " lex " , " weight for global lexical model " ) ;
AddParam ( " weight-t " , " tm " , " weights for translation model components " ) ;
AddParam ( " weight-w " , " w " , " weight for word penalty " ) ;
AddParam ( " weight-u " , " u " , " weight for unknown word penalty " ) ;
AddParam ( " weight-e " , " e " , " weight for word deletion " ) ;
AddParam ( " output-factors " , " list if factors in the output " ) ;
AddParam ( " cache-path " , " ? " ) ;
AddParam ( " distortion-limit " , " dl " , " distortion (reordering) limit in maximum number of words (0 = monotone, -1 = unlimited) " ) ;
AddParam ( " monotone-at-punctuation " , " mp " , " do not reorder over punctuation " ) ;
AddParam ( " distortion-file " , " source factors (0 if table independent of source), target factors, location of the factorized/lexicalized reordering tables " ) ;
AddParam ( " distortion " , " configurations for each factorized/lexicalized reordering model. " ) ;
AddParam ( " xml-input " , " xi " , " allows markup of input with desired translations and probabilities. values can be 'pass-through' (default), 'inclusive', 'exclusive', 'ignore' " ) ;
2011-11-16 16:53:10 +04:00
AddParam ( " xml-brackets " , " xb " , " specify strings to be used as xml tags opening and closing, e.g. \" {{ }} \" (default \" < > \" ). Avoid square brackets because of configuration file format. Valid only with text input mode " ) ;
2011-02-24 16:14:42 +03:00
AddParam ( " minimum-bayes-risk " , " mbr " , " use miminum Bayes risk to determine best translation " ) ;
2010-02-03 13:23:32 +03:00
AddParam ( " lminimum-bayes-risk " , " lmbr " , " use lattice miminum Bayes risk to determine best translation " ) ;
2010-04-07 19:47:58 +04:00
AddParam ( " consensus-decoding " , " con " , " use consensus decoding (De Nero et. al. 2009) " ) ;
2011-02-24 16:14:42 +03:00
AddParam ( " mbr-size " , " number of translation candidates considered in MBR decoding (default 200) " ) ;
AddParam ( " mbr-scale " , " scaling factor to convert log linear score probability in MBR decoding (default 1.0) " ) ;
2010-02-03 13:23:32 +03:00
AddParam ( " lmbr-thetas " , " theta(s) for lattice mbr calculation " ) ;
AddParam ( " lmbr-pruning-factor " , " average number of nodes/word wanted in pruned lattice " ) ;
2010-02-03 22:46:35 +03:00
AddParam ( " lmbr-p " , " unigram precision value for lattice mbr " ) ;
AddParam ( " lmbr-r " , " ngram precision decay value for lattice mbr " ) ;
2010-03-14 23:23:17 +03:00
AddParam ( " lmbr-map-weight " , " weight given to map solution when doing lattice MBR (default 0) " ) ;
2010-02-03 14:20:20 +03:00
AddParam ( " lattice-hypo-set " , " to use lattice as hypo set during lattice MBR " ) ;
2011-02-24 16:14:42 +03:00
AddParam ( " clean-lm-cache " , " clean language model caches after N translations (default N=1) " ) ;
AddParam ( " use-persistent-cache " , " cache translation options across sentences (default true) " ) ;
AddParam ( " persistent-cache-size " , " maximum size of cache for translation options (default 10,000 input phrases) " ) ;
AddParam ( " recover-input-path " , " r " , " (conf net/word lattice only) - recover input path corresponding to the best translation " ) ;
AddParam ( " output-word-graph " , " owg " , " Output stack info as word graph. Takes filename, 0=only hypos in stack, 1=stack + nbest hypos " ) ;
AddParam ( " time-out " , " seconds after which is interrupted (-1=no time-out, default is -1) " ) ;
AddParam ( " output-search-graph " , " osg " , " Output connected hypotheses of search into specified filename " ) ;
AddParam ( " output-search-graph-extended " , " osgx " , " Output connected hypotheses of search into specified filename, in extended format " ) ;
2011-09-16 15:58:53 +04:00
AddParam ( " unpruned-search-graph " , " usg " , " When outputting chart search graph, do not exclude dead ends. Note: stack pruning may have eliminated some hypotheses " ) ;
2008-09-24 20:48:23 +04:00
# ifdef HAVE_PROTOBUF
2011-02-24 16:14:42 +03:00
AddParam ( " output-search-graph-pb " , " pb " , " Write phrase lattice to protocol buffer objects in the specified path. " ) ;
2008-09-24 20:48:23 +04:00
# endif
2011-02-24 16:14:42 +03:00
AddParam ( " cube-pruning-pop-limit " , " cbp " , " How many hypotheses should be popped for each stack. (default = 1000) " ) ;
AddParam ( " cube-pruning-diversity " , " cbd " , " How many hypotheses should be created for each coverage. (default = 0) " ) ;
2011-06-27 19:13:15 +04:00
AddParam ( " cube-pruning-lazy-scoring " , " cbls " , " Don't fully score a hypothesis until it is popped " ) ;
2012-01-26 15:38:40 +04:00
AddParam ( " parsing-algorithm " , " Which parsing algorithm to use. 0=CYK+, 1=scope-3. (default = 0) " ) ;
2012-07-02 18:57:54 +04:00
AddParam ( " search-algorithm " , " Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing, 4=stack with batched lm requests (default = 0) " ) ;
2011-02-24 16:14:42 +03:00
AddParam ( " constraint " , " Location of the file with target sentences to produce constraining the search " ) ;
AddParam ( " use-alignment-info " , " Use word-to-word alignment: actually it is only used to output the word-to-word alignment. Word-to-word alignments are taken from the phrase table if any. Default is false. " ) ;
AddParam ( " print-alignment-info " , " Output word-to-word alignment into the log file. Word-to-word alignments are takne from the phrase table if any. Default is false " ) ;
AddParam ( " print-alignment-info-in-n-best " , " Include word-to-word alignment in the n-best list. Word-to-word alignments are takne from the phrase table if any. Default is false " ) ;
AddParam ( " link-param-count " , " Number of parameters on word links when using confusion networks or lattices (default = 1) " ) ;
AddParam ( " description " , " Source language, target language, description " ) ;
AddParam ( " max-chart-span " , " maximum num. of source word chart rules can consume (default 10) " ) ;
AddParam ( " non-terminals " , " list of non-term symbols, space separated " ) ;
AddParam ( " rule-limit " , " a little like table limit. But for chart decoding rules. Default is DEFAULT_MAX_TRANS_OPT_SIZE " ) ;
AddParam ( " source-label-overlap " , " What happens if a span already has a label. 0=add more. 1=replace. 2=discard. Default is 0 " ) ;
AddParam ( " output-hypo-score " , " Output the hypo score to stdout with the output string. For search error analysis. Default is false " ) ;
AddParam ( " unknown-lhs " , " file containing target lhs of unknown words. 1 per line: LHS prob " ) ;
AddParam ( " translation-systems " , " specify multiple translation systems, each consisting of an id, followed by a set of models ids, eg '0 T1 R1 L0' " ) ;
AddParam ( " show-weights " , " print feature weights and exit " ) ;
AddParam ( " alignment-output-file " , " print output word alignments into given file " ) ;
2011-08-26 06:37:52 +04:00
AddParam ( " sort-word-alignment " , " Sort word alignments for more consistent display. 0=no sort (default), 1=target order " ) ;
2011-11-13 21:14:40 +04:00
AddParam ( " start-translation-id " , " Id of 1st input. Default = 0 " ) ;
2012-08-03 14:04:39 +04:00
// Compact phrase table and reordering table.
AddParam ( " minlexr-memory " , " Load lexical reordering table in minlexr format into memory " ) ;
2012-08-03 18:38:45 +04:00
AddParam ( " minphr-memory " , " Load phrase table in minphr format into memory " ) ;
2008-06-11 14:52:57 +04:00
}
Parameter : : ~ Parameter ( )
{
}
/** initialize a parameter, sub of constructor */
void Parameter : : AddParam ( const string & paramName , const string & description )
{
2011-02-24 16:14:42 +03:00
m_valid [ paramName ] = true ;
m_description [ paramName ] = description ;
2008-06-11 14:52:57 +04:00
}
/** initialize a parameter (including abbreviation), sub of constructor */
void Parameter : : AddParam ( const string & paramName , const string & abbrevName , const string & description )
{
2011-02-24 16:14:42 +03:00
m_valid [ paramName ] = true ;
m_valid [ abbrevName ] = true ;
m_abbreviation [ paramName ] = abbrevName ;
m_description [ paramName ] = description ;
2008-06-11 14:52:57 +04:00
}
/** print descriptions of all parameters */
2011-02-24 16:14:42 +03:00
void Parameter : : Explain ( )
{
cerr < < " Usage: " < < endl ;
for ( PARAM_STRING : : const_iterator iterParam = m_description . begin ( ) ; iterParam ! = m_description . end ( ) ; iterParam + + ) {
const string paramName = iterParam - > first ;
const string paramDescription = iterParam - > second ;
cerr < < " \t - " < < paramName ;
PARAM_STRING : : const_iterator iterAbbr = m_abbreviation . find ( paramName ) ;
if ( iterAbbr ! = m_abbreviation . end ( ) )
cerr < < " ( " < < iterAbbr - > second < < " ) " ;
cerr < < " : " < < paramDescription < < endl ;
}
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
/** check whether an item on the command line is a switch or a value
2008-06-11 14:52:57 +04:00
* \ param token token on the command line to checked * */
2011-02-24 16:14:42 +03:00
bool Parameter : : isOption ( const char * token )
{
2008-06-11 14:52:57 +04:00
if ( ! token ) return false ;
std : : string tokenString ( token ) ;
size_t length = tokenString . size ( ) ;
if ( length > 0 & & tokenString . substr ( 0 , 1 ) ! = " - " ) return false ;
if ( length > 1 & & tokenString . substr ( 1 , 1 ) . find_first_not_of ( " 0123456789 " ) = = 0 ) return true ;
return false ;
}
/** load all parameters from the configuration file and the command line switches */
bool Parameter : : LoadParam ( const string & filePath )
{
2011-02-24 16:14:42 +03:00
const char * argv [ ] = { " executable " , " -f " , filePath . c_str ( ) } ;
return LoadParam ( 3 , ( char * * ) argv ) ;
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
2008-06-11 14:52:57 +04:00
/** load all parameters from the configuration file and the command line switches */
2011-02-24 16:14:42 +03:00
bool Parameter : : LoadParam ( int argc , char * argv [ ] )
2008-06-11 14:52:57 +04:00
{
2011-02-24 16:14:42 +03:00
// config file (-f) arg mandatory
string configPath ;
if ( ( configPath = FindParam ( " -f " , argc , argv ) ) = = " "
& & ( configPath = FindParam ( " -config " , argc , argv ) ) = = " " ) {
PrintCredit ( ) ;
Explain ( ) ;
UserMessage : : Add ( " No configuration file was specified. Use -config or -f " ) ;
return false ;
} else {
if ( ! ReadConfigFile ( configPath ) ) {
UserMessage : : Add ( " Could not read " + configPath ) ;
return false ;
}
}
// overwrite parameters with values from switches
for ( PARAM_STRING : : const_iterator iterParam = m_description . begin ( ) ; iterParam ! = m_description . end ( ) ; iterParam + + ) {
const string paramName = iterParam - > first ;
OverwriteParam ( " - " + paramName , paramName , argc , argv ) ;
}
// ... also shortcuts
for ( PARAM_STRING : : const_iterator iterParam = m_abbreviation . begin ( ) ; iterParam ! = m_abbreviation . end ( ) ; iterParam + + ) {
const string paramName = iterParam - > first ;
const string paramShortName = iterParam - > second ;
OverwriteParam ( " - " + paramShortName , paramName , argc , argv ) ;
}
// logging of parameters that were set in either config or switch
int verbose = 1 ;
if ( m_setting . find ( " verbose " ) ! = m_setting . end ( ) & &
m_setting [ " verbose " ] . size ( ) > 0 )
verbose = Scan < int > ( m_setting [ " verbose " ] [ 0 ] ) ;
if ( verbose > = 1 ) { // only if verbose
TRACE_ERR ( " Defined parameters (per moses.ini or switch): " < < endl ) ;
for ( PARAM_MAP : : const_iterator iterParam = m_setting . begin ( ) ; iterParam ! = m_setting . end ( ) ; iterParam + + ) {
TRACE_ERR ( " \t " < < iterParam - > first < < " : " ) ;
for ( size_t i = 0 ; i < iterParam - > second . size ( ) ; i + + )
TRACE_ERR ( iterParam - > second [ i ] < < " " ) ;
TRACE_ERR ( endl ) ;
}
}
// check for illegal parameters
bool noErrorFlag = true ;
for ( int i = 0 ; i < argc ; i + + ) {
if ( isOption ( argv [ i ] ) ) {
string paramSwitch = ( string ) argv [ i ] ;
string paramName = paramSwitch . substr ( 1 ) ;
if ( m_valid . find ( paramName ) = = m_valid . end ( ) ) {
UserMessage : : Add ( " illegal switch: " + paramSwitch ) ;
noErrorFlag = false ;
}
}
}
2008-06-11 14:52:57 +04:00
// check if parameters make sense
2011-02-24 16:14:42 +03:00
return Validate ( ) & & noErrorFlag ;
2008-06-11 14:52:57 +04:00
}
/** check that parameter settings make sense */
2011-02-24 16:14:42 +03:00
bool Parameter : : Validate ( )
2008-06-11 14:52:57 +04:00
{
2011-02-24 16:14:42 +03:00
bool noErrorFlag = true ;
2008-06-11 14:52:57 +04:00
2011-06-11 08:59:55 +04:00
PARAM_MAP : : const_iterator iterParams ;
for ( iterParams = m_setting . begin ( ) ; iterParams ! = m_setting . end ( ) ; + + iterParams ) {
const std : : string & key = iterParams - > first ;
if ( m_valid . find ( key ) = = m_valid . end ( ) )
{
UserMessage : : Add ( " Unknown parameter " + key ) ;
noErrorFlag = false ;
}
}
2008-06-11 14:52:57 +04:00
// required parameters
2011-02-24 16:14:42 +03:00
if ( m_setting [ " ttable-file " ] . size ( ) = = 0 ) {
UserMessage : : Add ( " No phrase translation table (ttable-file) " ) ;
noErrorFlag = false ;
}
if ( m_setting [ " lmodel-dub " ] . size ( ) > 0 ) {
if ( m_setting [ " lmodel-file " ] . size ( ) ! = m_setting [ " lmodel-dub " ] . size ( ) ) {
stringstream errorMsg ( " " ) ;
errorMsg < < " Config and parameters specify "
< < static_cast < int > ( m_setting [ " lmodel-file " ] . size ( ) )
< < " language model files (lmodel-file), but "
< < static_cast < int > ( m_setting [ " lmodel-dub " ] . size ( ) )
< < " LM upperbounds (lmodel-dub) "
< < endl ;
UserMessage : : Add ( errorMsg . str ( ) ) ;
noErrorFlag = false ;
}
}
2011-09-12 22:04:56 +04:00
if ( m_setting [ " lmodel-file " ] . size ( ) * ( m_setting . find ( " lmodel-oov-feature " ) ! = m_setting . end ( ) ? 2 : 1 )
2011-09-09 22:03:00 +04:00
! = m_setting [ " weight-l " ] . size ( ) ) {
2011-02-24 16:14:42 +03:00
stringstream errorMsg ( " " ) ;
errorMsg < < " Config and parameters specify "
< < static_cast < int > ( m_setting [ " lmodel-file " ] . size ( ) )
< < " language model files (lmodel-file), but "
< < static_cast < int > ( m_setting [ " weight-l " ] . size ( ) )
< < " weights (weight-l) " ;
2008-06-11 14:52:57 +04:00
errorMsg < < endl < < " You might be giving '-lmodel-file TYPE FACTOR ORDER FILENAME' but you should be giving these four as a single argument, i.e. '-lmodel-file \" TYPE FACTOR ORDER FILENAME \" ' " ;
2011-09-09 22:03:00 +04:00
errorMsg < < endl < < " You should also remember that each language model requires 2 weights, if and only if lmodel-oov-feature is on. " ;
2011-02-24 16:14:42 +03:00
UserMessage : : Add ( errorMsg . str ( ) ) ;
noErrorFlag = false ;
}
2008-06-11 14:52:57 +04:00
// do files exist?
2010-06-07 21:19:51 +04:00
2011-02-24 16:14:42 +03:00
// input file
if ( noErrorFlag & & m_setting [ " input-file " ] . size ( ) = = 1 ) {
noErrorFlag = FileExists ( m_setting [ " input-file " ] [ 0 ] ) ;
}
// generation tables
if ( noErrorFlag ) {
std : : vector < std : : string > ext ;
//raw tables in either un compressed or compressed form
ext . push_back ( " " ) ;
ext . push_back ( " .gz " ) ;
noErrorFlag = FilesExist ( " generation-file " , 3 , ext ) ;
}
// distortion
if ( noErrorFlag ) {
std : : vector < std : : string > ext ;
//raw tables in either un compressed or compressed form
ext . push_back ( " " ) ;
ext . push_back ( " .gz " ) ;
//prefix tree format
ext . push_back ( " .binlexr.idx " ) ;
2012-08-10 15:22:09 +04:00
//prefix tree format
ext . push_back ( " .minlexr " ) ;
2011-02-24 16:14:42 +03:00
noErrorFlag = FilesExist ( " distortion-file " , 3 , ext ) ;
}
return noErrorFlag ;
2008-06-11 14:52:57 +04:00
}
/** check whether a file exists */
2010-05-13 18:12:05 +04:00
bool Parameter : : FilesExist ( const string & paramName , int fieldNo , std : : vector < std : : string > const & extensions )
2008-06-11 14:52:57 +04:00
{
2011-02-24 16:14:42 +03:00
typedef std : : vector < std : : string > StringVec ;
StringVec : : const_iterator iter ;
PARAM_MAP : : const_iterator iterParam = m_setting . find ( paramName ) ;
if ( iterParam = = m_setting . end ( ) ) {
// no param. therefore nothing to check
return true ;
}
const StringVec & pathVec = ( * iterParam ) . second ;
for ( iter = pathVec . begin ( ) ; iter ! = pathVec . end ( ) ; + + iter ) {
StringVec vec = Tokenize ( * iter ) ;
size_t tokenizeIndex ;
if ( fieldNo = = - 1 )
tokenizeIndex = vec . size ( ) - 1 ;
else
tokenizeIndex = static_cast < size_t > ( fieldNo ) ;
if ( tokenizeIndex > = vec . size ( ) ) {
stringstream errorMsg ( " " ) ;
errorMsg < < " Expected at least " < < ( tokenizeIndex + 1 ) < < " tokens per entry in ' "
< < paramName < < " ', but only found "
< < vec . size ( ) ;
UserMessage : : Add ( errorMsg . str ( ) ) ;
return false ;
}
const string & pathStr = vec [ tokenizeIndex ] ;
bool fileFound = 0 ;
for ( size_t i = 0 ; i < extensions . size ( ) & & ! fileFound ; + + i ) {
fileFound | = FileExists ( pathStr + extensions [ i ] ) ;
}
if ( ! fileFound ) {
stringstream errorMsg ( " " ) ;
errorMsg < < " File " < < pathStr < < " does not exist " ;
UserMessage : : Add ( errorMsg . str ( ) ) ;
return false ;
}
}
return true ;
2008-06-11 14:52:57 +04:00
}
/** look for a switch in arg, update parameter */
// TODO arg parsing like this does not belong in the library, it belongs
// in moses-cmd
string Parameter : : FindParam ( const string & paramSwitch , int argc , char * argv [ ] )
{
2011-02-24 16:14:42 +03:00
for ( int i = 0 ; i < argc ; i + + ) {
if ( string ( argv [ i ] ) = = paramSwitch ) {
if ( i + 1 < argc ) {
return argv [ i + 1 ] ;
} else {
stringstream errorMsg ( " " ) ;
errorMsg < < " Option " < < paramSwitch < < " requires a parameter! " ;
UserMessage : : Add ( errorMsg . str ( ) ) ;
// TODO return some sort of error, not the empty string
}
}
}
return " " ;
2008-06-11 14:52:57 +04:00
}
/** update parameter settings with command line switches
* \ param paramSwitch ( potentially short ) name of switch
* \ param paramName full name of parameter
* \ param argc number of arguments on command line
* \ param argv values of paramters on command line */
void Parameter : : OverwriteParam ( const string & paramSwitch , const string & paramName , int argc , char * argv [ ] )
{
2011-02-24 16:14:42 +03:00
int startPos = - 1 ;
for ( int i = 0 ; i < argc ; i + + ) {
if ( string ( argv [ i ] ) = = paramSwitch ) {
startPos = i + 1 ;
break ;
}
}
if ( startPos < 0 )
return ;
int index = 0 ;
m_setting [ paramName ] ; // defines the parameter, important for boolean switches
while ( startPos < argc & & ( ! isOption ( argv [ startPos ] ) ) ) {
if ( m_setting [ paramName ] . size ( ) > ( size_t ) index )
m_setting [ paramName ] [ index ] = argv [ startPos ] ;
else
m_setting [ paramName ] . push_back ( argv [ startPos ] ) ;
index + + ;
startPos + + ;
}
2008-06-11 14:52:57 +04:00
}
/** read parameters from a configuration file */
2011-02-24 16:14:42 +03:00
bool Parameter : : ReadConfigFile ( const string & filePath )
2008-06-11 14:52:57 +04:00
{
2011-02-24 16:14:42 +03:00
InputFileStream inFile ( filePath ) ;
string line , paramName ;
while ( getline ( inFile , line ) ) {
// comments
size_t comPos = line . find_first_of ( " # " ) ;
if ( comPos ! = string : : npos )
line = line . substr ( 0 , comPos ) ;
// trim leading and trailing spaces/tabs
line = Trim ( line ) ;
2011-10-06 13:16:21 +04:00
if ( line . size ( ) = = 0 ) {
2011-10-06 19:06:59 +04:00
// blank line. do nothing.
2011-10-06 13:16:21 +04:00
}
else if ( line [ 0 ] = = ' [ ' ) {
2011-02-24 16:14:42 +03:00
// new parameter
for ( size_t currPos = 0 ; currPos < line . size ( ) ; currPos + + ) {
if ( line [ currPos ] = = ' ] ' ) {
paramName = line . substr ( 1 , currPos - 1 ) ;
break ;
}
}
2011-10-06 13:16:21 +04:00
} else {
2011-02-24 16:14:42 +03:00
// add value to parameter
m_setting [ paramName ] . push_back ( line ) ;
}
}
return true ;
2008-06-11 14:52:57 +04:00
}
2011-02-24 16:14:42 +03:00
struct Credit {
string name , contact , currentPursuits , areaResponsibility ;
int sortId ;
Credit ( string name , string contact , string currentPursuits , string areaResponsibility ) {
this - > name = name ;
this - > contact = contact ;
this - > currentPursuits = currentPursuits ;
this - > areaResponsibility = areaResponsibility ;
this - > sortId = rand ( ) % 1000 ;
}
bool operator < ( const Credit & other ) const {
/*
if ( areaResponsibility . size ( ) ! = 0 & & other . areaResponsibility . size ( ) = = 0 )
return true ;
if ( areaResponsibility . size ( ) = = 0 & & other . areaResponsibility . size ( ) ! = 0 )
return false ;
return name < other . name ;
*/
return sortId < other . sortId ;
}
2008-06-11 14:52:57 +04:00
} ;
std : : ostream & operator < < ( std : : ostream & os , const Credit & credit )
{
2011-02-24 16:14:42 +03:00
os < < credit . name ;
if ( credit . contact ! = " " )
os < < " \t contact: " < < credit . contact ;
if ( credit . currentPursuits ! = " " )
os < < " " < < credit . currentPursuits ;
if ( credit . areaResponsibility ! = " " )
os < < " I'll answer question on: " < < credit . areaResponsibility ;
return os ;
2008-06-11 14:52:57 +04:00
}
void Parameter : : PrintCredit ( )
{
2011-02-24 16:14:42 +03:00
vector < Credit > everyone ;
srand ( time ( NULL ) ) ;
everyone . push_back ( Credit ( " Nicola Bertoldi "
, " 911 "
, " "
, " scripts & other stuff " ) ) ;
everyone . push_back ( Credit ( " Ondrej Bojar "
, " "
, " czech this out! "
, " " ) ) ;
everyone . push_back ( Credit ( " Chris Callison-Burch "
, " anytime, anywhere "
, " international playboy "
, " " ) ) ;
everyone . push_back ( Credit ( " Alexandra Constantin "
, " "
, " eu sunt varza "
, " " ) ) ;
everyone . push_back ( Credit ( " Brooke Cowan "
, " brooke@csail.mit.edu "
, " if you're going to san francisco, be sure to wear a flower in your hair "
, " " ) ) ;
everyone . push_back ( Credit ( " Chris Dyer "
, " can't. i'll be out driving my mustang "
, " driving my mustang "
, " " ) ) ;
everyone . push_back ( Credit ( " Marcello Federico "
, " federico at itc at it "
, " Researcher at ITC-irst, Trento, Italy "
, " IRST language model " ) ) ;
everyone . push_back ( Credit ( " Evan Herbst "
, " Small college in upstate New York "
, " "
, " " ) ) ;
everyone . push_back ( Credit ( " Philipp Koehn "
, " only between 2 and 4am "
, " "
, " Nothing fazes this dude " ) ) ;
everyone . push_back ( Credit ( " Christine Moran "
, " weird building at MIT "
, " "
, " " ) ) ;
everyone . push_back ( Credit ( " Wade Shen "
, " via morse code "
, " buying another laptop "
, " " ) ) ;
everyone . push_back ( Credit ( " Richard Zens "
, " richard at aachen dot de "
, " "
, " ambiguous source input, confusion networks, confusing source code " ) ) ;
everyone . push_back ( Credit ( " Hieu Hoang " , " http://www.hoang.co.uk/hieu/ "
, " phd student at Edinburgh Uni. Original Moses developer "
, " general queries/ flames on Moses. " ) ) ;
sort ( everyone . begin ( ) , everyone . end ( ) ) ;
cerr < < " Moses - A beam search decoder for phrase-based statistical machine translation models " < < endl
< < " Copyright (C) 2006 University of Edinburgh " < < endl < < endl
< < " This library is free software; you can redistribute it and/or " < < endl
< < " modify it under the terms of the GNU Lesser General Public " < < endl
< < " License as published by the Free Software Foundation; either " < < endl
< < " version 2.1 of the License, or (at your option) any later version. " < < endl < < endl
< < " This library is distributed in the hope that it will be useful, " < < endl
< < " but WITHOUT ANY WARRANTY; without even the implied warranty of " < < endl
< < " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU " < < endl
< < " Lesser General Public License for more details. " < < endl < < endl
< < " You should have received a copy of the GNU Lesser General Public " < < endl
< < " License along with this library; if not, write to the Free Software " < < endl
< < " Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA " < < endl < < endl
< < " *********************************************************************** " < < endl < < endl
< < " Built on " < < __DATE__ < < " at " __TIME__ < < endl < < endl
< < " WHO'S FAULT IS THIS GODDAM SOFTWARE: " < < endl ;
ostream_iterator < Credit > out ( cerr , " \n " ) ;
copy ( everyone . begin ( ) , everyone . end ( ) , out ) ;
cerr < < endl < < endl ;
2008-06-11 14:52:57 +04:00
}
2008-10-09 03:51:26 +04:00
}