2012-06-28 18:58:59 +04:00
# include "lm/value_build.hh"
# include "lm/model.hh"
# include "lm/read_arpa.hh"
2015-04-30 08:05:11 +03:00
namespace lm {
2012-06-28 18:58:59 +04:00
namespace ngram {
template < class Model > LowerRestBuild < Model > : : LowerRestBuild ( const Config & config , unsigned int order , const typename Model : : Vocabulary & vocab ) {
UTIL_THROW_IF ( config . rest_lower_files . size ( ) ! = order - 1 , ConfigException , " This model has order " < < order < < " so there should be " < < ( order - 1 ) < < " lower-order models for rest cost purposes. " ) ;
Config for_lower = config ;
2013-09-09 13:43:20 +04:00
for_lower . write_mmap = NULL ;
2012-06-28 18:58:59 +04:00
for_lower . rest_lower_files . clear ( ) ;
2015-04-30 08:05:11 +03:00
// Unigram models aren't supported, so this is a custom loader.
// TODO: optimize the unigram loading?
2012-06-28 18:58:59 +04:00
{
util : : FilePiece uni ( config . rest_lower_files [ 0 ] . c_str ( ) ) ;
std : : vector < uint64_t > number ;
ReadARPACounts ( uni , number ) ;
UTIL_THROW_IF ( number . size ( ) ! = 1 , FormatLoadException , " Expected the unigram model to have order 1, not " < < number . size ( ) ) ;
ReadNGramHeader ( uni , 1 ) ;
unigrams_ . resize ( number [ 0 ] ) ;
unigrams_ [ 0 ] = config . unknown_missing_logprob ;
PositiveProbWarn warn ;
for ( uint64_t i = 0 ; i < number [ 0 ] ; + + i ) {
WordIndex w ;
Prob entry ;
ReadNGram ( uni , 1 , vocab , & w , entry , warn ) ;
unigrams_ [ w ] = entry . prob ;
}
}
try {
for ( unsigned int i = 2 ; i < order ; + + i ) {
models_ . push_back ( new Model ( config . rest_lower_files [ i - 1 ] . c_str ( ) , for_lower ) ) ;
UTIL_THROW_IF ( models_ . back ( ) - > Order ( ) ! = i , FormatLoadException , " Lower order file " < < config . rest_lower_files [ i - 1 ] < < " should have order " < < i ) ;
}
} catch ( . . . ) {
for ( typename std : : vector < const Model * > : : const_iterator i = models_ . begin ( ) ; i ! = models_ . end ( ) ; + + i ) {
delete * i ;
}
models_ . clear ( ) ;
throw ;
}
2015-04-30 08:05:11 +03:00
// TODO: force/check same vocab.
2012-06-28 18:58:59 +04:00
}
template < class Model > LowerRestBuild < Model > : : ~ LowerRestBuild ( ) {
for ( typename std : : vector < const Model * > : : const_iterator i = models_ . begin ( ) ; i ! = models_ . end ( ) ; + + i ) {
delete * i ;
}
}
template class LowerRestBuild < ProbingModel > ;
} // namespace ngram
} // namespace lm