diff --git a/biconcor/Vocabulary.cpp b/biconcor/Vocabulary.cpp index 9d52ee44e..f0f07c97d 100644 --- a/biconcor/Vocabulary.cpp +++ b/biconcor/Vocabulary.cpp @@ -1,101 +1,101 @@ -// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $ -#include "Vocabulary.h" -#include - -namespace -{ - -const int MAX_LENGTH = 10000; - -} // namespace - -using namespace std; - -// as in beamdecoder/tables.cpp -vector Vocabulary::Tokenize( const char input[] ) -{ - vector< WORD_ID > token; - bool betweenWords = true; - int start=0; - int i=0; - for(; input[i] != '\0'; i++) { - bool isSpace = (input[i] == ' ' || input[i] == '\t'); - - if (!isSpace && betweenWords) { - start = i; - betweenWords = false; - } else if (isSpace && !betweenWords) { - token.push_back( StoreIfNew ( string( input+start, i-start ) ) ); - betweenWords = true; - } - } - if (!betweenWords) - token.push_back( StoreIfNew ( string( input+start, i-start ) ) ); - return token; -} - -WORD_ID Vocabulary::StoreIfNew( const WORD& word ) -{ - map::iterator i = lookup.find( word ); - - if( i != lookup.end() ) - return i->second; - - WORD_ID id = vocab.size(); - vocab.push_back( word ); - lookup[ word ] = id; - return id; -} - -WORD_ID Vocabulary::GetWordID( const WORD &word ) const -{ - map::const_iterator i = lookup.find( word ); - if( i == lookup.end() ) - return 0; - WORD_ID w= (WORD_ID) i->second; - return w; -} - -void Vocabulary::Save(const string& fileName ) const -{ - ofstream vcbFile; - vcbFile.open( fileName.c_str(), ios::out | ios::ate | ios::trunc); - - if (!vcbFile) { - cerr << "Failed to open " << vcbFile << endl; - exit(1); - } - - vector< WORD >::const_iterator i; - for(i = vocab.begin(); i != vocab.end(); i++) { - const string &word = *i; - vcbFile << word << endl; - } - vcbFile.close(); -} - -void Vocabulary::Load(const string& fileName ) -{ - ifstream vcbFile; - char line[MAX_LENGTH]; - vcbFile.open(fileName.c_str()); - - if (!vcbFile) { - cerr << "no such file or directory: " << vcbFile << endl; - exit(1); - } - - cerr << "loading from " << fileName << endl; - istream *fileP = &vcbFile; - int count = 0; - while(!fileP->eof()) { - SAFE_GETLINE((*fileP), line, MAX_LENGTH, '\n'); - if (fileP->eof()) break; - int length = 0; - for(; line[length] != '\0'; length++); - StoreIfNew( string( line, length ) ); - count++; - } - vcbFile.close(); - cerr << count << " word read, vocabulary size " << vocab.size() << endl; -} +// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $ +#include "Vocabulary.h" +#include + +namespace +{ + +const int MAX_LENGTH = 10000; + +} // namespace + +using namespace std; + +// as in beamdecoder/tables.cpp +vector Vocabulary::Tokenize( const char input[] ) +{ + vector< WORD_ID > token; + bool betweenWords = true; + int start=0; + int i=0; + for(; input[i] != '\0'; i++) { + bool isSpace = (input[i] == ' ' || input[i] == '\t'); + + if (!isSpace && betweenWords) { + start = i; + betweenWords = false; + } else if (isSpace && !betweenWords) { + token.push_back( StoreIfNew ( string( input+start, i-start ) ) ); + betweenWords = true; + } + } + if (!betweenWords) + token.push_back( StoreIfNew ( string( input+start, i-start ) ) ); + return token; +} + +WORD_ID Vocabulary::StoreIfNew( const WORD& word ) +{ + map::iterator i = lookup.find( word ); + + if( i != lookup.end() ) + return i->second; + + WORD_ID id = vocab.size(); + vocab.push_back( word ); + lookup[ word ] = id; + return id; +} + +WORD_ID Vocabulary::GetWordID( const WORD &word ) const +{ + map::const_iterator i = lookup.find( word ); + if( i == lookup.end() ) + return 0; + WORD_ID w= (WORD_ID) i->second; + return w; +} + +void Vocabulary::Save(const string& fileName ) const +{ + ofstream vcbFile; + vcbFile.open( fileName.c_str(), ios::out | ios::ate | ios::trunc); + + if (!vcbFile) { + cerr << "Failed to open " << vcbFile << endl; + exit(1); + } + + vector< WORD >::const_iterator i; + for(i = vocab.begin(); i != vocab.end(); i++) { + const string &word = *i; + vcbFile << word << endl; + } + vcbFile.close(); +} + +void Vocabulary::Load(const string& fileName ) +{ + ifstream vcbFile; + char line[MAX_LENGTH]; + vcbFile.open(fileName.c_str()); + + if (!vcbFile) { + cerr << "no such file or directory: " << vcbFile << endl; + exit(1); + } + + cerr << "loading from " << fileName << endl; + istream *fileP = &vcbFile; + int count = 0; + while(!fileP->eof()) { + SAFE_GETLINE((*fileP), line, MAX_LENGTH, '\n'); + if (fileP->eof()) break; + int length = 0; + for(; line[length] != '\0'; length++); + StoreIfNew( string( line, length ) ); + count++; + } + vcbFile.close(); + cerr << count << " word read, vocabulary size " << vocab.size() << endl; +} diff --git a/contrib/c++tokenizer/tokenizer.cpp b/contrib/c++tokenizer/tokenizer.cpp index 035ba2e97..6d3dd7046 100644 --- a/contrib/c++tokenizer/tokenizer.cpp +++ b/contrib/c++tokenizer/tokenizer.cpp @@ -46,7 +46,7 @@ RE2 curr_en_x("^[Nn]?[\'][\\p{L}]"); // english contraction suffixes conjoin to RE2 pre_en_x(".*[\\p{L}\\p{N}]+$"); // valid english contraction prefixes RE2 curr_fr_x(".*[\\p{L}\\p{N}]+[\']"); // french/italian contraction prefixes conjoin to the right RE2 post_fr_x("^[\\p{L}\\p{N}]*"); // valid french/italian contraction suffixes -// anything rarely used will just be given as a string and compiled on demand by RE2 +// anything rarely used will just be given as a string and compiled on demand by RE2 const char * SPC_BYTE = " "; @@ -85,8 +85,8 @@ const char *ESCAPE_MOSES[] = { "'", // ' 6 (27) """, // " 7 (22) }; - -const std::set + +const std::set ESCAPE_SET = { std::string(ESCAPE_MOSES[0]), std::string(ESCAPE_MOSES[1]), @@ -98,7 +98,7 @@ ESCAPE_SET = { std::string(ESCAPE_MOSES[7]), }; -const std::map +const std::map ENTITY_MAP = { { std::wstring(L"""), L'"' }, { std::wstring(L"&"), L'&' }, @@ -355,7 +355,7 @@ ENTITY_MAP = { { std::wstring(L"♦"), L'\u2666' } }; -inline gunichar +inline gunichar get_entity(gunichar *ptr, size_t len) { // try hex, decimal entity first gunichar ech(0); @@ -380,16 +380,16 @@ get_entity(gunichar *ptr, size_t len) { ech = 0; } } - if (ech) + if (ech) return ech; - std::map::const_iterator it = + std::map::const_iterator it = ENTITY_MAP.find(std::wstring((wchar_t *)(ptr),len)); return it != ENTITY_MAP.end() ? it->second : gunichar(0); } -inline gunichar +inline gunichar get_entity(char *ptr, size_t len) { glong ulen = 0; gunichar *gtmp = g_utf8_to_ucs4_fast((const gchar *)ptr, len, &ulen); @@ -399,7 +399,7 @@ get_entity(char *ptr, size_t len) { } -inline std::string +inline std::string trim(const std::string& in) { std::size_t start = 0; @@ -413,7 +413,7 @@ trim(const std::string& in) } -inline std::vector +inline std::vector split(const std::string& in) { std::vector outv; @@ -476,7 +476,7 @@ Tokenizer::Tokenizer(const Parameters& _) // // dtor deletes dynamically allocated per-language RE2 compiled expressions // -Tokenizer::~Tokenizer() +Tokenizer::~Tokenizer() { for (auto& ptr : prot_pat_vec) { if (ptr == &numprefixed_x || ptr == &quasinumeric_x) @@ -491,7 +491,7 @@ Tokenizer::~Tokenizer() // others into nbpre_gen_set // std::pair -Tokenizer::load_prefixes(std::ifstream& ifs) +Tokenizer::load_prefixes(std::ifstream& ifs) { RE2 numonly("(.*)[\\s]+(\\#NUMERIC_ONLY\\#)"); std::string line; @@ -547,7 +547,7 @@ Tokenizer::init(const char *cfg_dir_optional) { try { std::pair counts = load_prefixes(cfg); if (verbose_p) { - std::cerr << "loaded " << counts.first << " non-numeric, " + std::cerr << "loaded " << counts.first << " non-numeric, " << counts.second << " numeric prefixes from " << nbpre_path << std::endl; } @@ -570,7 +570,7 @@ Tokenizer::init(const char *cfg_dir_optional) { std::string protpat_path(cfg_dir); protpat_path.append("/protected_pattern.").append(lang_iso); // default to generic version - if (::access(protpat_path.c_str(),R_OK)) + if (::access(protpat_path.c_str(),R_OK)) protpat_path = protpat_path.substr(0,protpat_path.size()-lang_iso.size()-1); prot_pat_vec.push_back(&numprefixed_x); @@ -596,7 +596,7 @@ Tokenizer::init(const char *cfg_dir_optional) { throw std::runtime_error(ess.str()); } if (verbose_p) { - std::cerr << "loaded " << npat << " protected patterns from " + std::cerr << "loaded " << npat << " protected patterns from " << protpat_path << std::endl; } } else if (verbose_p) { @@ -612,7 +612,7 @@ Tokenizer::reset() { // // apply ctor-selected tokenization to a string, in-place, no newlines allowed, -// assumes protections are applied already, some invariants are in place, +// assumes protections are applied already, some invariants are in place, // e.g. that successive chars <= ' ' have been normalized to a single ' ' // void @@ -633,7 +633,7 @@ Tokenizer::protected_tokenize(std::string& text) { } if (pos < textpc.size() && textpc[pos] != ' ') words.push_back(textpc.substr(pos,textpc.size()-pos)); - + // regurgitate words with look-ahead handling for tokens with final mumble std::string outs; std::size_t nwords(words.size()); @@ -659,7 +659,7 @@ Tokenizer::protected_tokenize(std::string& text) { // lower-case look-ahead does not break sentence_break_p = false; } - } + } outs.append(words[ii].data(),len); if (sentence_break_p) @@ -671,15 +671,15 @@ Tokenizer::protected_tokenize(std::string& text) { } -bool +bool Tokenizer::unescape(std::string& word) { std::ostringstream oss; std::size_t was = 0; // last processed std::size_t pos = 0; // last unprocessed std::size_t len = 0; // processed length bool hit = false; - for (std::size_t endp=0; - (pos = word.find('&',was)) != std::string::npos && (endp = word.find(';',pos)) != std::string::npos; + for (std::size_t endp=0; + (pos = word.find('&',was)) != std::string::npos && (endp = word.find(';',pos)) != std::string::npos; was = endp == std::string::npos ? pos : 1+endp) { len = endp - pos + 1; glong ulen(0); @@ -703,7 +703,7 @@ Tokenizer::unescape(std::string& word) { } g_free(gtmp); } - if (was < word.size()) + if (was < word.size()) oss << word.substr(was); if (hit) word = oss.str(); @@ -727,7 +727,7 @@ Tokenizer::escape(std::string& text) { if (mod_p) outs.append(pp,pt-pp+1); } else { - if (mod_p) + if (mod_p) outs.append(pp,mk-pp); pt = --mk; } @@ -751,7 +751,7 @@ Tokenizer::escape(std::string& text) { } else if (*pt > ']') { if (*pt =='|') { // 7c sequence_p = ESCAPE_MOSES[0]; - } + } } else if (*pt > 'Z') { if (*pt == '<') { // 3e sequence_p = ESCAPE_MOSES[4]; @@ -761,11 +761,11 @@ Tokenizer::escape(std::string& text) { sequence_p = ESCAPE_MOSES[1]; } else if (*pt == ']') { // 5d sequence_p = ESCAPE_MOSES[2]; - } + } } if (sequence_p) { - if (pt > pp) + if (pt > pp) outs.append(pp,pt-pp); outs.append(sequence_p); mod_p = true; @@ -774,7 +774,7 @@ Tokenizer::escape(std::string& text) { ++pt; } } - + if (mod_p) { if (pp < pt) { outs.append(pp,pt-pp); @@ -795,13 +795,13 @@ Tokenizer::penn_tokenize(const std::string& buf) std::string text(buf); std::string outs; - if (skip_alltags_p) + if (skip_alltags_p) RE2::GlobalReplace(&text,genl_tags_x,SPC_BYTE); // directed quote patches size_t len = text.size(); - if (len > 2 && text.substr(0,2) == "``") - text.replace(0,2,"`` ",3); + if (len > 2 && text.substr(0,2) == "``") + text.replace(0,2,"`` ",3); else if (text[0] == '"') text.replace(0,1,"`` ",3); else if (text[0] == '`' || text[0] == '\'') @@ -811,9 +811,9 @@ Tokenizer::penn_tokenize(const std::string& buf) RE2::GlobalReplace(&text,x1_v_gg,one_gg); RE2::GlobalReplace(&text,x1_v_g,"\\1 ` \\2"); RE2::GlobalReplace(&text,x1_v_q,"\\1 ` "); - + // protect ellipsis - for (size_t pos = text.find("..."); pos != std::string::npos; pos = text.find("...",pos+11)) + for (size_t pos = text.find("..."); pos != std::string::npos; pos = text.find("...",pos+11)) text.replace(pos,3,"MANYELIPSIS",11); // numeric commas @@ -826,13 +826,13 @@ Tokenizer::penn_tokenize(const std::string& buf) // isolable slash RE2::GlobalReplace(&text,slash_x,special_refs); - + // isolate final period RE2::GlobalReplace(&text,final_x,"\\1 \\2\\3"); - + // isolate q.m., e.m. RE2::GlobalReplace(&text,qx_x,isolate_ref); - + // isolate braces RE2::GlobalReplace(&text,braces_x,isolate_ref); @@ -866,7 +866,7 @@ Tokenizer::penn_tokenize(const std::string& buf) } std::string ntext(SPC_BYTE); ntext.append(text); - + // convert double quote to paired single-quotes RE2::GlobalReplace(&ntext,"\""," '' "); @@ -894,7 +894,7 @@ Tokenizer::penn_tokenize(const std::string& buf) RE2::GlobalReplace(&ntext," ([Ww])anna "," \\1an na "); protected_tokenize(ntext); - + // restore ellipsis RE2::GlobalReplace(&ntext,"MANYELIPSIS","..."); @@ -919,7 +919,7 @@ Tokenizer::quik_tokenize(const std::string& buf) int num = 0; // this is the main moses-compatible tokenizer - + // push all the prefixes matching protected patterns std::vector prot_stack; std::string match; @@ -942,7 +942,7 @@ Tokenizer::quik_tokenize(const std::string& buf) } } } - + const char *pt(text.c_str()); const char *ep(pt + text.size()); while (pt < ep && *pt >= 0 && *pt <= ' ') @@ -990,8 +990,8 @@ Tokenizer::quik_tokenize(const std::string& buf) if (!since_start) { if (std::isalpha(char(*ucs4))) alpha_prefix++; - } else if (alpha_prefix == since_start - && char(*ucs4) == ':' + } else if (alpha_prefix == since_start + && char(*ucs4) == ':' && next_type != G_UNICODE_SPACE_SEPARATOR) { in_url_p = true; } @@ -1018,7 +1018,7 @@ Tokenizer::quik_tokenize(const std::string& buf) // fallthough case G_UNICODE_UPPERCASE_LETTER: case G_UNICODE_LOWERCASE_LETTER: - if (downcase_p && curr_type == G_UNICODE_UPPERCASE_LETTER) + if (downcase_p && curr_type == G_UNICODE_UPPERCASE_LETTER) curr_uch = g_unichar_tolower(*ucs4); break; case G_UNICODE_SPACING_MARK: @@ -1082,8 +1082,8 @@ Tokenizer::quik_tokenize(const std::string& buf) substitute_p = L"@-@"; post_break_p = pre_break_p = true; } else if ( ( curr_uch > gunichar(L'\u002D') && curr_uch < gunichar(L'\u2010') ) || - ( curr_uch > gunichar(L'\u2011') - && curr_uch != gunichar(L'\u30A0') + ( curr_uch > gunichar(L'\u2011') + && curr_uch != gunichar(L'\u30A0') && curr_uch < gunichar(L'\uFE63') ) ) { // dash, not a hyphen post_break_p = pre_break_p = true; @@ -1151,7 +1151,7 @@ Tokenizer::quik_tokenize(const std::string& buf) default: post_break_p = pre_break_p = prev_uch != curr_uch; break; - } + } } } break; @@ -1159,8 +1159,8 @@ Tokenizer::quik_tokenize(const std::string& buf) switch (curr_uch) { case gunichar(L':'): case gunichar(L'/'): - if (refined_p && !in_url_p - && prev_type == G_UNICODE_DECIMAL_NUMBER + if (refined_p && !in_url_p + && prev_type == G_UNICODE_DECIMAL_NUMBER && next_type == G_UNICODE_DECIMAL_NUMBER) { break; } @@ -1178,7 +1178,7 @@ Tokenizer::quik_tokenize(const std::string& buf) break; case gunichar(L'&'): if (unescape_p) { - if (next_type == G_UNICODE_LOWERCASE_LETTER || next_type == G_UNICODE_UPPERCASE_LETTER + if (next_type == G_UNICODE_LOWERCASE_LETTER || next_type == G_UNICODE_UPPERCASE_LETTER || next_type == G_UNICODE_DECIMAL_NUMBER || next_uch == gunichar(L'#')) { gunichar *eptr = nxt4; GUnicodeType eptr_type(G_UNICODE_UNASSIGNED); @@ -1223,16 +1223,16 @@ Tokenizer::quik_tokenize(const std::string& buf) next_type = nxt4 < lim4 ? g_unichar_type(next_uch) : G_UNICODE_UNASSIGNED; goto retry; } - + } post_break_p = pre_break_p = !in_url_p || next_type != G_UNICODE_SPACE_SEPARATOR; - if (escape_p) + if (escape_p) substitute_p = L"&"; break; case gunichar(L'\''): if (english_p) { if (!in_url_p) { - bool next_letter_p = next_type == G_UNICODE_LOWERCASE_LETTER + bool next_letter_p = next_type == G_UNICODE_LOWERCASE_LETTER || next_type == G_UNICODE_UPPERCASE_LETTER; pre_break_p = true; if (next_letter_p && refined_p) { @@ -1241,9 +1241,9 @@ Tokenizer::quik_tokenize(const std::string& buf) *(uptr - 1) = gunichar(L' '); *(uptr++) = prev_uch; pre_break_p = false; - } + } } - post_break_p = since_start == 0 + post_break_p = since_start == 0 || (!next_letter_p && next_type != G_UNICODE_DECIMAL_NUMBER); } } else if (latin_p) { @@ -1252,12 +1252,12 @@ Tokenizer::quik_tokenize(const std::string& buf) } else { post_break_p = pre_break_p = !in_url_p; } - if (escape_p) + if (escape_p) substitute_p = L"'"; break; case gunichar(L'"'): post_break_p = pre_break_p = true; - if (escape_p) + if (escape_p) substitute_p = L"""; break; case gunichar(L','): @@ -1303,7 +1303,7 @@ Tokenizer::quik_tokenize(const std::string& buf) } } // terminal isolated letter does not break - } else if (class_follows_p(nxt4,lim4,G_UNICODE_LOWERCASE_LETTER) || + } else if (class_follows_p(nxt4,lim4,G_UNICODE_LOWERCASE_LETTER) || g_unichar_type(*nxt4) == G_UNICODE_DASH_PUNCTUATION) { // lower-case look-ahead does not break } else { @@ -1315,7 +1315,7 @@ Tokenizer::quik_tokenize(const std::string& buf) pre_break_p = true; break; } - } + } break; } } else { @@ -1346,11 +1346,11 @@ Tokenizer::quik_tokenize(const std::string& buf) case gunichar(L')'): break; case gunichar(L'['): - if (escape_p) + if (escape_p) substitute_p = L"["; break; case gunichar(L']'): - if (escape_p) + if (escape_p) substitute_p = L"]"; break; default: @@ -1377,7 +1377,7 @@ Tokenizer::quik_tokenize(const std::string& buf) if (english_p) { if (!in_url_p) { pre_break_p = true; - post_break_p = since_start == 0 || + post_break_p = since_start == 0 || (next_type != G_UNICODE_LOWERCASE_LETTER && next_type != G_UNICODE_UPPERCASE_LETTER && next_type != G_UNICODE_DECIMAL_NUMBER); } } else if (latin_p) { @@ -1386,23 +1386,23 @@ Tokenizer::quik_tokenize(const std::string& buf) } else { post_break_p = pre_break_p = !in_url_p; } - if (escape_p) + if (escape_p) substitute_p = L"'"; - else + else curr_uch = gunichar(L'\''); break; case gunichar(L'|'): - if (escape_p) + if (escape_p) substitute_p = L"|"; post_break_p = pre_break_p = true; break; case gunichar(L'<'): - if (escape_p) + if (escape_p) substitute_p = L"<"; post_break_p = pre_break_p = true; break; case gunichar(L'>'): - if (escape_p) + if (escape_p) substitute_p = L">"; post_break_p = pre_break_p = true; break; @@ -1414,7 +1414,7 @@ Tokenizer::quik_tokenize(const std::string& buf) case gunichar(L'='): case gunichar(L'~'): in_num_p = false; - post_break_p = pre_break_p = !in_url_p; + post_break_p = pre_break_p = !in_url_p; break; case gunichar(L'+'): post_break_p = pre_break_p = !in_url_p; @@ -1444,12 +1444,12 @@ Tokenizer::quik_tokenize(const std::string& buf) curr_uch = gunichar(L' '); } else if (curr_uch < gunichar(L' ')) { curr_uch = gunichar(L' '); - } else if (curr_uch == gunichar(L'\u0092') && + } else if (curr_uch == gunichar(L'\u0092') && (next_type == G_UNICODE_LOWERCASE_LETTER || next_type == G_UNICODE_UPPERCASE_LETTER)) { // observed corpus corruption case if (english_p) { pre_break_p = true; - post_break_p = since_start == 0 || + post_break_p = since_start == 0 || (next_type != G_UNICODE_LOWERCASE_LETTER && next_type != G_UNICODE_UPPERCASE_LETTER && next_type != G_UNICODE_DECIMAL_NUMBER); } else if (latin_p) { post_break_p = true; @@ -1457,9 +1457,9 @@ Tokenizer::quik_tokenize(const std::string& buf) } else { post_break_p = pre_break_p = true; } - if (escape_p) + if (escape_p) substitute_p = L"'"; - else + else curr_uch = gunichar(L'\''); } else { post_break_p = pre_break_p = true; @@ -1491,7 +1491,7 @@ Tokenizer::quik_tokenize(const std::string& buf) in_url_p = in_num_p = false; break; } - + if (pre_break_p || curr_uch == gunichar(L' ') || (bad_length && curr_type != G_UNICODE_UNASSIGNED)) { if (since_start) { // non-empty token emitted previously, so pre-break must emit token separator @@ -1501,8 +1501,8 @@ Tokenizer::quik_tokenize(const std::string& buf) if (curr_uch == gunichar(L' ')) // suppress emission below, fall-through to substitute logic curr_uch = 0; - } - + } + if (substitute_p) { for (gunichar *sptr = (gunichar *)substitute_p; *sptr; ++sptr) { *uptr++ = *sptr; @@ -1521,7 +1521,7 @@ Tokenizer::quik_tokenize(const std::string& buf) glong nbytes = 0; gchar *utf8 = g_ucs4_to_utf8(ubuf,uptr-ubuf,0,&nbytes,0); // g_free - if (utf8[nbytes-1] == ' ') + if (utf8[nbytes-1] == ' ') --nbytes; text.assign((const char *)utf8,(const char *)(utf8 + nbytes)); g_free(utf8); @@ -1552,7 +1552,7 @@ Tokenizer::quik_tokenize(const std::string& buf) } -std::size_t +std::size_t Tokenizer::tokenize(std::istream& is, std::ostream& os) { std::size_t line_no = 0; @@ -1561,10 +1561,10 @@ Tokenizer::tokenize(std::istream& is, std::ostream& os) std::vector< std::vector< std::string > > results(nthreads); std::vector< boost::thread > workers(nthreads); bool done_p = !(is.good() && os.good()); - + for (std::size_t tranche = 0; !done_p; ++tranche) { - + // for loop starting threads for chunks of input for (std::size_t ithread = 0; ithread < nthreads; ++ithread) { @@ -1589,19 +1589,19 @@ Tokenizer::tokenize(std::istream& is, std::ostream& os) results[ithread].resize(line_pos); break; } - lines[ithread][line_pos].clear(); - } else if (skip_xml_p && - (RE2::FullMatch(istr,tag_line_x) || RE2::FullMatch(istr,white_line_x))) { - lines[ithread][line_pos].clear(); + lines[ithread][line_pos].clear(); + } else if (skip_xml_p && + (RE2::FullMatch(istr,tag_line_x) || RE2::FullMatch(istr,white_line_x))) { + lines[ithread][line_pos].clear(); } else { - lines[ithread][line_pos] = - std::string(SPC_BYTE).append(istr).append(SPC_BYTE); + lines[ithread][line_pos] = + std::string(SPC_BYTE).append(istr).append(SPC_BYTE); } - } + } if (line_pos) { - workers[ithread] = - boost::thread(VectorTokenizerCallable(this,lines[ithread],results[ithread])); + workers[ithread] = + boost::thread(VectorTokenizerCallable(this,lines[ithread],results[ithread])); } } // end for loop starting threads @@ -1616,22 +1616,22 @@ Tokenizer::tokenize(std::istream& is, std::ostream& os) if (nlin != nres) { std::ostringstream emsg; - emsg << "Tranche " << tranche - << " worker " << ithread << "/" << nthreads + emsg << "Tranche " << tranche + << " worker " << ithread << "/" << nthreads << " |lines|==" << nlin << " != |results|==" << nres; throw std::runtime_error(emsg.str()); } - for (std::size_t ires = 0; ires < nres; ++ires) + for (std::size_t ires = 0; ires < nres; ++ires) os << results[ithread][ires] << std::endl; } // end loop over joined results - + if (verbose_p) { std::cerr << line_no << ' '; std::cerr.flush(); } - + } // end loop over chunks return line_no; @@ -1642,18 +1642,18 @@ std::string Tokenizer::detokenize(const std::string& buf) { std::vector words = split(trim(buf)); - + std::size_t squotes = 0; std::size_t dquotes = 0; std::string prepends(""); std::ostringstream oss; - + std::size_t nwords = words.size(); std::size_t iword = 0; - if (unescape_p) - for (auto &word: words) + if (unescape_p) + for (auto &word: words) unescape(word); for (auto &word: words) { @@ -1665,13 +1665,13 @@ Tokenizer::detokenize(const std::string& buf) } else if (RE2::FullMatch(word,left_x)) { oss << word; prepends = SPC_BYTE; - } else if (english_p && iword - && RE2::FullMatch(word,curr_en_x) + } else if (english_p && iword + && RE2::FullMatch(word,curr_en_x) && RE2::FullMatch(words[iword-1],pre_en_x)) { oss << word; prepends = SPC_BYTE; - } else if (latin_p && iword < nwords - 2 - && RE2::FullMatch(word,curr_fr_x) + } else if (latin_p && iword < nwords - 2 + && RE2::FullMatch(word,curr_fr_x) && RE2::FullMatch(words[iword+1],post_fr_x)) { oss << prepends << word; prepends.clear(); @@ -1679,7 +1679,7 @@ Tokenizer::detokenize(const std::string& buf) if ((word.at(0) == '\'' && ((squotes % 2) == 0 )) || (word.at(0) == '"' && ((dquotes % 2) == 0))) { if (english_p && iword - && word.at(0) == '\'' + && word.at(0) == '\'' && std::tolower(words[iword-1].at(words[iword-1].size()-1)) == 's') { oss << word; prepends = SPC_BYTE; @@ -1698,7 +1698,7 @@ Tokenizer::detokenize(const std::string& buf) prepends = SPC_BYTE; if (word.at(0) == '\'') squotes++; - else if (word.at(0) == '"') + else if (word.at(0) == '"') dquotes++; } } else { @@ -1707,8 +1707,8 @@ Tokenizer::detokenize(const std::string& buf) } iword++; } - - + + std::string text(oss.str()); RE2::GlobalReplace(&text," +",SPC_BYTE); RE2::GlobalReplace(&text,"\n ","\n"); @@ -1718,14 +1718,14 @@ Tokenizer::detokenize(const std::string& buf) std::size_t -Tokenizer::detokenize(std::istream& is, std::ostream& os) +Tokenizer::detokenize(std::istream& is, std::ostream& os) { size_t line_no = 0; while (is.good() && os.good()) { std::string istr; std::getline(is,istr); line_no ++; - if (istr.empty()) + if (istr.empty()) continue; if (skip_xml_p && (RE2::FullMatch(istr,tag_line_x) || RE2::FullMatch(istr,white_line_x))) { os << istr << std::endl; @@ -1749,7 +1749,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { return parts; } gunichar *uout = (gunichar *)g_malloc0(2*ncp*sizeof(gunichar)); - + const wchar_t GENL_HYPH = L'\u2010'; const wchar_t IDEO_STOP = L'\u3002'; const wchar_t KANA_MDOT = L'\u30FB'; @@ -1786,7 +1786,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { std::vector breaks; std::set suppress; - + for (; icp <= ncp; ++icp) { currwc = wchar_t(ucs4[icp]); curr_type = g_unichar_type(currwc); @@ -1798,7 +1798,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { case G_UNICODE_OTHER_NUMBER: curr_class = numba; curr_word_p = true; - break; + break; case G_UNICODE_LOWERCASE_LETTER: case G_UNICODE_MODIFIER_LETTER: case G_UNICODE_OTHER_LETTER: @@ -1822,7 +1822,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { } else if (currwc >= SMAL_HYPH) { curr_word_p = true; } else { - curr_word_p = (currwc >= WAVE_DASH) && (currwc <= KANA_DHYP); + curr_word_p = (currwc >= WAVE_DASH) && (currwc <= KANA_DHYP); } break; case G_UNICODE_CLOSE_PUNCTUATION: @@ -1860,7 +1860,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { curr_word_p = false; break; } - + // # condition for prefix test // $words[$i] =~ /([\p{IsAlnum}\.\-]*)([\'\"\)\]\%\p{IsPf}]*)(\.+)$/ // $words[$i+1] =~ /^([ ]*[\'\"\(\[\¿\¡\p{IsPi}]*[ ]*[\p{IsUpper}0-9])/ @@ -1875,7 +1875,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { } else if (curr_word_p) { if (!fini_word) { init_word = ocp; - } + } fini_word = ocp+1; dotslen = finilen = 0; } else if (curr_class >= quote && curr_class <= pfpct && curr_class != pinit) { @@ -1893,7 +1893,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { } else { init_word = fini_word = 0; } - + if (check_abbr_p) { // not a valid word character or post-word punctuation character: check word std::wstring k((wchar_t *)uout+init_word,fini_word-init_word); @@ -1986,7 +1986,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { } init_word = fini_word = 0; } - + if (seqpos >= SEQ_LIM) { seqpos = 0; } @@ -2015,7 +2015,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { continue; } } - + if (!seqpos) { if (curr_class != blank) { uout[ocp++] = gunichar(currwc); @@ -2024,7 +2024,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { } continue; } - + if (curr_class == blank) { if (prev_class != blank) { seq[seqpos] = blank; @@ -2034,7 +2034,7 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { } if (icp < ncp) continue; - } + } if (curr_class >= quote && curr_class <= pfini) { if (prev_class < quote || prev_class > pfini) { @@ -2158,8 +2158,8 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { if (uout[chkpos] == L'\n' || uout[chkpos] == L' ') { endpos = chkpos; continue; - } - if (g_unichar_isgraph(uout[chkpos])) + } + if (g_unichar_isgraph(uout[chkpos])) break; endpos = chkpos; } @@ -2171,17 +2171,17 @@ Tokenizer::splitter(const std::string &istr, bool *continuation_ptr) { if (continuation_ptr) *continuation_ptr = endpos > iop; iop = nextpos; - } - + } + g_free(uout); g_free(ucs4); - + return parts; } std::pair -Tokenizer::splitter(std::istream& is, std::ostream& os) +Tokenizer::splitter(std::istream& is, std::ostream& os) { std::pair counts = { 0, 0 }; bool continuation_p = false; @@ -2197,7 +2197,7 @@ Tokenizer::splitter(std::istream& is, std::ostream& os) if (istr.empty() && (is.eof() ||!para_marks_p)) continue; - if (skip_xml_p && (RE2::FullMatch(istr,tag_line_x) || RE2::FullMatch(istr,white_line_x))) + if (skip_xml_p && (RE2::FullMatch(istr,tag_line_x) || RE2::FullMatch(istr,white_line_x))) continue; std::vector sentences(splitter(istr,&continuation_p)); @@ -2221,13 +2221,13 @@ Tokenizer::splitter(std::istream& is, std::ostream& os) os << " "; pending_gap = false; } - - for (std::size_t ii = 0; ii < nsents-1; ++ii) + + for (std::size_t ii = 0; ii < nsents-1; ++ii) os << sentences[ii] << std::endl; - + os << sentences[nsents-1]; - if (continuation_p) + if (continuation_p) pending_gap = !split_breaks_p; if (!pending_gap) os << std::endl; diff --git a/contrib/c++tokenizer/tokenizer.h b/contrib/c++tokenizer/tokenizer.h index cc1de2770..978f20197 100644 --- a/contrib/c++tokenizer/tokenizer.h +++ b/contrib/c++tokenizer/tokenizer.h @@ -26,7 +26,7 @@ class Tokenizer { private: - typedef enum { + typedef enum { empty = 0, blank, upper, // upper case @@ -56,7 +56,7 @@ private: // non-breaking prefixes (other) ucs4 std::set nbpre_gen_ucs4; - // compiled protected patterns + // compiled protected patterns std::vector prot_pat_vec; protected: @@ -96,10 +96,10 @@ protected: Tokenizer *tokenizer; std::vector& in; std::vector& out; - - VectorTokenizerCallable(Tokenizer *_tokenizer, - std::vector& _in, - std::vector& _out) + + VectorTokenizerCallable(Tokenizer *_tokenizer, + std::vector& _in, + std::vector& _out) : tokenizer(_tokenizer) , in(_in) , out(_out) { @@ -107,10 +107,10 @@ protected: void operator()() { out.resize(in.size()); - for (std::size_t ii = 0; ii < in.size(); ++ii) + for (std::size_t ii = 0; ii < in.size(); ++ii) if (in[ii].empty()) out[ii] = in[ii]; - else if (tokenizer->penn_p) + else if (tokenizer->penn_p) out[ii] = tokenizer->penn_tokenize(in[ii]); else out[ii] = tokenizer->quik_tokenize(in[ii]); diff --git a/contrib/c++tokenizer/tokenizer_main.cpp b/contrib/c++tokenizer/tokenizer_main.cpp index 7adb599e7..358a68cc3 100644 --- a/contrib/c++tokenizer/tokenizer_main.cpp +++ b/contrib/c++tokenizer/tokenizer_main.cpp @@ -10,8 +10,8 @@ using namespace TOKENIZER_NAMESPACE ; #endif -void -usage(const char *path) +void +usage(const char *path) { std::cerr << "Usage: " << path << "[-{v|x|p|a|e|s|u|n|N]* [LL] [-{c|o} PATH]* INFILE*" << std::endl; std::cerr << " -a -- aggressive hyphenization" << std::endl; @@ -89,7 +89,7 @@ copy_words(Tokenizer& tize, std::istream& ifs, std::ostream& ofs) { int nlines = 0; std::string line; while (ifs.good() && std::getline(ifs,line)) { - if (line.empty()) + if (line.empty()) continue; std::vector tokens(tize.tokens(line)); int count = 0; @@ -127,7 +127,7 @@ copy_words(Tokenizer& tize, std::istream& ifs, std::ostream& ofs) { } -int main(int ac, char **av) +int main(int ac, char **av) { int rc = 0; Parameters params; @@ -140,7 +140,7 @@ int main(int ac, char **av) if (!detokenize_p) params.split_p = std::strstr(av[0],"splitter") != 0; - while (++av,--ac) { + while (++av,--ac) { if (**av == '-') { switch (av[0][1]) { case 'a': @@ -244,7 +244,7 @@ int main(int ac, char **av) if (comma) { *comma++ = 0; params.chunksize = std::strtoul(comma,0,0); - } + } params.nthreads = std::strtoul(*av,0,0); } else { params.args.push_back(std::string(*av)); @@ -275,7 +275,7 @@ int main(int ac, char **av) cfg_mos_str.append("/moses"); if (!::access(cfg_mos_str.c_str(),X_OK)) { params.cfg_path = strdup(cfg_mos_str.c_str()); - } else if (!::access(cfg_shr_str.c_str(),X_OK)) { + } else if (!::access(cfg_shr_str.c_str(),X_OK)) { params.cfg_path = strdup(cfg_shr_str.c_str()); } else if (!::access(cfg_dir_str.c_str(),X_OK)) { params.cfg_path = strdup(cfg_dir_str.c_str()); @@ -287,7 +287,7 @@ int main(int ac, char **av) if (params.verbose_p) { std::cerr << "config path: " << params.cfg_path << std::endl; } - } + } std::unique_ptr pofs = 0; if (!params.out_path.empty()) { @@ -345,7 +345,7 @@ int main(int ac, char **av) if (plines.second) { std::cerr << "%%% " << plines.second << " sentences." << std::endl; } - } + } return rc; } diff --git a/contrib/eppex/ISS.h b/contrib/eppex/ISS.h index 7921fcbf8..9c4e1fc22 100644 --- a/contrib/eppex/ISS.h +++ b/contrib/eppex/ISS.h @@ -1,236 +1,236 @@ -/** - * ISS (Indexed Strings Storage) - memory efficient storage for permanent strings. - * - * Implementation note: use #define USE_HASHSET to switch between implementation - * using __gnu_cxx::hash_set and implementation using std::set. - * - * (C) Ceslav Przywara, UFAL MFF UK, 2011 - * - * $Id$ - */ - -#ifndef _ISS_H -#define _ISS_H - -#include -#include -#include - -// Use hashset instead of std::set for string-to-number indexing? -#ifdef USE_HASHSET -#include -#else -#include -#endif - -#include - -#ifdef USE_HASHSET -// Forward declaration of comparator functor. -template -class StringsEqualComparator; - -template -class Hasher; -#else -// Forward declaration of comparator functor. -template -class StringsLessComparator; -#endif - -/** - */ -template -class IndexedStringsStorage { - -public: - - typedef IndType index_type; - -#ifdef USE_HASHSET - typedef StringsEqualComparator equality_comparator_t; - - typedef Hasher hasher_t; - - /** @typedef Hash set used as lookup table (string -> numeric index). */ - typedef __gnu_cxx::hash_set index_t; -#else - typedef StringsLessComparator less_comparator_t; - - /** @typedef Set used as lookup table (string -> numeric index). */ - typedef std::set index_t; -#endif - /** @typedef Container of pointers to stored C-strings. Acts as - * conversion table: numeric index -> string. - */ - typedef std::vector table_t; - -private: - - /** @var memory pool used to store C-strings */ - boost::pool<> _storage; - - /** @var index-to-string conversion table */ - table_t _table; - - /** @var index lookup table */ - index_t _index; - -public: - /** Default constructor. - */ - IndexedStringsStorage(void); - - /** @return True, if the indices are exhausted (new strings cannot be stored). - */ - inline bool is_full(void) const { return _table.size() == std::numeric_limits::max(); } - - /** Retrieves pointer to C-string instance represented by given index. - * Note: No range checks are performed! - * @param index Index of C-string to retrieve. - * @return Pointer to stored C-string instance. - */ - inline const char* get(IndType index) const { return _table[index]; } - - /** Stores the string and returns its numeric index. - * @param str Pointer to C-string to store. - * @return Index of stored copy of str. - * @throw std::bad_alloc When insertion of new string would cause - * overflow of indices datatype. - */ - IndType put(const char* str); - - /** @return Number of unique strings stored so far. - */ - inline table_t::size_type size(void) const { return _table.size(); } -}; - - -/** Functor designed for less than comparison of C-strings stored within StringStore. - * @param IndType Type of numerical indices of strings within given StringStore. - */ -#ifdef USE_HASHSET -template -class StringsEqualComparator: public std::binary_function { -#else -template -class StringsLessComparator: public std::binary_function { -#endif - /** @var conversion table: index -> string (necessary for indices comparison) */ - const typename IndexedStringsStorage::table_t& _table; -public: -#ifdef USE_HASHSET - StringsEqualComparator(const typename IndexedStringsStorage::table_t& table): _table(table) {} -#else - StringsLessComparator(const typename IndexedStringsStorage::table_t& table): _table(table) {} -#endif - - /** Comparison of two pointers to C-strings. - * @param lhs Pointer to 1st C-string. - * @param rhs Pointer to 2nd C-string. - * @return True, if 1st argument is equal/less than 2nd argument. - */ - inline bool operator()(IndType lhs, IndType rhs) const { -#ifdef USE_HASHSET - return strcmp(_table[lhs], _table[rhs]) == 0; -#else - return strcmp(_table[lhs], _table[rhs]) < 0; -#endif - } -}; - -#ifdef USE_HASHSET -/** Functor... TODO. - */ -template -class Hasher: public std::unary_function { - - __gnu_cxx::hash _hash; - - /** @var conversion table: index -> string (necessary for indices comparison) */ - const typename IndexedStringsStorage::table_t& _table; - -public: - /** */ - Hasher(const typename IndexedStringsStorage::table_t& table): _hash(), _table(table) {} - - /** Hashing function. - * @param index - * @return Counted hash. - */ - inline size_t operator()(const IndType index) const { - return _hash(_table[index]); - } -}; -#endif - -template -#ifdef USE_HASHSET -IndexedStringsStorage::IndexedStringsStorage(void): _storage(sizeof(char)), _table(), _index(100, hasher_t(_table), equality_comparator_t(_table)) {} -#else -IndexedStringsStorage::IndexedStringsStorage(void): _storage(sizeof(char)), _table(), _index(less_comparator_t(_table)) {} -#endif - -template -IndType IndexedStringsStorage::put(const char* str) { - - if ( this->is_full() ) { - // What a pity, not a single index left to spend. - throw std::bad_alloc(); - } - - // To use the index for lookup we first have to store passed string - // in conversion table (cause during lookup we compare the strings indirectly - // by using their indices). - // Note: thread unsafe! TODO: Redesing. - IndType index = static_cast(_table.size()); - _table.push_back(str); - -#ifdef USE_HASHSET - // - typename index_t::iterator iIndex = _index.find(index); -#else - // A lower_bound() search enables us to use "found" iterator as a hint for - // eventual insertion. - typename index_t::iterator iIndex = _index.lower_bound(index); -#endif - - if ( (iIndex != _index.end()) -#ifndef USE_HASHSET - // In case of lower_bound() search we have to also compare found item - // with passed string. - && (strcmp(_table[*iIndex], str) == 0) -#endif - ) { - // String is already present in storage! - // Pop back temporary stored pointer... - _table.pop_back(); - // ...and return numeric index to already stored copy of `str`. - return static_cast(*iIndex); - } - - // String not found within storage. - - // Allocate memory required for string storage... - char* mem = static_cast(_storage.ordered_malloc(strlen(str) + 1)); - // ...and fill it with copy of passed string. - strcpy(mem, str); - - // Overwrite temporary stored pointer to `str` with pointer to freshly - // saved copy. - _table[index] = mem; - -#ifdef USE_HASHSET - // Insert the index into lookup table. - _index.insert(index); -#else - // Insert the index into lookup table (use previously retrieved iterator - // as a hint). - _index.insert(iIndex, index); -#endif - - // Finally. - return index; -} - -#endif +/** + * ISS (Indexed Strings Storage) - memory efficient storage for permanent strings. + * + * Implementation note: use #define USE_HASHSET to switch between implementation + * using __gnu_cxx::hash_set and implementation using std::set. + * + * (C) Ceslav Przywara, UFAL MFF UK, 2011 + * + * $Id$ + */ + +#ifndef _ISS_H +#define _ISS_H + +#include +#include +#include + +// Use hashset instead of std::set for string-to-number indexing? +#ifdef USE_HASHSET +#include +#else +#include +#endif + +#include + +#ifdef USE_HASHSET +// Forward declaration of comparator functor. +template +class StringsEqualComparator; + +template +class Hasher; +#else +// Forward declaration of comparator functor. +template +class StringsLessComparator; +#endif + +/** + */ +template +class IndexedStringsStorage { + +public: + + typedef IndType index_type; + +#ifdef USE_HASHSET + typedef StringsEqualComparator equality_comparator_t; + + typedef Hasher hasher_t; + + /** @typedef Hash set used as lookup table (string -> numeric index). */ + typedef __gnu_cxx::hash_set index_t; +#else + typedef StringsLessComparator less_comparator_t; + + /** @typedef Set used as lookup table (string -> numeric index). */ + typedef std::set index_t; +#endif + /** @typedef Container of pointers to stored C-strings. Acts as + * conversion table: numeric index -> string. + */ + typedef std::vector table_t; + +private: + + /** @var memory pool used to store C-strings */ + boost::pool<> _storage; + + /** @var index-to-string conversion table */ + table_t _table; + + /** @var index lookup table */ + index_t _index; + +public: + /** Default constructor. + */ + IndexedStringsStorage(void); + + /** @return True, if the indices are exhausted (new strings cannot be stored). + */ + inline bool is_full(void) const { return _table.size() == std::numeric_limits::max(); } + + /** Retrieves pointer to C-string instance represented by given index. + * Note: No range checks are performed! + * @param index Index of C-string to retrieve. + * @return Pointer to stored C-string instance. + */ + inline const char* get(IndType index) const { return _table[index]; } + + /** Stores the string and returns its numeric index. + * @param str Pointer to C-string to store. + * @return Index of stored copy of str. + * @throw std::bad_alloc When insertion of new string would cause + * overflow of indices datatype. + */ + IndType put(const char* str); + + /** @return Number of unique strings stored so far. + */ + inline table_t::size_type size(void) const { return _table.size(); } +}; + + +/** Functor designed for less than comparison of C-strings stored within StringStore. + * @param IndType Type of numerical indices of strings within given StringStore. + */ +#ifdef USE_HASHSET +template +class StringsEqualComparator: public std::binary_function { +#else +template +class StringsLessComparator: public std::binary_function { +#endif + /** @var conversion table: index -> string (necessary for indices comparison) */ + const typename IndexedStringsStorage::table_t& _table; +public: +#ifdef USE_HASHSET + StringsEqualComparator(const typename IndexedStringsStorage::table_t& table): _table(table) {} +#else + StringsLessComparator(const typename IndexedStringsStorage::table_t& table): _table(table) {} +#endif + + /** Comparison of two pointers to C-strings. + * @param lhs Pointer to 1st C-string. + * @param rhs Pointer to 2nd C-string. + * @return True, if 1st argument is equal/less than 2nd argument. + */ + inline bool operator()(IndType lhs, IndType rhs) const { +#ifdef USE_HASHSET + return strcmp(_table[lhs], _table[rhs]) == 0; +#else + return strcmp(_table[lhs], _table[rhs]) < 0; +#endif + } +}; + +#ifdef USE_HASHSET +/** Functor... TODO. + */ +template +class Hasher: public std::unary_function { + + __gnu_cxx::hash _hash; + + /** @var conversion table: index -> string (necessary for indices comparison) */ + const typename IndexedStringsStorage::table_t& _table; + +public: + /** */ + Hasher(const typename IndexedStringsStorage::table_t& table): _hash(), _table(table) {} + + /** Hashing function. + * @param index + * @return Counted hash. + */ + inline size_t operator()(const IndType index) const { + return _hash(_table[index]); + } +}; +#endif + +template +#ifdef USE_HASHSET +IndexedStringsStorage::IndexedStringsStorage(void): _storage(sizeof(char)), _table(), _index(100, hasher_t(_table), equality_comparator_t(_table)) {} +#else +IndexedStringsStorage::IndexedStringsStorage(void): _storage(sizeof(char)), _table(), _index(less_comparator_t(_table)) {} +#endif + +template +IndType IndexedStringsStorage::put(const char* str) { + + if ( this->is_full() ) { + // What a pity, not a single index left to spend. + throw std::bad_alloc(); + } + + // To use the index for lookup we first have to store passed string + // in conversion table (cause during lookup we compare the strings indirectly + // by using their indices). + // Note: thread unsafe! TODO: Redesing. + IndType index = static_cast(_table.size()); + _table.push_back(str); + +#ifdef USE_HASHSET + // + typename index_t::iterator iIndex = _index.find(index); +#else + // A lower_bound() search enables us to use "found" iterator as a hint for + // eventual insertion. + typename index_t::iterator iIndex = _index.lower_bound(index); +#endif + + if ( (iIndex != _index.end()) +#ifndef USE_HASHSET + // In case of lower_bound() search we have to also compare found item + // with passed string. + && (strcmp(_table[*iIndex], str) == 0) +#endif + ) { + // String is already present in storage! + // Pop back temporary stored pointer... + _table.pop_back(); + // ...and return numeric index to already stored copy of `str`. + return static_cast(*iIndex); + } + + // String not found within storage. + + // Allocate memory required for string storage... + char* mem = static_cast(_storage.ordered_malloc(strlen(str) + 1)); + // ...and fill it with copy of passed string. + strcpy(mem, str); + + // Overwrite temporary stored pointer to `str` with pointer to freshly + // saved copy. + _table[index] = mem; + +#ifdef USE_HASHSET + // Insert the index into lookup table. + _index.insert(index); +#else + // Insert the index into lookup table (use previously retrieved iterator + // as a hint). + _index.insert(iIndex, index); +#endif + + // Finally. + return index; +} + +#endif diff --git a/contrib/eppex/LossyCounter.h b/contrib/eppex/LossyCounter.h index 2796c8090..a3cf3339f 100644 --- a/contrib/eppex/LossyCounter.h +++ b/contrib/eppex/LossyCounter.h @@ -83,7 +83,7 @@ public: const counter_t bucketWidth; // ceil(1/error) private: - + /** @var Current epoch bucket ID (b-current) */ counter_t _bucketId; @@ -182,7 +182,7 @@ class LossyCounterIterator: public std::iterator self_type; - + typedef typename LossyCounter::storage_t::const_iterator const_iterator; protected: @@ -288,7 +288,7 @@ protected: template void LossyCounter::add(const T& item) { - + typename storage_t::iterator iter = _storage.find(item); if ( iter == _storage.end() ) { @@ -330,7 +330,7 @@ void LossyCounter::prune(void) { //////////////////////////////////////////////////////////////////////////////// template -LossyCounterIterator LossyCounterIterator::operator++(void) { +LossyCounterIterator LossyCounterIterator::operator++(void) { this->forward(); return *this; } diff --git a/contrib/eppex/eppex.cpp b/contrib/eppex/eppex.cpp index d382890d2..76490d9d2 100644 --- a/contrib/eppex/eppex.cpp +++ b/contrib/eppex/eppex.cpp @@ -92,7 +92,7 @@ int main(int argc, char* argv[]) { // Init lossy counters. std::string lossyCountersParams; int paramIdx = 5; - + while ( (argc > paramIdx) && (*argv[paramIdx] != '-') ) { std::string param = std::string(argv[paramIdx]); if ( !parse_lossy_counting_params(param) ) { @@ -113,7 +113,7 @@ int main(int argc, char* argv[]) { usage(argv[0]); } } - + if ( (argc > paramIdx) && (strcmp(argv[paramIdx], "--compact") == 0) ) { compactOutputFlag = true; ++paramIdx; @@ -154,7 +154,7 @@ int main(int argc, char* argv[]) { readInput(eFile, fFile, aFile); std::cerr << std::endl; // Leave the progress bar end on previous line. - + // close input files eFile.close(); fFile.close(); diff --git a/contrib/eppex/phrase-extract.cpp b/contrib/eppex/phrase-extract.cpp index 5dff43b78..46337a8b7 100644 --- a/contrib/eppex/phrase-extract.cpp +++ b/contrib/eppex/phrase-extract.cpp @@ -32,14 +32,14 @@ typedef std::vector output_vector_t; class PhraseComp { /** @var If true, sort by target phrase first. */ bool _inverted; - + bool compareAlignments(const indexed_phrases_pair_t& a, const indexed_phrases_pair_t& b); int comparePhrases(const indexed_phrases_pair_t::phrase_t& a, const indexed_phrases_pair_t::phrase_t& b); - + public: PhraseComp(bool inverted): _inverted(inverted) {} - + bool operator()(const output_pair_t& a, const output_pair_t& b); }; @@ -448,9 +448,9 @@ void extract(SentenceAlignment &sentence) { ((phraseModel)? getOrientString(phrasePrevOrient, phraseType) + " " + getOrientString(phraseNextOrient, phraseType) : "") + " | " + ((hierModel)? getOrientString(hierPrevOrient, hierType) + " " + getOrientString(hierNextOrient, hierType) : ""); } - + addPhrase(sentence, startE, endE, startF, endF, orientationInfo); - + } // end of for loop through inbound phrases } // end if buildExtraStructure @@ -567,7 +567,7 @@ bool PhraseComp::operator()(const output_pair_t& a, const output_pair_t& b) { else { return cmp < 0; } - + } @@ -607,7 +607,7 @@ bool PhraseComp::compareAlignments(const indexed_phrases_pair_t& a, const indexe return cmp < 0; } } - + // Note: LC_ALL=C GNU sort treats shorter item as lesser than longer one. return (cmp == 0) ? (aSize < bSize) : (cmp < 0); @@ -685,7 +685,7 @@ void processSortedOutput(OutputProcessor& processor) { void processUnsortedOutput(OutputProcessor& processor) { - + LossyCountersVector::value_type current = NULL, prev = NULL; for ( size_t i = 1; i < lossyCounters.size(); ++i ) { // Intentionally skip 0. @@ -759,7 +759,7 @@ void printStats(void) { if ( (current == NULL) || ((current != prev) && (prev != NULL)) ) { // Time to print. to = i-1; - + // Increment overall stats. outputMass += prev->outputMass; outputSize += prev->outputSize; @@ -787,7 +787,7 @@ void printStats(void) { from = i; } - + prev = current; } diff --git a/contrib/other-builds/cmake/boost.example/main.cpp b/contrib/other-builds/cmake/boost.example/main.cpp index 7b95fb2a9..b77388e46 100644 --- a/contrib/other-builds/cmake/boost.example/main.cpp +++ b/contrib/other-builds/cmake/boost.example/main.cpp @@ -10,15 +10,15 @@ int main(int argc, char* argv[]) using namespace boost::locale; using namespace std; - + generator gen; locale loc=gen(""); - + cout.imbue(loc); - + cout << "Hello, World" << endl; - + cout << "This is how we show currency in this locale " << as::currency << 103.34 << endl; - + return 0; } diff --git a/contrib/python/moses/dictree.cpp b/contrib/python/moses/dictree.cpp index 207d7c3f7..d9008f6e3 100644 --- a/contrib/python/moses/dictree.cpp +++ b/contrib/python/moses/dictree.cpp @@ -557,7 +557,7 @@ static const char *__pyx_f[] = { * ctypedef vector[const_str_pointer] Tokens * ctypedef float FValue # <<<<<<<<<<<<<< * ctypedef vector[FValue] Scores - * + * */ typedef float __pyx_t_5moses_8cdictree_FValue; @@ -582,7 +582,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_8_genexpr; /* "cdictree.pxd":4 * from libcpp.vector cimport vector - * + * * ctypedef string* str_pointer # <<<<<<<<<<<<<< * ctypedef string* const_str_pointer "const str_pointer" * ctypedef vector[const_str_pointer] Tokens @@ -590,7 +590,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_8_genexpr; typedef std::string *__pyx_t_5moses_8cdictree_str_pointer; /* "cdictree.pxd":5 - * + * * ctypedef string* str_pointer * ctypedef string* const_str_pointer "const str_pointer" # <<<<<<<<<<<<<< * ctypedef vector[const_str_pointer] Tokens @@ -611,7 +611,7 @@ typedef std::vector __pyx_t_5moses_8cdictree_Tokens; * ctypedef vector[const_str_pointer] Tokens * ctypedef float FValue * ctypedef vector[FValue] Scores # <<<<<<<<<<<<<< - * + * * cdef extern from 'PhraseDictionaryTree.h' namespace 'Moses': */ typedef std::vector<__pyx_t_5moses_8cdictree_FValue> __pyx_t_5moses_8cdictree_Scores; @@ -619,7 +619,7 @@ struct __pyx_opt_args_5moses_7dictree_20PhraseDictionaryTree_getTargetProduction /* "moses/dictree.pyx":252 * and os.path.isfile(stem + ".binphr.tgtvoc") - * + * * cdef TargetProduction getTargetProduction(self, cdictree.StringTgtCand& cand, wa = None, converter = None): # <<<<<<<<<<<<<< * """Converts a StringTgtCandidate (c++ object) and possibly a word-alignment info (string) to a TargetProduction (python object).""" * cdef list words = [cand.tokens[i].c_str() for i in xrange(cand.tokens.size())] @@ -632,7 +632,7 @@ struct __pyx_opt_args_5moses_7dictree_20PhraseDictionaryTree_getTargetProduction /* "moses/dictree.pyx":23 * raise TypeError('Cannot convert %s to string' % type(data)) - * + * * cdef class Production(object): # <<<<<<<<<<<<<< * """ * General class that represents a context-free production or a flat contiguous phrase. @@ -646,7 +646,7 @@ struct __pyx_obj_5moses_7dictree_Production { /* "moses/dictree.pyx":104 * return x >= y - * + * * cdef class Alignment(list): # <<<<<<<<<<<<<< * """ * This represents a list of alignment points (pairs of integers). @@ -658,7 +658,7 @@ struct __pyx_obj_5moses_7dictree_Alignment { /* "moses/dictree.pyx":125 * return ' '.join('%d-%d' % (s, t) for s, t in self) - * + * * cdef class FValues(list): # <<<<<<<<<<<<<< * """ * This represents a list of feature values (floats). @@ -670,7 +670,7 @@ struct __pyx_obj_5moses_7dictree_FValues { /* "moses/dictree.pyx":137 * return ' '.join(str(x) for x in self) - * + * * cdef class TargetProduction(Production): # <<<<<<<<<<<<<< * """This class specializes production making it the target side of a translation rule. * On top of lhs and rhs it comes with alignment information a tuple of real-valued features. @@ -684,9 +684,9 @@ struct __pyx_obj_5moses_7dictree_TargetProduction { /* "moses/dictree.pyx":175 * return repr((repr(self.rhs), repr(self.lhs), repr(self.scores), repr(self.alignment))) - * + * * cdef class QueryResult(list): # <<<<<<<<<<<<<< - * + * * cdef readonly Production source */ struct __pyx_obj_5moses_7dictree_QueryResult { @@ -696,10 +696,10 @@ struct __pyx_obj_5moses_7dictree_QueryResult { /* "moses/dictree.pyx":184 - * - * + * + * * cdef class DictionaryTree(object): # <<<<<<<<<<<<<< - * + * * @classmethod */ struct __pyx_obj_5moses_7dictree_DictionaryTree { @@ -709,7 +709,7 @@ struct __pyx_obj_5moses_7dictree_DictionaryTree { /* "moses/dictree.pyx":202 * raise NotImplementedError - * + * * cdef class PhraseDictionaryTree(DictionaryTree): # <<<<<<<<<<<<<< * """This class encapsulates a Moses::PhraseDictionaryTree for operations over * binary phrase tables.""" @@ -728,9 +728,9 @@ struct __pyx_obj_5moses_7dictree_PhraseDictionaryTree { /* "moses/dictree.pyx":290 * return results - * + * * cdef class OnDiskWrapper(DictionaryTree): # <<<<<<<<<<<<<< - * + * * cdef condiskpt.OnDiskWrapper *wrapper */ struct __pyx_obj_5moses_7dictree_OnDiskWrapper { @@ -745,7 +745,7 @@ struct __pyx_obj_5moses_7dictree_OnDiskWrapper { /* "moses/dictree.pyx":50 * return IndexError, 'Index %s out of range' % str(key) - * + * * def __iter__(self): # <<<<<<<<<<<<<< * for x in self.rhs: * yield x @@ -761,10 +761,10 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct____iter__ { /* "moses/dictree.pyx":122 * ValueError, 'Cannot figure out pairs from: %s' % type(alignment) - * + * * def __str__(self): # <<<<<<<<<<<<<< * return ' '.join('%d-%d' % (s, t) for s, t in self) - * + * */ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_1___str__ { PyObject_HEAD @@ -773,10 +773,10 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_1___str__ { /* "moses/dictree.pyx":123 - * + * * def __str__(self): * return ' '.join('%d-%d' % (s, t) for s, t in self) # <<<<<<<<<<<<<< - * + * * cdef class FValues(list): */ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_2_genexpr { @@ -792,10 +792,10 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_2_genexpr { /* "moses/dictree.pyx":134 * super(FValues, self).__init__(values) - * + * * def __str__(self): # <<<<<<<<<<<<<< * return ' '.join(str(x) for x in self) - * + * */ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_3___str__ { PyObject_HEAD @@ -804,10 +804,10 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_3___str__ { /* "moses/dictree.pyx":135 - * + * * def __str__(self): * return ' '.join(str(x) for x in self) # <<<<<<<<<<<<<< - * + * * cdef class TargetProduction(Production): */ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_4_genexpr { @@ -822,7 +822,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_4_genexpr { /* "moses/dictree.pyx":258 * return TargetProduction(words, scores, wa) - * + * * def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -838,7 +838,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_5_query { * cdef vector[string]* wa = NULL * cdef Production source = Production(f.c_str() for f in fphrase) # <<<<<<<<<<<<<< * cdef QueryResult results = QueryResult(source) - * + * */ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_6_genexpr { PyObject_HEAD @@ -850,7 +850,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_6_genexpr { /* "moses/dictree.pyx":316 * return Production(tokens[:-1], tokens[-1]) - * + * * def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -881,7 +881,7 @@ struct __pyx_obj_5moses_7dictree___pyx_scope_struct_8_genexpr { /* "moses/dictree.pyx":202 * raise NotImplementedError - * + * * cdef class PhraseDictionaryTree(DictionaryTree): # <<<<<<<<<<<<<< * """This class encapsulates a Moses::PhraseDictionaryTree for operations over * binary phrase tables.""" @@ -895,9 +895,9 @@ static struct __pyx_vtabstruct_5moses_7dictree_PhraseDictionaryTree *__pyx_vtabp /* "moses/dictree.pyx":290 * return results - * + * * cdef class OnDiskWrapper(DictionaryTree): # <<<<<<<<<<<<<< - * + * * cdef condiskpt.OnDiskWrapper *wrapper */ @@ -1608,7 +1608,7 @@ static PyObject *__pyx_codeobj__13; static PyObject *__pyx_codeobj__15; /* "moses/dictree.pyx":156 - * + * * @staticmethod * def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<< * """Returns the sign of key(y) - key(x). @@ -1662,7 +1662,7 @@ static PyObject *__pyx_lambda_funcdef_5moses_7dictree_lambda1(CYTHON_UNUSED PyOb /* "moses/dictree.pyx":258 * return TargetProduction(words, scores, wa) - * + * * def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -1832,7 +1832,7 @@ static PyObject *__pyx_lambda_funcdef_5moses_7dictree_lambda3(CYTHON_UNUSED PyOb /* "moses/dictree.pyx":12 * from math import log - * + * * cpdef int fsign(float x): # <<<<<<<<<<<<<< * """Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing""" * return 1 if x >= 0 else -1 @@ -1849,7 +1849,7 @@ static int __pyx_f_5moses_7dictree_fsign(float __pyx_v_x, CYTHON_UNUSED int __py * cpdef int fsign(float x): * """Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing""" * return 1 if x >= 0 else -1 # <<<<<<<<<<<<<< - * + * * cdef bytes as_str(data): */ if (((__pyx_v_x >= 0.0) != 0)) { @@ -1862,7 +1862,7 @@ static int __pyx_f_5moses_7dictree_fsign(float __pyx_v_x, CYTHON_UNUSED int __py /* "moses/dictree.pyx":12 * from math import log - * + * * cpdef int fsign(float x): # <<<<<<<<<<<<<< * """Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing""" * return 1 if x >= 0 else -1 @@ -1929,7 +1929,7 @@ static PyObject *__pyx_pf_5moses_7dictree_fsign(CYTHON_UNUSED PyObject *__pyx_se /* "moses/dictree.pyx":16 * return 1 if x >= 0 else -1 - * + * * cdef bytes as_str(data): # <<<<<<<<<<<<<< * if isinstance(data, bytes): * return data @@ -1948,13 +1948,13 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) { __Pyx_RefNannySetupContext("as_str", 0); /* "moses/dictree.pyx":17 - * + * * cdef bytes as_str(data): * if isinstance(data, bytes): # <<<<<<<<<<<<<< * return data * elif isinstance(data, unicode): */ - __pyx_t_1 = PyBytes_Check(__pyx_v_data); + __pyx_t_1 = PyBytes_Check(__pyx_v_data); __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { @@ -1979,7 +1979,7 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) { * return data.encode('UTF-8') * raise TypeError('Cannot convert %s to string' % type(data)) */ - __pyx_t_2 = PyUnicode_Check(__pyx_v_data); + __pyx_t_2 = PyUnicode_Check(__pyx_v_data); __pyx_t_1 = (__pyx_t_2 != 0); if (__pyx_t_1) { @@ -1988,7 +1988,7 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) { * elif isinstance(data, unicode): * return data.encode('UTF-8') # <<<<<<<<<<<<<< * raise TypeError('Cannot convert %s to string' % type(data)) - * + * */ __Pyx_XDECREF(__pyx_r); __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_data, __pyx_n_s_encode); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -2006,7 +2006,7 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) { * elif isinstance(data, unicode): * return data.encode('UTF-8') * raise TypeError('Cannot convert %s to string' % type(data)) # <<<<<<<<<<<<<< - * + * * cdef class Production(object): */ __pyx_t_4 = __Pyx_PyString_Format(__pyx_kp_s_Cannot_convert_s_to_string, ((PyObject *)Py_TYPE(__pyx_v_data))); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -2025,7 +2025,7 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) { /* "moses/dictree.pyx":16 * return 1 if x >= 0 else -1 - * + * * cdef bytes as_str(data): # <<<<<<<<<<<<<< * if isinstance(data, bytes): * return data @@ -2045,7 +2045,7 @@ static PyObject *__pyx_f_5moses_7dictree_as_str(PyObject *__pyx_v_data) { /* "moses/dictree.pyx":33 * cdef readonly tuple rhs - * + * * def __init__(self, rhs, lhs = None): # <<<<<<<<<<<<<< * """ * :rhs right-hand side of the production (or the flat contiguous phrase) - sequence of strings @@ -2134,7 +2134,7 @@ static int __pyx_pf_5moses_7dictree_10Production___init__(struct __pyx_obj_5mose * """ * self.rhs = tuple(rhs) # <<<<<<<<<<<<<< * self.lhs = lhs - * + * */ __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -2154,7 +2154,7 @@ static int __pyx_pf_5moses_7dictree_10Production___init__(struct __pyx_obj_5mose * """ * self.rhs = tuple(rhs) * self.lhs = lhs # <<<<<<<<<<<<<< - * + * * def __len__(self): */ if (!(likely(PyBytes_CheckExact(__pyx_v_lhs))||((__pyx_v_lhs) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "bytes", Py_TYPE(__pyx_v_lhs)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -2168,7 +2168,7 @@ static int __pyx_pf_5moses_7dictree_10Production___init__(struct __pyx_obj_5mose /* "moses/dictree.pyx":33 * cdef readonly tuple rhs - * + * * def __init__(self, rhs, lhs = None): # <<<<<<<<<<<<<< * """ * :rhs right-hand side of the production (or the flat contiguous phrase) - sequence of strings @@ -2189,10 +2189,10 @@ static int __pyx_pf_5moses_7dictree_10Production___init__(struct __pyx_obj_5mose /* "moses/dictree.pyx":41 * self.lhs = lhs - * + * * def __len__(self): # <<<<<<<<<<<<<< * return len(self.rhs) - * + * */ /* Python wrapper */ @@ -2219,10 +2219,10 @@ static Py_ssize_t __pyx_pf_5moses_7dictree_10Production_2__len__(struct __pyx_ob __Pyx_RefNannySetupContext("__len__", 0); /* "moses/dictree.pyx":42 - * + * * def __len__(self): * return len(self.rhs) # <<<<<<<<<<<<<< - * + * * def __getitem__(self, key): */ __pyx_t_1 = __pyx_v_self->rhs; @@ -2238,10 +2238,10 @@ static Py_ssize_t __pyx_pf_5moses_7dictree_10Production_2__len__(struct __pyx_ob /* "moses/dictree.pyx":41 * self.lhs = lhs - * + * * def __len__(self): # <<<<<<<<<<<<<< * return len(self.rhs) - * + * */ /* function exit code */ @@ -2256,7 +2256,7 @@ static Py_ssize_t __pyx_pf_5moses_7dictree_10Production_2__len__(struct __pyx_ob /* "moses/dictree.pyx":44 * return len(self.rhs) - * + * * def __getitem__(self, key): # <<<<<<<<<<<<<< * if 0 <= key < len(self.rhs): * return self.rhs[key] @@ -2288,7 +2288,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_4__getitem__(struct __pyx __Pyx_RefNannySetupContext("__getitem__", 0); /* "moses/dictree.pyx":45 - * + * * def __getitem__(self, key): * if 0 <= key < len(self.rhs): # <<<<<<<<<<<<<< * return self.rhs[key] @@ -2338,7 +2338,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_4__getitem__(struct __pyx * return self.rhs[key] * else: * return IndexError, 'Index %s out of range' % str(key) # <<<<<<<<<<<<<< - * + * * def __iter__(self): */ __Pyx_XDECREF(__pyx_r); @@ -2368,7 +2368,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_4__getitem__(struct __pyx /* "moses/dictree.pyx":44 * return len(self.rhs) - * + * * def __getitem__(self, key): # <<<<<<<<<<<<<< * if 0 <= key < len(self.rhs): * return self.rhs[key] @@ -2389,7 +2389,7 @@ static PyObject *__pyx_gb_5moses_7dictree_10Production_8generator(__pyx_Generato /* "moses/dictree.pyx":50 * return IndexError, 'Index %s out of range' % str(key) - * + * * def __iter__(self): # <<<<<<<<<<<<<< * for x in self.rhs: * yield x @@ -2468,11 +2468,11 @@ static PyObject *__pyx_gb_5moses_7dictree_10Production_8generator(__pyx_Generato if (unlikely(!__pyx_sent_value)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "moses/dictree.pyx":51 - * + * * def __iter__(self): * for x in self.rhs: # <<<<<<<<<<<<<< * yield x - * + * */ if (unlikely(__pyx_cur_scope->__pyx_v_self->rhs == Py_None)) { PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); @@ -2495,7 +2495,7 @@ static PyObject *__pyx_gb_5moses_7dictree_10Production_8generator(__pyx_Generato * def __iter__(self): * for x in self.rhs: * yield x # <<<<<<<<<<<<<< - * + * * def __contains__(self, item): */ __Pyx_INCREF(__pyx_cur_scope->__pyx_v_x); @@ -2519,7 +2519,7 @@ static PyObject *__pyx_gb_5moses_7dictree_10Production_8generator(__pyx_Generato /* "moses/dictree.pyx":50 * return IndexError, 'Index %s out of range' % str(key) - * + * * def __iter__(self): # <<<<<<<<<<<<<< * for x in self.rhs: * yield x @@ -2542,10 +2542,10 @@ static PyObject *__pyx_gb_5moses_7dictree_10Production_8generator(__pyx_Generato /* "moses/dictree.pyx":54 * yield x - * + * * def __contains__(self, item): # <<<<<<<<<<<<<< * return item in self.rhs - * + * */ /* Python wrapper */ @@ -2571,10 +2571,10 @@ static int __pyx_pf_5moses_7dictree_10Production_9__contains__(struct __pyx_obj_ __Pyx_RefNannySetupContext("__contains__", 0); /* "moses/dictree.pyx":55 - * + * * def __contains__(self, item): * return item in self.rhs # <<<<<<<<<<<<<< - * + * * def __reversed__(self): */ __pyx_t_1 = (__Pyx_PySequence_Contains(__pyx_v_item, __pyx_v_self->rhs, Py_EQ)); if (unlikely(__pyx_t_1 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -2583,10 +2583,10 @@ static int __pyx_pf_5moses_7dictree_10Production_9__contains__(struct __pyx_obj_ /* "moses/dictree.pyx":54 * yield x - * + * * def __contains__(self, item): # <<<<<<<<<<<<<< * return item in self.rhs - * + * */ /* function exit code */ @@ -2600,10 +2600,10 @@ static int __pyx_pf_5moses_7dictree_10Production_9__contains__(struct __pyx_obj_ /* "moses/dictree.pyx":57 * return item in self.rhs - * + * * def __reversed__(self): # <<<<<<<<<<<<<< * return reversed(self.rhs) - * + * */ /* Python wrapper */ @@ -2630,10 +2630,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_11__reversed__(struct __p __Pyx_RefNannySetupContext("__reversed__", 0); /* "moses/dictree.pyx":58 - * + * * def __reversed__(self): * return reversed(self.rhs) # <<<<<<<<<<<<<< - * + * * def __hash__(self): */ __Pyx_XDECREF(__pyx_r); @@ -2651,10 +2651,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_11__reversed__(struct __p /* "moses/dictree.pyx":57 * return item in self.rhs - * + * * def __reversed__(self): # <<<<<<<<<<<<<< * return reversed(self.rhs) - * + * */ /* function exit code */ @@ -2671,10 +2671,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_11__reversed__(struct __p /* "moses/dictree.pyx":60 * return reversed(self.rhs) - * + * * def __hash__(self): # <<<<<<<<<<<<<< * return hash(self.rhs) - * + * */ /* Python wrapper */ @@ -2701,10 +2701,10 @@ static Py_hash_t __pyx_pf_5moses_7dictree_10Production_13__hash__(struct __pyx_o __Pyx_RefNannySetupContext("__hash__", 0); /* "moses/dictree.pyx":61 - * + * * def __hash__(self): * return hash(self.rhs) # <<<<<<<<<<<<<< - * + * * def __str__(self): */ __pyx_t_1 = __pyx_v_self->rhs; @@ -2716,10 +2716,10 @@ static Py_hash_t __pyx_pf_5moses_7dictree_10Production_13__hash__(struct __pyx_o /* "moses/dictree.pyx":60 * return reversed(self.rhs) - * + * * def __hash__(self): # <<<<<<<<<<<<<< * return hash(self.rhs) - * + * */ /* function exit code */ @@ -2735,7 +2735,7 @@ static Py_hash_t __pyx_pf_5moses_7dictree_10Production_13__hash__(struct __pyx_o /* "moses/dictree.pyx":63 * return hash(self.rhs) - * + * * def __str__(self): # <<<<<<<<<<<<<< * if self.lhs: * return '%s -> %s' % (self.lhs, ' '.join(self.rhs)) @@ -2766,7 +2766,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_15__str__(struct __pyx_ob __Pyx_RefNannySetupContext("__str__", 0); /* "moses/dictree.pyx":64 - * + * * def __str__(self): * if self.lhs: # <<<<<<<<<<<<<< * return '%s -> %s' % (self.lhs, ' '.join(self.rhs)) @@ -2809,7 +2809,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_15__str__(struct __pyx_ob * return '%s -> %s' % (self.lhs, ' '.join(self.rhs)) * else: * return ' '.join(self.rhs) # <<<<<<<<<<<<<< - * + * * def __repr__(self): */ __Pyx_XDECREF(__pyx_r); @@ -2825,7 +2825,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_15__str__(struct __pyx_ob /* "moses/dictree.pyx":63 * return hash(self.rhs) - * + * * def __str__(self): # <<<<<<<<<<<<<< * if self.lhs: * return '%s -> %s' % (self.lhs, ' '.join(self.rhs)) @@ -2845,10 +2845,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_15__str__(struct __pyx_ob /* "moses/dictree.pyx":69 * return ' '.join(self.rhs) - * + * * def __repr__(self): # <<<<<<<<<<<<<< * return repr(self.as_tuple()) - * + * */ /* Python wrapper */ @@ -2875,10 +2875,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_17__repr__(struct __pyx_o __Pyx_RefNannySetupContext("__repr__", 0); /* "moses/dictree.pyx":70 - * + * * def __repr__(self): * return repr(self.as_tuple()) # <<<<<<<<<<<<<< - * + * * def as_tuple(self, lhs_first = False): */ __Pyx_XDECREF(__pyx_r); @@ -2896,10 +2896,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_17__repr__(struct __pyx_o /* "moses/dictree.pyx":69 * return ' '.join(self.rhs) - * + * * def __repr__(self): # <<<<<<<<<<<<<< * return repr(self.as_tuple()) - * + * */ /* function exit code */ @@ -2916,7 +2916,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_17__repr__(struct __pyx_o /* "moses/dictree.pyx":72 * return repr(self.as_tuple()) - * + * * def as_tuple(self, lhs_first = False): # <<<<<<<<<<<<<< * """ * Returns a tuple (lhs) + rhs or rhs + (lhs) depending on the flag 'lhs_first'. @@ -3066,7 +3066,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_19as_tuple(struct __pyx_o * return self.rhs + tuple([self.lhs]) * else: * return self.rhs # <<<<<<<<<<<<<< - * + * * def __richcmp__(self, other, op): */ __Pyx_XDECREF(__pyx_r); @@ -3077,7 +3077,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_19as_tuple(struct __pyx_o /* "moses/dictree.pyx":72 * return repr(self.as_tuple()) - * + * * def as_tuple(self, lhs_first = False): # <<<<<<<<<<<<<< * """ * Returns a tuple (lhs) + rhs or rhs + (lhs) depending on the flag 'lhs_first'. @@ -3097,7 +3097,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_19as_tuple(struct __pyx_o /* "moses/dictree.pyx":84 * return self.rhs - * + * * def __richcmp__(self, other, op): # <<<<<<<<<<<<<< * """ * The comparison uses 'as_tuple()', therefore in the CFG case, the lhs will be part of the production and it will be placed in the end @@ -3307,7 +3307,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_21__richcmp__(PyObject *_ * return x > y * elif op == 5: # <<<<<<<<<<<<<< * return x >= y - * + * */ __pyx_t_1 = PyObject_RichCompare(__pyx_v_op, __pyx_int_5, Py_EQ); __Pyx_XGOTREF(__pyx_t_1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3318,7 +3318,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_21__richcmp__(PyObject *_ * return x > y * elif op == 5: * return x >= y # <<<<<<<<<<<<<< - * + * * cdef class Alignment(list): */ __Pyx_XDECREF(__pyx_r); @@ -3330,7 +3330,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_21__richcmp__(PyObject *_ /* "moses/dictree.pyx":84 * return self.rhs - * + * * def __richcmp__(self, other, op): # <<<<<<<<<<<<<< * """ * The comparison uses 'as_tuple()', therefore in the CFG case, the lhs will be part of the production and it will be placed in the end @@ -3354,10 +3354,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_21__richcmp__(PyObject *_ /* "moses/dictree.pyx":30 * """ - * + * * cdef readonly bytes lhs # <<<<<<<<<<<<<< * cdef readonly tuple rhs - * + * */ /* Python wrapper */ @@ -3390,10 +3390,10 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_3lhs___get__(struct __pyx } /* "moses/dictree.pyx":31 - * + * * cdef readonly bytes lhs * cdef readonly tuple rhs # <<<<<<<<<<<<<< - * + * * def __init__(self, rhs, lhs = None): */ @@ -3428,7 +3428,7 @@ static PyObject *__pyx_pf_5moses_7dictree_10Production_3rhs___get__(struct __pyx /* "moses/dictree.pyx":110 * """ - * + * * def __init__(self, alignment): # <<<<<<<<<<<<<< * if type(alignment) is str: * pairs = [] @@ -3511,7 +3511,7 @@ static int __pyx_pf_5moses_7dictree_9Alignment___init__(struct __pyx_obj_5moses_ __Pyx_RefNannySetupContext("__init__", 0); /* "moses/dictree.pyx":111 - * + * * def __init__(self, alignment): * if type(alignment) is str: # <<<<<<<<<<<<<< * pairs = [] @@ -3610,11 +3610,11 @@ static int __pyx_pf_5moses_7dictree_9Alignment___init__(struct __pyx_obj_5moses_ } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { - __pyx_t_4 = PyTuple_GET_ITEM(sequence, 0); - __pyx_t_8 = PyTuple_GET_ITEM(sequence, 1); + __pyx_t_4 = PyTuple_GET_ITEM(sequence, 0); + __pyx_t_8 = PyTuple_GET_ITEM(sequence, 1); } else { - __pyx_t_4 = PyList_GET_ITEM(sequence, 0); - __pyx_t_8 = PyList_GET_ITEM(sequence, 1); + __pyx_t_4 = PyList_GET_ITEM(sequence, 0); + __pyx_t_8 = PyList_GET_ITEM(sequence, 1); } __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(__pyx_t_8); @@ -3772,7 +3772,7 @@ static int __pyx_pf_5moses_7dictree_9Alignment___init__(struct __pyx_obj_5moses_ * super(Alignment, self).__init__(alignment) * else: * ValueError, 'Cannot figure out pairs from: %s' % type(alignment) # <<<<<<<<<<<<<< - * + * * def __str__(self): */ __pyx_t_3 = __Pyx_PyString_Format(__pyx_kp_s_Cannot_figure_out_pairs_from_s, ((PyObject *)Py_TYPE(__pyx_v_alignment))); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3791,7 +3791,7 @@ static int __pyx_pf_5moses_7dictree_9Alignment___init__(struct __pyx_obj_5moses_ /* "moses/dictree.pyx":110 * """ - * + * * def __init__(self, alignment): # <<<<<<<<<<<<<< * if type(alignment) is str: * pairs = [] @@ -3819,10 +3819,10 @@ static int __pyx_pf_5moses_7dictree_9Alignment___init__(struct __pyx_obj_5moses_ /* "moses/dictree.pyx":122 * ValueError, 'Cannot figure out pairs from: %s' % type(alignment) - * + * * def __str__(self): # <<<<<<<<<<<<<< * return ' '.join('%d-%d' % (s, t) for s, t in self) - * + * */ /* Python wrapper */ @@ -3840,10 +3840,10 @@ static PyObject *__pyx_pw_5moses_7dictree_9Alignment_3__str__(PyObject *__pyx_v_ static PyObject *__pyx_gb_5moses_7dictree_9Alignment_7__str___2generator1(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ /* "moses/dictree.pyx":123 - * + * * def __str__(self): * return ' '.join('%d-%d' % (s, t) for s, t in self) # <<<<<<<<<<<<<< - * + * * cdef class FValues(list): */ @@ -3960,11 +3960,11 @@ static PyObject *__pyx_gb_5moses_7dictree_9Alignment_7__str___2generator1(__pyx_ } #if CYTHON_COMPILING_IN_CPYTHON if (likely(PyTuple_CheckExact(sequence))) { - __pyx_t_5 = PyTuple_GET_ITEM(sequence, 0); - __pyx_t_6 = PyTuple_GET_ITEM(sequence, 1); + __pyx_t_5 = PyTuple_GET_ITEM(sequence, 0); + __pyx_t_6 = PyTuple_GET_ITEM(sequence, 1); } else { - __pyx_t_5 = PyList_GET_ITEM(sequence, 0); - __pyx_t_6 = PyList_GET_ITEM(sequence, 1); + __pyx_t_5 = PyList_GET_ITEM(sequence, 0); + __pyx_t_6 = PyList_GET_ITEM(sequence, 1); } __Pyx_INCREF(__pyx_t_5); __Pyx_INCREF(__pyx_t_6); @@ -4056,10 +4056,10 @@ static PyObject *__pyx_gb_5moses_7dictree_9Alignment_7__str___2generator1(__pyx_ /* "moses/dictree.pyx":122 * ValueError, 'Cannot figure out pairs from: %s' % type(alignment) - * + * * def __str__(self): # <<<<<<<<<<<<<< * return ' '.join('%d-%d' % (s, t) for s, t in self) - * + * */ static PyObject *__pyx_pf_5moses_7dictree_9Alignment_2__str__(struct __pyx_obj_5moses_7dictree_Alignment *__pyx_v_self) { @@ -4083,10 +4083,10 @@ static PyObject *__pyx_pf_5moses_7dictree_9Alignment_2__str__(struct __pyx_obj_5 __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); /* "moses/dictree.pyx":123 - * + * * def __str__(self): * return ' '.join('%d-%d' % (s, t) for s, t in self) # <<<<<<<<<<<<<< - * + * * cdef class FValues(list): */ __Pyx_XDECREF(__pyx_r); @@ -4101,10 +4101,10 @@ static PyObject *__pyx_pf_5moses_7dictree_9Alignment_2__str__(struct __pyx_obj_5 /* "moses/dictree.pyx":122 * ValueError, 'Cannot figure out pairs from: %s' % type(alignment) - * + * * def __str__(self): # <<<<<<<<<<<<<< * return ' '.join('%d-%d' % (s, t) for s, t in self) - * + * */ /* function exit code */ @@ -4122,10 +4122,10 @@ static PyObject *__pyx_pf_5moses_7dictree_9Alignment_2__str__(struct __pyx_obj_5 /* "moses/dictree.pyx":131 * """ - * + * * def __init__(self, values): # <<<<<<<<<<<<<< * super(FValues, self).__init__(values) - * + * */ /* Python wrapper */ @@ -4192,10 +4192,10 @@ static int __pyx_pf_5moses_7dictree_7FValues___init__(struct __pyx_obj_5moses_7d __Pyx_RefNannySetupContext("__init__", 0); /* "moses/dictree.pyx":132 - * + * * def __init__(self, values): * super(FValues, self).__init__(values) # <<<<<<<<<<<<<< - * + * * def __str__(self): */ __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 132; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -4225,10 +4225,10 @@ static int __pyx_pf_5moses_7dictree_7FValues___init__(struct __pyx_obj_5moses_7d /* "moses/dictree.pyx":131 * """ - * + * * def __init__(self, values): # <<<<<<<<<<<<<< * super(FValues, self).__init__(values) - * + * */ /* function exit code */ @@ -4247,10 +4247,10 @@ static int __pyx_pf_5moses_7dictree_7FValues___init__(struct __pyx_obj_5moses_7d /* "moses/dictree.pyx":134 * super(FValues, self).__init__(values) - * + * * def __str__(self): # <<<<<<<<<<<<<< * return ' '.join(str(x) for x in self) - * + * */ /* Python wrapper */ @@ -4268,10 +4268,10 @@ static PyObject *__pyx_pw_5moses_7dictree_7FValues_3__str__(PyObject *__pyx_v_se static PyObject *__pyx_gb_5moses_7dictree_7FValues_7__str___2generator2(__pyx_GeneratorObject *__pyx_generator, PyObject *__pyx_sent_value); /* proto */ /* "moses/dictree.pyx":135 - * + * * def __str__(self): * return ' '.join(str(x) for x in self) # <<<<<<<<<<<<<< - * + * * cdef class TargetProduction(Production): */ @@ -4422,10 +4422,10 @@ static PyObject *__pyx_gb_5moses_7dictree_7FValues_7__str___2generator2(__pyx_Ge /* "moses/dictree.pyx":134 * super(FValues, self).__init__(values) - * + * * def __str__(self): # <<<<<<<<<<<<<< * return ' '.join(str(x) for x in self) - * + * */ static PyObject *__pyx_pf_5moses_7dictree_7FValues_2__str__(struct __pyx_obj_5moses_7dictree_FValues *__pyx_v_self) { @@ -4449,10 +4449,10 @@ static PyObject *__pyx_pf_5moses_7dictree_7FValues_2__str__(struct __pyx_obj_5mo __Pyx_GIVEREF((PyObject *)__pyx_cur_scope->__pyx_v_self); /* "moses/dictree.pyx":135 - * + * * def __str__(self): * return ' '.join(str(x) for x in self) # <<<<<<<<<<<<<< - * + * * cdef class TargetProduction(Production): */ __Pyx_XDECREF(__pyx_r); @@ -4467,10 +4467,10 @@ static PyObject *__pyx_pf_5moses_7dictree_7FValues_2__str__(struct __pyx_obj_5mo /* "moses/dictree.pyx":134 * super(FValues, self).__init__(values) - * + * * def __str__(self): # <<<<<<<<<<<<<< * return ' '.join(str(x) for x in self) - * + * */ /* function exit code */ @@ -4488,7 +4488,7 @@ static PyObject *__pyx_pf_5moses_7dictree_7FValues_2__str__(struct __pyx_obj_5mo /* "moses/dictree.pyx":144 * cdef readonly FValues scores - * + * * def __init__(self, rhs, scores, alignment = [], lhs = None): # <<<<<<<<<<<<<< * """ * :rhs right-hand side tokens (sequence of terminals and nonterminals) @@ -4632,7 +4632,7 @@ static int __pyx_pf_5moses_7dictree_16TargetProduction___init__(struct __pyx_obj * super(TargetProduction, self).__init__(rhs, lhs) * self.scores = FValues(scores) # <<<<<<<<<<<<<< * self.alignment = Alignment(alignment) - * + * */ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 152; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); @@ -4652,7 +4652,7 @@ static int __pyx_pf_5moses_7dictree_16TargetProduction___init__(struct __pyx_obj * super(TargetProduction, self).__init__(rhs, lhs) * self.scores = FValues(scores) * self.alignment = Alignment(alignment) # <<<<<<<<<<<<<< - * + * * @staticmethod */ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -4671,7 +4671,7 @@ static int __pyx_pf_5moses_7dictree_16TargetProduction___init__(struct __pyx_obj /* "moses/dictree.pyx":144 * cdef readonly FValues scores - * + * * def __init__(self, rhs, scores, alignment = [], lhs = None): # <<<<<<<<<<<<<< * """ * :rhs right-hand side tokens (sequence of terminals and nonterminals) @@ -4692,7 +4692,7 @@ static int __pyx_pf_5moses_7dictree_16TargetProduction___init__(struct __pyx_obj } /* "moses/dictree.pyx":156 - * + * * @staticmethod * def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<< * """Returns the sign of key(y) - key(x). @@ -4790,7 +4790,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_2desc(PyObject *__p * Can only be used if scores is not an empty vector as * keys defaults to scores[0]""" * return fsign(key(y) - key(x)) # <<<<<<<<<<<<<< - * + * * def __str__(self): */ __Pyx_XDECREF(__pyx_r); @@ -4823,7 +4823,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_2desc(PyObject *__p goto __pyx_L0; /* "moses/dictree.pyx":156 - * + * * @staticmethod * def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<< * """Returns the sign of key(y) - key(x). @@ -4845,7 +4845,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_2desc(PyObject *__p /* "moses/dictree.pyx":162 * return fsign(key(y) - key(x)) - * + * * def __str__(self): # <<<<<<<<<<<<<< * """Returns a string such as: ||| [||| word-alignment info]""" * if self.lhs: @@ -4955,7 +4955,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_4__str__(struct __p * return ' ||| '.join((' '.join(chain(self.rhs, lhs)), * str(self.scores), # <<<<<<<<<<<<<< * str(self.alignment))) - * + * */ __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 169; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); @@ -4970,7 +4970,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_4__str__(struct __p * return ' ||| '.join((' '.join(chain(self.rhs, lhs)), * str(self.scores), * str(self.alignment))) # <<<<<<<<<<<<<< - * + * * def __repr__(self): */ __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 170; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -5009,7 +5009,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_4__str__(struct __p /* "moses/dictree.pyx":162 * return fsign(key(y) - key(x)) - * + * * def __str__(self): # <<<<<<<<<<<<<< * """Returns a string such as: ||| [||| word-alignment info]""" * if self.lhs: @@ -5032,10 +5032,10 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_4__str__(struct __p /* "moses/dictree.pyx":172 * str(self.alignment))) - * + * * def __repr__(self): # <<<<<<<<<<<<<< * return repr((repr(self.rhs), repr(self.lhs), repr(self.scores), repr(self.alignment))) - * + * */ /* Python wrapper */ @@ -5065,10 +5065,10 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_6__repr__(struct __ __Pyx_RefNannySetupContext("__repr__", 0); /* "moses/dictree.pyx":173 - * + * * def __repr__(self): * return repr((repr(self.rhs), repr(self.lhs), repr(self.scores), repr(self.alignment))) # <<<<<<<<<<<<<< - * + * * cdef class QueryResult(list): */ __Pyx_XDECREF(__pyx_r); @@ -5115,10 +5115,10 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_6__repr__(struct __ /* "moses/dictree.pyx":172 * str(self.alignment))) - * + * * def __repr__(self): # <<<<<<<<<<<<<< * return repr((repr(self.rhs), repr(self.lhs), repr(self.scores), repr(self.alignment))) - * + * */ /* function exit code */ @@ -5141,7 +5141,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_6__repr__(struct __ * """ * cdef readonly Alignment alignment # <<<<<<<<<<<<<< * cdef readonly FValues scores - * + * */ /* Python wrapper */ @@ -5177,7 +5177,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_9alignment___get__( * """ * cdef readonly Alignment alignment * cdef readonly FValues scores # <<<<<<<<<<<<<< - * + * * def __init__(self, rhs, scores, alignment = [], lhs = None): */ @@ -5212,7 +5212,7 @@ static PyObject *__pyx_pf_5moses_7dictree_16TargetProduction_6scores___get__(str /* "moses/dictree.pyx":179 * cdef readonly Production source - * + * * def __init__(self, source, targets = []): # <<<<<<<<<<<<<< * super(QueryResult, self).__init__(targets) * self.source = source @@ -5294,11 +5294,11 @@ static int __pyx_pf_5moses_7dictree_11QueryResult___init__(struct __pyx_obj_5mos __Pyx_RefNannySetupContext("__init__", 0); /* "moses/dictree.pyx":180 - * + * * def __init__(self, source, targets = []): * super(QueryResult, self).__init__(targets) # <<<<<<<<<<<<<< * self.source = source - * + * */ __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 180; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -5329,8 +5329,8 @@ static int __pyx_pf_5moses_7dictree_11QueryResult___init__(struct __pyx_obj_5mos * def __init__(self, source, targets = []): * super(QueryResult, self).__init__(targets) * self.source = source # <<<<<<<<<<<<<< - * - * + * + * */ if (!(likely(((__pyx_v_source) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_source, __pyx_ptype_5moses_7dictree_Production))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_3 = __pyx_v_source; @@ -5343,7 +5343,7 @@ static int __pyx_pf_5moses_7dictree_11QueryResult___init__(struct __pyx_obj_5mos /* "moses/dictree.pyx":179 * cdef readonly Production source - * + * * def __init__(self, source, targets = []): # <<<<<<<<<<<<<< * super(QueryResult, self).__init__(targets) * self.source = source @@ -5365,9 +5365,9 @@ static int __pyx_pf_5moses_7dictree_11QueryResult___init__(struct __pyx_obj_5mos /* "moses/dictree.pyx":177 * cdef class QueryResult(list): - * + * * cdef readonly Production source # <<<<<<<<<<<<<< - * + * * def __init__(self, source, targets = []): */ @@ -5401,7 +5401,7 @@ static PyObject *__pyx_pf_5moses_7dictree_11QueryResult_6source___get__(struct _ } /* "moses/dictree.pyx":187 - * + * * @classmethod * def canLoad(cls, path, bint wa = False): # <<<<<<<<<<<<<< * """Whether or not the path represents a valid table for that class.""" @@ -5488,14 +5488,14 @@ static PyObject *__pyx_pf_5moses_7dictree_14DictionaryTree_canLoad(CYTHON_UNUSED * def canLoad(cls, path, bint wa = False): * """Whether or not the path represents a valid table for that class.""" * raise NotImplementedError # <<<<<<<<<<<<<< - * + * * def query(self, line, converter = None, cmp = None, key = None): */ __Pyx_Raise(__pyx_builtin_NotImplementedError, 0, 0, 0); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "moses/dictree.pyx":187 - * + * * @classmethod * def canLoad(cls, path, bint wa = False): # <<<<<<<<<<<<<< * """Whether or not the path represents a valid table for that class.""" @@ -5513,7 +5513,7 @@ static PyObject *__pyx_pf_5moses_7dictree_14DictionaryTree_canLoad(CYTHON_UNUSED /* "moses/dictree.pyx":191 * raise NotImplementedError - * + * * def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -5616,7 +5616,7 @@ static PyObject *__pyx_pf_5moses_7dictree_14DictionaryTree_2query(CYTHON_UNUSED * :return QueryResult * """ * raise NotImplementedError # <<<<<<<<<<<<<< - * + * * cdef class PhraseDictionaryTree(DictionaryTree): */ __Pyx_Raise(__pyx_builtin_NotImplementedError, 0, 0, 0); @@ -5624,7 +5624,7 @@ static PyObject *__pyx_pf_5moses_7dictree_14DictionaryTree_2query(CYTHON_UNUSED /* "moses/dictree.pyx":191 * raise NotImplementedError - * + * * def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -5641,7 +5641,7 @@ static PyObject *__pyx_pf_5moses_7dictree_14DictionaryTree_2query(CYTHON_UNUSED /* "moses/dictree.pyx":213 * cdef readonly unsigned tableLimit - * + * * def __cinit__(self, bytes path, unsigned tableLimit = 20, unsigned nscores = 5, bint wa = False, delimiters = ' \t'): # <<<<<<<<<<<<<< * """ * :path stem of the table, e.g europarl.fr-en is the stem for europar.fr-en.binphr.* @@ -5771,7 +5771,7 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py /* "moses/dictree.pyx":221 * """ - * + * * if not PhraseDictionaryTree.canLoad(path, wa): # <<<<<<<<<<<<<< * raise ValueError, "'%s' doesn't seem a valid binary table." % path * self.path = path @@ -5798,7 +5798,7 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py if (__pyx_t_5) { /* "moses/dictree.pyx":222 - * + * * if not PhraseDictionaryTree.canLoad(path, wa): * raise ValueError, "'%s' doesn't seem a valid binary table." % path # <<<<<<<<<<<<<< * self.path = path @@ -5881,7 +5881,7 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py * self.tree = new cdictree.PhraseDictionaryTree() * self.tree.NeedAlignmentInfo(wa) # <<<<<<<<<<<<<< * self.tree.Read(path) - * + * */ __pyx_v_self->tree->NeedAlignmentInfo(__pyx_v_wa); @@ -5889,7 +5889,7 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py * self.tree = new cdictree.PhraseDictionaryTree() * self.tree.NeedAlignmentInfo(wa) * self.tree.Read(path) # <<<<<<<<<<<<<< - * + * * def __dealloc__(self): */ __pyx_t_6 = __pyx_convert_string_from_py_(__pyx_v_path); if (unlikely(PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -5897,7 +5897,7 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py /* "moses/dictree.pyx":213 * cdef readonly unsigned tableLimit - * + * * def __cinit__(self, bytes path, unsigned tableLimit = 20, unsigned nscores = 5, bint wa = False, delimiters = ' \t'): # <<<<<<<<<<<<<< * """ * :path stem of the table, e.g europarl.fr-en is the stem for europar.fr-en.binphr.* @@ -5919,10 +5919,10 @@ static int __pyx_pf_5moses_7dictree_20PhraseDictionaryTree___cinit__(struct __py /* "moses/dictree.pyx":232 * self.tree.Read(path) - * + * * def __dealloc__(self): # <<<<<<<<<<<<<< * del self.tree - * + * */ /* Python wrapper */ @@ -5941,20 +5941,20 @@ static void __pyx_pf_5moses_7dictree_20PhraseDictionaryTree_2__dealloc__(struct __Pyx_RefNannySetupContext("__dealloc__", 0); /* "moses/dictree.pyx":233 - * + * * def __dealloc__(self): * del self.tree # <<<<<<<<<<<<<< - * + * * @classmethod */ delete __pyx_v_self->tree; /* "moses/dictree.pyx":232 * self.tree.Read(path) - * + * * def __dealloc__(self): # <<<<<<<<<<<<<< * del self.tree - * + * */ /* function exit code */ @@ -5962,7 +5962,7 @@ static void __pyx_pf_5moses_7dictree_20PhraseDictionaryTree_2__dealloc__(struct } /* "moses/dictree.pyx":236 - * + * * @classmethod * def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<< * """This sanity check was added to the constructor, but you can access it from outside this class @@ -6368,7 +6368,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_4canLoad(CYTHON * and os.path.isfile(stem + ".binphr.srcvoc") \ * and os.path.isfile(stem + ".binphr.tgtdata") \ # <<<<<<<<<<<<<< * and os.path.isfile(stem + ".binphr.tgtvoc") - * + * */ __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 249; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); @@ -6397,7 +6397,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_4canLoad(CYTHON * and os.path.isfile(stem + ".binphr.srcvoc") \ * and os.path.isfile(stem + ".binphr.tgtdata") \ * and os.path.isfile(stem + ".binphr.tgtvoc") # <<<<<<<<<<<<<< - * + * * cdef TargetProduction getTargetProduction(self, cdictree.StringTgtCand& cand, wa = None, converter = None): */ __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -6449,7 +6449,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_4canLoad(CYTHON } /* "moses/dictree.pyx":236 - * + * * @classmethod * def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<< * """This sanity check was added to the constructor, but you can access it from outside this class @@ -6475,7 +6475,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_4canLoad(CYTHON /* "moses/dictree.pyx":252 * and os.path.isfile(stem + ".binphr.tgtvoc") - * + * * cdef TargetProduction getTargetProduction(self, cdictree.StringTgtCand& cand, wa = None, converter = None): # <<<<<<<<<<<<<< * """Converts a StringTgtCandidate (c++ object) and possibly a word-alignment info (string) to a TargetProduction (python object).""" * cdef list words = [cand.tokens[i].c_str() for i in xrange(cand.tokens.size())] @@ -6538,7 +6538,7 @@ static struct __pyx_obj_5moses_7dictree_TargetProduction *__pyx_f_5moses_7dictre * cdef list words = [cand.tokens[i].c_str() for i in xrange(cand.tokens.size())] * cdef list scores = [score for score in cand.scores] if converter is None else [converter(score) for score in cand.scores] # <<<<<<<<<<<<<< * return TargetProduction(words, scores, wa) - * + * */ __pyx_t_5 = (__pyx_v_converter == Py_None); if ((__pyx_t_5 != 0)) { @@ -6592,7 +6592,7 @@ static struct __pyx_obj_5moses_7dictree_TargetProduction *__pyx_f_5moses_7dictre * cdef list words = [cand.tokens[i].c_str() for i in xrange(cand.tokens.size())] * cdef list scores = [score for score in cand.scores] if converter is None else [converter(score) for score in cand.scores] * return TargetProduction(words, scores, wa) # <<<<<<<<<<<<<< - * + * * def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): */ __Pyx_XDECREF(((PyObject *)__pyx_r)); @@ -6616,7 +6616,7 @@ static struct __pyx_obj_5moses_7dictree_TargetProduction *__pyx_f_5moses_7dictre /* "moses/dictree.pyx":252 * and os.path.isfile(stem + ".binphr.tgtvoc") - * + * * cdef TargetProduction getTargetProduction(self, cdictree.StringTgtCand& cand, wa = None, converter = None): # <<<<<<<<<<<<<< * """Converts a StringTgtCandidate (c++ object) and possibly a word-alignment info (string) to a TargetProduction (python object).""" * cdef list words = [cand.tokens[i].c_str() for i in xrange(cand.tokens.size())] @@ -6641,7 +6641,7 @@ static struct __pyx_obj_5moses_7dictree_TargetProduction *__pyx_f_5moses_7dictre /* "moses/dictree.pyx":258 * return TargetProduction(words, scores, wa) - * + * * def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -6738,7 +6738,7 @@ static PyObject *__pyx_gb_5moses_7dictree_20PhraseDictionaryTree_5query_2generat * cdef vector[string]* wa = NULL * cdef Production source = Production(f.c_str() for f in fphrase) # <<<<<<<<<<<<<< * cdef QueryResult results = QueryResult(source) - * + * */ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_5query_genexpr(PyObject *__pyx_self) { @@ -6836,7 +6836,7 @@ static PyObject *__pyx_gb_5moses_7dictree_20PhraseDictionaryTree_5query_2generat /* "moses/dictree.pyx":258 * return TargetProduction(words, scores, wa) - * + * * def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -6930,7 +6930,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _ * cdef vector[string]* wa = NULL * cdef Production source = Production(f.c_str() for f in fphrase) # <<<<<<<<<<<<<< * cdef QueryResult results = QueryResult(source) - * + * */ __pyx_t_1 = __pyx_pf_5moses_7dictree_20PhraseDictionaryTree_5query_genexpr(((PyObject*)__pyx_cur_scope)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 271; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -6949,7 +6949,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _ * cdef vector[string]* wa = NULL * cdef Production source = Production(f.c_str() for f in fphrase) * cdef QueryResult results = QueryResult(source) # <<<<<<<<<<<<<< - * + * * if not self.wa: */ __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -6965,7 +6965,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _ /* "moses/dictree.pyx":274 * cdef QueryResult results = QueryResult(source) - * + * * if not self.wa: # <<<<<<<<<<<<<< * self.tree.GetTargetCandidates(fphrase, rv[0]) * results.extend([self.getTargetProduction(candidate, None, converter) for candidate in rv[0]]) @@ -6974,7 +6974,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _ if (__pyx_t_6) { /* "moses/dictree.pyx":275 - * + * * if not self.wa: * self.tree.GetTargetCandidates(fphrase, rv[0]) # <<<<<<<<<<<<<< * results.extend([self.getTargetProduction(candidate, None, converter) for candidate in rv[0]]) @@ -7175,7 +7175,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _ * return QueryResult(source, results[0:self.tableLimit]) * else: * return results # <<<<<<<<<<<<<< - * + * * cdef class OnDiskWrapper(DictionaryTree): */ __Pyx_XDECREF(__pyx_r); @@ -7186,7 +7186,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _ /* "moses/dictree.pyx":258 * return TargetProduction(words, scores, wa) - * + * * def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -7211,7 +7211,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_6query(struct _ } /* "moses/dictree.pyx":207 - * + * * cdef cdictree.PhraseDictionaryTree* tree * cdef readonly bytes path # <<<<<<<<<<<<<< * cdef readonly unsigned nscores @@ -7346,7 +7346,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_2wa___get__(str * cdef readonly bint wa * cdef readonly bytes delimiters # <<<<<<<<<<<<<< * cdef readonly unsigned tableLimit - * + * */ /* Python wrapper */ @@ -7382,7 +7382,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_10delimiters___ * cdef readonly bint wa * cdef readonly bytes delimiters * cdef readonly unsigned tableLimit # <<<<<<<<<<<<<< - * + * * def __cinit__(self, bytes path, unsigned tableLimit = 20, unsigned nscores = 5, bint wa = False, delimiters = ' \t'): */ @@ -7427,7 +7427,7 @@ static PyObject *__pyx_pf_5moses_7dictree_20PhraseDictionaryTree_10tableLimit___ /* "moses/dictree.pyx":297 * cdef readonly unsigned tableLimit - * + * * def __cinit__(self, bytes path, unsigned tableLimit = 20, delimiters = ' \t'): # <<<<<<<<<<<<<< * self.delimiters = delimiters * self.tableLimit = tableLimit @@ -7527,7 +7527,7 @@ static int __pyx_pf_5moses_7dictree_13OnDiskWrapper___cinit__(struct __pyx_obj_5 __Pyx_RefNannySetupContext("__cinit__", 0); /* "moses/dictree.pyx":298 - * + * * def __cinit__(self, bytes path, unsigned tableLimit = 20, delimiters = ' \t'): * self.delimiters = delimiters # <<<<<<<<<<<<<< * self.tableLimit = tableLimit @@ -7565,7 +7565,7 @@ static int __pyx_pf_5moses_7dictree_13OnDiskWrapper___cinit__(struct __pyx_obj_5 * self.wrapper = new condiskpt.OnDiskWrapper() * self.wrapper.BeginLoad(string(path)) # <<<<<<<<<<<<<< * self.finder = new condiskpt.OnDiskQuery(self.wrapper[0]) - * + * */ __pyx_t_2 = __Pyx_PyObject_AsString(__pyx_v_path); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} try { @@ -7580,14 +7580,14 @@ static int __pyx_pf_5moses_7dictree_13OnDiskWrapper___cinit__(struct __pyx_obj_5 * self.wrapper = new condiskpt.OnDiskWrapper() * self.wrapper.BeginLoad(string(path)) * self.finder = new condiskpt.OnDiskQuery(self.wrapper[0]) # <<<<<<<<<<<<<< - * + * * @classmethod */ __pyx_v_self->finder = new OnDiskPt::OnDiskQuery((__pyx_v_self->wrapper[0])); /* "moses/dictree.pyx":297 * cdef readonly unsigned tableLimit - * + * * def __cinit__(self, bytes path, unsigned tableLimit = 20, delimiters = ' \t'): # <<<<<<<<<<<<<< * self.delimiters = delimiters * self.tableLimit = tableLimit @@ -7606,7 +7606,7 @@ static int __pyx_pf_5moses_7dictree_13OnDiskWrapper___cinit__(struct __pyx_obj_5 } /* "moses/dictree.pyx":305 - * + * * @classmethod * def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<< * return os.path.isfile(stem + "/Misc.dat") \ @@ -7808,7 +7808,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_2canLoad(CYTHON_UNUSED * and os.path.isfile(stem + "/TargetColl.dat") \ * and os.path.isfile(stem + "/TargetInd.dat") \ # <<<<<<<<<<<<<< * and os.path.isfile(stem + "/Vocab.dat") - * + * */ __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 309; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); @@ -7837,7 +7837,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_2canLoad(CYTHON_UNUSED * and os.path.isfile(stem + "/TargetColl.dat") \ * and os.path.isfile(stem + "/TargetInd.dat") \ * and os.path.isfile(stem + "/Vocab.dat") # <<<<<<<<<<<<<< - * + * * cdef Production getSourceProduction(self, vector[string] ftokens): */ __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_os); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -7888,7 +7888,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_2canLoad(CYTHON_UNUSED goto __pyx_L0; /* "moses/dictree.pyx":305 - * + * * @classmethod * def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<< * return os.path.isfile(stem + "/Misc.dat") \ @@ -7914,7 +7914,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_2canLoad(CYTHON_UNUSED /* "moses/dictree.pyx":312 * and os.path.isfile(stem + "/Vocab.dat") - * + * * cdef Production getSourceProduction(self, vector[string] ftokens): # <<<<<<<<<<<<<< * cdef list tokens = [f.c_str() for f in ftokens] * return Production(tokens[:-1], tokens[-1]) @@ -7936,11 +7936,11 @@ static struct __pyx_obj_5moses_7dictree_Production *__pyx_f_5moses_7dictree_13On __Pyx_RefNannySetupContext("getSourceProduction", 0); /* "moses/dictree.pyx":313 - * + * * cdef Production getSourceProduction(self, vector[string] ftokens): * cdef list tokens = [f.c_str() for f in ftokens] # <<<<<<<<<<<<<< * return Production(tokens[:-1], tokens[-1]) - * + * */ __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -7962,7 +7962,7 @@ static struct __pyx_obj_5moses_7dictree_Production *__pyx_f_5moses_7dictree_13On * cdef Production getSourceProduction(self, vector[string] ftokens): * cdef list tokens = [f.c_str() for f in ftokens] * return Production(tokens[:-1], tokens[-1]) # <<<<<<<<<<<<<< - * + * * def query(self, line, converter = None, cmp = None, key = None): */ __Pyx_XDECREF(((PyObject *)__pyx_r)); @@ -7987,7 +7987,7 @@ static struct __pyx_obj_5moses_7dictree_Production *__pyx_f_5moses_7dictree_13On /* "moses/dictree.pyx":312 * and os.path.isfile(stem + "/Vocab.dat") - * + * * cdef Production getSourceProduction(self, vector[string] ftokens): # <<<<<<<<<<<<<< * cdef list tokens = [f.c_str() for f in ftokens] * return Production(tokens[:-1], tokens[-1]) @@ -8009,7 +8009,7 @@ static struct __pyx_obj_5moses_7dictree_Production *__pyx_f_5moses_7dictree_13On /* "moses/dictree.pyx":316 * return Production(tokens[:-1], tokens[-1]) - * + * * def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -8231,7 +8231,7 @@ static PyObject *__pyx_gb_5moses_7dictree_13OnDiskWrapper_5query_2generator4(__p /* "moses/dictree.pyx":316 * return Production(tokens[:-1], tokens[-1]) - * + * * def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -8549,7 +8549,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_4query(struct __pyx_ob * if cmp: * results.sort(cmp=cmp, key=key) # <<<<<<<<<<<<<< * return results - * + * */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_results), __pyx_n_s_sort); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -8570,7 +8570,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_4query(struct __pyx_ob * if cmp: * results.sort(cmp=cmp, key=key) * return results # <<<<<<<<<<<<<< - * + * * def load(path, nscores, limit): */ __Pyx_XDECREF(__pyx_r); @@ -8580,7 +8580,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_4query(struct __pyx_ob /* "moses/dictree.pyx":316 * return Production(tokens[:-1], tokens[-1]) - * + * * def query(self, line, converter = None, cmp = None, key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -8611,7 +8611,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_4query(struct __pyx_ob * cdef condiskpt.OnDiskQuery *finder * cdef readonly bytes delimiters # <<<<<<<<<<<<<< * cdef readonly unsigned tableLimit - * + * */ /* Python wrapper */ @@ -8647,7 +8647,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_10delimiters___get__(s * cdef condiskpt.OnDiskQuery *finder * cdef readonly bytes delimiters * cdef readonly unsigned tableLimit # <<<<<<<<<<<<<< - * + * * def __cinit__(self, bytes path, unsigned tableLimit = 20, delimiters = ' \t'): */ @@ -8692,7 +8692,7 @@ static PyObject *__pyx_pf_5moses_7dictree_13OnDiskWrapper_10tableLimit___get__(s /* "moses/dictree.pyx":349 * return results - * + * * def load(path, nscores, limit): # <<<<<<<<<<<<<< * """Finds out the correct implementation depending on the content of 'path' and returns the appropriate dictionary tree.""" * if PhraseDictionaryTree.canLoad(path, False): @@ -8953,7 +8953,7 @@ static PyObject *__pyx_pf_5moses_7dictree_2load(CYTHON_UNUSED PyObject *__pyx_se /* "moses/dictree.pyx":349 * return results - * + * * def load(path, nscores, limit): # <<<<<<<<<<<<<< * """Finds out the correct implementation depending on the content of 'path' and returns the appropriate dictionary tree.""" * if PhraseDictionaryTree.canLoad(path, False): @@ -8973,7 +8973,7 @@ static PyObject *__pyx_pf_5moses_7dictree_2load(CYTHON_UNUSED PyObject *__pyx_se } /* "string.from_py":13 - * + * * @cname("__pyx_convert_string_from_py_") * cdef string __pyx_convert_string_from_py_(object o) except *: # <<<<<<<<<<<<<< * cdef Py_ssize_t length @@ -8996,7 +8996,7 @@ static std::string __pyx_convert_string_from_py_(PyObject *__pyx_v_o) { * cdef Py_ssize_t length * cdef char* data = __Pyx_PyObject_AsStringAndSize(o, &length) # <<<<<<<<<<<<<< * return string(data, length) - * + * */ __pyx_t_1 = __Pyx_PyObject_AsStringAndSize(__pyx_v_o, (&__pyx_v_length)); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_data = __pyx_t_1; @@ -9005,14 +9005,14 @@ static std::string __pyx_convert_string_from_py_(PyObject *__pyx_v_o) { * cdef Py_ssize_t length * cdef char* data = __Pyx_PyObject_AsStringAndSize(o, &length) * return string(data, length) # <<<<<<<<<<<<<< - * - * + * + * */ __pyx_r = std::string(__pyx_v_data, __pyx_v_length); goto __pyx_L0; /* "string.from_py":13 - * + * * @cname("__pyx_convert_string_from_py_") * cdef string __pyx_convert_string_from_py_(object o) except *: # <<<<<<<<<<<<<< * cdef Py_ssize_t length @@ -9028,11 +9028,11 @@ static std::string __pyx_convert_string_from_py_(PyObject *__pyx_v_o) { } /* "vector.to_py":63 - * + * * @cname("__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue") * cdef object __pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(vector[X]& v): # <<<<<<<<<<<<<< * return [X_to_py(v[i]) for i in range(v.size())] - * + * */ static PyObject *__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(const std::vector<__pyx_t_5moses_8cdictree_FValue> &__pyx_v_v) { @@ -9052,8 +9052,8 @@ static PyObject *__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(cons * @cname("__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue") * cdef object __pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(vector[X]& v): * return [X_to_py(v[i]) for i in range(v.size())] # <<<<<<<<<<<<<< - * - * + * + * */ __Pyx_XDECREF(__pyx_r); __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[2]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -9071,11 +9071,11 @@ static PyObject *__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(cons goto __pyx_L0; /* "vector.to_py":63 - * + * * @cname("__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue") * cdef object __pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(vector[X]& v): # <<<<<<<<<<<<<< * return [X_to_py(v[i]) for i in range(v.size())] - * + * */ /* function exit code */ @@ -9091,11 +9091,11 @@ static PyObject *__pyx_convert_vector_to_py___pyx_t_5moses_8cdictree_FValue(cons } /* "pair.to_py":180 - * + * * @cname("__pyx_convert_pair_to_py_int____int") * cdef object __pyx_convert_pair_to_py_int____int(pair[X,Y]& p): # <<<<<<<<<<<<<< * return X_to_py(p.first), Y_to_py(p.second) - * + * */ static PyObject *__pyx_convert_pair_to_py_int____int(const std::pair &__pyx_v_p) { @@ -9113,8 +9113,8 @@ static PyObject *__pyx_convert_pair_to_py_int____int(const std::pair &_ * @cname("__pyx_convert_pair_to_py_int____int") * cdef object __pyx_convert_pair_to_py_int____int(pair[X,Y]& p): * return X_to_py(p.first), Y_to_py(p.second) # <<<<<<<<<<<<<< - * - * + * + * */ __Pyx_XDECREF(__pyx_r); __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_p.first); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[3]; __pyx_lineno = 181; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -9134,11 +9134,11 @@ static PyObject *__pyx_convert_pair_to_py_int____int(const std::pair &_ goto __pyx_L0; /* "pair.to_py":180 - * + * * @cname("__pyx_convert_pair_to_py_int____int") * cdef object __pyx_convert_pair_to_py_int____int(pair[X,Y]& p): # <<<<<<<<<<<<<< * return X_to_py(p.first), Y_to_py(p.second) - * + * */ /* function exit code */ @@ -9155,11 +9155,11 @@ static PyObject *__pyx_convert_pair_to_py_int____int(const std::pair &_ } /* "vector.to_py":63 - * + * * @cname("__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair") * cdef object __pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(vector[X]& v): # <<<<<<<<<<<<<< * return [X_to_py(v[i]) for i in range(v.size())] - * + * */ static PyObject *__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(const std::vector &__pyx_v_v) { @@ -9179,8 +9179,8 @@ static PyObject *__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(const std: * @cname("__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair") * cdef object __pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(vector[X]& v): * return [X_to_py(v[i]) for i in range(v.size())] # <<<<<<<<<<<<<< - * - * + * + * */ __Pyx_XDECREF(__pyx_r); __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[4]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -9198,11 +9198,11 @@ static PyObject *__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(const std: goto __pyx_L0; /* "vector.to_py":63 - * + * * @cname("__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair") * cdef object __pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(vector[X]& v): # <<<<<<<<<<<<<< * return [X_to_py(v[i]) for i in range(v.size())] - * + * */ /* function exit code */ @@ -11255,7 +11255,7 @@ static int __Pyx_InitCachedConstants(void) { * elif isinstance(data, unicode): * return data.encode('UTF-8') # <<<<<<<<<<<<<< * raise TypeError('Cannot convert %s to string' % type(data)) - * + * */ __pyx_tuple_ = PyTuple_Pack(1, __pyx_kp_s_UTF_8); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple_); @@ -11273,7 +11273,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_tuple__4); /* "moses/dictree.pyx":156 - * + * * @staticmethod * def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<< * """Returns the sign of key(y) - key(x). @@ -11286,7 +11286,7 @@ static int __Pyx_InitCachedConstants(void) { /* "moses/dictree.pyx":349 * return results - * + * * def load(path, nscores, limit): # <<<<<<<<<<<<<< * """Finds out the correct implementation depending on the content of 'path' and returns the appropriate dictionary tree.""" * if PhraseDictionaryTree.canLoad(path, False): @@ -11539,7 +11539,7 @@ PyMODINIT_FUNC PyInit_dictree(void) * cimport cdictree * cimport condiskpt * from math import log # <<<<<<<<<<<<<< - * + * * cpdef int fsign(float x): */ __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 10; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -11558,7 +11558,7 @@ PyMODINIT_FUNC PyInit_dictree(void) /* "moses/dictree.pyx":144 * cdef readonly FValues scores - * + * * def __init__(self, rhs, scores, alignment = [], lhs = None): # <<<<<<<<<<<<<< * """ * :rhs right-hand side tokens (sequence of terminals and nonterminals) @@ -11570,7 +11570,7 @@ PyMODINIT_FUNC PyInit_dictree(void) __pyx_t_1 = 0; /* "moses/dictree.pyx":156 - * + * * @staticmethod * def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<< * """Returns the sign of key(y) - key(x). @@ -11584,7 +11584,7 @@ PyMODINIT_FUNC PyInit_dictree(void) /* "moses/dictree.pyx":155 * self.alignment = Alignment(alignment) - * + * * @staticmethod # <<<<<<<<<<<<<< * def desc(x, y, key = lambda r: r.scores[0]): * """Returns the sign of key(y) - key(x). @@ -11604,7 +11604,7 @@ PyMODINIT_FUNC PyInit_dictree(void) PyType_Modified(__pyx_ptype_5moses_7dictree_TargetProduction); /* "moses/dictree.pyx":156 - * + * * @staticmethod * def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<< * """Returns the sign of key(y) - key(x). @@ -11615,7 +11615,7 @@ PyMODINIT_FUNC PyInit_dictree(void) /* "moses/dictree.pyx":155 * self.alignment = Alignment(alignment) - * + * * @staticmethod # <<<<<<<<<<<<<< * def desc(x, y, key = lambda r: r.scores[0]): * """Returns the sign of key(y) - key(x). @@ -11634,7 +11634,7 @@ PyMODINIT_FUNC PyInit_dictree(void) /* "moses/dictree.pyx":179 * cdef readonly Production source - * + * * def __init__(self, source, targets = []): # <<<<<<<<<<<<<< * super(QueryResult, self).__init__(targets) * self.source = source @@ -11646,7 +11646,7 @@ PyMODINIT_FUNC PyInit_dictree(void) __pyx_t_1 = 0; /* "moses/dictree.pyx":187 - * + * * @classmethod * def canLoad(cls, path, bint wa = False): # <<<<<<<<<<<<<< * """Whether or not the path represents a valid table for that class.""" @@ -11657,7 +11657,7 @@ PyMODINIT_FUNC PyInit_dictree(void) /* "moses/dictree.pyx":186 * cdef class DictionaryTree(object): - * + * * @classmethod # <<<<<<<<<<<<<< * def canLoad(cls, path, bint wa = False): * """Whether or not the path represents a valid table for that class.""" @@ -11670,7 +11670,7 @@ PyMODINIT_FUNC PyInit_dictree(void) PyType_Modified(__pyx_ptype_5moses_7dictree_DictionaryTree); /* "moses/dictree.pyx":236 - * + * * @classmethod * def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<< * """This sanity check was added to the constructor, but you can access it from outside this class @@ -11681,7 +11681,7 @@ PyMODINIT_FUNC PyInit_dictree(void) /* "moses/dictree.pyx":235 * del self.tree - * + * * @classmethod # <<<<<<<<<<<<<< * def canLoad(cls, stem, bint wa = False): * """This sanity check was added to the constructor, but you can access it from outside this class @@ -11695,7 +11695,7 @@ PyMODINIT_FUNC PyInit_dictree(void) /* "moses/dictree.pyx":258 * return TargetProduction(words, scores, wa) - * + * * def query(self, line, converter = lambda x: log(x), cmp = lambda x, y: fsign(y.scores[2] - x.scores[2]), key = None): # <<<<<<<<<<<<<< * """ * Returns a list of target productions that translate a given source production @@ -11712,7 +11712,7 @@ PyMODINIT_FUNC PyInit_dictree(void) __pyx_t_1 = 0; /* "moses/dictree.pyx":305 - * + * * @classmethod * def canLoad(cls, stem, bint wa = False): # <<<<<<<<<<<<<< * return os.path.isfile(stem + "/Misc.dat") \ @@ -11723,7 +11723,7 @@ PyMODINIT_FUNC PyInit_dictree(void) /* "moses/dictree.pyx":304 * self.finder = new condiskpt.OnDiskQuery(self.wrapper[0]) - * + * * @classmethod # <<<<<<<<<<<<<< * def canLoad(cls, stem, bint wa = False): * return os.path.isfile(stem + "/Misc.dat") \ @@ -11737,7 +11737,7 @@ PyMODINIT_FUNC PyInit_dictree(void) /* "moses/dictree.pyx":349 * return results - * + * * def load(path, nscores, limit): # <<<<<<<<<<<<<< * """Finds out the correct implementation depending on the content of 'path' and returns the appropriate dictionary tree.""" * if PhraseDictionaryTree.canLoad(path, False): @@ -11749,7 +11749,7 @@ PyMODINIT_FUNC PyInit_dictree(void) /* "moses/dictree.pyx":1 * # This module wraps phrase/rule tables # <<<<<<<<<<<<<< - * + * * from libcpp.string cimport string */ __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -11758,11 +11758,11 @@ PyMODINIT_FUNC PyInit_dictree(void) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "vector.to_py":63 - * + * * @cname("__pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair") * cdef object __pyx_convert_vector_to_py_OnDiskPt_3a__3a_AlignPair(vector[X]& v): # <<<<<<<<<<<<<< * return [X_to_py(v[i]) for i in range(v.size())] - * + * */ goto __pyx_L0; __pyx_L1_error:; diff --git a/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp b/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp index cfc15d592..989c727a2 100755 --- a/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp +++ b/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp @@ -1,231 +1,231 @@ -// XGetopt.cpp Version 1.2 -// -// Author: Hans Dietrich -// hdietrich2@hotmail.com -// -// Description: -// XGetopt.cpp implements getopt(), a function to parse command lines. -// -// History -// Version 1.2 - 2003 May 17 -// - Added Unicode support -// -// Version 1.1 - 2002 March 10 -// - Added example to XGetopt.cpp module header -// -// This software is released into the public domain. -// You are free to use it in any way you like. -// -// This software is provided "as is" with no expressed -// or implied warranty. I accept no liability for any -// damage or loss of business that this software may cause. -// -/////////////////////////////////////////////////////////////////////////////// - - -/////////////////////////////////////////////////////////////////////////////// -// if you are using precompiled headers then include this line: -/////////////////////////////////////////////////////////////////////////////// - - -/////////////////////////////////////////////////////////////////////////////// -// if you are not using precompiled headers then include these lines: -//#include -//#include -//#include -/////////////////////////////////////////////////////////////////////////////// - - -#include -#include -#include -#include "WIN32_functions.h" - - -/////////////////////////////////////////////////////////////////////////////// -// -// X G e t o p t . c p p -// -// -// NAME -// getopt -- parse command line options -// -// SYNOPSIS -// int getopt(int argc, char *argv[], char *optstring) -// -// extern char *optarg; -// extern int optind; -// -// DESCRIPTION -// The getopt() function parses the command line arguments. Its -// arguments argc and argv are the argument count and array as -// passed into the application on program invocation. In the case -// of Visual C++ programs, argc and argv are available via the -// variables __argc and __argv (double underscores), respectively. -// getopt returns the next option letter in argv that matches a -// letter in optstring. (Note: Unicode programs should use -// __targv instead of __argv. Also, all character and string -// literals should be enclosed in ( ) ). -// -// optstring is a string of recognized option letters; if a letter -// is followed by a colon, the option is expected to have an argument -// that may or may not be separated from it by white space. optarg -// is set to point to the start of the option argument on return from -// getopt. -// -// Option letters may be combined, e.g., "-ab" is equivalent to -// "-a -b". Option letters are case sensitive. -// -// getopt places in the external variable optind the argv index -// of the next argument to be processed. optind is initialized -// to 0 before the first call to getopt. -// -// When all options have been processed (i.e., up to the first -// non-option argument), getopt returns EOF, optarg will point -// to the argument, and optind will be set to the argv index of -// the argument. If there are no non-option arguments, optarg -// will be set to NULL. -// -// The special option "--" may be used to delimit the end of the -// options; EOF will be returned, and "--" (and everything after it) -// will be skipped. -// -// RETURN VALUE -// For option letters contained in the string optstring, getopt -// will return the option letter. getopt returns a question mark (?) -// when it encounters an option letter not included in optstring. -// EOF is returned when processing is finished. -// -// BUGS -// 1) Long options are not supported. -// 2) The GNU double-colon extension is not supported. -// 3) The environment variable POSIXLY_CORRECT is not supported. -// 4) The + syntax is not supported. -// 5) The automatic permutation of arguments is not supported. -// 6) This implementation of getopt() returns EOF if an error is -// encountered, instead of -1 as the latest standard requires. -// -// EXAMPLE -// BOOL CMyApp::ProcessCommandLine(int argc, char *argv[]) -// { -// int c; -// -// while ((c = getopt(argc, argv, ("aBn:"))) != EOF) -// { -// switch (c) -// { -// case ('a'): -// TRACE(("option a\n")); -// // -// // set some flag here -// // -// break; -// -// case ('B'): -// TRACE( ("option B\n")); -// // -// // set some other flag here -// // -// break; -// -// case ('n'): -// TRACE(("option n: value=%d\n"), atoi(optarg)); -// // -// // do something with value here -// // -// break; -// -// case ('?'): -// TRACE(("ERROR: illegal option %s\n"), argv[optind-1]); -// return FALSE; -// break; -// -// default: -// TRACE(("WARNING: no handler for option %c\n"), c); -// return FALSE; -// break; -// } -// } -// // -// // check for non-option args here -// // -// return TRUE; -// } -// -/////////////////////////////////////////////////////////////////////////////// - -char *optarg; // global argument pointer -int optind = 0; // global argv index - -int getopt(int argc, char *argv[], char *optstring) -{ - static char *next = NULL; - if (optind == 0) - next = NULL; - - optarg = NULL; - - if (next == NULL || *next =='\0') { - if (optind == 0) - optind++; - - if (optind >= argc || argv[optind][0] != ('-') || argv[optind][1] == ('\0')) { - optarg = NULL; - if (optind < argc) - optarg = argv[optind]; - return EOF; - } - - if (strcmp(argv[optind], "--") == 0) { - optind++; - optarg = NULL; - if (optind < argc) - optarg = argv[optind]; - return EOF; - } - - next = argv[optind]; - next++; // skip past - - optind++; - } - - char c = *next++; - char *cp = strchr(optstring, c); - - if (cp == NULL || c == (':')) - return ('?'); - - cp++; - if (*cp == (':')) { - if (*next != ('\0')) { - optarg = next; - next = NULL; - } else if (optind < argc) { - optarg = argv[optind]; - optind++; - } else { - return ('?'); - } - } - - return c; -} - -// for an overview, see -// W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1. -double lgamma(int x) -{ - // size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum)); - if (x <= 2) { - return 0.0; - } - static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5}; - double tmp=(double)x+5.5; - tmp -= (((double)x)+0.5)*log(tmp); - double y=(double)x; - double sum = 1.000000000190015; - for (size_t j=0; j<6; ++j) { - sum += coefs[j]/++y; - } - return -tmp+log(2.5066282746310005*sum/(double)x); -} +// XGetopt.cpp Version 1.2 +// +// Author: Hans Dietrich +// hdietrich2@hotmail.com +// +// Description: +// XGetopt.cpp implements getopt(), a function to parse command lines. +// +// History +// Version 1.2 - 2003 May 17 +// - Added Unicode support +// +// Version 1.1 - 2002 March 10 +// - Added example to XGetopt.cpp module header +// +// This software is released into the public domain. +// You are free to use it in any way you like. +// +// This software is provided "as is" with no expressed +// or implied warranty. I accept no liability for any +// damage or loss of business that this software may cause. +// +/////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// if you are using precompiled headers then include this line: +/////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// if you are not using precompiled headers then include these lines: +//#include +//#include +//#include +/////////////////////////////////////////////////////////////////////////////// + + +#include +#include +#include +#include "WIN32_functions.h" + + +/////////////////////////////////////////////////////////////////////////////// +// +// X G e t o p t . c p p +// +// +// NAME +// getopt -- parse command line options +// +// SYNOPSIS +// int getopt(int argc, char *argv[], char *optstring) +// +// extern char *optarg; +// extern int optind; +// +// DESCRIPTION +// The getopt() function parses the command line arguments. Its +// arguments argc and argv are the argument count and array as +// passed into the application on program invocation. In the case +// of Visual C++ programs, argc and argv are available via the +// variables __argc and __argv (double underscores), respectively. +// getopt returns the next option letter in argv that matches a +// letter in optstring. (Note: Unicode programs should use +// __targv instead of __argv. Also, all character and string +// literals should be enclosed in ( ) ). +// +// optstring is a string of recognized option letters; if a letter +// is followed by a colon, the option is expected to have an argument +// that may or may not be separated from it by white space. optarg +// is set to point to the start of the option argument on return from +// getopt. +// +// Option letters may be combined, e.g., "-ab" is equivalent to +// "-a -b". Option letters are case sensitive. +// +// getopt places in the external variable optind the argv index +// of the next argument to be processed. optind is initialized +// to 0 before the first call to getopt. +// +// When all options have been processed (i.e., up to the first +// non-option argument), getopt returns EOF, optarg will point +// to the argument, and optind will be set to the argv index of +// the argument. If there are no non-option arguments, optarg +// will be set to NULL. +// +// The special option "--" may be used to delimit the end of the +// options; EOF will be returned, and "--" (and everything after it) +// will be skipped. +// +// RETURN VALUE +// For option letters contained in the string optstring, getopt +// will return the option letter. getopt returns a question mark (?) +// when it encounters an option letter not included in optstring. +// EOF is returned when processing is finished. +// +// BUGS +// 1) Long options are not supported. +// 2) The GNU double-colon extension is not supported. +// 3) The environment variable POSIXLY_CORRECT is not supported. +// 4) The + syntax is not supported. +// 5) The automatic permutation of arguments is not supported. +// 6) This implementation of getopt() returns EOF if an error is +// encountered, instead of -1 as the latest standard requires. +// +// EXAMPLE +// BOOL CMyApp::ProcessCommandLine(int argc, char *argv[]) +// { +// int c; +// +// while ((c = getopt(argc, argv, ("aBn:"))) != EOF) +// { +// switch (c) +// { +// case ('a'): +// TRACE(("option a\n")); +// // +// // set some flag here +// // +// break; +// +// case ('B'): +// TRACE( ("option B\n")); +// // +// // set some other flag here +// // +// break; +// +// case ('n'): +// TRACE(("option n: value=%d\n"), atoi(optarg)); +// // +// // do something with value here +// // +// break; +// +// case ('?'): +// TRACE(("ERROR: illegal option %s\n"), argv[optind-1]); +// return FALSE; +// break; +// +// default: +// TRACE(("WARNING: no handler for option %c\n"), c); +// return FALSE; +// break; +// } +// } +// // +// // check for non-option args here +// // +// return TRUE; +// } +// +/////////////////////////////////////////////////////////////////////////////// + +char *optarg; // global argument pointer +int optind = 0; // global argv index + +int getopt(int argc, char *argv[], char *optstring) +{ + static char *next = NULL; + if (optind == 0) + next = NULL; + + optarg = NULL; + + if (next == NULL || *next =='\0') { + if (optind == 0) + optind++; + + if (optind >= argc || argv[optind][0] != ('-') || argv[optind][1] == ('\0')) { + optarg = NULL; + if (optind < argc) + optarg = argv[optind]; + return EOF; + } + + if (strcmp(argv[optind], "--") == 0) { + optind++; + optarg = NULL; + if (optind < argc) + optarg = argv[optind]; + return EOF; + } + + next = argv[optind]; + next++; // skip past - + optind++; + } + + char c = *next++; + char *cp = strchr(optstring, c); + + if (cp == NULL || c == (':')) + return ('?'); + + cp++; + if (*cp == (':')) { + if (*next != ('\0')) { + optarg = next; + next = NULL; + } else if (optind < argc) { + optarg = argv[optind]; + optind++; + } else { + return ('?'); + } + } + + return c; +} + +// for an overview, see +// W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1. +double lgamma(int x) +{ + // size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum)); + if (x <= 2) { + return 0.0; + } + static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5}; + double tmp=(double)x+5.5; + tmp -= (((double)x)+0.5)*log(tmp); + double y=(double)x; + double sum = 1.000000000190015; + for (size_t j=0; j<6; ++j) { + sum += coefs[j]/++y; + } + return -tmp+log(2.5066282746310005*sum/(double)x); +} diff --git a/contrib/relent-filter/sigtest-filter/WIN32_functions.h b/contrib/relent-filter/sigtest-filter/WIN32_functions.h index 6a719392e..ad644018b 100755 --- a/contrib/relent-filter/sigtest-filter/WIN32_functions.h +++ b/contrib/relent-filter/sigtest-filter/WIN32_functions.h @@ -1,24 +1,24 @@ -// XGetopt.h Version 1.2 -// -// Author: Hans Dietrich -// hdietrich2@hotmail.com -// -// This software is released into the public domain. -// You are free to use it in any way you like. -// -// This software is provided "as is" with no expressed -// or implied warranty. I accept no liability for any -// damage or loss of business that this software may cause. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef XGETOPT_H -#define XGETOPT_H - -extern int optind, opterr; -extern char *optarg; - -int getopt(int argc, char *argv[], char *optstring); -double lgamma(int x); - -#endif //XGETOPT_H +// XGetopt.h Version 1.2 +// +// Author: Hans Dietrich +// hdietrich2@hotmail.com +// +// This software is released into the public domain. +// You are free to use it in any way you like. +// +// This software is provided "as is" with no expressed +// or implied warranty. I accept no liability for any +// damage or loss of business that this software may cause. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef XGETOPT_H +#define XGETOPT_H + +extern int optind, opterr; +extern char *optarg; + +int getopt(int argc, char *argv[], char *optstring); +double lgamma(int x); + +#endif //XGETOPT_H diff --git a/contrib/relent-filter/sigtest-filter/filter-pt.cpp b/contrib/relent-filter/sigtest-filter/filter-pt.cpp index 4a51953ea..e2408900d 100755 --- a/contrib/relent-filter/sigtest-filter/filter-pt.cpp +++ b/contrib/relent-filter/sigtest-filter/filter-pt.cpp @@ -1,5 +1,5 @@ -#include +#include #include #include #include diff --git a/contrib/relent-filter/src/IOWrapper.cpp b/contrib/relent-filter/src/IOWrapper.cpp index 053735c96..7ad7697ce 100755 --- a/contrib/relent-filter/src/IOWrapper.cpp +++ b/contrib/relent-filter/src/IOWrapper.cpp @@ -234,13 +234,13 @@ void OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, { typedef std::vector< const std::pair* > AlignVec; AlignVec alignments = ai.GetSortedAlignments(); - + AlignVec::const_iterator it; for (it = alignments.begin(); it != alignments.end(); ++it) { const std::pair &alignment = **it; out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " "; } - + } void OutputAlignment(ostream &out, const vector &edges) @@ -251,7 +251,7 @@ void OutputAlignment(ostream &out, const vector &edges) const Hypothesis &edge = *edges[currEdge]; const TargetPhrase &tp = edge.GetCurrTargetPhrase(); size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos(); - + OutputAlignment(out, tp.GetAlignmentInfo(), sourceOffset, targetOffset); targetOffset += tp.GetSize(); @@ -263,7 +263,7 @@ void OutputAlignment(OutputCollector* collector, size_t lineNo , const vectorWrite(lineNo,out.str()); } @@ -477,7 +477,7 @@ void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, con const int sourceOffset = sourceRange.GetStartPos(); const int targetOffset = targetRange.GetStartPos(); const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignmentInfo(); - + OutputAlignment(out, ai, sourceOffset, targetOffset); } diff --git a/contrib/relent-filter/src/Main.cpp b/contrib/relent-filter/src/Main.cpp index 3c7911248..6a2bf4b01 100755 --- a/contrib/relent-filter/src/Main.cpp +++ b/contrib/relent-filter/src/Main.cpp @@ -168,18 +168,18 @@ static void ShowWeights() int main(int argc, char** argv) { try { - + // echo command line, if verbose IFVERBOSE(1) { TRACE_ERR("command: "); for(int i=0; iExplain(); exit(1); } - - + + // initialize all "global" variables, which are stored in StaticData // note: this also loads models such as the language model, etc. if (!StaticData::LoadDataStatic(params, argv[0])) { exit(1); } - + // setting "-show-weights" -> just dump out weights and exit if (params->isParamSpecified("show-weights")) { ShowWeights(); exit(0); } - + // shorthand for accessing information in StaticData const StaticData& staticData = StaticData::Instance(); - - + + //initialise random numbers rand_init(); - + // set up read/writing class IOWrapper* ioWrapper = GetIOWrapper(staticData); if (!ioWrapper) { cerr << "Error; Failed to create IO object" << endl; exit(1); } - + // check on weights vector weights = staticData.GetAllWeights(); IFVERBOSE(2) { @@ -233,7 +233,7 @@ int main(int argc, char** argv) // setting lexicalized reordering setup PhraseBasedReorderingState::m_useFirstBackwardScore = false; - + auto_ptr outputCollector; outputCollector.reset(new OutputCollector()); @@ -241,7 +241,7 @@ int main(int argc, char** argv) #ifdef WITH_THREADS ThreadPool pool(staticData.ThreadCount()); #endif - + // main loop over set of input sentences InputType* source = NULL; size_t lineCount = 0; @@ -259,11 +259,11 @@ int main(int argc, char** argv) task->Run(); delete task; #endif - + source = NULL; //make sure it doesn't get deleted ++lineCount; } - + // we are done, finishing up #ifdef WITH_THREADS pool.Stop(true); //flush remaining jobs diff --git a/contrib/relent-filter/src/RelativeEntropyCalc.cpp b/contrib/relent-filter/src/RelativeEntropyCalc.cpp index 212eedf87..9ba334fca 100755 --- a/contrib/relent-filter/src/RelativeEntropyCalc.cpp +++ b/contrib/relent-filter/src/RelativeEntropyCalc.cpp @@ -70,7 +70,7 @@ namespace MosesCmd if (neg_log_div > 100){ return 100; } - return neg_log_div; + return neg_log_div; } void RelativeEntropyCalc::ConcatOutputPhraseRecursive(Phrase& phrase, const Hypothesis *hypo){ diff --git a/contrib/relent-filter/src/TranslationAnalysis.cpp b/contrib/relent-filter/src/TranslationAnalysis.cpp index 89da48301..eb5f36293 100755 --- a/contrib/relent-filter/src/TranslationAnalysis.cpp +++ b/contrib/relent-filter/src/TranslationAnalysis.cpp @@ -57,7 +57,7 @@ void PrintTranslationAnalysis(const TranslationSystem* system, std::ostream &os, } } } - + bool epsilon = false; if (target == "") { target=""; diff --git a/contrib/rephraser/paraphrase.cpp b/contrib/rephraser/paraphrase.cpp index ad9dbc891..0556d6ccd 100644 --- a/contrib/rephraser/paraphrase.cpp +++ b/contrib/rephraser/paraphrase.cpp @@ -60,12 +60,12 @@ static void add(const string& e, const vector scores, static void finalise(Probs& p_e_given_f, Probs& p_f_given_e) { //cerr << "Sizes: p(e|f): " << p_e_given_f.size() << " p(f|e): " << p_f_given_e.size() << endl; - for (Probs::const_iterator e1_iter = p_f_given_e.begin() ; + for (Probs::const_iterator e1_iter = p_f_given_e.begin() ; e1_iter != p_f_given_e.end(); ++e1_iter) { for (Probs::const_iterator e2_iter = p_e_given_f.begin() ; e2_iter != p_e_given_f.end(); ++e2_iter) { - if (e1_iter->second == e2_iter->second) continue; + if (e1_iter->second == e2_iter->second) continue; cout << e1_iter->second << " ||| " << e2_iter->second << " ||| " << e1_iter->first * e2_iter->first << " ||| " << endl; } diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp index edf7daa13..337962aa6 100644 --- a/contrib/server/mosesserver.cpp +++ b/contrib/server/mosesserver.cpp @@ -3,10 +3,10 @@ // The separate moses server executable is being phased out. // Since there were problems with the migration into the main // executable, this separate program is still included in the -// distribution for legacy reasons. Contributors are encouraged -// to add their contributions to moses/server rather than +// distribution for legacy reasons. Contributors are encouraged +// to add their contributions to moses/server rather than // contrib/server. This recommendation does not apply to wrapper -// scripts. +// scripts. // The future is this: /** main function of the command line version of the decoder **/ @@ -83,7 +83,7 @@ public: pdsa->add(source_,target_,alignment_); #else const PhraseDictionary* pdf = PhraseDictionary::GetColl()[0]; - PhraseDictionaryDynSuffixArray* + PhraseDictionaryDynSuffixArray* pdsa = (PhraseDictionaryDynSuffixArray*) pdf; cerr << "Inserting into address " << pdsa << endl; pdsa->insertSnt(source_, target_, alignment_); @@ -146,7 +146,7 @@ public: } } */ - + void breakOutParams(const params_t& params) { params_t::const_iterator si = params.find("source"); if(si == params.end()) @@ -236,7 +236,7 @@ public: class TranslationTask : public virtual Moses::TranslationTask { protected: TranslationTask(xmlrpc_c::paramList const& paramList, - boost::condition_variable& cond, boost::mutex& mut) + boost::condition_variable& cond, boost::mutex& mut) : m_paramList(paramList), m_cond(cond), m_mut(mut), @@ -244,7 +244,7 @@ protected: {} public: - static boost::shared_ptr + static boost::shared_ptr create(xmlrpc_c::paramList const& paramList, boost::condition_variable& cond, boost::mutex& mut) { @@ -252,15 +252,15 @@ public: ret->m_self = ret; return ret; } - + virtual bool DeleteAfterExecution() {return false;} bool IsDone() const {return m_done;} const map& GetRetData() { return m_retData;} - virtual void - Run() + virtual void + Run() { using namespace xmlrpc_c; const params_t params = m_paramList.getStruct(0); @@ -292,25 +292,25 @@ public: vector multiModelWeights; si = params.find("lambda"); - if (si != params.end()) + if (si != params.end()) { value_array multiModelArray = value_array(si->second); vector multiModelValueVector(multiModelArray.vectorValueValue()); - for (size_t i=0;i < multiModelValueVector.size();i++) + for (size_t i=0;i < multiModelValueVector.size();i++) { multiModelWeights.push_back(value_double(multiModelValueVector[i])); } } si = params.find("model_name"); - if (si != params.end() && multiModelWeights.size() > 0) + if (si != params.end() && multiModelWeights.size() > 0) { const string model_name = value_string(si->second); - PhraseDictionaryMultiModel* pdmm + PhraseDictionaryMultiModel* pdmm = (PhraseDictionaryMultiModel*) FindPhraseDictionary(model_name); pdmm->SetTemporaryMultiModelWeightsVector(multiModelWeights); } - + const StaticData &staticData = StaticData::Instance(); //Make sure alternative paths are retained, if necessary @@ -321,7 +321,7 @@ public: stringstream out, graphInfo, transCollOpts; - if (staticData.IsSyntax()) + if (staticData.IsSyntax()) { boost::shared_ptr tinput(new TreeInput); const vector& IFO = staticData.GetInputFactorOrder(); @@ -338,8 +338,8 @@ public: manager.OutputSearchGraphMoses(sgstream); m_retData["sg"] = value_string(sgstream.str()); } - } - else + } + else { // size_t lineNumber = 0; // TODO: Include sentence request number here? boost::shared_ptr sentence(new Sentence(0,source)); @@ -351,30 +351,30 @@ public: vector alignInfo; outputHypo(out,hypo,addAlignInfo,alignInfo,reportAllFactors); if (addAlignInfo) m_retData["align"] = value_array(alignInfo); - if (addWordAlignInfo) + if (addWordAlignInfo) { stringstream wordAlignment; hypo->OutputAlignment(wordAlignment); vector alignments; string alignmentPair; - while (wordAlignment >> alignmentPair) + while (wordAlignment >> alignmentPair) { int pos = alignmentPair.find('-'); map wordAlignInfo; - wordAlignInfo["source-word"] + wordAlignInfo["source-word"] = value_int(atoi(alignmentPair.substr(0, pos).c_str())); - wordAlignInfo["target-word"] + wordAlignInfo["target-word"] = value_int(atoi(alignmentPair.substr(pos + 1).c_str())); alignments.push_back(value_struct(wordAlignInfo)); } m_retData["word-align"] = value_array(alignments); } - + if (addGraphInfo) insertGraphInfo(manager,m_retData); if (addTopts) insertTranslationOptions(manager,m_retData); - if (nbest_size > 0) + if (nbest_size > 0) { - outputNBest(manager, m_retData, nbest_size, nbest_distinct, + outputNBest(manager, m_retData, nbest_size, nbest_distinct, reportAllFactors, addAlignInfo, addScoreBreakdown); } (const_cast(staticData)).SetOutputSearchGraph(false); @@ -389,11 +389,11 @@ public: } - void outputHypo(ostream& out, const Hypothesis* hypo, - bool addAlignmentInfo, vector& alignInfo, + void outputHypo(ostream& out, const Hypothesis* hypo, + bool addAlignmentInfo, vector& alignInfo, bool reportAllFactors = false) { if (hypo->GetPrevHypo() != NULL) { - outputHypo(out,hypo->GetPrevHypo(),addAlignmentInfo, + outputHypo(out,hypo->GetPrevHypo(),addAlignmentInfo, alignInfo, reportAllFactors); Phrase p = hypo->GetCurrTargetPhrase(); if(reportAllFactors) { @@ -547,14 +547,14 @@ public: retData.insert(pair("nbest", xmlrpc_c::value_array(nBestXml))); } - void - insertTranslationOptions(Manager& manager, map& retData) + void + insertTranslationOptions(Manager& manager, map& retData) { const TranslationOptionCollection* toptsColl = manager.getSntTranslationOptions(); vector toptsXml; size_t const stop = toptsColl->GetSource().GetSize(); TranslationOptionList const* tol; - for (size_t s = 0 ; s < stop ; ++s) + for (size_t s = 0 ; s < stop ; ++s) { for (size_t e = s; (tol = toptsColl->GetTranslationOptionList(s,e)) != NULL; ++e) { @@ -569,11 +569,11 @@ public: toptXml["start"] = xmlrpc_c::value_int(s); toptXml["end"] = xmlrpc_c::value_int(e); vector scoresXml; - const std::valarray &scores + const std::valarray &scores = topt->GetScoreBreakdown().getCoreFeatures(); - for (size_t j = 0; j < scores.size(); ++j) + for (size_t j = 0; j < scores.size(); ++j) scoresXml.push_back(xmlrpc_c::value_double(scores[j])); - + toptXml["scores"] = xmlrpc_c::value_array(scoresXml); toptsXml.push_back(xmlrpc_c::value_struct(toptXml)); } @@ -581,7 +581,7 @@ public: } retData.insert(pair("topt", xmlrpc_c::value_array(toptsXml))); } - + private: xmlrpc_c::paramList const& m_paramList; map m_retData; @@ -619,8 +619,8 @@ private: Moses::ThreadPool m_threadPool; }; -static -void +static +void PrintFeatureWeight(ostream& out, const FeatureFunction* ff) { out << ff->GetScoreProducerDescription() << "="; @@ -632,16 +632,16 @@ PrintFeatureWeight(ostream& out, const FeatureFunction* ff) out << endl; } -static -void +static +void ShowWeights(ostream& out) { // adapted from moses-cmd/Main.cpp std::ios::fmtflags old_flags = out.setf(std::ios::fixed); size_t old_precision = out.precision(6); - const vector& + const vector& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions(); - const vector& + const vector& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions(); for (size_t i = 0; i < sff.size(); ++i) { @@ -662,7 +662,7 @@ ShowWeights(ostream& out) out << ff->GetScoreProducerDescription() << " UNTUNEABLE" << endl; } } - if (! (old_flags & std::ios::fixed)) + if (! (old_flags & std::ios::fixed)) out.unsetf(std::ios::fixed); out.precision(old_precision); } @@ -754,7 +754,7 @@ int main(int argc, char** argv) .allowOrigin("*") ); */ - + XVERBOSE(1,"Listening on port " << port << endl); if (isSerial) { while(1) myAbyssServer.runOnce(); diff --git a/contrib/sigtest-filter/WIN32_functions.cpp b/contrib/sigtest-filter/WIN32_functions.cpp index cfc15d592..989c727a2 100644 --- a/contrib/sigtest-filter/WIN32_functions.cpp +++ b/contrib/sigtest-filter/WIN32_functions.cpp @@ -1,231 +1,231 @@ -// XGetopt.cpp Version 1.2 -// -// Author: Hans Dietrich -// hdietrich2@hotmail.com -// -// Description: -// XGetopt.cpp implements getopt(), a function to parse command lines. -// -// History -// Version 1.2 - 2003 May 17 -// - Added Unicode support -// -// Version 1.1 - 2002 March 10 -// - Added example to XGetopt.cpp module header -// -// This software is released into the public domain. -// You are free to use it in any way you like. -// -// This software is provided "as is" with no expressed -// or implied warranty. I accept no liability for any -// damage or loss of business that this software may cause. -// -/////////////////////////////////////////////////////////////////////////////// - - -/////////////////////////////////////////////////////////////////////////////// -// if you are using precompiled headers then include this line: -/////////////////////////////////////////////////////////////////////////////// - - -/////////////////////////////////////////////////////////////////////////////// -// if you are not using precompiled headers then include these lines: -//#include -//#include -//#include -/////////////////////////////////////////////////////////////////////////////// - - -#include -#include -#include -#include "WIN32_functions.h" - - -/////////////////////////////////////////////////////////////////////////////// -// -// X G e t o p t . c p p -// -// -// NAME -// getopt -- parse command line options -// -// SYNOPSIS -// int getopt(int argc, char *argv[], char *optstring) -// -// extern char *optarg; -// extern int optind; -// -// DESCRIPTION -// The getopt() function parses the command line arguments. Its -// arguments argc and argv are the argument count and array as -// passed into the application on program invocation. In the case -// of Visual C++ programs, argc and argv are available via the -// variables __argc and __argv (double underscores), respectively. -// getopt returns the next option letter in argv that matches a -// letter in optstring. (Note: Unicode programs should use -// __targv instead of __argv. Also, all character and string -// literals should be enclosed in ( ) ). -// -// optstring is a string of recognized option letters; if a letter -// is followed by a colon, the option is expected to have an argument -// that may or may not be separated from it by white space. optarg -// is set to point to the start of the option argument on return from -// getopt. -// -// Option letters may be combined, e.g., "-ab" is equivalent to -// "-a -b". Option letters are case sensitive. -// -// getopt places in the external variable optind the argv index -// of the next argument to be processed. optind is initialized -// to 0 before the first call to getopt. -// -// When all options have been processed (i.e., up to the first -// non-option argument), getopt returns EOF, optarg will point -// to the argument, and optind will be set to the argv index of -// the argument. If there are no non-option arguments, optarg -// will be set to NULL. -// -// The special option "--" may be used to delimit the end of the -// options; EOF will be returned, and "--" (and everything after it) -// will be skipped. -// -// RETURN VALUE -// For option letters contained in the string optstring, getopt -// will return the option letter. getopt returns a question mark (?) -// when it encounters an option letter not included in optstring. -// EOF is returned when processing is finished. -// -// BUGS -// 1) Long options are not supported. -// 2) The GNU double-colon extension is not supported. -// 3) The environment variable POSIXLY_CORRECT is not supported. -// 4) The + syntax is not supported. -// 5) The automatic permutation of arguments is not supported. -// 6) This implementation of getopt() returns EOF if an error is -// encountered, instead of -1 as the latest standard requires. -// -// EXAMPLE -// BOOL CMyApp::ProcessCommandLine(int argc, char *argv[]) -// { -// int c; -// -// while ((c = getopt(argc, argv, ("aBn:"))) != EOF) -// { -// switch (c) -// { -// case ('a'): -// TRACE(("option a\n")); -// // -// // set some flag here -// // -// break; -// -// case ('B'): -// TRACE( ("option B\n")); -// // -// // set some other flag here -// // -// break; -// -// case ('n'): -// TRACE(("option n: value=%d\n"), atoi(optarg)); -// // -// // do something with value here -// // -// break; -// -// case ('?'): -// TRACE(("ERROR: illegal option %s\n"), argv[optind-1]); -// return FALSE; -// break; -// -// default: -// TRACE(("WARNING: no handler for option %c\n"), c); -// return FALSE; -// break; -// } -// } -// // -// // check for non-option args here -// // -// return TRUE; -// } -// -/////////////////////////////////////////////////////////////////////////////// - -char *optarg; // global argument pointer -int optind = 0; // global argv index - -int getopt(int argc, char *argv[], char *optstring) -{ - static char *next = NULL; - if (optind == 0) - next = NULL; - - optarg = NULL; - - if (next == NULL || *next =='\0') { - if (optind == 0) - optind++; - - if (optind >= argc || argv[optind][0] != ('-') || argv[optind][1] == ('\0')) { - optarg = NULL; - if (optind < argc) - optarg = argv[optind]; - return EOF; - } - - if (strcmp(argv[optind], "--") == 0) { - optind++; - optarg = NULL; - if (optind < argc) - optarg = argv[optind]; - return EOF; - } - - next = argv[optind]; - next++; // skip past - - optind++; - } - - char c = *next++; - char *cp = strchr(optstring, c); - - if (cp == NULL || c == (':')) - return ('?'); - - cp++; - if (*cp == (':')) { - if (*next != ('\0')) { - optarg = next; - next = NULL; - } else if (optind < argc) { - optarg = argv[optind]; - optind++; - } else { - return ('?'); - } - } - - return c; -} - -// for an overview, see -// W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1. -double lgamma(int x) -{ - // size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum)); - if (x <= 2) { - return 0.0; - } - static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5}; - double tmp=(double)x+5.5; - tmp -= (((double)x)+0.5)*log(tmp); - double y=(double)x; - double sum = 1.000000000190015; - for (size_t j=0; j<6; ++j) { - sum += coefs[j]/++y; - } - return -tmp+log(2.5066282746310005*sum/(double)x); -} +// XGetopt.cpp Version 1.2 +// +// Author: Hans Dietrich +// hdietrich2@hotmail.com +// +// Description: +// XGetopt.cpp implements getopt(), a function to parse command lines. +// +// History +// Version 1.2 - 2003 May 17 +// - Added Unicode support +// +// Version 1.1 - 2002 March 10 +// - Added example to XGetopt.cpp module header +// +// This software is released into the public domain. +// You are free to use it in any way you like. +// +// This software is provided "as is" with no expressed +// or implied warranty. I accept no liability for any +// damage or loss of business that this software may cause. +// +/////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// if you are using precompiled headers then include this line: +/////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// if you are not using precompiled headers then include these lines: +//#include +//#include +//#include +/////////////////////////////////////////////////////////////////////////////// + + +#include +#include +#include +#include "WIN32_functions.h" + + +/////////////////////////////////////////////////////////////////////////////// +// +// X G e t o p t . c p p +// +// +// NAME +// getopt -- parse command line options +// +// SYNOPSIS +// int getopt(int argc, char *argv[], char *optstring) +// +// extern char *optarg; +// extern int optind; +// +// DESCRIPTION +// The getopt() function parses the command line arguments. Its +// arguments argc and argv are the argument count and array as +// passed into the application on program invocation. In the case +// of Visual C++ programs, argc and argv are available via the +// variables __argc and __argv (double underscores), respectively. +// getopt returns the next option letter in argv that matches a +// letter in optstring. (Note: Unicode programs should use +// __targv instead of __argv. Also, all character and string +// literals should be enclosed in ( ) ). +// +// optstring is a string of recognized option letters; if a letter +// is followed by a colon, the option is expected to have an argument +// that may or may not be separated from it by white space. optarg +// is set to point to the start of the option argument on return from +// getopt. +// +// Option letters may be combined, e.g., "-ab" is equivalent to +// "-a -b". Option letters are case sensitive. +// +// getopt places in the external variable optind the argv index +// of the next argument to be processed. optind is initialized +// to 0 before the first call to getopt. +// +// When all options have been processed (i.e., up to the first +// non-option argument), getopt returns EOF, optarg will point +// to the argument, and optind will be set to the argv index of +// the argument. If there are no non-option arguments, optarg +// will be set to NULL. +// +// The special option "--" may be used to delimit the end of the +// options; EOF will be returned, and "--" (and everything after it) +// will be skipped. +// +// RETURN VALUE +// For option letters contained in the string optstring, getopt +// will return the option letter. getopt returns a question mark (?) +// when it encounters an option letter not included in optstring. +// EOF is returned when processing is finished. +// +// BUGS +// 1) Long options are not supported. +// 2) The GNU double-colon extension is not supported. +// 3) The environment variable POSIXLY_CORRECT is not supported. +// 4) The + syntax is not supported. +// 5) The automatic permutation of arguments is not supported. +// 6) This implementation of getopt() returns EOF if an error is +// encountered, instead of -1 as the latest standard requires. +// +// EXAMPLE +// BOOL CMyApp::ProcessCommandLine(int argc, char *argv[]) +// { +// int c; +// +// while ((c = getopt(argc, argv, ("aBn:"))) != EOF) +// { +// switch (c) +// { +// case ('a'): +// TRACE(("option a\n")); +// // +// // set some flag here +// // +// break; +// +// case ('B'): +// TRACE( ("option B\n")); +// // +// // set some other flag here +// // +// break; +// +// case ('n'): +// TRACE(("option n: value=%d\n"), atoi(optarg)); +// // +// // do something with value here +// // +// break; +// +// case ('?'): +// TRACE(("ERROR: illegal option %s\n"), argv[optind-1]); +// return FALSE; +// break; +// +// default: +// TRACE(("WARNING: no handler for option %c\n"), c); +// return FALSE; +// break; +// } +// } +// // +// // check for non-option args here +// // +// return TRUE; +// } +// +/////////////////////////////////////////////////////////////////////////////// + +char *optarg; // global argument pointer +int optind = 0; // global argv index + +int getopt(int argc, char *argv[], char *optstring) +{ + static char *next = NULL; + if (optind == 0) + next = NULL; + + optarg = NULL; + + if (next == NULL || *next =='\0') { + if (optind == 0) + optind++; + + if (optind >= argc || argv[optind][0] != ('-') || argv[optind][1] == ('\0')) { + optarg = NULL; + if (optind < argc) + optarg = argv[optind]; + return EOF; + } + + if (strcmp(argv[optind], "--") == 0) { + optind++; + optarg = NULL; + if (optind < argc) + optarg = argv[optind]; + return EOF; + } + + next = argv[optind]; + next++; // skip past - + optind++; + } + + char c = *next++; + char *cp = strchr(optstring, c); + + if (cp == NULL || c == (':')) + return ('?'); + + cp++; + if (*cp == (':')) { + if (*next != ('\0')) { + optarg = next; + next = NULL; + } else if (optind < argc) { + optarg = argv[optind]; + optind++; + } else { + return ('?'); + } + } + + return c; +} + +// for an overview, see +// W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1. +double lgamma(int x) +{ + // size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum)); + if (x <= 2) { + return 0.0; + } + static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5}; + double tmp=(double)x+5.5; + tmp -= (((double)x)+0.5)*log(tmp); + double y=(double)x; + double sum = 1.000000000190015; + for (size_t j=0; j<6; ++j) { + sum += coefs[j]/++y; + } + return -tmp+log(2.5066282746310005*sum/(double)x); +} diff --git a/contrib/sigtest-filter/WIN32_functions.h b/contrib/sigtest-filter/WIN32_functions.h index 6a719392e..ad644018b 100644 --- a/contrib/sigtest-filter/WIN32_functions.h +++ b/contrib/sigtest-filter/WIN32_functions.h @@ -1,24 +1,24 @@ -// XGetopt.h Version 1.2 -// -// Author: Hans Dietrich -// hdietrich2@hotmail.com -// -// This software is released into the public domain. -// You are free to use it in any way you like. -// -// This software is provided "as is" with no expressed -// or implied warranty. I accept no liability for any -// damage or loss of business that this software may cause. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef XGETOPT_H -#define XGETOPT_H - -extern int optind, opterr; -extern char *optarg; - -int getopt(int argc, char *argv[], char *optstring); -double lgamma(int x); - -#endif //XGETOPT_H +// XGetopt.h Version 1.2 +// +// Author: Hans Dietrich +// hdietrich2@hotmail.com +// +// This software is released into the public domain. +// You are free to use it in any way you like. +// +// This software is provided "as is" with no expressed +// or implied warranty. I accept no liability for any +// damage or loss of business that this software may cause. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef XGETOPT_H +#define XGETOPT_H + +extern int optind, opterr; +extern char *optarg; + +int getopt(int argc, char *argv[], char *optstring); +double lgamma(int x); + +#endif //XGETOPT_H diff --git a/contrib/sigtest-filter/filter-pt.cpp b/contrib/sigtest-filter/filter-pt.cpp index bd0b9ae36..50418d502 100644 --- a/contrib/sigtest-filter/filter-pt.cpp +++ b/contrib/sigtest-filter/filter-pt.cpp @@ -1,5 +1,5 @@ -#include +#include #include #include #include @@ -14,7 +14,7 @@ #include #include -#include +#include #include #ifdef WIN32 @@ -58,9 +58,9 @@ typedef boost::shared_ptr > SentIdSet; class Cache { typedef std::pair ClockedSet; typedef boost::unordered_map ClockedMap; - + public: - + SentIdSet get(const std::string& phrase) { boost::shared_lock lock(m_mutex); if(m_cont.count(phrase)) { @@ -70,27 +70,27 @@ class Cache { } return SentIdSet( new SentIdSet::element_type() ); } - + void put(const std::string& phrase, const SentIdSet set) { boost::unique_lock lock(m_mutex); m_cont[phrase] = std::make_pair(set, clock()); } - + static void set_max_cache(size_t max_cache) { s_max_cache = max_cache; } - + void prune() { if(s_max_cache > 0) { boost::upgrade_lock lock(m_mutex); if(m_cont.size() > s_max_cache) { std::vector clocks; - for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++) + for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++) clocks.push_back(it->second.second); - + std::sort(clocks.begin(), clocks.end()); clock_t out = clocks[m_cont.size() - s_max_cache]; - + boost::upgrade_to_unique_lock uniq_lock(lock); for(ClockedMap::iterator it = m_cont.begin(); it != m_cont.end(); it++) if(it->second.second < out) @@ -98,7 +98,7 @@ class Cache { } } } - + private: ClockedMap m_cont; boost::shared_mutex m_mutex; @@ -282,12 +282,12 @@ void lookup_phrase(SentIdSet& ids, const std::string& phrase, i != locations.end(); ++i) { ids->push_back(i->sentIdInCorpus); } - + std::sort(ids->begin(), ids->end()); SentIdSet::element_type::iterator it = std::unique(ids->begin(), ids->end()); ids->resize(it - ids->begin()); - + if(ids->size() >= MINIMUM_SIZE_TO_KEEP) cache.put(phrase, ids); } @@ -295,8 +295,8 @@ void lookup_phrase(SentIdSet& ids, const std::string& phrase, void lookup_multiple_phrases(SentIdSet& ids, vector & phrases, C_SuffixArraySearchApplicationBase & my_sa, - const std::string & rule, Cache& cache) -{ + const std::string & rule, Cache& cache) +{ if (phrases.size() == 1) { lookup_phrase(ids, phrases.front(), my_sa, cache); @@ -372,32 +372,32 @@ void compute_cooc_stats_and_filter(std::vector& options, delete *i; options.erase(options.begin() + pfe_filter_limit,options.end()); } - + if (pef_filter_only) return; - + if (options.empty()) return; - + SentIdSet fset( new SentIdSet::element_type() ); find_occurrences(fset, options.front()->f_phrase, f_sa, f_cache); size_t cf = fset->size(); - + for (std::vector::iterator i = options.begin(); i != options.end(); ++i) { const std::string& e_phrase = (*i)->e_phrase; SentIdSet eset( new SentIdSet::element_type() ); find_occurrences(eset, e_phrase, e_sa, e_cache); size_t ce = eset->size(); - + SentIdSet efset( new SentIdSet::element_type() ); ordered_set_intersect(efset, fset, eset); size_t cef = efset->size(); - + double nlp = -log(fisher_exact(cef, cf, ce)); (*i)->set_cooc_stats(cef, cf, ce, nlp); } - + std::vector::iterator new_end = std::remove_if(options.begin(), options.end(), NlogSigThresholder(sig_filter_limit)); @@ -406,7 +406,7 @@ void compute_cooc_stats_and_filter(std::vector& options, } void filter(std::istream* in, std::ostream* out, int pfe_index) { - + std::vector lines; std::string prev = ""; std::vector options; @@ -415,23 +415,23 @@ void filter(std::istream* in, std::ostream* out, int pfe_index) { boost::mutex::scoped_lock lock(in_mutex); if(in->eof()) break; - + lines.clear(); std::string line; while(getline(*in, line) && lines.size() < 500000) lines.push_back(line); } - + std::stringstream out_temp; for(std::vector::iterator it = lines.begin(); it != lines.end(); it++) { size_t tmp_lines = ++pt_lines; if(tmp_lines % 10000 == 0) { boost::mutex::scoped_lock lock(err_mutex); std::cerr << "."; - + if(tmp_lines % 500000 == 0) std::cerr << "[n:" << tmp_lines << "]\n"; - + if(tmp_lines % 10000000 == 0) { float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines; float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines; @@ -446,30 +446,30 @@ void filter(std::istream* in, std::ostream* out, int pfe_index) { << "------------------------------------------------------\n"; } } - + if(pt_lines % 10000 == 0) { f_cache.prune(); e_cache.prune(); } - + if(it->length() > 0) { PTEntry* pp = new PTEntry(it->c_str(), pfe_index); if (prev != pp->f_phrase) { prev = pp->f_phrase; - + if (!options.empty()) { // always true after first line compute_cooc_stats_and_filter(options, f_cache, e_cache); } - + for (std::vector::iterator i = options.begin(); i != options.end(); ++i) { out_temp << **i << '\n'; delete *i; } - + options.clear(); options.push_back(pp); - + } else { options.push_back(pp); } @@ -479,7 +479,7 @@ void filter(std::istream* in, std::ostream* out, int pfe_index) { *out << out_temp.str() << std::flush; } compute_cooc_stats_and_filter(options, f_cache, e_cache); - + boost::mutex::scoped_lock lock(out_mutex); for (std::vector::iterator i = options.begin(); i != options.end(); ++i) { @@ -512,11 +512,11 @@ int main(int argc, char * argv[]) pfe_filter_limit = atoi(optarg); std::cerr << "P(f|e) filter limit: " << pfe_filter_limit << std::endl; break; - case 't': + case 't': threads = atoi(optarg); std::cerr << "Using threads: " << threads << std::endl; break; - case 'm': + case 'm': max_cache = atoi(optarg); std::cerr << "Using max phrases in caches: " << max_cache << std::endl; break; @@ -548,13 +548,13 @@ int main(int argc, char * argv[]) usage(); } } - + if (sig_filter_limit == 0.0) pef_filter_only = true; //----------------------------------------------------------------------------- if (optind != argc || ((!efile || !ffile) && !pef_filter_only)) { usage(); } - + //load the indexed corpus with vocabulary(noVoc=false) and with offset(noOffset=false) if (!pef_filter_only) { e_sa.loadData_forSearch(efile, false, false); @@ -582,15 +582,15 @@ int main(int argc, char * argv[]) Cache::set_max_cache(max_cache); std::ios_base::sync_with_stdio(false); - + boost::thread_group threadGroup; - for(int i = 0; i < threads; i++) + for(int i = 0; i < threads; i++) threadGroup.add_thread(new boost::thread(filter, &std::cin, &std::cout, pfe_index)); threadGroup.join_all(); float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines; float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines; - + std::cerr << "\n\n------------------------------------------------------\n" << " unfiltered phrases pairs: " << pt_lines << "\n" << "\n" @@ -599,5 +599,5 @@ int main(int argc, char * argv[]) << " TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << " (" << (sigfper + pfefper) << "%)\n" << "\n" << " FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << " (" << (100.0-sigfper - pfefper) << "%)\n" - << "------------------------------------------------------\n"; + << "------------------------------------------------------\n"; } diff --git a/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h b/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h index 914e85e92..342f10777 100644 --- a/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h +++ b/contrib/synlm/hhmm/rvtl/include/nl-archetypeset.h @@ -65,7 +65,7 @@ class Numbered : public T { friend String& operator<< ( String& str, const Numbered& rv ) { return str<*> operator>> ( StringInput ps, Numbered& rv ) { return pair*>(ps,&rv); } friend StringInput operator>> ( pair*> delimbuff, const char* psPostDelim ) { - return ( (SD3[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->i>>SD2>>delimbuff.second->setT()>>psPostDelim + return ( (SD3[0]=='\0') ? delimbuff.first>>SD1>>delimbuff.second->i>>SD2>>delimbuff.second->setT()>>psPostDelim : delimbuff.first>>SD1>>delimbuff.second->i>>SD2>>delimbuff.second->setT()>>SD3>>psPostDelim ); } }; @@ -106,7 +106,7 @@ template pair ArchetypeSet::getDistanceOfNearest ( const V& v ) const { //const Scored > > sipvDummy ( DBL_MAX ); //MinHeap > > > hsiv ( MapType::size()+1, sipvDummy ); - MinHeap > > >& hsiv = + MinHeap > > >& hsiv = const_cast > > >&> ( hsivCalc ); hsiv.clear(); @@ -120,7 +120,7 @@ pair ArchetypeSet::getDistanceOfNearest ( const typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() ); hsiv.set(iNext).setScore() = d; //hsiv.set(iNext).setScore() = v.getMarginalDistance ( hsiv.getMin().first, iUpper->second.second ); - ////int j = + ////int j = hsiv.fixDecr(iNext); ////cerr<<" adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="< ArchetypeSet::getDistanceOfNearest ( const typename V::ElementType d = v.getMarginalDistance ( ++hsiv.setMin().first, hsiv.getMin().second.getRef() ); hsiv.setMin().setScore() += d; ////cerr<<" matching ln"<<&hsiv.getMin().second.getRef()<<" i="< ArchetypeSet::getDistanceOfNearest ( const hsiv.set(iNext).second = SafePtr ( iUpper->second ); typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() ); hsiv.set(iNext).setScore() = d; - ////int j = + ////int j = hsiv.fixDecr(iNext); ////cerr<<" adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="< ArchetypeSet::getDistanceOfNearest ( const hsiv.set(iNext).second = SafePtr ( iLower->second ); typename V::ElementType d = v.getMarginalDistance ( hsiv.get(iNext).first, hsiv.get(iNext).second.getRef() ); hsiv.set(iNext).setScore() = d; - ////int j = + ////int j = hsiv.fixDecr(iNext); ////cerr<<" adding ln"<<&hsiv.get(j).second.getRef()<<" marg-dist="< #include -using namespace std; +using namespace std; //////////////////////////////////////////////////////////////////////////////// diff --git a/contrib/synlm/hhmm/rvtl/include/nl-beam.h b/contrib/synlm/hhmm/rvtl/include/nl-beam.h index 398babe21..817e96206 100644 --- a/contrib/synlm/hhmm/rvtl/include/nl-beam.h +++ b/contrib/synlm/hhmm/rvtl/include/nl-beam.h @@ -101,8 +101,8 @@ class Beam { void write(FILE *pf){ /* for (typename BeamMap::const_iterator i = mkid.begin(); i != mkid.end(); i++){ i->first.write(pf); - fprintf(pf, " %d ", i->second.first); -// i->second.second.write(pf); + fprintf(pf, " %d ", i->second.first); +// i->second.second.write(pf); fprintf(pf, "\n"); } */ diff --git a/contrib/synlm/hhmm/rvtl/include/nl-cpt.h b/contrib/synlm/hhmm/rvtl/include/nl-cpt.h index a7c1a916c..dbfb947e3 100644 --- a/contrib/synlm/hhmm/rvtl/include/nl-cpt.h +++ b/contrib/synlm/hhmm/rvtl/include/nl-cpt.h @@ -394,7 +394,7 @@ class SimpleMap : public map { private: typedef map OrigMap; static const Y yDummy; - + public: // Constructor / destructor methods... SimpleMap ( ) : OrigMap() { } @@ -899,7 +899,7 @@ class GenericHidVarCPTModel : public SimpleHash& getDistrib ( const K& k ) const { return HKYP::get(k); } - + P& setProb ( const Y& y, const K& k ) { pair& yp = HKYP::set(k).add(); yp.first = y; diff --git a/contrib/synlm/hhmm/rvtl/include/nl-crf.h b/contrib/synlm/hhmm/rvtl/include/nl-crf.h index 44744ad03..a9b233b23 100644 --- a/contrib/synlm/hhmm/rvtl/include/nl-crf.h +++ b/contrib/synlm/hhmm/rvtl/include/nl-crf.h @@ -36,7 +36,7 @@ // //////////////////////////////////////////////////////////////////////////////// -template +template class CRF3DModeledRV : public Y { private: @@ -90,7 +90,7 @@ template SafeArray5D,int,int,int,int,float> //////////////////////////////////////////////////////////////////////////////// -template +template Prob CRF3DModeledRV::getProb( const X1& x1, const X2& x2 ) const { SafeArray2D aaCnds ( cardOff, cardSh ) ; @@ -131,7 +131,7 @@ Prob CRF3DModeledRV::getProb( const X1& x1, const X2& x2 ) const { for ( int configRghtValSite=0; configRghtValSite<(1<::getProb( const X1& x1, const X2& x2 ) const { //////////////////////////////////////////////////////////////////////////////// -template +template bool CRF3DModeledRV::readModelFields ( char* aps[], int numFields ) { if ( 7==numFields ) setPotential ( X1(string(aps[1])), // globals @@ -172,7 +172,7 @@ bool CRF3DModeledRV::readModelFields ( char* aps[], int numFields ) { //////////////////////////////////////////////////////////////////////////////// -template +template void CRF3DModeledRV::writeObservCliqueConfigs ( FILE* pf, int frame, const char* psMdl, const X1& x1, const X2& x2, bool bObsVal ) const { fprintf ( pf, "%04d> %s ", frame, psMdl ); @@ -199,7 +199,7 @@ void CRF3DModeledRV::writeObservCliqueConfigs ( FILE* pf, int frame, co // //////////////////////////////////////////////////////////////////////////////// -template +template class CRF4DModeledRV : public Y { private: @@ -247,13 +247,13 @@ template int CRF4DModeledRV::c template int CRF4DModeledRV::cardCnd = 0; template int CRF4DModeledRV::bitsVal = 0; template int CRF4DModeledRV::bitsValSite = 0; -template SafeArray5D,int,int,int,int,float> +template SafeArray5D,int,int,int,int,float> CRF4DModeledRV::aaaaaPotentials; /* template SafeArray3D CRF4DModeledRV::aaaCnds; */ //////////////////////////////////////////////////////////////////////////////// -template +template Prob CRF4DModeledRV::getProb( const X1& x1, const X2& x2, const X3& x3 ) const { SafeArray2D aaCnds ( cardOff, cardSh ) ; @@ -294,7 +294,7 @@ Prob CRF4DModeledRV::getProb( const X1& x1, const X2& x2, const X3& for ( int configRghtValSite=0; configRghtValSite<(1<::getProb( const X1& x1, const X2& x2, const X3& //////////////////////////////////////////////////////////////////////////////// -template +template bool CRF4DModeledRV::readModelFields ( char* aps[], int numFields ) { if ( 7==numFields ) setPotential ( X1(string(aps[1])), // globals @@ -335,9 +335,9 @@ bool CRF4DModeledRV::readModelFields ( char* aps[], int numFields ) //////////////////////////////////////////////////////////////////////////////// -template +template void CRF4DModeledRV::writeObservCliqueConfigs ( FILE* pf, int frame, const char* psMdl, - const X1& x1, const X2& x2, + const X1& x1, const X2& x2, const X3& x3, bool bObsVal ) const { fprintf ( pf, "%04d> %s ", frame, psMdl ); // For each shape (feature slope)... diff --git a/contrib/synlm/hhmm/rvtl/include/nl-denot.h b/contrib/synlm/hhmm/rvtl/include/nl-denot.h index 0b50663a1..be92168b8 100644 --- a/contrib/synlm/hhmm/rvtl/include/nl-denot.h +++ b/contrib/synlm/hhmm/rvtl/include/nl-denot.h @@ -80,7 +80,7 @@ void VecE::read ( char* ps, const ReaderContext& rc ) { */ char* psT; int i=0; for ( char* psU=strtok_r(ps,",",&psT); - psU && i::set(i) = psU; } @@ -166,7 +166,7 @@ void VecV::read ( char* ps, VecVReaderContext& rc ) { // Chop into individual coinds strings... char* psT; int i=0; for ( char* psU=strtok_r(ps,",",&psT); - psU && i { static const int NUM_ENTS; // Constructor / destructor methods... JointVecV ( ) { } - JointVecV ( const V1& a1, const V2& a2 ) { + JointVecV ( const V1& a1, const V2& a2 ) { ////fprintf(stderr,"iJoin "); a1.V1::write(stderr); fprintf(stderr," "); a2.V2::write(stderr); fprintf(stderr,"\n"); for (int i=0; i, public Tree >* ptr = this; - while ( !ptr->isTerm() ) { + while ( !ptr->isTerm() ) { double sumsqr=0.0; for(A a;a >::getWt(); @@ -112,7 +112,7 @@ class ContDTree2DModel : public Generic2DModel, public Tree +template bool ContDTree2DModel::readFields ( char* aps[], int numFields ) { if ( /*aps[0]==sId &&*/ (3==numFields || 4==numFields) ) { //fprintf(stderr,"%s,%d\n",aps[3],numFields); @@ -171,7 +171,7 @@ class ContDTree3DModel : public Generic3DModel { }; //////////////////// -template +template bool ContDTree3DModel::readFields ( char* aps[], int numFields ) { if ( /*aps[0]==sId &&*/ (4==numFields || 5==numFields) ) { //fprintf(stderr,"%s,%d\n",aps[3],numFields); @@ -212,7 +212,7 @@ bool ContDTree3DModel::readFields ( char* aps[], int numFields ) { //////////////////////////////////////////////////////////////////////////////// template -class TrainableContDTree2DModel : public ContDTree2DModel { +class TrainableContDTree2DModel : public ContDTree2DModel { private: List > lxy; public: @@ -225,7 +225,7 @@ class TrainableContDTree2DModel : public ContDTree2DModel { void train ( List >&, const double ) ; void train ( const double d ) { train(lxy,d); } ////// Input / output methods... - bool readData ( char* vs[], int numFields ) { + bool readData ( char* vs[], int numFields ) { if ( 3==numFields ) lxy.add() = Joint2DRV ( X(vs[1]), Y(vs[2]) ); else return false; return true; @@ -312,7 +312,7 @@ void TrainableContDTree2DModel::train ( List >& lxy, cons // if ( double(rand())/double(RAND_MAX) < prRarest/modelY.getProb(pxy->getSub2()) ) { dCtr++; - double gamma = dTot/(dTot+dCtr); // 1.0/(double(epoch)+dCtr/dTot); // 1.0/double(epoch); // 1.0/(double(epoch)+dCtr/(dTot*prRarest*2.0)); // + double gamma = dTot/(dTot+dCtr); // 1.0/(double(epoch)+dCtr/dTot); // 1.0/double(epoch); // 1.0/(double(epoch)+dCtr/(dTot*prRarest*2.0)); // // Weight deltas for next epoch... Wt wDelta = 0.0; @@ -333,7 +333,7 @@ void TrainableContDTree2DModel::train ( List >& lxy, cons P prY = 1.0 / ( 1.0 + exp(-wtdavg) ); // Calc deltas for each feature/attribute/dimension... - double dEachWt = 1.0/dTot; // 1.0/dTot * modelY.getProb ( Y(1-pxy->getSub2().toInt()) ); // 1.0/(dTot*prRarest*2.0); // + double dEachWt = 1.0/dTot; // 1.0/dTot * modelY.getProb ( Y(1-pxy->getSub2().toInt()) ); // 1.0/(dTot*prRarest*2.0); // wDelta += dEachWt * -1 * ( prY - P(double(pxy->getSub2().toInt())) ); for ( A a; agetSub1().get(a.toInt()) * ( prY - P(double(pxy->getSub2().toInt())) ); @@ -439,7 +439,7 @@ void TrainableContDTree2DModel::train ( List >& lxy, cons //////////////////////////////////////////////////////////////////////////////// template -class TrainableContDTree3DModel : public ContDTree3DModel { +class TrainableContDTree3DModel : public ContDTree3DModel { private: @@ -455,7 +455,7 @@ class TrainableContDTree3DModel : public ContDTree3DModel { TrainableContDTree2DModel& setTree(const X1& x1) { return static_cast&>(ContDTree3DModel::setTree(x1)); } ////// Add training data to per-subphone lists... - bool readData ( char* vs[], int numFields ) { + bool readData ( char* vs[], int numFields ) { if ( 4==numFields ) { mqlxy[X1(vs[1])].add() = Joint2DRV ( X2(vs[2]), Y(vs[3]) ); ////mqlxy[X1(vs[1])].getLast()->write(stderr); fprintf(stderr,"\n"); diff --git a/contrib/synlm/hhmm/rvtl/include/nl-dtree.h b/contrib/synlm/hhmm/rvtl/include/nl-dtree.h index 2396f395c..93a0e4d42 100644 --- a/contrib/synlm/hhmm/rvtl/include/nl-dtree.h +++ b/contrib/synlm/hhmm/rvtl/include/nl-dtree.h @@ -129,8 +129,8 @@ class DTree2DModel : public Tree < typename X::ElementType, DecisNode > { friend StringInput operator>> ( pair*> si_m, const char* psD ) { if (StringInput(NULL)==si_m.first) return si_m.first; Y y; String xs; StringInput si,si2; si=si_m.first; DTree2DModel* pm=si_m.second; - while((si2=si>>" ")!=NULL)si=si2; - si=si>>xs>>" "; + while((si2=si>>" ")!=NULL)si=si2; + si=si>>xs>>" "; while((si2=si>>" ")!=NULL)si=si2; // Find appropriate node, creating nodes as necessary... for(int i=1; i > { if ( si!=NULL && si[0]==':' ) { si=si>>": "; - while((si2=si>>" ")!=NULL)si=si2; + while((si2=si>>" ")!=NULL)si=si2; si=si>>y>>" "; - while((si2=si>>" ")!=NULL)si=si2; + while((si2=si>>" ")!=NULL)si=si2; si=si>>"= "; - while((si2=si>>" ")!=NULL)si=si2; + while((si2=si>>" ")!=NULL)si=si2; // Specify attribute number (at nonterminal) or probability in distribution (at terminal)... return (si!=NULL) ? si>>pm->setProb(y)>>psD : si; } else if ( si!=NULL && si[0]=='=' ) { si=si>>"= "; //cerr<<" in after equals "<<((si==NULL) ? "yes" : "no") << endl; - while((si2=si>>" ")!=NULL)si=si2; + while((si2=si>>" ")!=NULL)si=si2; //m.setA() = atoi(si.c_str()); int aVar = 0; - si=si>>aVar>>psD; - pm->setA()=aVar; + si=si>>aVar>>psD; + pm->setA()=aVar; ////cerr<<" at end "<<((si==NULL) ? "yes" : "no") << endl; ////cerr<<" m.getA() is "<< m.getA().toInt() << endl; return si; @@ -169,15 +169,15 @@ class DTree2DModel : public Tree < typename X::ElementType, DecisNode > { si=si_m.first; sRt = si.c_str(); if (sRt.find(':')!=string::npos) { - while((si2=si>>" [")!=NULL)si=si2; - si=si>>xs>>"] "; - while((si2=si>>" ")!=NULL)si=si2; + while((si2=si>>" [")!=NULL)si=si2; + si=si>>xs>>"] "; + while((si2=si>>" ")!=NULL)si=si2; si=si>>": "; - while((si2=si>>" ")!=NULL)si=si2; + while((si2=si>>" ")!=NULL)si=si2; si=si>>y>>" "; - while((si2=si>>" ")!=NULL)si=si2; + while((si2=si>>" ")!=NULL)si=si2; si=si>>"= "; - + // For DTree, must find the node labeled by X //Tree >* ptr = m; //assert(ptr); @@ -189,15 +189,15 @@ class DTree2DModel : public Tree < typename X::ElementType, DecisNode > { // Specify attribute number (at nonterminal) or probability in distribution (at terminal)... return (si!=NULL) ? si>>m.setProb(y)>>psD : si; } else { - while((si2=si>>" [")!=NULL)si=si2; + while((si2=si>>" [")!=NULL)si=si2; si=si>>xs>>"] "; //cerr<<" in bracket "<<((si==NULL) ? "yes" : "no") << endl; - while((si2=si>>" ")!=NULL)si=si2; + while((si2=si>>" ")!=NULL)si=si2; si=si>>"= "; //cerr<<" in after equals "<<((si==NULL) ? "yes" : "no") << endl; //m.setA() = atoi(si.c_str()); int aVar = 0; - si=si>>aVar>>psD; - m.setA()=aVar; + si=si>>aVar>>psD; + m.setA()=aVar; //cerr<<" at end "<<((si==NULL) ? "yes" : "no") << endl; //cerr<<" m.getA() is "<< m.getA().toInt() << endl; return si; @@ -209,7 +209,7 @@ class DTree2DModel : public Tree < typename X::ElementType, DecisNode > { }; //////////////////// -template +template bool DTree2DModel::readFields ( Array& aps ) { if ( /*aps[0]==sId &&*/ (3==aps.size() || 4==aps.size()) ) { //fprintf(stderr,"%s,%d\n",aps[3],numFields); @@ -269,7 +269,7 @@ class DTree3DModel { }; //////////////////// -template +template bool DTree3DModel::readFields ( char* aps[], int numFields ) { if ( /*aps[0]==sId &&*/ (4==numFields || 5==numFields) ) { //fprintf(stderr,"%s,%d\n",aps[3],numFields); @@ -307,7 +307,7 @@ bool DTree3DModel::readFields ( char* aps[], int numFields ) { //////////////////////////////////////////////////////////////////////////////// template -class TrainableDTree2DModel : public DTree2DModel { +class TrainableDTree2DModel : public DTree2DModel { private: // Type members... typedef typename X::ElementType B; @@ -485,7 +485,7 @@ void TrainableDTree2DModel::train ( List >& lxy, const De //////////////////////////////////////////////////////////////////////////////// template -class TrainableDTree3DModel : public DTree3DModel { +class TrainableDTree3DModel : public DTree3DModel { private: diff --git a/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h b/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h index dbb9d9d9d..5e8b4d6d0 100644 --- a/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h +++ b/contrib/synlm/hhmm/rvtl/include/nl-fixedmatrix.h @@ -34,7 +34,7 @@ class Matrix : public SafeArray2D,Id,T> { Matrix ( ) : SafeArray2D,Id,T>( ) { }//{ xSize=0; ySize=0; } Matrix (int x, int y) : SafeArray2D,Id,T>(x,y) { }//{ xSize=x; ySize=y; } Matrix (int x, int y, const T& t) : SafeArray2D,Id,T>(x,y,t) { }//{ xSize=x; ySize=y; } - Matrix (const Matrix& a) : SafeArray2D,Id,T>(a.xSize(),a.ySize()) { //xSize=a.xSize; ySize=a.ySize; + Matrix (const Matrix& a) : SafeArray2D,Id,T>(a.xSize(),a.ySize()) { //xSize=a.xSize; ySize=a.ySize; for(int i=0;iset(i,j)=a.get(i,j); } // Specification methods... //Matrix& operator= ( const Matrix& sat ) @@ -195,34 +195,34 @@ class Matrix : public SafeArray2D,Id,T> { } return false; } - bool operator== ( const Matrix& a ) const { + bool operator== ( const Matrix& a ) const { if (xSize()!=a.xSize() || ySize()!=a.ySize()) return false; - for (int i=0;iget(Id(i),Id(j))!=a.get(Id(i),Id(j))) return false; return true; } // Input/output methods... - friend ostream& operator<< ( ostream& os, const Matrix& a ) { + friend ostream& operator<< ( ostream& os, const Matrix& a ) { os<<"\n "; for (int i=0;i(i),Id(j)); - } + } os<<(i==a.xSize()-1?"\n":"\n "); } - return os; + return os; } - friend String& operator<< ( String& str, const Matrix& a ) { + friend String& operator<< ( String& str, const Matrix& a ) { str<<"\n "; for (int i=0;i(i),Id(j)); - } + } str<<";"; } - return str; + return str; } string getString( ) const; @@ -234,7 +234,7 @@ string Matrix::getString() const { for (int j=0;jget(Id(i),Id(j)); - } + } str += ";"; } return str; diff --git a/contrib/synlm/hhmm/rvtl/include/nl-gauss.h b/contrib/synlm/hhmm/rvtl/include/nl-gauss.h index a2213086f..f5cc45159 100644 --- a/contrib/synlm/hhmm/rvtl/include/nl-gauss.h +++ b/contrib/synlm/hhmm/rvtl/include/nl-gauss.h @@ -43,7 +43,7 @@ static const PDFVal VARIANCE_THRESHOLD = 0.01; //0.0001; //0 // //////////////////////////////////////////////////////////////////////////////// -template +template class DiagGauss1DModel : public Generic1DModel { private: // Member variables... @@ -53,7 +53,7 @@ class DiagGauss1DModel : public Generic1DModel { SimpleHash,PDFVal> aMeans; SimpleHash,PDFVal> aVariances; PDFVal prInvRootNormVariances; - PDFVal prProduct; + PDFVal prProduct; SimpleHash,PDFVal> algprNegHalfInvVariances; public: // Constructor / destructor methods... @@ -78,7 +78,7 @@ class DiagGauss1DModel : public Generic1DModel { }; //////////////////////////////////////// -template +template inline void DiagGauss1DModel::precomputeVarianceTerms ( ) { // Inverse square root of norm of variances... setInvRootNormVar() = 1.0; @@ -92,7 +92,7 @@ inline void DiagGauss1DModel::precomputeVarianceTerms ( ) { } //////////////////////////////////////// -template +template inline PDFVal DiagGauss1DModel::getProb ( const Y& y ) const { // fprintf(stderr,"--------------------\n"); // y.write(stderr); @@ -109,7 +109,7 @@ inline PDFVal DiagGauss1DModel::getProb ( const Y& y ) const { } //////////////////////////////////////// -template +template bool DiagGauss1DModel::readFields ( char* as[], int numFields ) { if ( 0==strcmp(as[1],"m") && numFields>2 ) { char* psT; @@ -126,12 +126,12 @@ bool DiagGauss1DModel::readFields ( char* as[], int numFields ) { } //////////////////////////////////////// -template +template void DiagGauss1DModel::writeFields ( FILE* pf, const string& sPref ) const { fprintf(pf,"%s m = ",sPref.c_str()); for(int i=0; i::writeFields ( FILE* pf, const string& sPref ) const { //////////////////////////////////////////////////////////////////////////////// /* -template +template class DiagGauss2DModel : public Generic2DModel { private: // Member variables... @@ -177,7 +177,7 @@ class DiagGauss2DModel : public Generic2DModel { //////////////////////////////////////////////////////////////////////////////// -template +template class DiagGauss3DModel : public Generic3DModel { private: // Member variables... @@ -220,7 +220,7 @@ class DiagGauss3DModel : public Generic3DModel { // //////////////////////////////////////////////////////////////////////////////// -template +template class TrainableDiagGauss1DModel : public DiagGauss1DModel { public: TrainableDiagGauss1DModel ( ) : DiagGauss1DModel() { } diff --git a/contrib/synlm/hhmm/rvtl/include/nl-hash.h b/contrib/synlm/hhmm/rvtl/include/nl-hash.h index 809284db9..b4d228b9c 100644 --- a/contrib/synlm/hhmm/rvtl/include/nl-hash.h +++ b/contrib/synlm/hhmm/rvtl/include/nl-hash.h @@ -54,7 +54,7 @@ class SimpleHash : public hash_map,SimpleHashEqual > /*pu // tr1::unordered_map,SimpleHashEqual > mxy; static const Y yDummy; //static Y yNonconstDummy; - + public: // typedef typename OrigHash::const_iterator const_iterator; // typedef typename OrigHash::iterator iterator; diff --git a/contrib/synlm/hhmm/rvtl/include/nl-hmm.h b/contrib/synlm/hhmm/rvtl/include/nl-hmm.h index 2f6cd0104..c4414c4b7 100644 --- a/contrib/synlm/hhmm/rvtl/include/nl-hmm.h +++ b/contrib/synlm/hhmm/rvtl/include/nl-hmm.h @@ -209,7 +209,7 @@ template void HMM::debugPrint() const{ for (int frame=0, numFrames=aatnTrellis.getxSize(); frame 0) { @@ -306,7 +306,7 @@ void HMM::updateRanked ( const typename MX::RandVarType& x, bool b1 ) // Add best transition (top of queue)... //mx.getProb(o,my.setTrellDat(ashpiQueue.getTop().first,ashpiQueue.getTop().second)); if ( ashpiQueue.getSize() > 0 ) { - S s; my.setTrellDat(s,ashpiQueue.getTop().second); + S s; my.setTrellDat(s,ashpiQueue.getTop().second); bFull |= btn.tryAdd ( s, IB(ashpiQueue.getTop().first,my.setBackDat(ashpiQueue.getTop().second)), ashpiQueue.getTop().third ); ////cerr<::updateSerial ( const typename MX::RandVarType& x ) { // Incorporate into trellis... btn.tryAdd ( s, IB(i,my.setBackDat(y)), lgprFull ); //if(OUTPUT_VERYNOISY) - // fprintf ( stderr," (S_t-1:[e^%0.6f] * Y:e^%0.6f * X:e^%0.6f = S_t:[e^%0.6f])\n", + // fprintf ( stderr," (S_t-1:[e^%0.6f] * Y:e^%0.6f * X:e^%0.6f = S_t:[e^%0.6f])\n", // float(aatnTrellis.get(frameLast-1,i).getLogProb().toInt())/100.0, // float(lgprY.toInt())/100.0, // float(lgprX.toInt())/100.0, @@ -389,7 +389,7 @@ void HMM::updateSerial ( const typename MX::RandVarType& x ) { } // for(int i=0;i "); btn.get(i)->first.write(stderr); fprintf(stderr,"\n"); +// fprintf(stderr,"> "); btn.get(i)->first.write(stderr); fprintf(stderr,"\n"); // } btn.sort(atnSorted); @@ -429,8 +429,8 @@ void HMM::each ( const typename MX::RandVarType& x, Beam& tnsbPrev = aatnTrellis.get(frameLast-1,i); // If prob still not below beam minimum... if ( tnsbPrev.getLogProb() > btn.getMin().getScore() ) { - //if (OUTPUT_VERYNOISY) { fprintf(stderr,"FROM: "); tnsbPrev.getId().write(stderr); fprintf(stderr,"\n"); } - + //if (OUTPUT_VERYNOISY) { fprintf(stderr,"FROM: "); tnsbPrev.getId().write(stderr); fprintf(stderr,"\n"); } + // For each possible transition... const S& sPrev = tnsbPrev.getId(); typename MY::IterVal y; @@ -447,7 +447,7 @@ void HMM::each ( const typename MX::RandVarType& x, Beam "<::each ( const typename MX::RandVarType& x, Beam HMM::getMLS(const S& sLast) const { //// sprintf(tmp,"HYPOTH %04d> ", fr-1); //// string tString(tmp); //// tString += - string tString = + string tString = //// aatnTrellis.get(fr,iBest).getId().getString() + " " + aatnTrellis.get(fr,iBest).getBackData().getString() //// + "\n" @@ -737,7 +737,7 @@ template void HMM::writeCurr ( ostream& os, int f=-1 ) const { if ( -1==f ) f=frameLast; if ( 0<=f && f<=frameLast ) - for ( int i=0; i::writeCurrSum ( FILE* pf, int f=-1 ) const { if ( 0<=f && f<=frameLast ) { LogProb sum = 0.0; LogProb logtop = 0.0; - for ( int i=0; i::gatherElementsInBeam( SafeArray1D,pair > result->init(BEAM_WIDTH); if ( -1==f ) f=frameLast; if ( 0<=f && f<=frameLast ) { - for ( int i=0; iset(i).first = aatnTrellis.get(f,i).getId(); result->set(i).second = aatnTrellis.get(f,i).getLogProb(); } @@ -836,7 +836,7 @@ void HMM::writeCurrEntropy ( FILE* pf, int f=-1 ) const { if ( 0<=f && f<=frameLast ) { LogProb logh = 0.0; LogProb logtop = 0.0; - for ( int i=0; i::writeCurrDepths ( FILE* pf, int f=-1 ) const { Array depths = Array(); Array logprobs = Array(); double avgdepth = 0.0; - for ( int i=0; i::getBeamUsed ( int f=-1 ) const { if ( -1==f ) f=frameLast; int ctr=0; if ( 0<=f && f<=frameLast ) - for ( int i=0; i::updateRanked ( const typename MO::RandVarType& o ) { // Add best transition (top of queue)... //mo.getProb(o,mh.setTrellDat(axhpiQueue.getTop().first,axhpiQueue.getTop().second)); if ( axhpiQueue.getSize() > 0 ) { - X x; mh.setTrellDat(x,axhpiQueue.getTop().second); + X x; mh.setTrellDat(x,axhpiQueue.getTop().second); bFull |= btn.tryAdd ( x, IB(axhpiQueue.getTop().first,mh.setBackDat(axhpiQueue.getTop().second)), axhpiQueue.getTop().third ); //cerr<::updateSerial ( const typename MO::RandVarType& o ) { // Incorporate into trellis... btn.tryAdd ( x, IB(i,mh.setBackDat(h)), lgprFull ); //if(OUTPUT_VERYNOISY) - // fprintf ( stderr," (X_t-1:[e^%0.6f] * H:e^%0.6f * O:e^%0.6f = X_t:[e^%0.6f])\n", + // fprintf ( stderr," (X_t-1:[e^%0.6f] * H:e^%0.6f * O:e^%0.6f = X_t:[e^%0.6f])\n", // float(aatnTrellis.get(frameLast-1,i).getLogProb().toInt())/100.0, // float(lgprH.toInt())/100.0, // float(lgprO.toInt())/100.0, @@ -351,7 +351,7 @@ void HMM::updateSerial ( const typename MO::RandVarType& o ) { } // for(int i=0;i "); btn.get(i)->first.write(stderr); fprintf(stderr,"\n"); +// fprintf(stderr,"> "); btn.get(i)->first.write(stderr); fprintf(stderr,"\n"); // } btn.sort(atnSorted); @@ -390,8 +390,8 @@ void HMM::each ( const typename MO::RandVarType& o, Beam& tnxbPrev = aatnTrellis.get(frameLast-1,i); // If prob still not below beam minimum... if ( tnxbPrev.getLogProb() > btn.getMin().getScore() ) { - //if (OUTPUT_VERYNOISY) { fprintf(stderr,"FROM: "); tnxbPrev.getId().write(stderr); fprintf(stderr,"\n"); } - + //if (OUTPUT_VERYNOISY) { fprintf(stderr,"FROM: "); tnxbPrev.getId().write(stderr); fprintf(stderr,"\n"); } + // For each possible transition... const X& xPrev = tnxbPrev.getId(); typename MH::IterVal h; @@ -408,7 +408,7 @@ void HMM::each ( const typename MO::RandVarType& o, Beam "<::each ( const typename MO::RandVarType& o, Beam HMM::getMLS(const X& xLast) const { //// sprintf(tmp,"HYPOTH %04d> ", fr-1); //// string tString(tmp); //// tString += - string tString = + string tString = //// aatnTrellis.get(fr,iBest).getId().getString() + " " + aatnTrellis.get(fr,iBest).getBackData().getString() //// + "\n" @@ -697,7 +697,7 @@ template void HMM::writeCurr ( FILE* pf, int f=-1 ) const { if ( -1==f ) f=frameLast; if ( 0<=f && f<=frameLast ) - for ( int i=0; i::writeCurrSum ( FILE* pf, int f=-1 ) const { if ( 0<=f && f<=frameLast ) { LogProb sum = 0.0; LogProb logtop = 0.0; - for ( int i=0; i::writeCurrEntropy ( FILE* pf, int f=-1 ) const { if ( 0<=f && f<=frameLast ) { LogProb logh = 0.0; LogProb logtop = 0.0; - for ( int i=0; i::writeCurrDepths ( FILE* pf, int f=-1 ) const { Array depths = Array(); Array logprobs = Array(); double avgdepth = 0.0; - for ( int i=0; i::getBeamUsed ( int f=-1 ) const { if ( -1==f ) f=frameLast; int ctr=0; if ( 0<=f && f<=frameLast ) - for ( int i=0; i& HMMLoop::update ( const typename MX::RandVarTyp //modX.getProb(o,modY.setTrellDat(ashpiQueue.getTop().first,ashpiQueue.getTop().second)); if ( ashpiQueue.getSize() > 0 ) { S s ( ashpiQueue.getTop().second ); - ////S s; modY.setTrellDat(s,ashpiQueue.getTop().second); + ////S s; modY.setTrellDat(s,ashpiQueue.getTop().second); bFull |= btn.tryAdd ( s, IB(ashpiQueue.getTop().first,B(ashpiQueue.getTop().second)), ashpiQueue.getTop().third ); ////cerr< operator- ( ElementType d ) const { Vector vO; for(uint i=0;i operator* ( ElementType d, const Vector& v ) { Vector vO; for(uint i=0;i operator/ ( ElementType d, const Vector& v ) { Vector vO; for(uint i=0;i operator+ ( ElementType d, const Vector& v ) { Vector vO; for(uint i=0;i operator- ( ElementType d, const Vector& v ) { Vector vO; for(uint i=0;i operator+ ( ElementType d, const Vector& v ) { Vector vO; for(uint i=0;i operator- ( ElementType d, const Vector& v ) { Vector vO; for(uint i=0;i& operator*= ( ElementType d ) { for(uint i=0;i& operator/= ( ElementType d ) { for(uint i=0;i& operator+= ( ElementType d ) { for(uint i=0;i { // //////////////////////////////////////////////////////////////////////////////// -template