mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-01 16:33:16 +03:00
Merge branch 'master' of http://github.com/moses-smt/mosesdecoder
This commit is contained in:
commit
764780ea26
@ -21,6 +21,9 @@ mingw/MosesGUI/icons_rc.py
|
||||
mingw/MosesGUI/Ui_credits.py
|
||||
mingw/MosesGUI/Ui_mainWindow.py
|
||||
moses/TranslationModel/UG
|
||||
moses/server
|
||||
moses/parameters
|
||||
moses/thread_safe_container.h
|
||||
phrase-extract/pcfg-common
|
||||
phrase-extract/syntax-common
|
||||
randlm
|
||||
@ -32,3 +35,4 @@ srilm
|
||||
util
|
||||
xmlrpc-c
|
||||
.git
|
||||
util/ug_cache_with_timeout.h
|
||||
|
10
Jamroot
10
Jamroot
@ -108,7 +108,7 @@ external-lib z ;
|
||||
|
||||
#lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
|
||||
#requirements += <library>dl ;
|
||||
|
||||
#requirements += <cxxflags>-std=c++0x ;
|
||||
|
||||
if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_minimal" ] {
|
||||
if [ option.get "full-tcmalloc" : : "yes" ] {
|
||||
@ -133,7 +133,9 @@ if [ option.get "filter-warnings" : : "yes" ] {
|
||||
requirements += <cxxflags>-Wno-unused-but-set-variable ;
|
||||
requirements += <cxxflags>-Wno-unused-result ;
|
||||
requirements += <cxxflags>-Wno-unused-variable ;
|
||||
requirements += <cxxflags>-Wcomment ;
|
||||
requirements += <cxxflags>-Wno-comment ;
|
||||
requirements += <cxxflags>-Wno-strict-aliasing ;
|
||||
requirements += <cxxflags>-Wno-overloaded-virtual ;
|
||||
}
|
||||
|
||||
if [ option.get "debug-build" : : "yes" ] {
|
||||
@ -179,7 +181,7 @@ if [ option.get "with-icu" : : "yes" ]
|
||||
requirements += <library>icui18n/<link>shared ;
|
||||
requirements += <cxxflags>-fPIC ;
|
||||
requirements += <address-model>64 ;
|
||||
requirements += <runtime-link>shared ;
|
||||
# requirements += <runtime-link>shared ;
|
||||
}
|
||||
|
||||
if [ option.get "with-probing-pt" : : "yes" ]
|
||||
@ -301,5 +303,5 @@ if [ path.exists $(TOP)/dist ] && $(prefix) != dist {
|
||||
|
||||
#local temp = [ _shell "bash source ./s.sh" ] ;
|
||||
local temp = [ _shell "mkdir -p $(TOP)/bin" ] ;
|
||||
local temp = [ _shell "rm $(TOP)/bin/moses_chart" ] ;
|
||||
local temp = [ _shell "rm -f $(TOP)/bin/moses_chart" ] ;
|
||||
local temp = [ _shell "cd $(TOP)/bin && ln -s moses moses_chart" ] ;
|
||||
|
@ -21,6 +21,11 @@ SuffixArray::SuffixArray()
|
||||
m_wordInSentence(NULL),
|
||||
m_sentence(NULL),
|
||||
m_sentenceLength(NULL),
|
||||
m_document(NULL),
|
||||
m_documentName(NULL),
|
||||
m_documentNameLength(0),
|
||||
m_documentCount(0),
|
||||
m_useDocument(false),
|
||||
m_vcb(),
|
||||
m_size(0),
|
||||
m_sentenceCount(0) { }
|
||||
@ -32,6 +37,8 @@ SuffixArray::~SuffixArray()
|
||||
free(m_wordInSentence);
|
||||
free(m_sentence);
|
||||
free(m_sentenceLength);
|
||||
free(m_document);
|
||||
free(m_documentName);
|
||||
}
|
||||
|
||||
void SuffixArray::Create(const string& fileName )
|
||||
@ -46,22 +53,32 @@ void SuffixArray::Create(const string& fileName )
|
||||
textFile.open(fileName.c_str());
|
||||
|
||||
if (!textFile) {
|
||||
cerr << "no such file or directory " << fileName << endl;
|
||||
cerr << "Error: no such file or directory " << fileName << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// first pass through data: get size
|
||||
istream *fileP = &textFile;
|
||||
m_size = 0;
|
||||
m_sentenceCount = 0;
|
||||
m_documentCount = 0;
|
||||
while(!fileP->eof()) {
|
||||
SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
|
||||
if (fileP->eof()) break;
|
||||
if (m_useDocument && ProcessDocumentLine(line,0)) continue;
|
||||
vector< WORD_ID > words = m_vcb.Tokenize( line );
|
||||
m_size += words.size() + 1;
|
||||
m_sentenceCount++;
|
||||
}
|
||||
textFile.close();
|
||||
cerr << m_size << " words (incl. sentence boundaries)" << endl;
|
||||
if (m_useDocument) {
|
||||
cerr << m_documentCount << " documents" << endl;
|
||||
if (m_documentCount == 0) {
|
||||
cerr << "Error: no documents found, aborting." << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// allocate memory
|
||||
m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
|
||||
@ -69,21 +86,31 @@ void SuffixArray::Create(const string& fileName )
|
||||
m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
|
||||
m_sentence = (INDEX*) calloc( sizeof( INDEX ), m_size );
|
||||
m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );
|
||||
|
||||
// fill the array
|
||||
int wordIndex = 0;
|
||||
int sentenceId = 0;
|
||||
textFile.open(fileName.c_str());
|
||||
|
||||
if (!textFile) {
|
||||
cerr << "no such file or directory " << fileName << endl;
|
||||
exit(1);
|
||||
CheckAllocation(m_array != NULL, "m_array");
|
||||
CheckAllocation(m_index != NULL, "m_index");
|
||||
CheckAllocation(m_wordInSentence != NULL, "m_wordInSentence");
|
||||
CheckAllocation(m_sentence != NULL, "m_sentence");
|
||||
CheckAllocation(m_sentenceLength != NULL, "m_sentenceLength");
|
||||
if (m_useDocument) {
|
||||
m_document = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
|
||||
m_documentName = (INDEX*) calloc( sizeof( char ), m_documentCount );
|
||||
m_documentNameBuffer = (char*) calloc( sizeof( char ), m_documentNameLength );
|
||||
CheckAllocation(m_document != NULL, "m_document");
|
||||
CheckAllocation(m_documentName != NULL, "m_documentName");
|
||||
CheckAllocation(m_documentNameBuffer != NULL, "m_documentNameBuffer");
|
||||
}
|
||||
|
||||
// second pass through data: fill the arrays
|
||||
int wordIndex = 0;
|
||||
int sentenceId = 0;
|
||||
m_documentNameLength = 0; // re-use as counter
|
||||
m_documentCount = 0; // re-use as counter
|
||||
textFile.open(fileName.c_str());
|
||||
fileP = &textFile;
|
||||
while(!fileP->eof()) {
|
||||
SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
|
||||
if (fileP->eof()) break;
|
||||
if (m_useDocument && ProcessDocumentLine(line,sentenceId)) continue;
|
||||
vector< WORD_ID > words = m_vcb.Tokenize( line );
|
||||
vector< WORD_ID >::const_iterator i;
|
||||
|
||||
@ -105,7 +132,7 @@ void SuffixArray::Create(const string& fileName )
|
||||
m_buffer = (INDEX*) calloc( sizeof( INDEX ), m_size );
|
||||
|
||||
if (m_buffer == NULL) {
|
||||
cerr << "cannot allocate memory to m_buffer" << endl;
|
||||
cerr << "Error: cannot allocate memory to m_buffer" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -114,6 +141,45 @@ void SuffixArray::Create(const string& fileName )
|
||||
cerr << "done sorting" << endl;
|
||||
}
|
||||
|
||||
// very specific code to deal with common crawl document ids
|
||||
bool SuffixArray::ProcessDocumentLine( const char *line, const size_t sentenceId )
|
||||
{
|
||||
size_t i;
|
||||
// first 32 characters are hex-hash
|
||||
for(i=0; i<32; i++) {
|
||||
if ((line[i] < '0' || line[i] > '9') && (line[i] < 'a' || line[i] > 'f')) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (line[i++] != ' ') return false;
|
||||
|
||||
// second token is float
|
||||
for (; line[i] != ' ' && line[i] != 0; i++) {
|
||||
if (line[i] != '.' && (line[i] < '0' || line[i] > '9')) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
i++;
|
||||
|
||||
// last token is url (=name)
|
||||
size_t startName = i;
|
||||
for (; line[i] != ' ' && line[i] != 0; i++) {}
|
||||
if (line[i] == ' ') return false;
|
||||
size_t endName = i+1; // include '\0'
|
||||
|
||||
// second pass: record name and sentence number
|
||||
if (m_document != NULL) {
|
||||
m_documentName[m_documentCount] = m_documentNameLength;
|
||||
for(size_t i=startName; i<endName; i++) {
|
||||
m_documentNameBuffer[m_documentNameLength + i-startName] = line[i];
|
||||
}
|
||||
m_document[m_documentCount] = sentenceId;
|
||||
}
|
||||
m_documentNameLength += endName-startName;
|
||||
m_documentCount++;
|
||||
return true;
|
||||
}
|
||||
|
||||
// good ol' quick sort
|
||||
void SuffixArray::Sort(INDEX start, INDEX end)
|
||||
{
|
||||
@ -162,7 +228,6 @@ int SuffixArray::CompareIndex( INDEX a, INDEX b ) const
|
||||
|
||||
inline int SuffixArray::CompareWord( WORD_ID a, WORD_ID b ) const
|
||||
{
|
||||
// cerr << "c(" << m_vcb.GetWord(a) << ":" << m_vcb.GetWord(b) << ")=" << m_vcb.GetWord(a).compare( m_vcb.GetWord(b) ) << endl;
|
||||
return m_vcb.GetWord(a).compare( m_vcb.GetWord(b) );
|
||||
}
|
||||
|
||||
@ -272,13 +337,73 @@ void SuffixArray::List(INDEX start, INDEX end)
|
||||
}
|
||||
}
|
||||
|
||||
void SuffixArray::PrintSentenceMatches( const std::vector< WORD > &phrase )
|
||||
{
|
||||
cout << "QUERY\t";
|
||||
for(size_t i=0; i<phrase.size(); i++) {
|
||||
if (i>0) cout << " ";
|
||||
cout << phrase[i];
|
||||
}
|
||||
cout << '\t';
|
||||
INDEX start = 0;
|
||||
INDEX end = m_size-1;
|
||||
INDEX mid = FindFirst( phrase, start, end );
|
||||
if (mid == m_size) { // no matches
|
||||
cout << "0 matches" << endl;
|
||||
return;
|
||||
}
|
||||
|
||||
INDEX firstMatch = FindLast( phrase, mid, start, -1 );
|
||||
INDEX lastMatch = FindLast( phrase, mid, end, 1 );
|
||||
|
||||
// loop through all matches
|
||||
cout << (lastMatch-firstMatch+1) << " matches" << endl;
|
||||
for(INDEX i=firstMatch; i<=lastMatch; i++) {
|
||||
// get sentence information
|
||||
INDEX pos = GetPosition( i );
|
||||
INDEX start = pos - GetWordInSentence( pos );
|
||||
char length = GetSentenceLength( GetSentence( pos ) );
|
||||
// print document name
|
||||
if (m_useDocument) {
|
||||
INDEX sentence = GetSentence( pos );
|
||||
INDEX document = GetDocument( sentence );
|
||||
PrintDocumentName( document );
|
||||
cout << '\t';
|
||||
}
|
||||
// print sentence
|
||||
for(char i=0; i<length; i++) {
|
||||
if (i>0) cout << " ";
|
||||
cout << GetWord( start + i );
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
|
||||
SuffixArray::INDEX SuffixArray::GetDocument( INDEX sentence ) const
|
||||
{
|
||||
// binary search
|
||||
INDEX min = 0;
|
||||
INDEX max = m_documentCount-1;
|
||||
if (sentence >= m_document[max]) {
|
||||
return max;
|
||||
}
|
||||
while(true) {
|
||||
INDEX mid = (min + max) / 2;
|
||||
if (sentence >= m_document[mid] && sentence < m_document[mid+1]) {
|
||||
return mid;
|
||||
}
|
||||
if (sentence < m_document[mid]) {
|
||||
max = mid-1;
|
||||
} else {
|
||||
min = mid+1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SuffixArray::Save(const string& fileName ) const
|
||||
{
|
||||
FILE *pFile = fopen ( fileName.c_str() , "w" );
|
||||
if (pFile == NULL) {
|
||||
cerr << "Cannot open " << fileName << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (pFile == NULL) Error("cannot open",fileName);
|
||||
|
||||
fwrite( &m_size, sizeof(INDEX), 1, pFile );
|
||||
fwrite( m_array, sizeof(WORD_ID), m_size, pFile ); // corpus
|
||||
@ -288,6 +413,16 @@ void SuffixArray::Save(const string& fileName ) const
|
||||
|
||||
fwrite( &m_sentenceCount, sizeof(INDEX), 1, pFile );
|
||||
fwrite( m_sentenceLength, sizeof(char), m_sentenceCount, pFile); // sentence length
|
||||
|
||||
char useDocument = m_useDocument; // not sure if that is needed
|
||||
fwrite( &useDocument, sizeof(char), 1, pFile );
|
||||
if (m_useDocument) {
|
||||
fwrite( &m_documentCount, sizeof(INDEX), 1, pFile );
|
||||
fwrite( m_document, sizeof(INDEX), m_documentCount, pFile );
|
||||
fwrite( m_documentName, sizeof(INDEX), m_documentCount, pFile );
|
||||
fwrite( &m_documentNameLength, sizeof(INDEX), 1, pFile );
|
||||
fwrite( m_documentNameBuffer, sizeof(char), m_documentNameLength, pFile );
|
||||
}
|
||||
fclose( pFile );
|
||||
|
||||
m_vcb.Save( fileName + ".src-vcb" );
|
||||
@ -296,56 +431,81 @@ void SuffixArray::Save(const string& fileName ) const
|
||||
void SuffixArray::Load(const string& fileName )
|
||||
{
|
||||
FILE *pFile = fopen ( fileName.c_str() , "r" );
|
||||
if (pFile == NULL) {
|
||||
cerr << "no such file or directory " << fileName << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (pFile == NULL) Error("no such file or directory", fileName);
|
||||
|
||||
cerr << "loading from " << fileName << endl;
|
||||
|
||||
fread( &m_size, sizeof(INDEX), 1, pFile );
|
||||
fread( &m_size, sizeof(INDEX), 1, pFile )
|
||||
|| Error("could not read m_size from", fileName);
|
||||
cerr << "words in corpus: " << m_size << endl;
|
||||
|
||||
m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
|
||||
m_index = (INDEX*) calloc( sizeof( INDEX ), m_size );
|
||||
m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
|
||||
m_sentence = (INDEX*) calloc( sizeof( INDEX ), m_size );
|
||||
CheckAllocation(m_array != NULL, "m_array");
|
||||
CheckAllocation(m_index != NULL, "m_index");
|
||||
CheckAllocation(m_wordInSentence != NULL, "m_wordInSentence");
|
||||
CheckAllocation(m_sentence != NULL, "m_sentence");
|
||||
fread( m_array, sizeof(WORD_ID), m_size, pFile ) // corpus
|
||||
|| Error("could not read m_array from", fileName);
|
||||
fread( m_index, sizeof(INDEX), m_size, pFile ) // suffix array
|
||||
|| Error("could not read m_index from", fileName);
|
||||
fread( m_wordInSentence, sizeof(char), m_size, pFile) // word index
|
||||
|| Error("could not read m_wordInSentence from", fileName);
|
||||
fread( m_sentence, sizeof(INDEX), m_size, pFile ) // sentence index
|
||||
|| Error("could not read m_sentence from", fileName);
|
||||
|
||||
if (m_array == NULL) {
|
||||
cerr << "Error: cannot allocate memory to m_array" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (m_index == NULL) {
|
||||
cerr << "Error: cannot allocate memory to m_index" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (m_wordInSentence == NULL) {
|
||||
cerr << "Error: cannot allocate memory to m_wordInSentence" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (m_sentence == NULL) {
|
||||
cerr << "Error: cannot allocate memory to m_sentence" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fread( m_array, sizeof(WORD_ID), m_size, pFile ); // corpus
|
||||
fread( m_index, sizeof(INDEX), m_size, pFile ); // suffix array
|
||||
fread( m_wordInSentence, sizeof(char), m_size, pFile); // word index
|
||||
fread( m_sentence, sizeof(INDEX), m_size, pFile); // sentence index
|
||||
|
||||
fread( &m_sentenceCount, sizeof(INDEX), 1, pFile );
|
||||
fread( &m_sentenceCount, sizeof(INDEX), 1, pFile )
|
||||
|| Error("could not read m_sentenceCount from", fileName);
|
||||
cerr << "sentences in corpus: " << m_sentenceCount << endl;
|
||||
m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );
|
||||
|
||||
if (m_sentenceLength == NULL) {
|
||||
cerr << "Error: cannot allocate memory to m_sentenceLength" << endl;
|
||||
exit(1);
|
||||
m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );
|
||||
CheckAllocation(m_sentenceLength != NULL, "m_sentenceLength");
|
||||
fread( m_sentenceLength, sizeof(char), m_sentenceCount, pFile) // sentence length
|
||||
|| Error("could not read m_sentenceLength from", fileName);
|
||||
|
||||
if (m_useDocument) { // do not read it when you do not need it
|
||||
char useDocument;
|
||||
fread( &useDocument, sizeof(char), 1, pFile )
|
||||
|| Error("could not read m_useDocument from", fileName);
|
||||
if (!useDocument) {
|
||||
cerr << "Error: stored suffix array does not have a document index\n";
|
||||
exit(1);
|
||||
}
|
||||
fread( &m_documentCount, sizeof(INDEX), 1, pFile )
|
||||
|| Error("could not read m_documentCount from", fileName);
|
||||
m_document = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
|
||||
m_documentName = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
|
||||
CheckAllocation(m_document != NULL, "m_document");
|
||||
CheckAllocation(m_documentName != NULL, "m_documentName");
|
||||
fread( m_document, sizeof(INDEX), m_documentCount, pFile )
|
||||
|| Error("could not read m_document from", fileName);
|
||||
fread( m_documentName, sizeof(INDEX), m_documentCount, pFile )
|
||||
|| Error("could not read m_documentName from", fileName);
|
||||
fread( &m_documentNameLength, sizeof(INDEX), 1, pFile )
|
||||
|| Error("could not read m_documentNameLength from", fileName);
|
||||
m_documentNameBuffer = (char*) calloc( sizeof( char ), m_documentNameLength );
|
||||
CheckAllocation(m_documentNameBuffer != NULL, "m_documentNameBuffer");
|
||||
fread( m_documentNameBuffer, sizeof(char), m_documentNameLength, pFile )
|
||||
|| Error("could not read m_document from", fileName);
|
||||
}
|
||||
|
||||
fread( m_sentenceLength, sizeof(char), m_sentenceCount, pFile); // sentence length
|
||||
fclose( pFile );
|
||||
|
||||
m_vcb.Load( fileName + ".src-vcb" );
|
||||
}
|
||||
|
||||
void SuffixArray::CheckAllocation( bool check, const char *dataStructure ) const
|
||||
{
|
||||
if (check) return;
|
||||
cerr << "Error: could not allocate memory for " << dataStructure << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
bool SuffixArray::Error( const char *message, const string &fileName) const
|
||||
{
|
||||
cerr << "Error: " << message << " " << fileName << endl;
|
||||
exit(1);
|
||||
return true; // yeah, i know.
|
||||
}
|
||||
|
@ -15,6 +15,12 @@ private:
|
||||
INDEX *m_sentence;
|
||||
char *m_sentenceLength;
|
||||
WORD_ID m_endOfSentence;
|
||||
INDEX *m_document;
|
||||
INDEX *m_documentName;
|
||||
char *m_documentNameBuffer;
|
||||
size_t m_documentNameLength;
|
||||
size_t m_documentCount;
|
||||
bool m_useDocument;
|
||||
Vocabulary m_vcb;
|
||||
INDEX m_size;
|
||||
INDEX m_sentenceCount;
|
||||
@ -28,6 +34,7 @@ public:
|
||||
~SuffixArray();
|
||||
|
||||
void Create(const std::string& fileName );
|
||||
bool ProcessDocumentLine( const char* const, const size_t );
|
||||
void Sort(INDEX start, INDEX end);
|
||||
int CompareIndex( INDEX a, INDEX b ) const;
|
||||
inline int CompareWord( WORD_ID a, WORD_ID b ) const;
|
||||
@ -40,6 +47,7 @@ public:
|
||||
INDEX FindLast( const std::vector< WORD > &phrase, INDEX start, INDEX end, int direction );
|
||||
int Match( const std::vector< WORD > &phrase, INDEX index );
|
||||
void List( INDEX start, INDEX end );
|
||||
void PrintSentenceMatches( const std::vector< WORD > &phrase );
|
||||
inline INDEX GetPosition( INDEX index ) const {
|
||||
return m_index[ index ];
|
||||
}
|
||||
@ -58,6 +66,17 @@ public:
|
||||
inline WORD GetWord( INDEX position ) const {
|
||||
return m_vcb.GetWord( m_array[position] );
|
||||
}
|
||||
void UseDocument() {
|
||||
m_useDocument = true;
|
||||
}
|
||||
INDEX GetDocument( INDEX sentence ) const;
|
||||
void PrintDocumentName( INDEX document ) {
|
||||
for(INDEX i=m_documentName[ document ]; m_documentNameBuffer[i] != 0; i++) {
|
||||
std::cout << m_documentNameBuffer[ i ];
|
||||
}
|
||||
}
|
||||
void Save(const std::string& fileName ) const;
|
||||
void Load(const std::string& fileName );
|
||||
void CheckAllocation(bool, const char *dataStructure) const;
|
||||
bool Error( const char* message, const std::string& fileName) const;
|
||||
};
|
||||
|
@ -62,7 +62,7 @@ void Vocabulary::Save(const string& fileName ) const
|
||||
vcbFile.open( fileName.c_str(), ios::out | ios::ate | ios::trunc);
|
||||
|
||||
if (!vcbFile) {
|
||||
cerr << "Failed to open " << vcbFile << endl;
|
||||
cerr << "Failed to open " << fileName << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ void Vocabulary::Load(const string& fileName )
|
||||
vcbFile.open(fileName.c_str());
|
||||
|
||||
if (!vcbFile) {
|
||||
cerr << "no such file or directory: " << vcbFile << endl;
|
||||
cerr << "no such file or directory: " << fileName << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "SuffixArray.h"
|
||||
#include "../util/tokenize.hh"
|
||||
#include <getopt.h>
|
||||
|
||||
using namespace std;
|
||||
@ -13,10 +14,12 @@ int main(int argc, char* argv[])
|
||||
string query;
|
||||
string fileNameSuffix;
|
||||
string fileNameSource;
|
||||
int loadFlag = false;
|
||||
int saveFlag = false;
|
||||
int createFlag = false;
|
||||
int queryFlag = false;
|
||||
bool loadFlag = false;
|
||||
bool saveFlag = false;
|
||||
bool createFlag = false;
|
||||
bool queryFlag = false;
|
||||
bool querySentenceFlag = false;
|
||||
|
||||
int stdioFlag = false; // receive requests from STDIN, respond to STDOUT
|
||||
string info = "usage: biconcor\n\t[--load model-file]\n\t[--save model-file]\n\t[--create corpus]\n\t[--query string]\n\t[--stdio]\n";
|
||||
while(1) {
|
||||
@ -25,11 +28,14 @@ int main(int argc, char* argv[])
|
||||
{"save", required_argument, 0, 's'},
|
||||
{"create", required_argument, 0, 'c'},
|
||||
{"query", required_argument, 0, 'q'},
|
||||
{"query-sentence", required_argument, 0, 'Q'},
|
||||
{"document", required_argument, 0, 'd'},
|
||||
{"stdio", no_argument, 0, 'i'},
|
||||
{"stdio-sentence", no_argument, 0, 'I'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
int option_index = 0;
|
||||
int c = getopt_long (argc, argv, "l:s:c:q:i", long_options, &option_index);
|
||||
int c = getopt_long (argc, argv, "l:s:c:q:Q:iId", long_options, &option_index);
|
||||
if (c == -1) break;
|
||||
switch (c) {
|
||||
case 'l':
|
||||
@ -48,17 +54,25 @@ int main(int argc, char* argv[])
|
||||
query = string(optarg);
|
||||
queryFlag = true;
|
||||
break;
|
||||
case 'Q':
|
||||
query = string(optarg);
|
||||
querySentenceFlag = true;
|
||||
break;
|
||||
case 'i':
|
||||
stdioFlag = true;
|
||||
break;
|
||||
case 'I':
|
||||
stdioFlag = true;
|
||||
querySentenceFlag = true;
|
||||
break;
|
||||
case 'd':
|
||||
suffixArray.UseDocument();
|
||||
break;
|
||||
default:
|
||||
cerr << info;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (stdioFlag) {
|
||||
queryFlag = true;
|
||||
}
|
||||
|
||||
// check if parameter settings are legal
|
||||
if (saveFlag && !createFlag) {
|
||||
@ -74,7 +88,7 @@ int main(int argc, char* argv[])
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// do your thing
|
||||
// get suffix array
|
||||
if (createFlag) {
|
||||
cerr << "will create\n";
|
||||
cerr << "corpus is in " << fileNameSource << endl;
|
||||
@ -88,16 +102,26 @@ int main(int argc, char* argv[])
|
||||
cerr << "will load from " << fileNameSuffix << endl;
|
||||
suffixArray.Load( fileNameSuffix );
|
||||
}
|
||||
|
||||
// do something with it
|
||||
if (stdioFlag) {
|
||||
while(true) {
|
||||
string query;
|
||||
if (getline(cin, query, '\n').eof()) {
|
||||
return 0;
|
||||
}
|
||||
cout << lookup( query ) << endl;
|
||||
if (querySentenceFlag) {
|
||||
vector< string > queryString = util::tokenize( query.c_str() );
|
||||
suffixArray.PrintSentenceMatches( queryString );
|
||||
} else {
|
||||
cout << lookup( query ) << endl;
|
||||
}
|
||||
}
|
||||
} else if (queryFlag) {
|
||||
cout << lookup( query ) << endl;
|
||||
} else if (querySentenceFlag) {
|
||||
vector< string > queryString = util::tokenize( query.c_str() );
|
||||
suffixArray.PrintSentenceMatches( queryString );
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -105,32 +129,6 @@ int main(int argc, char* argv[])
|
||||
size_t lookup( string query )
|
||||
{
|
||||
cerr << "query is " << query << endl;
|
||||
vector< string > queryString = tokenize( query.c_str() );
|
||||
vector< string > queryString = util::tokenize( query.c_str() );
|
||||
return suffixArray.Count( queryString );
|
||||
}
|
||||
|
||||
// Duplicate of definition in util/tokenize.hh.
|
||||
// TODO: Can we de-duplicate this? At the time of writing biconcor does not
|
||||
// use util at all.
|
||||
vector<string> tokenize(const char input[])
|
||||
{
|
||||
vector< string > token;
|
||||
bool betweenWords = true;
|
||||
int start=0;
|
||||
int i;
|
||||
for(i = 0; input[i] != '\0'; i++) {
|
||||
const bool isSpace = (input[i] == ' ' || input[i] == '\t');
|
||||
|
||||
if (!isSpace && betweenWords) {
|
||||
start = i;
|
||||
betweenWords = false;
|
||||
} else if (isSpace && !betweenWords) {
|
||||
token.push_back( string( input+start, i-start ) );
|
||||
betweenWords = true;
|
||||
}
|
||||
}
|
||||
if (!betweenWords)
|
||||
token.push_back( string( input+start, i-start ) );
|
||||
return token;
|
||||
}
|
||||
|
||||
|
@ -28,14 +28,16 @@ TEST_DIR: /home/moses-speedtest/phrase_tables/tests
|
||||
TEST_LOG_DIR: /home/moses-speedtest/phrase_tables/testlogs
|
||||
BASEBRANCH: RELEASE-2.1.1
|
||||
MOSES_PROFILER_REPO: /home/moses-speedtest/moses-standard/mosesdecoder-variant-prof
|
||||
MOSES_GOOGLE_PROFILER_REPO: /home/moses-speedtest/moses-standard/mosesdecoder-variant-gperftools
|
||||
</pre>
|
||||
|
||||
The _MOSES\_REPO\_PATH_ is the place where you have set up and built moses.
|
||||
The _DROP\_CACHES\_COMM_ is the command that would b eused to drop caches. It should run without needing root access.
|
||||
The _DROP\_CACHES\_COMM_ is the command that would be used to drop caches. It should run without needing root access.
|
||||
_TEST\_DIR_ is the directory where all the tests will reside.
|
||||
_TEST\_LOG\_DIR_ is the directory where the performance logs will be gathered. It should be created before running the testsuite for the first time.
|
||||
_BASEBRANCH_ is the branch against which all new tests will be compared. It should normally be set to be the latest Moses stable release.
|
||||
_MOSES\_PROFILER\_REPO_ is a path to a moses repository set up and built with profiling enabled. Optional if you want to produce profiling results.
|
||||
_MOSES\_GOOGLE\_PROFILER\_REPO is a path to moses repository set up with full tcmalloc and profiler, as well as shared link for use with gperftools.
|
||||
### Creating tests
|
||||
|
||||
In order to create a test one should go into the TEST_DIR and create a new folder. That folder will be used for the name of the test.
|
||||
@ -45,7 +47,7 @@ An example such configuration file is **test\_config**
|
||||
<pre>
|
||||
Command: moses -f ... -i fff #Looks for the command in the /bin directory of the repo specified in the testsuite_config
|
||||
LDPRE: ldpreloads #Comma separated LD_LIBRARY_PATH:/,
|
||||
Variants: vanilla, cached, ldpre, profile #Can't have cached without ldpre or vanilla
|
||||
Variants: vanilla, cached, ldpre, profile, google-profiler #Can't have cached without ldpre or vanilla
|
||||
</pre>
|
||||
|
||||
The _Command:_ line specifies the executable (which is looked up in the /bin directory of the repo.) and any arguments necessary. Before running the test, the script cds to the current test directory so you can use relative paths.
|
||||
@ -61,11 +63,21 @@ The _Variants:_ line specifies what type of tests should we run. This particular
|
||||
If you want to produce profiler results together in some tests you need to specify the _MOSES\_PROFILER\_REPO_ in the config
|
||||
```bash
|
||||
git clone https://github.com/moses-smt/mosesdecoder.git mosesdecoder-profile
|
||||
cd mosesdecoder
|
||||
cd mosesdecoder-profile
|
||||
./bjam -j10 --with-cmph=/usr/include/ variant=profile
|
||||
```
|
||||
|
||||
Afterwards for testcases which contain the **profile** keyword in **Variants** you will see a directory inside _TEST\_LOG\_DIR which contains the **gprof** output from every run.
|
||||
Afterwards for testcases which contain the **profile** keyword in **Variants** you will see a directory inside _TEST\_LOG\_DIR which contains the **gprof** output from every run (files ending in **\_profile**).
|
||||
|
||||
#### Produce google profiler results.
|
||||
If you want to produce profiler results together in some tests you need to specify the _MOSES\_GOOGLE\_PROFILER\_REPO in the config
|
||||
```bash
|
||||
git clone https://github.com/moses-smt/mosesdecoder.git mosesdecoder-google-profile
|
||||
cd mosesdecoder
|
||||
./bjam link=shared -j10 --full-tcmalloc --with-cmph=/usr/include/
|
||||
```
|
||||
|
||||
Afterwards for testcases which contain the **google-profiler** keyword in **Variants** you will see a directory inside _TEST\_LOG\_DIR which contains the **google-profiler** output from every run (files prefixed with **pprof**). To analyze the output you need to use [pprof](http://google-perftools.googlecode.com/svn/trunk/doc/cpuprofile.html).
|
||||
|
||||
### Running tests.
|
||||
Running the tests is done through the **runtests.py** script.
|
||||
|
@ -2,6 +2,7 @@
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
import shutil
|
||||
from argparse import ArgumentParser
|
||||
from testsuite_common import processLogLine
|
||||
|
||||
@ -26,16 +27,21 @@ def parse_cmd():
|
||||
arguments = parser.parse_args()
|
||||
return arguments
|
||||
|
||||
def repoinit(testconfig, profiler=True):
|
||||
def repoinit(testconfig, profiler=None):
|
||||
"""Determines revision and sets up the repo. If given the profiler optional
|
||||
argument, wil init the profiler repo instead of the default one."""
|
||||
revision = ''
|
||||
#Update the repo
|
||||
if profiler:
|
||||
if profiler == "gnu-profiler":
|
||||
if testconfig.repo_prof is not None:
|
||||
os.chdir(testconfig.repo_prof)
|
||||
else:
|
||||
raise ValueError('Profiling repo is not defined')
|
||||
elif profiler == "google-profiler":
|
||||
if testconfig.repo_gprof is not None:
|
||||
os.chdir(testconfig.repo_gprof)
|
||||
else:
|
||||
raise ValueError('Profiling repo is not defined')
|
||||
else:
|
||||
os.chdir(testconfig.repo)
|
||||
#Checkout specific branch, else maintain main branch
|
||||
@ -61,9 +67,10 @@ def repoinit(testconfig, profiler=True):
|
||||
|
||||
class Configuration:
|
||||
"""A simple class to hold all of the configuration constatns"""
|
||||
def __init__(self, repo, drop_caches, tests, testlogs, basebranch, baserev, repo_prof=None):
|
||||
def __init__(self, repo, drop_caches, tests, testlogs, basebranch, baserev, repo_prof=None, repo_gprof=None):
|
||||
self.repo = repo
|
||||
self.repo_prof = repo_prof
|
||||
self.repo_gprof = repo_gprof
|
||||
self.drop_caches = drop_caches
|
||||
self.tests = tests
|
||||
self.testlogs = testlogs
|
||||
@ -88,16 +95,17 @@ class Configuration:
|
||||
|
||||
class Test:
|
||||
"""A simple class to contain all information about tests"""
|
||||
def __init__(self, name, command, ldopts, permutations, prof_command=None):
|
||||
def __init__(self, name, command, ldopts, permutations, prof_command=None, gprof_command=None):
|
||||
self.name = name
|
||||
self.command = command
|
||||
self.prof_command = prof_command
|
||||
self.gprof_command = gprof_command
|
||||
self.ldopts = ldopts.replace(' ', '').split(',') #Not tested yet
|
||||
self.permutations = permutations
|
||||
|
||||
def parse_configfile(conffile, testdir, moses_repo, moses_prof_repo=None):
|
||||
def parse_configfile(conffile, testdir, moses_repo, moses_prof_repo=None, moses_gprof_repo=None):
|
||||
"""Parses the config file"""
|
||||
command, ldopts, prof_command = '', '', None
|
||||
command, ldopts, prof_command, gprof_command = '', '', None, None
|
||||
permutations = []
|
||||
fileopen = open(conffile, 'r')
|
||||
for line in fileopen:
|
||||
@ -108,8 +116,10 @@ def parse_configfile(conffile, testdir, moses_repo, moses_prof_repo=None):
|
||||
|
||||
if opt == 'Command:':
|
||||
command = args.replace('\n', '')
|
||||
if moses_prof is not None: # Get optional command for profiling
|
||||
if moses_prof_repo is not None: # Get optional command for profiling
|
||||
prof_command = moses_prof_repo + '/bin/' + command
|
||||
if moses_gprof_repo is not None: # Get optional command for google-perftools
|
||||
gprof_command = moses_gprof_repo + '/bin/' + command
|
||||
command = moses_repo + '/bin/' + command
|
||||
elif opt == 'LDPRE:':
|
||||
ldopts = args.replace('\n', '')
|
||||
@ -118,14 +128,14 @@ def parse_configfile(conffile, testdir, moses_repo, moses_prof_repo=None):
|
||||
else:
|
||||
raise ValueError('Unrecognized option ' + opt)
|
||||
#We use the testdir as the name.
|
||||
testcase = Test(testdir, command, ldopts, permutations, prof_command)
|
||||
testcase = Test(testdir, command, ldopts, permutations, prof_command, gprof_command)
|
||||
fileopen.close()
|
||||
return testcase
|
||||
|
||||
def parse_testconfig(conffile):
|
||||
"""Parses the config file for the whole testsuite."""
|
||||
repo_path, drop_caches, tests_dir, testlog_dir = '', '', '', ''
|
||||
basebranch, baserev, repo_prof_path = '', '', None
|
||||
basebranch, baserev, repo_prof_path, repo_gprof_path = '', '', None, None
|
||||
fileopen = open(conffile, 'r')
|
||||
for line in fileopen:
|
||||
line = line.split('#')[0] # Discard comments
|
||||
@ -146,10 +156,12 @@ def parse_testconfig(conffile):
|
||||
baserev = args.replace('\n', '')
|
||||
elif opt == 'MOSES_PROFILER_REPO:': # Optional
|
||||
repo_prof_path = args.replace('\n', '')
|
||||
elif opt == 'MOSES_GOOGLE_PROFILER_REPO:': # Optional
|
||||
repo_gprof_path = args.replace('\n', '')
|
||||
else:
|
||||
raise ValueError('Unrecognized option ' + opt)
|
||||
config = Configuration(repo_path, drop_caches, tests_dir, testlog_dir,\
|
||||
basebranch, baserev, repo_prof_path)
|
||||
basebranch, baserev, repo_prof_path, repo_gprof_path)
|
||||
fileopen.close()
|
||||
return config
|
||||
|
||||
@ -160,7 +172,9 @@ def get_config():
|
||||
config.additional_args(args.singletestdir, args.revision, args.branch)
|
||||
revision = repoinit(config)
|
||||
if config.repo_prof is not None:
|
||||
repoinit(config, True)
|
||||
repoinit(config, "gnu-profiler")
|
||||
if config.repo_gprof is not None:
|
||||
repoinit(config, "google-profiler")
|
||||
config.set_revision(revision)
|
||||
return config
|
||||
|
||||
@ -212,16 +226,27 @@ def write_gprof(command, name, variant, config):
|
||||
executable_path = command.split(' ')[0] # Path to the moses binary
|
||||
gprof_command = 'gprof ' + executable_path + ' ' + gmon_path + ' > ' + outputfile
|
||||
subprocess.call([gprof_command], shell=True)
|
||||
os.remove('gmon_path') # After we are done discard the gmon file
|
||||
os.remove(gmon_path) # After we are done discard the gmon file
|
||||
|
||||
def execute_test(command, path, name, variant, config, profile=False):
|
||||
def write_pprof(name, variant, config):
|
||||
"""Copies the google-perftools profiler output to the corresponding test directory"""
|
||||
output_dir = config.testlogs + '/' + name
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
outputfile = output_dir + '/pprof_' + time.strftime("%d.%m.%Y_%H:%M:%S") + '_' + name + '_' + variant
|
||||
shutil.move("/tmp/moses.prof", outputfile)
|
||||
|
||||
|
||||
def execute_test(command, path, name, variant, config, profile=None):
|
||||
"""Executes a testcase given a whole command, path to the test file output,
|
||||
name of the test and variant tested. Config is the global configuration"""
|
||||
subprocess.Popen([command], stdout=None, stderr=subprocess.PIPE, shell=True).communicate()
|
||||
if not profile:
|
||||
if profile is None:
|
||||
write_log(path, name + '_' + variant, config)
|
||||
else: # Basically produce a gmon output
|
||||
elif profile == "gnu-profiler": # Basically produce a gmon output
|
||||
write_gprof(command, name, variant, config)
|
||||
elif profile == "google-profiler":
|
||||
write_pprof(name, variant, config)
|
||||
|
||||
|
||||
def execute_tests(testcase, cur_directory, config):
|
||||
@ -255,7 +280,7 @@ def execute_tests(testcase, cur_directory, config):
|
||||
subprocess.call([config.drop_caches], shell=True)
|
||||
|
||||
#Create the command for executing moses:
|
||||
whole_command = 'LD_PRELOAD ' + opt + time_command + testcase.command
|
||||
whole_command = 'LD_PRELOAD=' + opt + time_command + testcase.command
|
||||
variant = 'ldpre_' + opt
|
||||
|
||||
#test normal and cached
|
||||
@ -271,9 +296,9 @@ def execute_tests(testcase, cur_directory, config):
|
||||
|
||||
if 'vanilla' in testcase.permutations:
|
||||
whole_command = testcase.prof_command
|
||||
execute_test(whole_command, time_path, testcase.name, 'profile', config, True)
|
||||
execute_test(whole_command, time_path, testcase.name, 'profile', config, "gnu-profiler")
|
||||
if 'cached' in testcase.permutations:
|
||||
execute_test(whole_command, time_path, testcase.name, 'profile_cached', config, True)
|
||||
execute_test(whole_command, time_path, testcase.name, 'profile_cached', config, "gnu-profiler")
|
||||
|
||||
if 'ldpre' in testcase.permutations:
|
||||
for opt in testcase.ldopts:
|
||||
@ -282,13 +307,42 @@ def execute_tests(testcase, cur_directory, config):
|
||||
subprocess.call([config.drop_caches], shell=True)
|
||||
|
||||
#Create the command for executing moses:
|
||||
whole_command = 'LD_PRELOAD ' + opt + testcase.prof_command
|
||||
whole_command = 'LD_PRELOAD=' + opt + " " + testcase.prof_command
|
||||
variant = 'profile_ldpre_' + opt
|
||||
|
||||
#test normal and cached
|
||||
execute_test(whole_command, time_path, testcase.name, variant, config, True)
|
||||
execute_test(whole_command, time_path, testcase.name, variant, config, "gnu-profiler")
|
||||
if 'cached' in testcase.permutations:
|
||||
execute_test(whole_command, time_path, testcase.name, variant + '_cached', config, True)
|
||||
execute_test(whole_command, time_path, testcase.name, variant + '_cached', config, "gnu-profiler")
|
||||
|
||||
#Google-perftools profiler
|
||||
if 'google-profiler' in testcase.permutations:
|
||||
subprocess.call(['sync'], shell=True) # Drop caches first
|
||||
subprocess.call([config.drop_caches], shell=True)
|
||||
|
||||
#Create the command for executing moses
|
||||
whole_command = "CPUPROFILE=/tmp/moses.prof " + testcase.gprof_command
|
||||
|
||||
#test normal and cached
|
||||
execute_test(whole_command, time_path, testcase.name, 'vanilla', config, 'google-profiler')
|
||||
if 'cached' in testcase.permutations:
|
||||
execute_test(whole_command, time_path, testcase.name, 'vanilla_cached', config, 'google-profiler')
|
||||
|
||||
#Now perform LD_PRELOAD tests
|
||||
if 'ldpre' in testcase.permutations:
|
||||
for opt in testcase.ldopts:
|
||||
#Clear caches
|
||||
subprocess.call(['sync'], shell=True)
|
||||
subprocess.call([config.drop_caches], shell=True)
|
||||
|
||||
#Create the command for executing moses:
|
||||
whole_command = 'LD_PRELOAD=' + opt + " " + whole_command
|
||||
variant = 'ldpre_' + opt
|
||||
|
||||
#test normal and cached
|
||||
execute_test(whole_command, time_path, testcase.name, variant, config, 'google-profiler')
|
||||
if 'cached' in testcase.permutations:
|
||||
execute_test(whole_command, time_path, testcase.name, variant + '_cached', config, 'google-profiler')
|
||||
|
||||
|
||||
# Go through all the test directories and executes tests
|
||||
@ -319,7 +373,7 @@ if __name__ == '__main__':
|
||||
|
||||
for logfile in os.listdir(CONFIG.testlogs):
|
||||
logfile_name = CONFIG.testlogs + '/' + logfile
|
||||
if not check_for_basever(logfile_name, CONFIG.basebranch):
|
||||
if os.path.isfile(logfile_name) and not check_for_basever(logfile_name, CONFIG.basebranch):
|
||||
logfile = logfile.replace('_vanilla', '')
|
||||
logfile = logfile.replace('_cached', '')
|
||||
logfile = logfile.replace('_ldpre', '')
|
||||
@ -330,7 +384,7 @@ if __name__ == '__main__':
|
||||
#Create a new configuration for base version tests:
|
||||
BASECONFIG = Configuration(CONFIG.repo, CONFIG.drop_caches,\
|
||||
CONFIG.tests, CONFIG.testlogs, CONFIG.basebranch,\
|
||||
CONFIG.baserev, CONFIG.repo_prof)
|
||||
CONFIG.baserev, CONFIG.repo_prof, CONFIG.repo_gprof)
|
||||
BASECONFIG.additional_args(None, CONFIG.baserev, CONFIG.basebranch)
|
||||
#Set up the repository and get its revision:
|
||||
REVISION = repoinit(BASECONFIG)
|
||||
@ -340,20 +394,28 @@ if __name__ == '__main__':
|
||||
subprocess.call(['./previous.sh'], shell=True)
|
||||
#If profiler configuration exists also init it
|
||||
if BASECONFIG.repo_prof is not None:
|
||||
repoinit(BASECONFIG, True)
|
||||
repoinit(BASECONFIG, "gnu-profiler")
|
||||
os.chdir(BASECONFIG.repo_prof)
|
||||
subprocess.call(['./previous.sh'], shell=True)
|
||||
|
||||
if BASECONFIG.repo_gprof is not None:
|
||||
repoinit(BASECONFIG, "google-profiler")
|
||||
os.chdir(BASECONFIG.repo_gprof)
|
||||
subprocess.call(['./previous.sh'], shell=True)
|
||||
|
||||
#Perform tests
|
||||
for directory in FIRSTTIME:
|
||||
cur_testcase = parse_configfile(BASECONFIG.tests + '/' + directory +\
|
||||
'/config', directory, BASECONFIG.repo)
|
||||
'/config', directory, BASECONFIG.repo, BASECONFIG.repo_prof, BASECONFIG.repo_gprof)
|
||||
execute_tests(cur_testcase, directory, BASECONFIG)
|
||||
|
||||
#Reset back the repository to the normal configuration
|
||||
repoinit(CONFIG)
|
||||
if BASECONFIG.repo_prof is not None:
|
||||
repoinit(CONFIG, True)
|
||||
repoinit(CONFIG, "gnu-profiler")
|
||||
|
||||
if BASECONFIG.repo_gprof is not None:
|
||||
repoinit(CONFIG, "google-profiler")
|
||||
|
||||
#Builds moses
|
||||
os.chdir(CONFIG.repo)
|
||||
@ -362,12 +424,16 @@ if __name__ == '__main__':
|
||||
os.chdir(CONFIG.repo_prof)
|
||||
subprocess.call(['./previous.sh'], shell=True)
|
||||
|
||||
if CONFIG.repo_gprof is not None:
|
||||
os.chdir(CONFIG.repo_gprof)
|
||||
subprocess.call(['./previous.sh'], shell=True)
|
||||
|
||||
if CONFIG.singletest:
|
||||
TESTCASE = parse_configfile(CONFIG.tests + '/' +\
|
||||
CONFIG.singletest + '/config', CONFIG.singletest, CONFIG.repo)
|
||||
CONFIG.singletest + '/config', CONFIG.singletest, CONFIG.repo, CONFIG.repo_prof, CONFIG.repo_gprof)
|
||||
execute_tests(TESTCASE, CONFIG.singletest, CONFIG)
|
||||
else:
|
||||
for directory in ALL_DIR:
|
||||
cur_testcase = parse_configfile(CONFIG.tests + '/' + directory +\
|
||||
'/config', directory, CONFIG.repo)
|
||||
'/config', directory, CONFIG.repo, CONFIG.repo_prof, CONFIG.repo_gprof)
|
||||
execute_tests(cur_testcase, directory, CONFIG)
|
||||
|
@ -11,12 +11,12 @@
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
@ -72,13 +72,13 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
|
@ -1,5 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="OnDiskPt" InternalType="Library">
|
||||
<Plugins>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="src"/>
|
||||
@ -27,6 +44,8 @@
|
||||
<File Name="../../../OnDiskPt/Word.cpp"/>
|
||||
<File Name="../../../OnDiskPt/Word.h"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Static Library">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -40,9 +59,9 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
<Preprocessor Value="MAX_NUM_FACTORS=4"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes"/>
|
||||
@ -72,7 +91,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -110,7 +129,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -118,6 +137,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -6,10 +6,11 @@
|
||||
<Project Name="lm" Path="lm/lm.project" Active="No"/>
|
||||
<Project Name="OnDiskPt" Path="OnDiskPt/OnDiskPt.project" Active="No"/>
|
||||
<Project Name="search" Path="search/search.project" Active="No"/>
|
||||
<Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="Yes"/>
|
||||
<Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="No"/>
|
||||
<Project Name="score" Path="score/score.project" Active="No"/>
|
||||
<Project Name="consolidate" Path="consolidate/consolidate.project" Active="No"/>
|
||||
<Project Name="moses" Path="moses/moses.project" Active="No"/>
|
||||
<Project Name="pruneGeneration" Path="pruneGeneration/pruneGeneration.project" Active="Yes"/>
|
||||
<BuildMatrix>
|
||||
<WorkspaceConfiguration Name="Debug" Selected="yes">
|
||||
<Project Name="manual-label" ConfigName="Debug"/>
|
||||
@ -23,6 +24,7 @@
|
||||
<Project Name="score" ConfigName="Debug"/>
|
||||
<Project Name="consolidate" ConfigName="Debug"/>
|
||||
<Project Name="moses" ConfigName="Debug"/>
|
||||
<Project Name="pruneGeneration" ConfigName="Debug"/>
|
||||
</WorkspaceConfiguration>
|
||||
<WorkspaceConfiguration Name="Release" Selected="yes">
|
||||
<Project Name="manual-label" ConfigName="Release"/>
|
||||
@ -36,6 +38,7 @@
|
||||
<Project Name="score" ConfigName="Release"/>
|
||||
<Project Name="consolidate" ConfigName="Release"/>
|
||||
<Project Name="moses" ConfigName="Release"/>
|
||||
<Project Name="pruneGeneration" ConfigName="Release"/>
|
||||
</WorkspaceConfiguration>
|
||||
</BuildMatrix>
|
||||
</CodeLite_Workspace>
|
||||
|
@ -102,9 +102,14 @@
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SyntaxTree.cpp</name>
|
||||
<name>SyntaxNodeCollection.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SyntaxNodeCollection.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SyntaxTree.h</name>
|
||||
|
@ -1,5 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="extract-mixed-syntax" InternalType="Console">
|
||||
<Plugins>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="src"/>
|
||||
@ -43,6 +60,10 @@
|
||||
<File Name="../../../phrase-extract/OutputFileStream.cpp"/>
|
||||
<File Name="../../../phrase-extract/OutputFileStream.h"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug">
|
||||
<Project Name="util"/>
|
||||
</Dependencies>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Executable">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -56,13 +77,14 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../../"/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
|
||||
<LibraryPath Value="../../../boost/lib64"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
|
||||
<LibraryPath Value="Debug"/>
|
||||
<Library Value="util"/>
|
||||
<Library Value="boost_iostreams"/>
|
||||
<Library Value="boost_program_options"/>
|
||||
@ -94,7 +116,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -133,7 +155,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -141,8 +163,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug">
|
||||
<Project Name="util"/>
|
||||
</Dependencies>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -26,6 +26,7 @@
|
||||
<option id="gnu.cpp.compiler.option.include.paths.231971122" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/include""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.61884195" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
|
@ -81,9 +81,14 @@
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SyntaxTree.cpp</name>
|
||||
<name>SyntaxNodeCollection.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SyntaxNodeCollection.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SyntaxTree.h</name>
|
||||
|
@ -5,16 +5,16 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2119725657." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1708444053" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.645190133" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
|
||||
@ -25,6 +25,7 @@
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.535775760" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.874182289" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1355287045" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
@ -61,16 +62,16 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1230189043" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1230189043" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1230189043" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1230189043." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.280378247" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1881910636" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
|
||||
|
@ -1,5 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="extract" InternalType="Console">
|
||||
<Plugins>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="src">
|
||||
@ -13,6 +30,8 @@
|
||||
<File Name="../../../phrase-extract/tables-core.cpp"/>
|
||||
<File Name="../../../phrase-extract/tables-core.h"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Executable">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -26,11 +45,11 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../../"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
|
||||
<LibraryPath Value="../../../boost/lib64"/>
|
||||
<Library Value="boost_iostreams"/>
|
||||
<Library Value="z"/>
|
||||
</Linker>
|
||||
@ -60,7 +79,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -99,7 +118,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -107,6 +126,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -83,6 +83,16 @@
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
<linkedResources>
|
||||
<link>
|
||||
<name>InternalTree.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mert/InternalTree.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>InternalTree.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mert/InternalTree.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>bin</name>
|
||||
<type>2</type>
|
||||
|
@ -546,26 +546,11 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/interpolate.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>builder/joint_order.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/joint_order.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>builder/lmplz_main.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/lmplz_main.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>builder/ngram.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/ngram.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>builder/ngram_stream.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/ngram_stream.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>builder/pipeline.cc</name>
|
||||
<type>1</type>
|
||||
@ -576,21 +561,6 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/pipeline.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>builder/print.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/print.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>builder/print.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/print.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>builder/sort.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/sort.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>filter/Jamfile</name>
|
||||
<type>1</type>
|
||||
|
@ -1,5 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="lm" InternalType="Library">
|
||||
<Plugins>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="src"/>
|
||||
@ -27,6 +44,8 @@
|
||||
<File Name="../../../lm/virtual_interface.cc"/>
|
||||
<File Name="../../../lm/vocab.cc"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Static Library">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -40,9 +59,9 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes"/>
|
||||
@ -72,7 +91,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -110,7 +129,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -118,6 +137,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -11,15 +11,15 @@
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.debug.1721952013" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
|
||||
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.debug.1721952013" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.lib.debug.1721952013." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.lib.debug.1932340583" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.lib.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.lib.debug.296711714" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.lib.debug"/>
|
||||
@ -32,6 +32,9 @@
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
</option>
|
||||
<option id="gnu.cpp.compiler.option.preprocessor.def.2072043013" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
|
||||
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1183866856" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.1365367786" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug">
|
||||
@ -46,9 +49,6 @@
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.lib.debug.1721952013.195400614" name="MeteorScorer.cpp" rcbsApplicability="disable" resourcePath="MeteorScorer.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537.307282660">
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537.307282660" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537"/>
|
||||
</fileInfo>
|
||||
<sourceEntries>
|
||||
<entry excluding="mert/PreProcessFilter.h|mert/PreProcessFilter.cpp|mert/UtilTest.cpp|mert/TimerTest.cpp|mert/SingletonTest.cpp|mert/PointTest.cpp|mert/OptimizerFactoryTest.cpp|mert/NgramTest.cpp|mert/FeatureDataTest.cpp|mert/DataTest.cpp|mert/ReferenceTest.cpp|mert/VocabularyTest.cpp|mert/extractor.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
@ -66,15 +66,15 @@
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.release.3250316" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
|
||||
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.release.3250316" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.lib.release.3250316." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.lib.release.1996805666" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.lib.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.lib.release.106685808" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.lib.release"/>
|
||||
|
@ -46,20 +46,20 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
<Preprocessor Value="MAX_NUM_FACTORS=4"/>
|
||||
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
|
||||
<Preprocessor Value="TRACE_ENABLE=1"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/lm/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/moses/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/OnDiskPt/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/search/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
|
||||
<LibraryPath Value="../../../boost/lib64"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/lm/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/OnDiskPt/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/search/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
|
||||
<Library Value="util"/>
|
||||
<Library Value="moses"/>
|
||||
<Library Value="search"/>
|
||||
|
@ -11,11 +11,11 @@
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
@ -79,12 +79,12 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1911984684" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
|
@ -60,6 +60,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/AlignmentInfoTest.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>AllOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>AllOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>BaseManager.cpp</name>
|
||||
<type>1</type>
|
||||
@ -70,6 +80,11 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/BaseManager.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>BeamSearchOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BeamSearchOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>BitmapContainer.cpp</name>
|
||||
<type>1</type>
|
||||
@ -80,6 +95,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/BitmapContainer.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>BookkeepingOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>BookkeepingOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>CMakeLists.txt</name>
|
||||
<type>1</type>
|
||||
@ -230,6 +255,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>CubePruningOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>CubePruningOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>DecodeGraph.cpp</name>
|
||||
<type>1</type>
|
||||
@ -460,6 +495,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/InputFileStream.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>InputOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>InputOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>InputPath.cpp</name>
|
||||
<type>1</type>
|
||||
@ -490,6 +535,16 @@
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LMBR_Options.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LMBR_Options.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LVoc.cpp</name>
|
||||
<type>1</type>
|
||||
@ -510,6 +565,21 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/LatticeMBR.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LookupOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LookupOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>MBR_Options.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>MBR_Options.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Manager.cpp</name>
|
||||
<type>1</type>
|
||||
@ -535,6 +605,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/MosesTest.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>NBestOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>NBestOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>NonTerminal.cpp</name>
|
||||
<type>1</type>
|
||||
@ -550,6 +630,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/ObjectPool.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>OptionsBaseClass.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>OptionsBaseClass.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>OutputCollector.h</name>
|
||||
<type>1</type>
|
||||
@ -635,6 +725,26 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/ReorderingConstraint.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ReorderingOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ReorderingOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ReportingOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ReportingOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>RuleCube.cpp</name>
|
||||
<type>1</type>
|
||||
@ -711,14 +821,14 @@
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormal.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SearchNormalBatch.cpp</name>
|
||||
<name>SearchOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormalBatch.cpp</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SearchNormalBatch.h</name>
|
||||
<name>SearchOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormalBatch.h</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Sentence.cpp</name>
|
||||
@ -740,6 +850,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/SentenceStats.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ServerOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ServerOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SquareMatrix.cpp</name>
|
||||
<type>1</type>
|
||||
@ -1065,6 +1185,11 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/mbr.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>rule.proto</name>
|
||||
<type>1</type>
|
||||
@ -1360,16 +1485,6 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SetSourcePhrase.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/SkeletonChangeInput.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/SkeletonChangeInput.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/SkeletonStatefulFF.cpp</name>
|
||||
<type>1</type>
|
||||
@ -2240,6 +2355,146 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/PhraseOrientation.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/AllOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/AllOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/BeamSearchOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BeamSearchOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/BookkeepingOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/BookkeepingOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/ContextParameters.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/ContextParameters.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/CubePruningOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/CubePruningOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/InputOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/InputOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/LMBR_Options.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/LMBR_Options.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/LookupOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LookupOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/MBR_Options.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/MBR_Options.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/NBestOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/NBestOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/OptionsBaseClass.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/OptionsBaseClass.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/ReorderingOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/ReorderingOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/ReportingOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/ReportingOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/SearchOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/SearchOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/ServerOptions.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>parameters/ServerOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/LexicalReordering/LexicalReordering.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -775,6 +775,8 @@
|
||||
<File Name="../../../moses/WordsRange.h"/>
|
||||
<File Name="../../../moses/XmlOption.cpp"/>
|
||||
<File Name="../../../moses/XmlOption.h"/>
|
||||
<File Name="../../../moses/OutputFileStream.cpp"/>
|
||||
<File Name="../../../moses/OutputFileStream.h"/>
|
||||
</VirtualDirectory>
|
||||
<VirtualDirectory Name="PP">
|
||||
<File Name="../../../moses/PP/CountsPhraseProperty.cpp"/>
|
||||
@ -793,8 +795,6 @@
|
||||
<File Name="../../../moses/PP/SpanLengthPhraseProperty.h"/>
|
||||
<File Name="../../../moses/PP/TreeStructurePhraseProperty.h"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<VirtualDirectory Name="parameters">
|
||||
<File Name="../../../moses/parameters/ContextParameters.cpp"/>
|
||||
<File Name="../../../moses/parameters/ContextParameters.h"/>
|
||||
@ -814,7 +814,7 @@
|
||||
<ResourceCompiler Options=""/>
|
||||
</GlobalSettings>
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<Compiler Options="-g -std=c++0x" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="../../../"/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
@ -897,4 +897,6 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
125
contrib/other-builds/pruneGeneration/pruneGeneration.project
Normal file
125
contrib/other-builds/pruneGeneration/pruneGeneration.project
Normal file
@ -0,0 +1,125 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="pruneGeneration" InternalType="Console">
|
||||
<Plugins>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="src">
|
||||
<File Name="../../../misc/pruneGeneration.cpp"/>
|
||||
<File Name="../../../misc/pruneGeneration.h"/>
|
||||
</VirtualDirectory>
|
||||
<Settings Type="Executable">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
<IncludePath Value="."/>
|
||||
</Compiler>
|
||||
<Linker Options="">
|
||||
<LibraryPath Value="."/>
|
||||
</Linker>
|
||||
<ResourceCompiler Options=""/>
|
||||
</GlobalSettings>
|
||||
<Configuration Name="Debug" CompilerType="GCC ( XCode )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="../../../boost/lib64"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
|
||||
<Library Value="boost_filesystem"/>
|
||||
<Library Value="boost_system"/>
|
||||
<Library Value="boost_iostreams"/>
|
||||
<Library Value="moses"/>
|
||||
<Library Value="z"/>
|
||||
<Library Value="bz2"/>
|
||||
</Linker>
|
||||
<ResourceCompiler Options="" Required="no"/>
|
||||
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
|
||||
<Environment EnvVarSetName="<Use Defaults>" DbgSetName="<Use Defaults>">
|
||||
<![CDATA[]]>
|
||||
</Environment>
|
||||
<Debugger IsRemote="no" RemoteHostName="" RemoteHostPort="" DebuggerPath="" IsExtended="yes">
|
||||
<DebuggerSearchPaths/>
|
||||
<PostConnectCommands/>
|
||||
<StartupCommands/>
|
||||
</Debugger>
|
||||
<PreBuild/>
|
||||
<PostBuild/>
|
||||
<CustomBuild Enabled="no">
|
||||
<RebuildCommand/>
|
||||
<CleanCommand/>
|
||||
<BuildCommand/>
|
||||
<PreprocessFileCommand/>
|
||||
<SingleFileCommand/>
|
||||
<MakefileGenerationCommand/>
|
||||
<ThirdPartyToolName>None</ThirdPartyToolName>
|
||||
<WorkingDirectory/>
|
||||
</CustomBuild>
|
||||
<AdditionalRules>
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
<SearchPaths/>
|
||||
</Completion>
|
||||
</Configuration>
|
||||
<Configuration Name="Release" CompilerType="GCC ( XCode )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-O2;-Wall" C_Options="-O2;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<Preprocessor Value="NDEBUG"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes"/>
|
||||
<ResourceCompiler Options="" Required="no"/>
|
||||
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Release" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
|
||||
<Environment EnvVarSetName="<Use Defaults>" DbgSetName="<Use Defaults>">
|
||||
<![CDATA[]]>
|
||||
</Environment>
|
||||
<Debugger IsRemote="no" RemoteHostName="" RemoteHostPort="" DebuggerPath="" IsExtended="no">
|
||||
<DebuggerSearchPaths/>
|
||||
<PostConnectCommands/>
|
||||
<StartupCommands/>
|
||||
</Debugger>
|
||||
<PreBuild/>
|
||||
<PostBuild/>
|
||||
<CustomBuild Enabled="no">
|
||||
<RebuildCommand/>
|
||||
<CleanCommand/>
|
||||
<BuildCommand/>
|
||||
<PreprocessFileCommand/>
|
||||
<SingleFileCommand/>
|
||||
<MakefileGenerationCommand/>
|
||||
<ThirdPartyToolName>None</ThirdPartyToolName>
|
||||
<WorkingDirectory/>
|
||||
</CustomBuild>
|
||||
<AdditionalRules>
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
<SearchPaths/>
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
</CodeLite_Project>
|
@ -59,7 +59,6 @@
|
||||
<listOptionValue builtIn="false" value="boost_program_options"/>
|
||||
<listOptionValue builtIn="false" value="pthread"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="bz2"/>
|
||||
<listOptionValue builtIn="false" value="dl"/>
|
||||
<listOptionValue builtIn="false" value="rt"/>
|
||||
</option>
|
||||
|
@ -19,6 +19,10 @@
|
||||
<File Name="../../../phrase-extract/tables-core.cpp"/>
|
||||
<File Name="../../../phrase-extract/tables-core.h"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug">
|
||||
<Project Name="util"/>
|
||||
</Dependencies>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Executable">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -32,17 +36,17 @@
|
||||
<Configuration Name="Debug" CompilerType="clang( based on LLVM 3.5svn )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/lm/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/moses/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/OnDiskPt/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/search/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
|
||||
<LibraryPath Value="../../../boost/lib64"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/lm/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/OnDiskPt/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/search/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
|
||||
<Library Value="moses"/>
|
||||
<Library Value="search"/>
|
||||
<Library Value="OnDiskPt"/>
|
||||
@ -86,7 +90,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -125,7 +129,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -133,8 +137,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug">
|
||||
<Project Name="util"/>
|
||||
</Dependencies>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -10,6 +10,8 @@
|
||||
<File Name="../../../search/rule.cc"/>
|
||||
<File Name="../../../search/vertex.cc"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Static Library">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -23,9 +25,9 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes"/>
|
||||
@ -55,7 +57,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -93,7 +95,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -101,6 +103,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -159,10 +159,10 @@
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/server"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/server"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
|
@ -62,6 +62,8 @@
|
||||
<File Name="../../../util/stream/sort_test.cc" ExcludeProjConfig="Debug"/>
|
||||
<File Name="../../../util/stream/stream_test.cc" ExcludeProjConfig="Debug"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Static Library">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -75,8 +77,8 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes"/>
|
||||
<ResourceCompiler Options="" Required="no"/>
|
||||
@ -105,7 +107,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -143,7 +145,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -151,6 +153,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -13,7 +13,7 @@ with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ;
|
||||
if $(with-xmlrpc-c) {
|
||||
echo While building mosesserver ... ;
|
||||
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
|
||||
echo "!!! You are linking the XMLRPC-C library; Do NOT use v.1.25.29 !!!" ;
|
||||
echo "!!! You are linking the XMLRPC-C library; Must be v.1.32 (September 2012) or higher !!!" ;
|
||||
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
|
||||
|
||||
build-moses-server = true ;
|
||||
|
@ -38,13 +38,12 @@ int main(int argc, char** argv)
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/ThreadPool.h"
|
||||
#include "moses/TranslationTask.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
|
||||
#include "moses/FF/StatefulFeatureFunction.h"
|
||||
#if PT_UG
|
||||
#include "moses/TranslationModel/UG/mmsapt.h"
|
||||
#endif
|
||||
#include "moses/TreeInput.h"
|
||||
#include "moses/LM/ORLM.h"
|
||||
#include "moses/IOWrapper.h"
|
||||
|
||||
#include <boost/foreach.hpp>
|
||||
@ -58,8 +57,8 @@ int main(int argc, char** argv)
|
||||
#include <xmlrpc-c/server_abyss.hpp>
|
||||
|
||||
// using namespace Moses;
|
||||
using Moses::TreeInput;
|
||||
using namespace std;
|
||||
using namespace Moses;
|
||||
|
||||
typedef std::map<std::string, xmlrpc_c::value> params_t;
|
||||
|
||||
@ -82,70 +81,16 @@ public:
|
||||
Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
|
||||
pdsa->add(source_,target_,alignment_);
|
||||
#else
|
||||
const PhraseDictionary* pdf = PhraseDictionary::GetColl()[0];
|
||||
PhraseDictionaryDynSuffixArray*
|
||||
pdsa = (PhraseDictionaryDynSuffixArray*) pdf;
|
||||
cerr << "Inserting into address " << pdsa << endl;
|
||||
pdsa->insertSnt(source_, target_, alignment_);
|
||||
std::string msg;
|
||||
msg = "Server was compiled without a phrase table implementation that ";
|
||||
msg += "supports updates.";
|
||||
throw xmlrpc_c::fault(msg.c_str(), xmlrpc_c::fault::CODE_PARSE);
|
||||
#endif
|
||||
if(add2ORLM_) {
|
||||
//updateORLM();
|
||||
}
|
||||
XVERBOSE(1,"Done inserting\n");
|
||||
//PhraseDictionary* pdsa = (PhraseDictionary*) pdf->GetDictionary(*dummy);
|
||||
map<string, xmlrpc_c::value> retData;
|
||||
//*retvalP = xmlrpc_c::value_struct(retData);
|
||||
#ifndef PT_UG
|
||||
pdf = 0;
|
||||
#endif
|
||||
pdsa = 0;
|
||||
*retvalP = xmlrpc_c::value_string("Phrase table updated");
|
||||
}
|
||||
string source_, target_, alignment_;
|
||||
bool bounded_, add2ORLM_;
|
||||
/*
|
||||
void updateORLM() {
|
||||
// TODO(level101): this belongs in the language model, not in moseserver.cpp
|
||||
vector<string> vl;
|
||||
map<vector<string>, int> ngSet;
|
||||
LMList lms = StaticData::Instance().GetLMList(); // get LM
|
||||
LMList::const_iterator lmIter = lms.begin();
|
||||
LanguageModel *lm = *lmIter;
|
||||
LanguageModelORLM* orlm = static_cast<LanguageModelORLM*>(lm);
|
||||
if(orlm == 0) {
|
||||
cerr << "WARNING: Unable to add target sentence to ORLM\n";
|
||||
return;
|
||||
}
|
||||
// break out new ngrams from sentence
|
||||
const int ngOrder(orlm->GetNGramOrder());
|
||||
const std::string sBOS = orlm->GetSentenceStart()->GetString().as_string();
|
||||
const std::string sEOS = orlm->GetSentenceEnd()->GetString().as_string();
|
||||
Utils::splitToStr(target_, vl, " ");
|
||||
// insert BOS and EOS
|
||||
vl.insert(vl.begin(), sBOS);
|
||||
vl.insert(vl.end(), sEOS);
|
||||
for(int j=0; j < vl.size(); ++j) {
|
||||
int i = (j<ngOrder) ? 0 : j-ngOrder+1;
|
||||
for(int t=j; t >= i; --t) {
|
||||
vector<string> ngVec;
|
||||
for(int s=t; s<=j; ++s) {
|
||||
ngVec.push_back(vl[s]);
|
||||
//cerr << vl[s] << " ";
|
||||
}
|
||||
ngSet[ngVec]++;
|
||||
//cerr << endl;
|
||||
}
|
||||
}
|
||||
// insert into LM in order from 1grams up (for LM well-formedness)
|
||||
cerr << "Inserting " << ngSet.size() << " ngrams into ORLM...\n";
|
||||
for(int i=1; i <= ngOrder; ++i) {
|
||||
iterate(ngSet, it) {
|
||||
if(it->first.size() == i)
|
||||
orlm->UpdateORLM(it->first, it->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
bool bounded_;
|
||||
|
||||
void breakOutParams(const params_t& params) {
|
||||
params_t::const_iterator si = params.find("source");
|
||||
@ -165,8 +110,6 @@ public:
|
||||
XVERBOSE(1,"alignment = " << alignment_ << endl);
|
||||
si = params.find("bounded");
|
||||
bounded_ = (si != params.end());
|
||||
si = params.find("updateORLM");
|
||||
add2ORLM_ = (si != params.end());
|
||||
}
|
||||
};
|
||||
|
||||
@ -678,6 +621,14 @@ int main(int argc, char** argv)
|
||||
bool isSerial = false;
|
||||
size_t numThreads = 10; //for translation tasks
|
||||
|
||||
//Abyss server configuration: initial values reflect hard-coded default
|
||||
//-> http://xmlrpc-c.sourceforge.net/doc/libxmlrpc_server_abyss.html#max_conn
|
||||
size_t maxConn = 15;
|
||||
size_t maxConnBacklog = 15;
|
||||
size_t keepaliveTimeout = 15;
|
||||
size_t keepaliveMaxConn = 30;
|
||||
size_t timeout = 15;
|
||||
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
if (!strcmp(argv[i],"--server-port")) {
|
||||
++i;
|
||||
@ -695,6 +646,46 @@ int main(int argc, char** argv)
|
||||
} else {
|
||||
logfile = argv[i];
|
||||
}
|
||||
} else if (!strcmp(argv[i],"--server-maxconn")) {
|
||||
++i;
|
||||
if (i >= argc) {
|
||||
cerr << "Error: Missing argument to --server-maxconn" << endl;
|
||||
exit(1);
|
||||
} else {
|
||||
maxConn = atoi(argv[i]);
|
||||
}
|
||||
} else if (!strcmp(argv[i],"--server-maxconn-backlog")) {
|
||||
++i;
|
||||
if (i >= argc) {
|
||||
cerr << "Error: Missing argument to --server-maxconn-backlog" << endl;
|
||||
exit(1);
|
||||
} else {
|
||||
maxConnBacklog = atoi(argv[i]);
|
||||
}
|
||||
} else if (!strcmp(argv[i],"--server-keepalive-timeout")) {
|
||||
++i;
|
||||
if (i >= argc) {
|
||||
cerr << "Error: Missing argument to --server-keepalive-timeout" << endl;
|
||||
exit(1);
|
||||
} else {
|
||||
keepaliveTimeout = atoi(argv[i]);
|
||||
}
|
||||
} else if (!strcmp(argv[i],"--server-keepalive-maxconn")) {
|
||||
++i;
|
||||
if (i >= argc) {
|
||||
cerr << "Error: Missing argument to --server-keepalive-maxconn" << endl;
|
||||
exit(1);
|
||||
} else {
|
||||
keepaliveMaxConn = atoi(argv[i]);
|
||||
}
|
||||
} else if (!strcmp(argv[i],"--server-timeout")) {
|
||||
++i;
|
||||
if (i >= argc) {
|
||||
cerr << "Error: Missing argument to --server-timeout" << endl;
|
||||
exit(1);
|
||||
} else {
|
||||
timeout = atoi(argv[i]);
|
||||
}
|
||||
} else if (!strcmp(argv[i], "--threads")) {
|
||||
++i;
|
||||
if (i>=argc) {
|
||||
@ -740,20 +731,27 @@ int main(int argc, char** argv)
|
||||
myRegistry.addMethod("updater", updater);
|
||||
myRegistry.addMethod("optimize", optimizer);
|
||||
|
||||
/* CODE FOR old xmlrpc-c v. 1.32 or lower
|
||||
xmlrpc_c::serverAbyss myAbyssServer(
|
||||
myRegistry,
|
||||
port, // TCP port on which to listen
|
||||
logfile
|
||||
);
|
||||
/* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04
|
||||
*/
|
||||
|
||||
/* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04 */
|
||||
xmlrpc_c::serverAbyss myAbyssServer(
|
||||
xmlrpc_c::serverAbyss::constrOpt()
|
||||
.registryPtr(&myRegistry)
|
||||
.registryP(&myRegistry)
|
||||
.portNumber(port) // TCP port on which to listen
|
||||
.logFileName(logfile)
|
||||
.allowOrigin("*")
|
||||
.maxConn((unsigned int)maxConn)
|
||||
.maxConnBacklog((unsigned int)maxConnBacklog)
|
||||
.keepaliveTimeout((unsigned int)keepaliveTimeout)
|
||||
.keepaliveMaxConn((unsigned int)keepaliveMaxConn)
|
||||
.timeout((unsigned int)timeout)
|
||||
);
|
||||
*/
|
||||
|
||||
XVERBOSE(1,"Listening on port " << port << endl);
|
||||
if (isSerial) {
|
||||
|
@ -2,7 +2,7 @@
|
||||
# xmlrpc-c library (including the abyss server) that is needed for
|
||||
# moses server functionality
|
||||
|
||||
if [ option.get "no-xmlrpc-c" ]
|
||||
if [ option.get "no-xmlrpc-c" : : "yes" ]
|
||||
{
|
||||
rule xmlrpc ( what ? ) { } # never return anything
|
||||
}
|
||||
|
@ -1,46 +1,139 @@
|
||||
cmake_minimum_required(VERSION 2.8.8)
|
||||
#
|
||||
# The KenLM cmake files make use of add_library(... OBJECTS ...)
|
||||
#
|
||||
# This syntax allows grouping of source files when compiling
|
||||
# (effectively creating "fake" libraries based on source subdirs).
|
||||
#
|
||||
# This syntax was only added in cmake version 2.8.8
|
||||
#
|
||||
# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
|
||||
|
||||
|
||||
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
|
||||
|
||||
|
||||
set(KENLM_MAX_ORDER 6)
|
||||
|
||||
add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})
|
||||
|
||||
|
||||
# Explicitly list the source files for this subdirectory
|
||||
#
|
||||
# If you add any source files to this subdirectory
|
||||
# that should be included in the kenlm library,
|
||||
# (this excludes any unit test files)
|
||||
# you should add them to the following list:
|
||||
set(KENLM_SOURCE
|
||||
bhiksha.cc
|
||||
binary_format.cc
|
||||
config.cc
|
||||
lm_exception.cc
|
||||
model.cc
|
||||
quantize.cc
|
||||
read_arpa.cc
|
||||
search_hashed.cc
|
||||
search_trie.cc
|
||||
sizes.cc
|
||||
trie.cc
|
||||
trie_sort.cc
|
||||
value_build.cc
|
||||
virtual_interface.cc
|
||||
vocab.cc
|
||||
)
|
||||
|
||||
|
||||
# Group these objects together for later use.
|
||||
#
|
||||
# Given add_library(foo OBJECT ${my_foo_sources}),
|
||||
# refer to these objects as $<TARGET_OBJECTS:foo>
|
||||
#
|
||||
add_library(kenlm OBJECT ${KENLM_SOURCE})
|
||||
|
||||
# This directory has children that need to be processed
|
||||
add_subdirectory(builder)
|
||||
add_subdirectory(common)
|
||||
add_subdirectory(filter)
|
||||
|
||||
|
||||
|
||||
# Explicitly list the executable files to be compiled
|
||||
set(EXE_LIST
|
||||
query
|
||||
fragment
|
||||
build_binary
|
||||
)
|
||||
|
||||
# Iterate through the executable list
|
||||
foreach(exe ${EXE_LIST})
|
||||
|
||||
# Compile the executable, linking against the requisite dependent object files
|
||||
add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
|
||||
|
||||
# Link the executable against boost
|
||||
target_link_libraries(${exe} ${Boost_LIBRARIES})
|
||||
|
||||
# Group executables together
|
||||
set_target_properties(${exe} PROPERTIES FOLDER executables)
|
||||
|
||||
# End for loop
|
||||
endforeach(exe)
|
||||
|
||||
|
||||
# Install the executable files
|
||||
install(TARGETS ${EXE_LIST} DESTINATION bin)
|
||||
|
||||
|
||||
|
||||
if(BUILD_TESTING)
|
||||
|
||||
# Explicitly list the Boost test files to be compiled
|
||||
set(KENLM_BOOST_TESTS_LIST
|
||||
left_test
|
||||
model_test
|
||||
partial_test
|
||||
)
|
||||
|
||||
# Iterate through the Boost tests list
|
||||
foreach(test ${KENLM_BOOST_TESTS_LIST})
|
||||
|
||||
# Compile the executable, linking against the requisite dependent object files
|
||||
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
|
||||
|
||||
# Require the following compile flag
|
||||
set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
|
||||
|
||||
# Link the executable against boost
|
||||
target_link_libraries(${test} ${Boost_LIBRARIES})
|
||||
|
||||
# model_test requires an extra command line parameter
|
||||
if ("${test}" STREQUAL "model_test")
|
||||
set(test_params
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa
|
||||
)
|
||||
else()
|
||||
set(test_params
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
|
||||
)
|
||||
endif()
|
||||
|
||||
# Specify command arguments for how to run each unit test
|
||||
#
|
||||
# Assuming that foo was defined via add_executable(foo ...),
|
||||
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
|
||||
#
|
||||
add_test(NAME ${test}_test
|
||||
COMMAND $<TARGET_FILE:${test}> ${test_params})
|
||||
|
||||
# Group unit tests together
|
||||
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
|
||||
|
||||
# End for loop
|
||||
endforeach(test)
|
||||
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/bhiksha.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/bhiksha.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/binary_format.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/binary_format.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/blank.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/config.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/config.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/enumerate_vocab.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/facade.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/left.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/lm_exception.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/lm_exception.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/max_order.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model_type.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/ngram_query.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/partial.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/quantize.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/quantize.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/read_arpa.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/read_arpa.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/return.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_hashed.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_hashed.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_trie.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_trie.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/sizes.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/sizes.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/state.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie_sort.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie_sort.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value_build.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value_build.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/virtual_interface.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/virtual_interface.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/vocab.cc")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/vocab.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/weights.hh")
|
||||
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/word_index.hh")
|
||||
|
||||
add_library(kenlm OBJECT ${SOURCE_KENLM})
|
87
lm/builder/CMakeLists.txt
Normal file
87
lm/builder/CMakeLists.txt
Normal file
@ -0,0 +1,87 @@
|
||||
cmake_minimum_required(VERSION 2.8.8)
|
||||
#
|
||||
# The KenLM cmake files make use of add_library(... OBJECTS ...)
|
||||
#
|
||||
# This syntax allows grouping of source files when compiling
|
||||
# (effectively creating "fake" libraries based on source subdirs).
|
||||
#
|
||||
# This syntax was only added in cmake version 2.8.8
|
||||
#
|
||||
# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
|
||||
|
||||
|
||||
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
|
||||
|
||||
# Explicitly list the source files for this subdirectory
|
||||
#
|
||||
# If you add any source files to this subdirectory
|
||||
# that should be included in the kenlm library,
|
||||
# (this excludes any unit test files)
|
||||
# you should add them to the following list:
|
||||
#
|
||||
# In order to set correct paths to these files
|
||||
# in case this variable is referenced by CMake files in the parent directory,
|
||||
# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
|
||||
#
|
||||
set(KENLM_BUILDER_SOURCE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/adjust_counts.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/corpus_count.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/initial_probabilities.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/interpolate.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/output.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pipeline.cc
|
||||
)
|
||||
|
||||
|
||||
# Group these objects together for later use.
|
||||
#
|
||||
# Given add_library(foo OBJECT ${my_foo_sources}),
|
||||
# refer to these objects as $<TARGET_OBJECTS:foo>
|
||||
#
|
||||
add_library(kenlm_builder OBJECT ${KENLM_BUILDER_SOURCE})
|
||||
|
||||
|
||||
# Compile the executable, linking against the requisite dependent object files
|
||||
add_executable(lmplz lmplz_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
|
||||
|
||||
# Link the executable against boost
|
||||
target_link_libraries(lmplz ${Boost_LIBRARIES})
|
||||
|
||||
# Group executables together
|
||||
set_target_properties(lmplz PROPERTIES FOLDER executables)
|
||||
|
||||
if(BUILD_TESTING)
|
||||
|
||||
# Explicitly list the Boost test files to be compiled
|
||||
set(KENLM_BOOST_TESTS_LIST
|
||||
adjust_counts_test
|
||||
corpus_count_test
|
||||
)
|
||||
|
||||
# Iterate through the Boost tests list
|
||||
foreach(test ${KENLM_BOOST_TESTS_LIST})
|
||||
|
||||
# Compile the executable, linking against the requisite dependent object files
|
||||
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
|
||||
|
||||
# Require the following compile flag
|
||||
set_target_properties(${test} PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK -DBOOST_PROGRAM_OPTIONS_DYN_LINK")
|
||||
|
||||
# Link the executable against boost
|
||||
target_link_libraries(${test} ${Boost_LIBRARIES})
|
||||
|
||||
# Specify command arguments for how to run each unit test
|
||||
#
|
||||
# Assuming that foo was defined via add_executable(foo ...),
|
||||
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
|
||||
#
|
||||
add_test(NAME ${test}_test
|
||||
COMMAND $<TARGET_FILE:${test}>)
|
||||
|
||||
# Group unit tests together
|
||||
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
|
||||
|
||||
# End for loop
|
||||
endforeach(test)
|
||||
|
||||
endif()
|
@ -15,9 +15,6 @@
|
||||
#include "util/stream/timer.hh"
|
||||
#include "util/tokenize_piece.hh"
|
||||
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include <stdint.h>
|
||||
|
@ -43,12 +43,13 @@ BOOST_AUTO_TEST_CASE(Short) {
|
||||
util::scoped_fd vocab(util::MakeTemp("corpus_count_test_vocab"));
|
||||
|
||||
util::stream::Chain chain(config);
|
||||
NGramStream<BuildingPayload> stream;
|
||||
uint64_t token_count;
|
||||
WordIndex type_count = 10;
|
||||
std::vector<bool> prune_words;
|
||||
CorpusCount counter(input_piece, vocab.get(), token_count, type_count, prune_words, "", chain.BlockSize() / chain.EntrySize(), SILENT);
|
||||
chain >> boost::ref(counter) >> stream >> util::stream::kRecycle;
|
||||
chain >> boost::ref(counter);
|
||||
NGramStream<BuildingPayload> stream(chain.Add());
|
||||
chain >> util::stream::kRecycle;
|
||||
|
||||
const char *v[] = {"<unk>", "<s>", "</s>", "looking", "on", "a", "little", "more", "loin", "foo", "bar"};
|
||||
|
||||
|
@ -1,54 +1,18 @@
|
||||
#ifndef LM_BUILDER_PRINT_H
|
||||
#define LM_BUILDER_PRINT_H
|
||||
#ifndef LM_BUILDER_DEBUG_PRINT_H
|
||||
#define LM_BUILDER_DEBUG_PRINT_H
|
||||
|
||||
#include "lm/common/ngram_stream.hh"
|
||||
#include "lm/builder/output.hh"
|
||||
#include "lm/builder/payload.hh"
|
||||
#include "lm/common/ngram.hh"
|
||||
#include "lm/common/print.hh"
|
||||
#include "lm/common/ngram_stream.hh"
|
||||
#include "util/fake_ofstream.hh"
|
||||
#include "util/file.hh"
|
||||
#include "util/mmap.hh"
|
||||
#include "util/string_piece.hh"
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
#include <ostream>
|
||||
#include <cassert>
|
||||
|
||||
// Warning: print routines read all unigrams before all bigrams before all
|
||||
// trigrams etc. So if other parts of the chain move jointly, you'll have to
|
||||
// buffer.
|
||||
|
||||
namespace lm { namespace builder {
|
||||
|
||||
class VocabReconstitute {
|
||||
public:
|
||||
// fd must be alive for life of this object; does not take ownership.
|
||||
explicit VocabReconstitute(int fd);
|
||||
|
||||
const char *Lookup(WordIndex index) const {
|
||||
assert(index < map_.size() - 1);
|
||||
return map_[index];
|
||||
}
|
||||
|
||||
StringPiece LookupPiece(WordIndex index) const {
|
||||
return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]);
|
||||
}
|
||||
|
||||
std::size_t Size() const {
|
||||
// There's an extra entry to support StringPiece lengths.
|
||||
return map_.size() - 1;
|
||||
}
|
||||
|
||||
private:
|
||||
util::scoped_memory memory_;
|
||||
std::vector<const char*> map_;
|
||||
};
|
||||
|
||||
// Not defined, only specialized.
|
||||
template <class T> void PrintPayload(util::FakeOFStream &to, const BuildingPayload &payload);
|
||||
template <> inline void PrintPayload<uint64_t>(util::FakeOFStream &to, const BuildingPayload &payload) {
|
||||
// TODO slow
|
||||
to << payload.count;
|
||||
}
|
||||
template <> inline void PrintPayload<Uninterpolated>(util::FakeOFStream &to, const BuildingPayload &payload) {
|
||||
@ -101,19 +65,6 @@ template <class V> class Print {
|
||||
int to_;
|
||||
};
|
||||
|
||||
class PrintARPA : public OutputHook {
|
||||
public:
|
||||
explicit PrintARPA(int fd, bool verbose_header)
|
||||
: OutputHook(PROB_SEQUENTIAL_HOOK), out_fd_(fd), verbose_header_(verbose_header) {}
|
||||
|
||||
void Sink(util::stream::Chains &chains);
|
||||
|
||||
void Run(const util::stream::ChainPositions &positions);
|
||||
|
||||
private:
|
||||
util::scoped_fd out_fd_;
|
||||
bool verbose_header_;
|
||||
};
|
||||
|
||||
}} // namespaces
|
||||
#endif // LM_BUILDER_PRINT_H
|
||||
|
||||
#endif // LM_BUILDER_DEBUG_PRINT_H
|
@ -1,4 +1,4 @@
|
||||
#include "lm/builder/print.hh"
|
||||
#include "lm/common/print.hh"
|
||||
#include "lm/word_index.hh"
|
||||
#include "util/file.hh"
|
||||
#include "util/read_compressed.hh"
|
||||
@ -20,7 +20,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
util::ReadCompressed counts(util::OpenReadOrThrow(argv[1]));
|
||||
util::scoped_fd vocab_file(util::OpenReadOrThrow(argv[2]));
|
||||
lm::builder::VocabReconstitute vocab(vocab_file.get());
|
||||
lm::VocabReconstitute vocab(vocab_file.get());
|
||||
unsigned int order = boost::lexical_cast<unsigned int>(argv[3]);
|
||||
std::vector<char> record(sizeof(uint32_t) * order + sizeof(uint64_t));
|
||||
while (std::size_t got = counts.ReadOrEOF(&*record.begin(), record.size())) {
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace lm { namespace builder {
|
||||
|
||||
// Some configuration info that is used to add
|
||||
// comments to the beginning of an ARPA file
|
||||
struct HeaderInfo {
|
||||
@ -21,4 +23,6 @@ struct HeaderInfo {
|
||||
// TODO: More info if multiple models were interpolated
|
||||
};
|
||||
|
||||
}} // namespaces
|
||||
|
||||
#endif
|
||||
|
@ -1,9 +1,9 @@
|
||||
#include "lm/builder/initial_probabilities.hh"
|
||||
|
||||
#include "lm/builder/discount.hh"
|
||||
#include "lm/builder/special.hh"
|
||||
#include "lm/builder/hash_gamma.hh"
|
||||
#include "lm/builder/payload.hh"
|
||||
#include "lm/common/special.hh"
|
||||
#include "lm/common/ngram_stream.hh"
|
||||
#include "util/murmur_hash.hh"
|
||||
#include "util/file.hh"
|
||||
|
@ -10,9 +10,8 @@
|
||||
namespace util { namespace stream { class Chains; } }
|
||||
|
||||
namespace lm {
|
||||
namespace builder {
|
||||
|
||||
class SpecialVocab;
|
||||
namespace builder {
|
||||
|
||||
struct InitialProbabilitiesConfig {
|
||||
// These should be small buffers to keep the adder from getting too far ahead
|
||||
|
@ -1,16 +1,16 @@
|
||||
#include "lm/builder/interpolate.hh"
|
||||
|
||||
#include "lm/builder/hash_gamma.hh"
|
||||
#include "lm/builder/joint_order.hh"
|
||||
#include "lm/common/ngram_stream.hh"
|
||||
#include "lm/builder/payload.hh"
|
||||
#include "lm/common/compare.hh"
|
||||
#include "lm/common/joint_order.hh"
|
||||
#include "lm/common/ngram_stream.hh"
|
||||
#include "lm/lm_exception.hh"
|
||||
#include "util/fixed_array.hh"
|
||||
#include "util/murmur_hash.hh"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
|
||||
namespace lm { namespace builder {
|
||||
namespace {
|
||||
@ -91,7 +91,8 @@ template <class Output> class Callback {
|
||||
}
|
||||
}
|
||||
|
||||
void Enter(unsigned order_minus_1, NGram<BuildingPayload> &gram) {
|
||||
void Enter(unsigned order_minus_1, void *data) {
|
||||
NGram<BuildingPayload> gram(data, order_minus_1 + 1);
|
||||
BuildingPayload &pay = gram.Value();
|
||||
pay.complete.prob = pay.uninterp.prob + pay.uninterp.gamma * probs_[order_minus_1];
|
||||
probs_[order_minus_1 + 1] = pay.complete.prob;
|
||||
@ -125,7 +126,7 @@ template <class Output> class Callback {
|
||||
output_.Gram(order_minus_1, out_backoff, pay.complete);
|
||||
}
|
||||
|
||||
void Exit(unsigned, const NGram<BuildingPayload> &) const {}
|
||||
void Exit(unsigned, void *) const {}
|
||||
|
||||
private:
|
||||
util::FixedArray<util::stream::Stream> backoffs_;
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifndef LM_BUILDER_INTERPOLATE_H
|
||||
#define LM_BUILDER_INTERPOLATE_H
|
||||
|
||||
#include "lm/builder/special.hh"
|
||||
#include "lm/common/special.hh"
|
||||
#include "lm/word_index.hh"
|
||||
#include "util/stream/multi_stream.hh"
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#include "lm/builder/output.hh"
|
||||
#include "lm/builder/pipeline.hh"
|
||||
#include "lm/builder/print.hh"
|
||||
#include "lm/common/size_option.hh"
|
||||
#include "lm/lm_exception.hh"
|
||||
#include "util/file.hh"
|
||||
#include "util/file_piece.hh"
|
||||
@ -13,21 +13,6 @@
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
class SizeNotify {
|
||||
public:
|
||||
SizeNotify(std::size_t &out) : behind_(out) {}
|
||||
|
||||
void operator()(const std::string &from) {
|
||||
behind_ = util::ParseSize(from);
|
||||
}
|
||||
|
||||
private:
|
||||
std::size_t &behind_;
|
||||
};
|
||||
|
||||
boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value) {
|
||||
return boost::program_options::value<std::string>()->notifier(SizeNotify(to))->default_value(default_value);
|
||||
}
|
||||
|
||||
// Parse and validate pruning thresholds then return vector of threshold counts
|
||||
// for each n-grams order.
|
||||
@ -106,17 +91,16 @@ int main(int argc, char *argv[]) {
|
||||
("interpolate_unigrams", po::value<bool>(&pipeline.initial_probs.interpolate_unigrams)->default_value(true)->implicit_value(true), "Interpolate the unigrams (default) as opposed to giving lots of mass to <unk> like SRI. If you want SRI's behavior with a large <unk> and the old lmplz default, use --interpolate_unigrams 0.")
|
||||
("skip_symbols", po::bool_switch(), "Treat <s>, </s>, and <unk> as whitespace instead of throwing an exception")
|
||||
("temp_prefix,T", po::value<std::string>(&pipeline.sort.temp_prefix)->default_value("/tmp/lm"), "Temporary file prefix")
|
||||
("memory,S", SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
|
||||
("minimum_block", SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
|
||||
("sort_block", SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
|
||||
("memory,S", lm:: SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
|
||||
("minimum_block", lm::SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
|
||||
("sort_block", lm::SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
|
||||
("block_count", po::value<std::size_t>(&pipeline.block_count)->default_value(2), "Block count (per order)")
|
||||
("vocab_estimate", po::value<lm::WordIndex>(&pipeline.vocab_estimate)->default_value(1000000), "Assume this vocabulary size for purposes of calculating memory in step 1 (corpus count) and pre-sizing the hash table")
|
||||
("vocab_file", po::value<std::string>(&pipeline.vocab_file)->default_value(""), "Location to write a file containing the unique vocabulary strings delimited by null bytes")
|
||||
("vocab_pad", po::value<uint64_t>(&pipeline.vocab_size_for_unk)->default_value(0), "If the vocabulary is smaller than this value, pad with <unk> to reach this size. Requires --interpolate_unigrams")
|
||||
("verbose_header", po::bool_switch(&verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc.")
|
||||
("text", po::value<std::string>(&text), "Read text from a file instead of stdin")
|
||||
("arpa", po::value<std::string>(&arpa), "Write ARPA to a file instead of stdout")
|
||||
("intermediate", po::value<std::string>(&intermediate), "Write ngrams to an intermediate file. Turns off ARPA output (which can be reactivated by --arpa file). Forces --renumber on. Implicitly makes --vocab_file be the provided name + .vocab.")
|
||||
("intermediate", po::value<std::string>(&intermediate), "Write ngrams to intermediate files. Turns off ARPA output (which can be reactivated by --arpa file). Forces --renumber on.")
|
||||
("renumber", po::bool_switch(&pipeline.renumber_vocabulary), "Rrenumber the vocabulary identifiers so that they are monotone with the hash of each string. This is consistent with the ordering used by the trie data structure.")
|
||||
("collapse_values", po::bool_switch(&pipeline.output_q), "Collapse probability and backoff into a single value, q that yields the same sentence-level probabilities. See http://kheafield.com/professional/edinburgh/rest_paper.pdf for more details, including a proof.")
|
||||
("prune", po::value<std::vector<std::string> >(&pruning)->multitoken(), "Prune n-grams with count less than or equal to the given threshold. Specify one value for each order i.e. 0 0 1 to prune singleton trigrams and above. The sequence of values must be non-decreasing and the last value applies to any remaining orders. Default is to not prune, which is equivalent to --prune 0.")
|
||||
@ -217,15 +201,10 @@ int main(int argc, char *argv[]) {
|
||||
bool writing_intermediate = vm.count("intermediate");
|
||||
if (writing_intermediate) {
|
||||
pipeline.renumber_vocabulary = true;
|
||||
if (!pipeline.vocab_file.empty()) {
|
||||
std::cerr << "--intermediate and --vocab_file are incompatible because --intermediate already makes a vocab file." << std::endl;
|
||||
return 1;
|
||||
}
|
||||
pipeline.vocab_file = intermediate + ".vocab";
|
||||
}
|
||||
lm::builder::Output output(writing_intermediate ? intermediate : pipeline.sort.temp_prefix, writing_intermediate);
|
||||
lm::builder::Output output(writing_intermediate ? intermediate : pipeline.sort.temp_prefix, writing_intermediate, pipeline.output_q);
|
||||
if (!writing_intermediate || vm.count("arpa")) {
|
||||
output.Add(new lm::builder::PrintARPA(out.release(), verbose_header));
|
||||
output.Add(new lm::builder::PrintHook(out.release(), verbose_header));
|
||||
}
|
||||
lm::builder::Pipeline(pipeline, in.release(), output);
|
||||
} catch (const util::MallocException &e) {
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include "lm/builder/output.hh"
|
||||
|
||||
#include "lm/common/model_buffer.hh"
|
||||
#include "lm/common/print.hh"
|
||||
#include "util/fake_ofstream.hh"
|
||||
#include "util/stream/multi_stream.hh"
|
||||
|
||||
#include <iostream>
|
||||
@ -9,23 +11,22 @@ namespace lm { namespace builder {
|
||||
|
||||
OutputHook::~OutputHook() {}
|
||||
|
||||
Output::Output(StringPiece file_base, bool keep_buffer)
|
||||
: file_base_(file_base.data(), file_base.size()), keep_buffer_(keep_buffer) {}
|
||||
Output::Output(StringPiece file_base, bool keep_buffer, bool output_q)
|
||||
: buffer_(file_base, keep_buffer, output_q) {}
|
||||
|
||||
void Output::SinkProbs(util::stream::Chains &chains, bool output_q) {
|
||||
void Output::SinkProbs(util::stream::Chains &chains) {
|
||||
Apply(PROB_PARALLEL_HOOK, chains);
|
||||
if (!keep_buffer_ && !Have(PROB_SEQUENTIAL_HOOK)) {
|
||||
if (!buffer_.Keep() && !Have(PROB_SEQUENTIAL_HOOK)) {
|
||||
chains >> util::stream::kRecycle;
|
||||
chains.Wait(true);
|
||||
return;
|
||||
}
|
||||
lm::common::ModelBuffer buf(file_base_, keep_buffer_, output_q);
|
||||
buf.Sink(chains);
|
||||
buffer_.Sink(chains, header_.counts_pruned);
|
||||
chains >> util::stream::kRecycle;
|
||||
chains.Wait(false);
|
||||
if (Have(PROB_SEQUENTIAL_HOOK)) {
|
||||
std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl;
|
||||
buf.Source(chains);
|
||||
buffer_.Source(chains);
|
||||
Apply(PROB_SEQUENTIAL_HOOK, chains);
|
||||
chains >> util::stream::kRecycle;
|
||||
chains.Wait(true);
|
||||
@ -34,8 +35,18 @@ void Output::SinkProbs(util::stream::Chains &chains, bool output_q) {
|
||||
|
||||
void Output::Apply(HookType hook_type, util::stream::Chains &chains) {
|
||||
for (boost::ptr_vector<OutputHook>::iterator entry = outputs_[hook_type].begin(); entry != outputs_[hook_type].end(); ++entry) {
|
||||
entry->Sink(chains);
|
||||
entry->Sink(header_, VocabFile(), chains);
|
||||
}
|
||||
}
|
||||
|
||||
void PrintHook::Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) {
|
||||
if (verbose_header_) {
|
||||
util::FakeOFStream out(file_.get(), 50);
|
||||
out << "# Input file: " << info.input_file << '\n';
|
||||
out << "# Token count: " << info.token_count << '\n';
|
||||
out << "# Smoothing: Modified Kneser-Ney" << '\n';
|
||||
}
|
||||
chains >> PrintARPA(vocab_file, file_.get(), info.counts_pruned);
|
||||
}
|
||||
|
||||
}} // namespaces
|
||||
|
@ -2,6 +2,7 @@
|
||||
#define LM_BUILDER_OUTPUT_H
|
||||
|
||||
#include "lm/builder/header_info.hh"
|
||||
#include "lm/common/model_buffer.hh"
|
||||
#include "util/file.hh"
|
||||
|
||||
#include <boost/ptr_container/ptr_vector.hpp>
|
||||
@ -20,69 +21,64 @@ enum HookType {
|
||||
NUMBER_OF_HOOKS // Keep this last so we know how many values there are.
|
||||
};
|
||||
|
||||
class Output;
|
||||
|
||||
class OutputHook {
|
||||
public:
|
||||
explicit OutputHook(HookType hook_type) : type_(hook_type), master_(NULL) {}
|
||||
explicit OutputHook(HookType hook_type) : type_(hook_type) {}
|
||||
|
||||
virtual ~OutputHook();
|
||||
|
||||
virtual void Sink(util::stream::Chains &chains) = 0;
|
||||
virtual void Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) = 0;
|
||||
|
||||
protected:
|
||||
const HeaderInfo &GetHeader() const;
|
||||
int GetVocabFD() const;
|
||||
HookType Type() const { return type_; }
|
||||
|
||||
private:
|
||||
friend class Output;
|
||||
const HookType type_;
|
||||
const Output *master_;
|
||||
HookType type_;
|
||||
};
|
||||
|
||||
class Output : boost::noncopyable {
|
||||
public:
|
||||
Output(StringPiece file_base, bool keep_buffer);
|
||||
Output(StringPiece file_base, bool keep_buffer, bool output_q);
|
||||
|
||||
// Takes ownership.
|
||||
void Add(OutputHook *hook) {
|
||||
hook->master_ = this;
|
||||
outputs_[hook->type_].push_back(hook);
|
||||
outputs_[hook->Type()].push_back(hook);
|
||||
}
|
||||
|
||||
bool Have(HookType hook_type) const {
|
||||
return !outputs_[hook_type].empty();
|
||||
}
|
||||
|
||||
void SetVocabFD(int to) { vocab_fd_ = to; }
|
||||
int GetVocabFD() const { return vocab_fd_; }
|
||||
int VocabFile() const { return buffer_.VocabFile(); }
|
||||
|
||||
void SetHeader(const HeaderInfo &header) { header_ = header; }
|
||||
const HeaderInfo &GetHeader() const { return header_; }
|
||||
|
||||
// This is called by the pipeline.
|
||||
void SinkProbs(util::stream::Chains &chains, bool output_q);
|
||||
void SinkProbs(util::stream::Chains &chains);
|
||||
|
||||
unsigned int Steps() const { return Have(PROB_SEQUENTIAL_HOOK); }
|
||||
|
||||
private:
|
||||
void Apply(HookType hook_type, util::stream::Chains &chains);
|
||||
|
||||
boost::ptr_vector<OutputHook> outputs_[NUMBER_OF_HOOKS];
|
||||
int vocab_fd_;
|
||||
HeaderInfo header_;
|
||||
ModelBuffer buffer_;
|
||||
|
||||
std::string file_base_;
|
||||
bool keep_buffer_;
|
||||
boost::ptr_vector<OutputHook> outputs_[NUMBER_OF_HOOKS];
|
||||
HeaderInfo header_;
|
||||
};
|
||||
|
||||
inline const HeaderInfo &OutputHook::GetHeader() const {
|
||||
return master_->GetHeader();
|
||||
}
|
||||
class PrintHook : public OutputHook {
|
||||
public:
|
||||
// Takes ownership
|
||||
PrintHook(int write_fd, bool verbose_header)
|
||||
: OutputHook(PROB_SEQUENTIAL_HOOK), file_(write_fd), verbose_header_(verbose_header) {}
|
||||
|
||||
inline int OutputHook::GetVocabFD() const {
|
||||
return master_->GetVocabFD();
|
||||
}
|
||||
void Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains);
|
||||
|
||||
private:
|
||||
util::scoped_fd file_;
|
||||
bool verbose_header_;
|
||||
};
|
||||
|
||||
}} // namespaces
|
||||
|
||||
|
@ -277,27 +277,27 @@ void InterpolateProbabilities(const std::vector<uint64_t> &counts, Master &maste
|
||||
}
|
||||
master >> Interpolate(std::max(master.Config().vocab_size_for_unk, counts[0] - 1 /* <s> is not included */), util::stream::ChainPositions(gamma_chains), config.prune_thresholds, config.prune_vocab, config.output_q, specials);
|
||||
gamma_chains >> util::stream::kRecycle;
|
||||
output.SinkProbs(master.MutableChains(), config.output_q);
|
||||
output.SinkProbs(master.MutableChains());
|
||||
}
|
||||
|
||||
class VocabNumbering {
|
||||
public:
|
||||
VocabNumbering(StringPiece vocab_file, StringPiece temp_prefix, bool renumber)
|
||||
: vocab_file_(vocab_file.data(), vocab_file.size()),
|
||||
temp_prefix_(temp_prefix.data(), temp_prefix.size()),
|
||||
VocabNumbering(int final_vocab, StringPiece temp_prefix, bool renumber)
|
||||
: final_vocab_(final_vocab),
|
||||
renumber_(renumber),
|
||||
specials_(kBOS, kEOS) {
|
||||
InitFile(renumber || vocab_file.empty());
|
||||
if (renumber) {
|
||||
temporary_.reset(util::MakeTemp(temp_prefix));
|
||||
}
|
||||
}
|
||||
|
||||
int File() const { return null_delimited_.get(); }
|
||||
int WriteOnTheFly() const { return renumber_ ? temporary_.get() : final_vocab_; }
|
||||
|
||||
// Compute the vocabulary mapping and return the memory used.
|
||||
std::size_t ComputeMapping(WordIndex type_count) {
|
||||
if (!renumber_) return 0;
|
||||
util::scoped_fd previous(null_delimited_.release());
|
||||
InitFile(vocab_file_.empty());
|
||||
ngram::SortedVocabulary::ComputeRenumbering(type_count, previous.get(), null_delimited_.get(), vocab_mapping_);
|
||||
ngram::SortedVocabulary::ComputeRenumbering(type_count, temporary_.get(), final_vocab_, vocab_mapping_);
|
||||
temporary_.reset();
|
||||
return sizeof(WordIndex) * vocab_mapping_.size();
|
||||
}
|
||||
|
||||
@ -312,15 +312,9 @@ class VocabNumbering {
|
||||
const SpecialVocab &Specials() const { return specials_; }
|
||||
|
||||
private:
|
||||
void InitFile(bool temp) {
|
||||
null_delimited_.reset(temp ?
|
||||
util::MakeTemp(temp_prefix_) :
|
||||
util::CreateOrThrow(vocab_file_.c_str()));
|
||||
}
|
||||
|
||||
std::string vocab_file_, temp_prefix_;
|
||||
|
||||
util::scoped_fd null_delimited_;
|
||||
int final_vocab_;
|
||||
// Out of order vocab file created on the fly.
|
||||
util::scoped_fd temporary_;
|
||||
|
||||
bool renumber_;
|
||||
|
||||
@ -349,18 +343,17 @@ void Pipeline(PipelineConfig &config, int text_file, Output &output) {
|
||||
// master's destructor will wait for chains. But they might be deadlocked if
|
||||
// this thread dies because e.g. it ran out of memory.
|
||||
try {
|
||||
VocabNumbering numbering(config.vocab_file, config.TempPrefix(), config.renumber_vocabulary);
|
||||
VocabNumbering numbering(output.VocabFile(), config.TempPrefix(), config.renumber_vocabulary);
|
||||
uint64_t token_count;
|
||||
WordIndex type_count;
|
||||
std::string text_file_name;
|
||||
std::vector<bool> prune_words;
|
||||
util::scoped_ptr<util::stream::Sort<SuffixOrder, CombineCounts> > sorted_counts(
|
||||
CountText(text_file, numbering.File(), master, token_count, type_count, text_file_name, prune_words));
|
||||
CountText(text_file, numbering.WriteOnTheFly(), master, token_count, type_count, text_file_name, prune_words));
|
||||
std::cerr << "Unigram tokens " << token_count << " types " << type_count << std::endl;
|
||||
|
||||
// Create vocab mapping, which uses temporary memory, while nothing else is happening.
|
||||
std::size_t subtract_for_numbering = numbering.ComputeMapping(type_count);
|
||||
output.SetVocabFD(numbering.File());
|
||||
|
||||
std::cerr << "=== 2/" << master.Steps() << " Calculating and sorting adjusted counts ===" << std::endl;
|
||||
master.InitForAdjust(*sorted_counts, type_count, subtract_for_numbering);
|
||||
|
@ -18,7 +18,6 @@ class Output;
|
||||
|
||||
struct PipelineConfig {
|
||||
std::size_t order;
|
||||
std::string vocab_file;
|
||||
util::stream::SortConfig sort;
|
||||
InitialProbabilitiesConfig initial_probs;
|
||||
util::stream::ChainConfig read_backoffs;
|
||||
|
@ -1,64 +0,0 @@
|
||||
#include "lm/builder/print.hh"
|
||||
|
||||
#include "util/fake_ofstream.hh"
|
||||
#include "util/file.hh"
|
||||
#include "util/mmap.hh"
|
||||
#include "util/scoped.hh"
|
||||
#include "util/stream/timer.hh"
|
||||
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
|
||||
namespace lm { namespace builder {
|
||||
|
||||
VocabReconstitute::VocabReconstitute(int fd) {
|
||||
uint64_t size = util::SizeOrThrow(fd);
|
||||
util::MapRead(util::POPULATE_OR_READ, fd, 0, size, memory_);
|
||||
const char *const start = static_cast<const char*>(memory_.get());
|
||||
const char *i;
|
||||
for (i = start; i != start + size; i += strlen(i) + 1) {
|
||||
map_.push_back(i);
|
||||
}
|
||||
// Last one for LookupPiece.
|
||||
map_.push_back(i);
|
||||
}
|
||||
|
||||
void PrintARPA::Sink(util::stream::Chains &chains) {
|
||||
chains >> boost::ref(*this);
|
||||
}
|
||||
|
||||
void PrintARPA::Run(const util::stream::ChainPositions &positions) {
|
||||
VocabReconstitute vocab(GetVocabFD());
|
||||
util::FakeOFStream out(out_fd_.get());
|
||||
|
||||
// Write header.
|
||||
if (verbose_header_) {
|
||||
out << "# Input file: " << GetHeader().input_file << '\n';
|
||||
out << "# Token count: " << GetHeader().token_count << '\n';
|
||||
out << "# Smoothing: Modified Kneser-Ney" << '\n';
|
||||
}
|
||||
out << "\\data\\\n";
|
||||
for (size_t i = 0; i < positions.size(); ++i) {
|
||||
out << "ngram " << (i+1) << '=' << GetHeader().counts_pruned[i] << '\n';
|
||||
}
|
||||
out << '\n';
|
||||
|
||||
for (unsigned order = 1; order <= positions.size(); ++order) {
|
||||
out << "\\" << order << "-grams:" << '\n';
|
||||
for (NGramStream<BuildingPayload> stream(positions[order - 1]); stream; ++stream) {
|
||||
// Correcting for numerical precision issues. Take that IRST.
|
||||
out << stream->Value().complete.prob << '\t' << vocab.Lookup(*stream->begin());
|
||||
for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
|
||||
out << ' ' << vocab.Lookup(*i);
|
||||
}
|
||||
if (order != positions.size())
|
||||
out << '\t' << stream->Value().complete.backoff;
|
||||
out << '\n';
|
||||
|
||||
}
|
||||
out << '\n';
|
||||
}
|
||||
out << "\\end\\\n";
|
||||
}
|
||||
|
||||
}} // namespaces
|
40
lm/common/CMakeLists.txt
Normal file
40
lm/common/CMakeLists.txt
Normal file
@ -0,0 +1,40 @@
|
||||
cmake_minimum_required(VERSION 2.8.8)
|
||||
#
|
||||
# The KenLM cmake files make use of add_library(... OBJECTS ...)
|
||||
#
|
||||
# This syntax allows grouping of source files when compiling
|
||||
# (effectively creating "fake" libraries based on source subdirs).
|
||||
#
|
||||
# This syntax was only added in cmake version 2.8.8
|
||||
#
|
||||
# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
|
||||
|
||||
|
||||
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
|
||||
|
||||
# Explicitly list the source files for this subdirectory
|
||||
#
|
||||
# If you add any source files to this subdirectory
|
||||
# that should be included in the kenlm library,
|
||||
# (this excludes any unit test files)
|
||||
# you should add them to the following list:
|
||||
#
|
||||
# In order to set correct paths to these files
|
||||
# in case this variable is referenced by CMake files in the parent directory,
|
||||
# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
|
||||
#
|
||||
set(KENLM_COMMON_SOURCE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/model_buffer.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/print.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/renumber.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/size_option.cc
|
||||
)
|
||||
|
||||
|
||||
# Group these objects together for later use.
|
||||
#
|
||||
# Given add_library(foo OBJECT ${my_foo_sources}),
|
||||
# refer to these objects as $<TARGET_OBJECTS:foo>
|
||||
#
|
||||
add_library(kenlm_common OBJECT ${KENLM_COMMON_SOURCE})
|
||||
|
@ -1,2 +1,2 @@
|
||||
fakelib common : [ glob *.cc : *test.cc *main.cc ]
|
||||
../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm ;
|
||||
../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm /top//boost_program_options ;
|
||||
|
@ -1,8 +1,7 @@
|
||||
#ifndef LM_BUILDER_JOINT_ORDER_H
|
||||
#define LM_BUILDER_JOINT_ORDER_H
|
||||
#ifndef LM_COMMON_JOINT_ORDER_H
|
||||
#define LM_COMMON_JOINT_ORDER_H
|
||||
|
||||
#include "lm/common/ngram_stream.hh"
|
||||
#include "lm/builder/payload.hh"
|
||||
#include "lm/lm_exception.hh"
|
||||
|
||||
#ifdef DEBUG
|
||||
@ -12,15 +11,19 @@
|
||||
|
||||
#include <cstring>
|
||||
|
||||
namespace lm { namespace builder {
|
||||
namespace lm {
|
||||
|
||||
template <class Callback, class Compare> void JointOrder(const util::stream::ChainPositions &positions, Callback &callback) {
|
||||
// Allow matching to reference streams[-1].
|
||||
NGramStreams<BuildingPayload> streams_with_dummy;
|
||||
streams_with_dummy.InitWithDummy(positions);
|
||||
NGramStream<BuildingPayload> *streams = streams_with_dummy.begin() + 1;
|
||||
util::FixedArray<ProxyStream<NGramHeader> > streams_with_dummy(positions.size() + 1);
|
||||
// A bogus stream for [-1].
|
||||
streams_with_dummy.push_back();
|
||||
for (std::size_t i = 0; i < positions.size(); ++i) {
|
||||
streams_with_dummy.push_back(positions[i], NGramHeader(NULL, i + 1));
|
||||
}
|
||||
ProxyStream<NGramHeader> *streams = streams_with_dummy.begin() + 1;
|
||||
|
||||
unsigned int order;
|
||||
std::size_t order;
|
||||
for (order = 0; order < positions.size() && streams[order]; ++order) {}
|
||||
assert(order); // should always have <unk>.
|
||||
|
||||
@ -31,11 +34,11 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
|
||||
less_compare.push_back(i + 1);
|
||||
#endif // DEBUG
|
||||
|
||||
unsigned int current = 0;
|
||||
std::size_t current = 0;
|
||||
while (true) {
|
||||
// Does the context match the lower one?
|
||||
if (!memcmp(streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset, sizeof(WordIndex) * current)) {
|
||||
callback.Enter(current, *streams[current]);
|
||||
callback.Enter(current, streams[current].Get());
|
||||
// Transition to looking for extensions.
|
||||
if (++current < order) continue;
|
||||
}
|
||||
@ -51,7 +54,7 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
|
||||
while(true) {
|
||||
assert(current > 0);
|
||||
--current;
|
||||
callback.Exit(current, *streams[current]);
|
||||
callback.Exit(current, streams[current].Get());
|
||||
|
||||
if (++streams[current]) break;
|
||||
|
||||
@ -63,6 +66,6 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
|
||||
}
|
||||
}
|
||||
|
||||
}} // namespaces
|
||||
} // namespaces
|
||||
|
||||
#endif // LM_BUILDER_JOINT_ORDER_H
|
||||
#endif // LM_COMMON_JOINT_ORDER_H
|
@ -8,25 +8,30 @@
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
namespace lm { namespace common {
|
||||
namespace lm {
|
||||
|
||||
namespace {
|
||||
const char kMetadataHeader[] = "KenLM intermediate binary file";
|
||||
} // namespace
|
||||
|
||||
ModelBuffer::ModelBuffer(const std::string &file_base, bool keep_buffer, bool output_q)
|
||||
: file_base_(file_base), keep_buffer_(keep_buffer), output_q_(output_q) {}
|
||||
|
||||
ModelBuffer::ModelBuffer(const std::string &file_base)
|
||||
: file_base_(file_base), keep_buffer_(false) {
|
||||
ModelBuffer::ModelBuffer(StringPiece file_base, bool keep_buffer, bool output_q)
|
||||
: file_base_(file_base.data(), file_base.size()), keep_buffer_(keep_buffer), output_q_(output_q),
|
||||
vocab_file_(keep_buffer ? util::CreateOrThrow((file_base_ + ".vocab").c_str()) : util::MakeTemp(file_base_)) {}
|
||||
|
||||
ModelBuffer::ModelBuffer(StringPiece file_base)
|
||||
: file_base_(file_base.data(), file_base.size()), keep_buffer_(false) {
|
||||
const std::string full_name = file_base_ + ".kenlm_intermediate";
|
||||
util::FilePiece in(full_name.c_str());
|
||||
StringPiece token = in.ReadLine();
|
||||
UTIL_THROW_IF2(token != kMetadataHeader, "File " << full_name << " begins with \"" << token << "\" not " << kMetadataHeader);
|
||||
|
||||
token = in.ReadDelimited();
|
||||
UTIL_THROW_IF2(token != "Order", "Expected Order, got \"" << token << "\" in " << full_name);
|
||||
unsigned long order = in.ReadULong();
|
||||
UTIL_THROW_IF2(token != "Counts", "Expected Counts, got \"" << token << "\" in " << full_name);
|
||||
char got;
|
||||
while ((got = in.get()) == ' ') {
|
||||
counts_.push_back(in.ReadULong());
|
||||
}
|
||||
UTIL_THROW_IF2(got != '\n', "Expected newline at end of counts.");
|
||||
|
||||
token = in.ReadDelimited();
|
||||
UTIL_THROW_IF2(token != "Payload", "Expected Payload, got \"" << token << "\" in " << full_name);
|
||||
@ -39,16 +44,16 @@ ModelBuffer::ModelBuffer(const std::string &file_base)
|
||||
UTIL_THROW(util::Exception, "Unknown payload " << token);
|
||||
}
|
||||
|
||||
files_.Init(order);
|
||||
for (unsigned long i = 0; i < order; ++i) {
|
||||
vocab_file_.reset(util::OpenReadOrThrow((file_base_ + ".vocab").c_str()));
|
||||
|
||||
files_.Init(counts_.size());
|
||||
for (unsigned long i = 0; i < counts_.size(); ++i) {
|
||||
files_.push_back(util::OpenReadOrThrow((file_base_ + '.' + boost::lexical_cast<std::string>(i + 1)).c_str()));
|
||||
}
|
||||
}
|
||||
|
||||
// virtual destructor
|
||||
ModelBuffer::~ModelBuffer() {}
|
||||
|
||||
void ModelBuffer::Sink(util::stream::Chains &chains) {
|
||||
void ModelBuffer::Sink(util::stream::Chains &chains, const std::vector<uint64_t> &counts) {
|
||||
counts_ = counts;
|
||||
// Open files.
|
||||
files_.Init(chains.size());
|
||||
for (std::size_t i = 0; i < chains.size(); ++i) {
|
||||
@ -64,19 +69,23 @@ void ModelBuffer::Sink(util::stream::Chains &chains) {
|
||||
if (keep_buffer_) {
|
||||
util::scoped_fd metadata(util::CreateOrThrow((file_base_ + ".kenlm_intermediate").c_str()));
|
||||
util::FakeOFStream meta(metadata.get(), 200);
|
||||
meta << kMetadataHeader << "\nOrder " << chains.size() << "\nPayload " << (output_q_ ? "q" : "pb") << '\n';
|
||||
meta << kMetadataHeader << "\nCounts";
|
||||
for (std::vector<uint64_t>::const_iterator i = counts_.begin(); i != counts_.end(); ++i) {
|
||||
meta << ' ' << *i;
|
||||
}
|
||||
meta << "\nPayload " << (output_q_ ? "q" : "pb") << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
void ModelBuffer::Source(util::stream::Chains &chains) {
|
||||
assert(chains.size() == files_.size());
|
||||
for (unsigned int i = 0; i < files_.size(); ++i) {
|
||||
assert(chains.size() <= files_.size());
|
||||
for (unsigned int i = 0; i < chains.size(); ++i) {
|
||||
chains[i] >> util::stream::PRead(files_[i].get());
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t ModelBuffer::Order() const {
|
||||
return files_.size();
|
||||
void ModelBuffer::Source(std::size_t order_minus_1, util::stream::Chain &chain) {
|
||||
chain >> util::stream::PRead(files_[order_minus_1].get());
|
||||
}
|
||||
|
||||
}} // namespaces
|
||||
} // namespace
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifndef LM_BUILDER_MODEL_BUFFER_H
|
||||
#define LM_BUILDER_MODEL_BUFFER_H
|
||||
#ifndef LM_COMMON_MODEL_BUFFER_H
|
||||
#define LM_COMMON_MODEL_BUFFER_H
|
||||
|
||||
/* Format with separate files in suffix order. Each file contains
|
||||
* n-grams of the same order.
|
||||
@ -9,37 +9,55 @@
|
||||
#include "util/fixed_array.hh"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace util { namespace stream { class Chains; } }
|
||||
namespace util { namespace stream {
|
||||
class Chains;
|
||||
class Chain;
|
||||
}} // namespaces
|
||||
|
||||
namespace lm { namespace common {
|
||||
namespace lm {
|
||||
|
||||
class ModelBuffer {
|
||||
public:
|
||||
// Construct for writing.
|
||||
ModelBuffer(const std::string &file_base, bool keep_buffer, bool output_q);
|
||||
// Construct for writing. Must call VocabFile() and fill it with null-delimited vocab words.
|
||||
ModelBuffer(StringPiece file_base, bool keep_buffer, bool output_q);
|
||||
|
||||
// Load from file.
|
||||
explicit ModelBuffer(const std::string &file_base);
|
||||
explicit ModelBuffer(StringPiece file_base);
|
||||
|
||||
// explicit for virtual destructor.
|
||||
~ModelBuffer();
|
||||
|
||||
void Sink(util::stream::Chains &chains);
|
||||
// Must call VocabFile and populate before calling this function.
|
||||
void Sink(util::stream::Chains &chains, const std::vector<uint64_t> &counts);
|
||||
|
||||
// Read files and write to the given chains. If fewer chains are provided,
|
||||
// only do the lower orders.
|
||||
void Source(util::stream::Chains &chains);
|
||||
|
||||
void Source(std::size_t order_minus_1, util::stream::Chain &chain);
|
||||
|
||||
// The order of the n-gram model that is associated with the model buffer.
|
||||
std::size_t Order() const;
|
||||
std::size_t Order() const { return counts_.size(); }
|
||||
// Requires Sink or load from file.
|
||||
const std::vector<uint64_t> &Counts() const {
|
||||
assert(!counts_.empty());
|
||||
return counts_;
|
||||
}
|
||||
|
||||
int VocabFile() const { return vocab_file_.get(); }
|
||||
int StealVocabFile() { return vocab_file_.release(); }
|
||||
|
||||
bool Keep() const { return keep_buffer_; }
|
||||
|
||||
private:
|
||||
const std::string file_base_;
|
||||
const bool keep_buffer_;
|
||||
bool output_q_;
|
||||
std::vector<uint64_t> counts_;
|
||||
|
||||
util::scoped_fd vocab_file_;
|
||||
util::FixedArray<util::scoped_fd> files_;
|
||||
};
|
||||
|
||||
}} // namespaces
|
||||
} // namespace lm
|
||||
|
||||
#endif // LM_BUILDER_MODEL_BUFFER_H
|
||||
#endif // LM_COMMON_MODEL_BUFFER_H
|
||||
|
@ -16,6 +16,8 @@ class NGramHeader {
|
||||
NGramHeader(void *begin, std::size_t order)
|
||||
: begin_(static_cast<WordIndex*>(begin)), end_(begin_ + order) {}
|
||||
|
||||
NGramHeader() : begin_(NULL), end_(NULL) {}
|
||||
|
||||
const uint8_t *Base() const { return reinterpret_cast<const uint8_t*>(begin_); }
|
||||
uint8_t *Base() { return reinterpret_cast<uint8_t*>(begin_); }
|
||||
|
||||
@ -32,6 +34,7 @@ class NGramHeader {
|
||||
const WordIndex *end() const { return end_; }
|
||||
WordIndex *end() { return end_; }
|
||||
|
||||
std::size_t size() const { return end_ - begin_; }
|
||||
std::size_t Order() const { return end_ - begin_; }
|
||||
|
||||
private:
|
||||
@ -42,6 +45,8 @@ template <class PayloadT> class NGram : public NGramHeader {
|
||||
public:
|
||||
typedef PayloadT Payload;
|
||||
|
||||
NGram() : NGramHeader(NULL, 0) {}
|
||||
|
||||
NGram(void *begin, std::size_t order) : NGramHeader(begin, order) {}
|
||||
|
||||
// Would do operator++ but that can get confusing for a stream.
|
||||
|
@ -10,24 +10,21 @@
|
||||
|
||||
namespace lm {
|
||||
|
||||
template <class Payload> class NGramStream {
|
||||
template <class Proxy> class ProxyStream {
|
||||
public:
|
||||
NGramStream() : gram_(NULL, 0) {}
|
||||
// Make an invalid stream.
|
||||
ProxyStream() {}
|
||||
|
||||
NGramStream(const util::stream::ChainPosition &position) : gram_(NULL, 0) {
|
||||
Init(position);
|
||||
explicit ProxyStream(const util::stream::ChainPosition &position, const Proxy &proxy = Proxy())
|
||||
: proxy_(proxy), stream_(position) {
|
||||
proxy_.ReBase(stream_.Get());
|
||||
}
|
||||
|
||||
void Init(const util::stream::ChainPosition &position) {
|
||||
stream_.Init(position);
|
||||
gram_ = NGram<Payload>(stream_.Get(), NGram<Payload>::OrderFromSize(position.GetChain().EntrySize()));
|
||||
}
|
||||
Proxy &operator*() { return proxy_; }
|
||||
const Proxy &operator*() const { return proxy_; }
|
||||
|
||||
NGram<Payload> &operator*() { return gram_; }
|
||||
const NGram<Payload> &operator*() const { return gram_; }
|
||||
|
||||
NGram<Payload> *operator->() { return &gram_; }
|
||||
const NGram<Payload> *operator->() const { return &gram_; }
|
||||
Proxy *operator->() { return &proxy_; }
|
||||
const Proxy *operator->() const { return &proxy_; }
|
||||
|
||||
void *Get() { return stream_.Get(); }
|
||||
const void *Get() const { return stream_.Get(); }
|
||||
@ -36,21 +33,25 @@ template <class Payload> class NGramStream {
|
||||
bool operator!() const { return !stream_; }
|
||||
void Poison() { stream_.Poison(); }
|
||||
|
||||
NGramStream &operator++() {
|
||||
ProxyStream<Proxy> &operator++() {
|
||||
++stream_;
|
||||
gram_.ReBase(stream_.Get());
|
||||
proxy_.ReBase(stream_.Get());
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
NGram<Payload> gram_;
|
||||
Proxy proxy_;
|
||||
util::stream::Stream stream_;
|
||||
};
|
||||
|
||||
template <class Payload> inline util::stream::Chain &operator>>(util::stream::Chain &chain, NGramStream<Payload> &str) {
|
||||
str.Init(chain.Add());
|
||||
return chain;
|
||||
}
|
||||
template <class Payload> class NGramStream : public ProxyStream<NGram<Payload> > {
|
||||
public:
|
||||
// Make an invalid stream.
|
||||
NGramStream() {}
|
||||
|
||||
explicit NGramStream(const util::stream::ChainPosition &position) :
|
||||
ProxyStream<NGram<Payload> >(position, NGram<Payload>(NULL, NGram<Payload>::OrderFromSize(position.GetChain().EntrySize()))) {}
|
||||
};
|
||||
|
||||
template <class Payload> class NGramStreams : public util::stream::GenericStreams<NGramStream<Payload> > {
|
||||
private:
|
||||
|
62
lm/common/print.cc
Normal file
62
lm/common/print.cc
Normal file
@ -0,0 +1,62 @@
|
||||
#include "lm/common/print.hh"
|
||||
|
||||
#include "lm/common/ngram_stream.hh"
|
||||
#include "util/fake_ofstream.hh"
|
||||
#include "util/file.hh"
|
||||
#include "util/mmap.hh"
|
||||
#include "util/scoped.hh"
|
||||
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
|
||||
namespace lm {
|
||||
|
||||
VocabReconstitute::VocabReconstitute(int fd) {
|
||||
uint64_t size = util::SizeOrThrow(fd);
|
||||
util::MapRead(util::POPULATE_OR_READ, fd, 0, size, memory_);
|
||||
const char *const start = static_cast<const char*>(memory_.get());
|
||||
const char *i;
|
||||
for (i = start; i != start + size; i += strlen(i) + 1) {
|
||||
map_.push_back(i);
|
||||
}
|
||||
// Last one for LookupPiece.
|
||||
map_.push_back(i);
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <class Payload> void PrintLead(const VocabReconstitute &vocab, ProxyStream<Payload> &stream, util::FakeOFStream &out) {
|
||||
out << stream->Value().prob << '\t' << vocab.Lookup(*stream->begin());
|
||||
for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
|
||||
out << ' ' << vocab.Lookup(*i);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void PrintARPA::Run(const util::stream::ChainPositions &positions) {
|
||||
VocabReconstitute vocab(vocab_fd_);
|
||||
util::FakeOFStream out(out_fd_);
|
||||
out << "\\data\\\n";
|
||||
for (size_t i = 0; i < positions.size(); ++i) {
|
||||
out << "ngram " << (i+1) << '=' << counts_[i] << '\n';
|
||||
}
|
||||
out << '\n';
|
||||
|
||||
for (unsigned order = 1; order < positions.size(); ++order) {
|
||||
out << "\\" << order << "-grams:" << '\n';
|
||||
for (ProxyStream<NGram<ProbBackoff> > stream(positions[order - 1], NGram<ProbBackoff>(NULL, order)); stream; ++stream) {
|
||||
PrintLead(vocab, stream, out);
|
||||
out << '\t' << stream->Value().backoff << '\n';
|
||||
}
|
||||
out << '\n';
|
||||
}
|
||||
|
||||
out << "\\" << positions.size() << "-grams:" << '\n';
|
||||
for (ProxyStream<NGram<Prob> > stream(positions.back(), NGram<Prob>(NULL, positions.size())); stream; ++stream) {
|
||||
PrintLead(vocab, stream, out);
|
||||
out << '\n';
|
||||
}
|
||||
out << '\n';
|
||||
out << "\\end\\\n";
|
||||
}
|
||||
|
||||
} // namespace lm
|
58
lm/common/print.hh
Normal file
58
lm/common/print.hh
Normal file
@ -0,0 +1,58 @@
|
||||
#ifndef LM_COMMON_PRINT_H
|
||||
#define LM_COMMON_PRINT_H
|
||||
|
||||
#include "lm/word_index.hh"
|
||||
#include "util/mmap.hh"
|
||||
#include "util/string_piece.hh"
|
||||
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
|
||||
namespace util { namespace stream { class ChainPositions; }}
|
||||
|
||||
// Warning: PrintARPA routines read all unigrams before all bigrams before all
|
||||
// trigrams etc. So if other parts of the chain move jointly, you'll have to
|
||||
// buffer.
|
||||
|
||||
namespace lm {
|
||||
|
||||
class VocabReconstitute {
|
||||
public:
|
||||
// fd must be alive for life of this object; does not take ownership.
|
||||
explicit VocabReconstitute(int fd);
|
||||
|
||||
const char *Lookup(WordIndex index) const {
|
||||
assert(index < map_.size() - 1);
|
||||
return map_[index];
|
||||
}
|
||||
|
||||
StringPiece LookupPiece(WordIndex index) const {
|
||||
return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]);
|
||||
}
|
||||
|
||||
std::size_t Size() const {
|
||||
// There's an extra entry to support StringPiece lengths.
|
||||
return map_.size() - 1;
|
||||
}
|
||||
|
||||
private:
|
||||
util::scoped_memory memory_;
|
||||
std::vector<const char*> map_;
|
||||
};
|
||||
|
||||
class PrintARPA {
|
||||
public:
|
||||
// Does not take ownership of vocab_fd or out_fd.
|
||||
explicit PrintARPA(int vocab_fd, int out_fd, const std::vector<uint64_t> &counts)
|
||||
: vocab_fd_(vocab_fd), out_fd_(out_fd), counts_(counts) {}
|
||||
|
||||
void Run(const util::stream::ChainPositions &positions);
|
||||
|
||||
private:
|
||||
int vocab_fd_;
|
||||
int out_fd_;
|
||||
std::vector<uint64_t> counts_;
|
||||
};
|
||||
|
||||
} // namespace lm
|
||||
#endif // LM_COMMON_PRINT_H
|
24
lm/common/size_option.cc
Normal file
24
lm/common/size_option.cc
Normal file
@ -0,0 +1,24 @@
|
||||
#include <boost/program_options.hpp>
|
||||
#include "util/usage.hh"
|
||||
|
||||
namespace lm {
|
||||
|
||||
namespace {
|
||||
class SizeNotify {
|
||||
public:
|
||||
explicit SizeNotify(std::size_t &out) : behind_(out) {}
|
||||
|
||||
void operator()(const std::string &from) {
|
||||
behind_ = util::ParseSize(from);
|
||||
}
|
||||
|
||||
private:
|
||||
std::size_t &behind_;
|
||||
};
|
||||
}
|
||||
|
||||
boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value) {
|
||||
return boost::program_options::value<std::string>()->notifier(SizeNotify(to))->default_value(default_value);
|
||||
}
|
||||
|
||||
} // namespace lm
|
11
lm/common/size_option.hh
Normal file
11
lm/common/size_option.hh
Normal file
@ -0,0 +1,11 @@
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <cstddef>
|
||||
#include <string>
|
||||
|
||||
namespace lm {
|
||||
|
||||
// Create a boost program option for data sizes. This parses sizes like 1T and 10k.
|
||||
boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value);
|
||||
|
||||
} // namespace lm
|
@ -1,9 +1,9 @@
|
||||
#ifndef LM_BUILDER_SPECIAL_H
|
||||
#define LM_BUILDER_SPECIAL_H
|
||||
#ifndef LM_COMMON_SPECIAL_H
|
||||
#define LM_COMMON_SPECIAL_H
|
||||
|
||||
#include "lm/word_index.hh"
|
||||
|
||||
namespace lm { namespace builder {
|
||||
namespace lm {
|
||||
|
||||
class SpecialVocab {
|
||||
public:
|
||||
@ -22,6 +22,6 @@ class SpecialVocab {
|
||||
WordIndex eos_;
|
||||
};
|
||||
|
||||
}} // namespaces
|
||||
} // namespace lm
|
||||
|
||||
#endif // LM_BUILDER_SPECIAL_H
|
||||
#endif // LM_COMMON_SPECIAL_H
|
62
lm/filter/CMakeLists.txt
Normal file
62
lm/filter/CMakeLists.txt
Normal file
@ -0,0 +1,62 @@
|
||||
cmake_minimum_required(VERSION 2.8.8)
|
||||
#
|
||||
# The KenLM cmake files make use of add_library(... OBJECTS ...)
|
||||
#
|
||||
# This syntax allows grouping of source files when compiling
|
||||
# (effectively creating "fake" libraries based on source subdirs).
|
||||
#
|
||||
# This syntax was only added in cmake version 2.8.8
|
||||
#
|
||||
# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
|
||||
|
||||
|
||||
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
|
||||
|
||||
# Explicitly list the source files for this subdirectory
|
||||
#
|
||||
# If you add any source files to this subdirectory
|
||||
# that should be included in the kenlm library,
|
||||
# (this excludes any unit test files)
|
||||
# you should add them to the following list:
|
||||
#
|
||||
# In order to set correct paths to these files
|
||||
# in case this variable is referenced by CMake files in the parent directory,
|
||||
# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
|
||||
#
|
||||
set(KENLM_FILTER_SOURCE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/arpa_io.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/phrase.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vocab.cc
|
||||
)
|
||||
|
||||
|
||||
# Group these objects together for later use.
|
||||
#
|
||||
# Given add_library(foo OBJECT ${my_foo_sources}),
|
||||
# refer to these objects as $<TARGET_OBJECTS:foo>
|
||||
#
|
||||
add_library(kenlm_filter OBJECT ${KENLM_FILTER_SOURCE})
|
||||
|
||||
|
||||
# Explicitly list the executable files to be compiled
|
||||
set(EXE_LIST
|
||||
filter
|
||||
phrase_table_vocab
|
||||
)
|
||||
|
||||
|
||||
# Iterate through the executable list
|
||||
foreach(exe ${EXE_LIST})
|
||||
|
||||
# Compile the executable, linking against the requisite dependent object files
|
||||
add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_filter> $<TARGET_OBJECTS:kenlm_util>)
|
||||
|
||||
# Link the executable against boost
|
||||
target_link_libraries(${exe} ${Boost_LIBRARIES})
|
||||
|
||||
# Group executables together
|
||||
set_target_properties(${exe} PROPERTIES FOLDER executables)
|
||||
|
||||
# End for loop
|
||||
endforeach(exe)
|
||||
|
@ -5,10 +5,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "StatisticsBasedScorer.h"
|
||||
#include "moses/FF/InternalTree.h"
|
||||
|
||||
using Moses::TreePointer;
|
||||
using Moses::InternalTree;
|
||||
#include "InternalTree.h"
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
110
mert/InternalTree.cpp
Normal file
110
mert/InternalTree.cpp
Normal file
@ -0,0 +1,110 @@
|
||||
#include "InternalTree.h"
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
InternalTree::InternalTree(const std::string & line, const bool terminal):
|
||||
m_isTerminal(terminal)
|
||||
{
|
||||
|
||||
size_t found = line.find_first_of("[] ");
|
||||
|
||||
if (found == line.npos) {
|
||||
m_value = line;
|
||||
}
|
||||
|
||||
else {
|
||||
AddSubTree(line, 0);
|
||||
}
|
||||
}
|
||||
|
||||
size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
|
||||
{
|
||||
|
||||
std::string value;
|
||||
char token = 0;
|
||||
|
||||
while (token != ']' && pos != std::string::npos) {
|
||||
size_t oldpos = pos;
|
||||
pos = line.find_first_of("[] ", pos);
|
||||
if (pos == std::string::npos) break;
|
||||
token = line[pos];
|
||||
value = line.substr(oldpos,pos-oldpos);
|
||||
|
||||
if (token == '[') {
|
||||
if (m_value.size() > 0) {
|
||||
m_children.push_back(boost::make_shared<InternalTree>(value,false));
|
||||
pos = m_children.back()->AddSubTree(line, pos+1);
|
||||
} else {
|
||||
if (value.size() > 0) {
|
||||
m_value = value;
|
||||
}
|
||||
pos = AddSubTree(line, pos+1);
|
||||
}
|
||||
} else if (token == ' ' || token == ']') {
|
||||
if (value.size() > 0 && !(m_value.size() > 0)) {
|
||||
m_value = value;
|
||||
} else if (value.size() > 0) {
|
||||
m_isTerminal = false;
|
||||
m_children.push_back(boost::make_shared<InternalTree>(value,true));
|
||||
}
|
||||
if (token == ' ') {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_children.size() > 0) {
|
||||
m_isTerminal = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos == std::string::npos) {
|
||||
return line.size();
|
||||
}
|
||||
return std::min(line.size(),pos+1);
|
||||
|
||||
}
|
||||
|
||||
std::string InternalTree::GetString(bool start) const
|
||||
{
|
||||
|
||||
std::string ret = "";
|
||||
if (!start) {
|
||||
ret += " ";
|
||||
}
|
||||
|
||||
if (!m_isTerminal) {
|
||||
ret += "[";
|
||||
}
|
||||
|
||||
ret += m_value;
|
||||
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
ret += (*it)->GetString(false);
|
||||
}
|
||||
|
||||
if (!m_isTerminal) {
|
||||
ret += "]";
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
|
||||
void InternalTree::Combine(const std::vector<TreePointer> &previous)
|
||||
{
|
||||
|
||||
std::vector<TreePointer>::iterator it;
|
||||
bool found = false;
|
||||
leafNT next_leafNT(this);
|
||||
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
|
||||
found = next_leafNT(it);
|
||||
if (found) {
|
||||
*it = *it_prev;
|
||||
} else {
|
||||
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
77
mert/InternalTree.h
Normal file
77
mert/InternalTree.h
Normal file
@ -0,0 +1,77 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/make_shared.hpp>
|
||||
#include "util/generator.hh"
|
||||
#include "util/exception.hh"
|
||||
|
||||
namespace MosesTuning
|
||||
{
|
||||
|
||||
class InternalTree;
|
||||
typedef boost::shared_ptr<InternalTree> TreePointer;
|
||||
typedef int NTLabel;
|
||||
|
||||
class InternalTree
|
||||
{
|
||||
std::string m_value;
|
||||
std::vector<TreePointer> m_children;
|
||||
bool m_isTerminal;
|
||||
public:
|
||||
InternalTree(const std::string & line, const bool terminal = false);
|
||||
InternalTree(const InternalTree & tree):
|
||||
m_value(tree.m_value),
|
||||
m_isTerminal(tree.m_isTerminal) {
|
||||
const std::vector<TreePointer> & children = tree.m_children;
|
||||
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
|
||||
m_children.push_back(boost::make_shared<InternalTree>(**it));
|
||||
}
|
||||
}
|
||||
size_t AddSubTree(const std::string & line, size_t start);
|
||||
|
||||
std::string GetString(bool start = true) const;
|
||||
void Combine(const std::vector<TreePointer> &previous);
|
||||
const std::string & GetLabel() const {
|
||||
return m_value;
|
||||
}
|
||||
|
||||
size_t GetLength() const {
|
||||
return m_children.size();
|
||||
}
|
||||
std::vector<TreePointer> & GetChildren() {
|
||||
return m_children;
|
||||
}
|
||||
|
||||
bool IsTerminal() const {
|
||||
return m_isTerminal;
|
||||
}
|
||||
|
||||
bool IsLeafNT() const {
|
||||
return (!m_isTerminal && m_children.size() == 0);
|
||||
}
|
||||
};
|
||||
|
||||
// Python-like generator that yields next nonterminal leaf on every call
|
||||
$generator(leafNT)
|
||||
{
|
||||
std::vector<TreePointer>::iterator it;
|
||||
InternalTree* tree;
|
||||
leafNT(InternalTree* root = 0): tree(root) {}
|
||||
$emit(std::vector<TreePointer>::iterator)
|
||||
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
|
||||
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
|
||||
$yield(it);
|
||||
} else if ((*it)->GetLength() > 0) {
|
||||
if ((*it).get()) { // normal pointer to same object that TreePointer points to
|
||||
$restart(tree = (*it).get());
|
||||
}
|
||||
}
|
||||
}
|
||||
$stop;
|
||||
};
|
||||
|
||||
}
|
@ -30,7 +30,7 @@ InterpolatedScorer.cpp
|
||||
Point.cpp
|
||||
PerScorer.cpp
|
||||
HwcmScorer.cpp
|
||||
../moses/FF/InternalTree.cpp
|
||||
InternalTree.cpp
|
||||
Scorer.cpp
|
||||
ScorerFactory.cpp
|
||||
Optimizer.cpp
|
||||
|
@ -14,6 +14,8 @@ exe 1-1-Extraction : 1-1-Extraction.cpp ..//boost_filesystem ../moses//moses ;
|
||||
|
||||
exe prunePhraseTable : prunePhraseTable.cpp ..//boost_filesystem ../moses//moses ..//boost_program_options ;
|
||||
|
||||
exe pruneGeneration : pruneGeneration.cpp ..//boost_filesystem ../moses//moses ..//boost_program_options ;
|
||||
|
||||
local with-cmph = [ option.get "with-cmph" ] ;
|
||||
if $(with-cmph) {
|
||||
exe processPhraseTableMin : processPhraseTableMin.cpp ..//boost_filesystem ../moses//moses ;
|
||||
@ -46,6 +48,6 @@ $(TOP)//boost_iostreams
|
||||
$(TOP)//boost_program_options
|
||||
;
|
||||
|
||||
alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable ;
|
||||
alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable pruneGeneration ;
|
||||
#processPhraseTable queryPhraseTable
|
||||
|
||||
|
98
misc/pruneGeneration.cpp
Normal file
98
misc/pruneGeneration.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include "pruneGeneration.h"
|
||||
#include "moses/InputFileStream.h"
|
||||
#include "moses/OutputFileStream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
cerr << "Starting" << endl;
|
||||
int limit = atoi(argv[1]);
|
||||
string inPathStem = argv[2];
|
||||
string outPathStem = argv[3];
|
||||
|
||||
namespace fs = boost::filesystem;
|
||||
|
||||
//cerr << "inPathStem=" << inPathStem << endl;
|
||||
fs::path p(inPathStem);
|
||||
fs::path dir = p.parent_path();
|
||||
//cerr << "dir=" << dir << endl;
|
||||
|
||||
fs::path fileStem = p.filename();
|
||||
string fileStemStr = fileStem.native();
|
||||
size_t fileStemStrSize = fileStemStr.size();
|
||||
//cerr << "fileStem=" << fileStemStr << endl;
|
||||
|
||||
// loop thru each file in directory
|
||||
fs::directory_iterator end_iter;
|
||||
for( fs::directory_iterator dir_iter(dir) ; dir_iter != end_iter ; ++dir_iter) {
|
||||
if (fs::is_regular_file(dir_iter->status())) {
|
||||
fs::path currPath = *dir_iter;
|
||||
string currPathStr = currPath.native();
|
||||
//cerr << "currPathStr=" << currPathStr << endl;
|
||||
|
||||
fs::path currFile = currPath.filename();
|
||||
string currFileStr = currFile.native();
|
||||
|
||||
if (currFileStr.find(fileStemStr) == 0) {
|
||||
// found gen table we need
|
||||
//cerr << "found=" << currPathStr << endl;
|
||||
string suffix = currFileStr.substr(fileStemStrSize, currFileStr.size() - fileStemStrSize);
|
||||
string outPath = outPathStem + suffix;
|
||||
cerr << "PRUNING " << currPathStr << " TO " << outPath << endl;
|
||||
|
||||
Moses::InputFileStream inStrme(currPathStr);
|
||||
Moses::OutputFileStream outStrme(outPath);
|
||||
Process(limit, inStrme, outStrme);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cerr << "Finished" << endl;
|
||||
}
|
||||
|
||||
void Process(int limit, istream &inStrme, ostream &outStrme)
|
||||
{
|
||||
vector<Rec> records;
|
||||
string prevInWord;
|
||||
string line;
|
||||
while (getline(inStrme, line)) {
|
||||
vector<string> toks;
|
||||
Tokenize(toks, line);
|
||||
assert(toks.size() == 4);
|
||||
|
||||
if (prevInWord != toks[0]) {
|
||||
Output(outStrme, records, limit);
|
||||
records.clear();
|
||||
}
|
||||
|
||||
// add new record
|
||||
float prob = atof(toks[2].c_str());
|
||||
records.push_back(Rec(prob, line));
|
||||
|
||||
prevInWord = toks[0];
|
||||
}
|
||||
|
||||
// last
|
||||
Output(outStrme, records, limit);
|
||||
records.clear();
|
||||
|
||||
}
|
||||
|
||||
void Output(ostream &outStrme, vector<Rec> &records, int limit)
|
||||
{
|
||||
std::sort(records.rbegin(), records.rend());
|
||||
|
||||
for (size_t i = 0; i < limit && i < records.size(); ++i) {
|
||||
const Rec &rec = records[i];
|
||||
outStrme << rec.line << endl;
|
||||
}
|
||||
}
|
||||
|
46
misc/pruneGeneration.h
Normal file
46
misc/pruneGeneration.h
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
class Rec
|
||||
{
|
||||
public:
|
||||
float prob;
|
||||
std::string line;
|
||||
|
||||
Rec(float aprob, const std::string &aline)
|
||||
:prob(aprob)
|
||||
,line(aline)
|
||||
{}
|
||||
|
||||
inline bool operator< (const Rec &compare) const {
|
||||
return prob < compare.prob;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
void Process(int limit, std::istream &inStrme, std::ostream &outStrme);
|
||||
void Output(std::ostream &outStrme, std::vector<Rec> &records, int limit);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
inline void Tokenize(std::vector<std::string> &output
|
||||
, const std::string& str
|
||||
, const std::string& delimiters = " \t")
|
||||
{
|
||||
// Skip delimiters at beginning.
|
||||
std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
|
||||
// Find first "non-delimiter".
|
||||
std::string::size_type pos = str.find_first_of(delimiters, lastPos);
|
||||
|
||||
while (std::string::npos != pos || std::string::npos != lastPos) {
|
||||
// Found a token, add it to the vector.
|
||||
output.push_back(str.substr(lastPos, pos - lastPos));
|
||||
// Skip delimiters. Note the "not_of"
|
||||
lastPos = str.find_first_not_of(delimiters, pos);
|
||||
// Find next "non-delimiter"
|
||||
pos = str.find_first_of(delimiters, lastPos);
|
||||
}
|
||||
}
|
||||
|
@ -159,13 +159,15 @@ int main(int argc, char* argv[])
|
||||
}
|
||||
|
||||
StaticData& SD = const_cast<StaticData&>(StaticData::Instance());
|
||||
SD.SetUseLatticeMBR(true);
|
||||
LMBR_Options& lmbr = SD.options().lmbr;
|
||||
MBR_Options& mbr = SD.options().mbr;
|
||||
lmbr.enabled = true;
|
||||
|
||||
boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper);
|
||||
if (!ioWrapper) {
|
||||
throw runtime_error("Failed to initialise IOWrapper");
|
||||
}
|
||||
size_t nBestSize = SD.GetMBRSize();
|
||||
size_t nBestSize = mbr.size;
|
||||
|
||||
if (nBestSize <= 0) {
|
||||
throw new runtime_error("Non-positive size specified for n-best list");
|
||||
@ -187,13 +189,13 @@ int main(int argc, char* argv[])
|
||||
manager.CalcNBest(nBestSize, nBestList,true);
|
||||
//grid search
|
||||
BOOST_FOREACH(float const& p, pgrid) {
|
||||
SD.SetLatticeMBRPrecision(p);
|
||||
lmbr.precision = p;
|
||||
BOOST_FOREACH(float const& r, rgrid) {
|
||||
SD.SetLatticeMBRPRatio(r);
|
||||
lmbr.ratio = r;
|
||||
BOOST_FOREACH(size_t const prune_i, prune_grid) {
|
||||
SD.SetLatticeMBRPruningFactor(size_t(prune_i));
|
||||
lmbr.pruning_factor = prune_i;
|
||||
BOOST_FOREACH(float const& scale_i, scale_grid) {
|
||||
SD.SetMBRScale(scale_i);
|
||||
mbr.scale = scale_i;
|
||||
size_t lineCount = source->GetTranslationId();
|
||||
cout << lineCount << " ||| " << p << " "
|
||||
<< r << " " << size_t(prune_i) << " " << scale_i
|
||||
|
@ -27,6 +27,12 @@ BaseManager::GetSource() const
|
||||
return m_source;
|
||||
}
|
||||
|
||||
const ttasksptr
|
||||
BaseManager::GetTtask() const
|
||||
{
|
||||
return m_ttask.lock();
|
||||
}
|
||||
|
||||
void
|
||||
BaseManager::
|
||||
OutputSearchGraphAsHypergraph(std::ostream& out) const
|
||||
@ -134,6 +140,14 @@ void BaseManager::WriteApplicationContext(std::ostream &out,
|
||||
}
|
||||
}
|
||||
|
||||
AllOptions const&
|
||||
BaseManager::
|
||||
options() const
|
||||
{
|
||||
return GetTtask()->options();
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <string>
|
||||
#include "ScoreComponentCollection.h"
|
||||
#include "InputType.h"
|
||||
|
||||
#include "moses/parameters/AllOptions.h"
|
||||
namespace Moses
|
||||
{
|
||||
class ScoreComponentCollection;
|
||||
@ -50,6 +50,8 @@ public:
|
||||
|
||||
//! the input sentence being decoded
|
||||
const InputType& GetSource() const;
|
||||
const ttasksptr GetTtask() const;
|
||||
AllOptions const& options() const;
|
||||
|
||||
virtual void Decode() = 0;
|
||||
// outputs
|
||||
|
@ -53,7 +53,7 @@ ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
|
||||
ChartCellBase(startPos, endPos), m_manager(manager)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
m_nBestIsEnabled = staticData.IsNBestEnabled();
|
||||
m_nBestIsEnabled = staticData.options().nbest.enabled;
|
||||
}
|
||||
|
||||
ChartCell::~ChartCell() {}
|
||||
@ -100,7 +100,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList
|
||||
}
|
||||
|
||||
// pluck things out of queue and add to hypo collection
|
||||
const size_t popLimit = staticData.GetCubePruningPopLimit();
|
||||
const size_t popLimit = staticData.options().cube.pop_limit;
|
||||
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
|
||||
ChartHypothesis *hypo = queue.Pop();
|
||||
AddHypothesis(hypo);
|
||||
|
@ -287,8 +287,11 @@ void ChartHypothesis::CleanupArcList()
|
||||
* so we'll keep all of arc list if nedd distinct n-best list
|
||||
*/
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph();
|
||||
size_t nBestSize = staticData.options().nbest.nbest_size;
|
||||
bool distinctNBest = (staticData.options().nbest.only_distinct
|
||||
|| staticData.options().mbr.enabled
|
||||
|| staticData.GetOutputSearchGraph()
|
||||
|| staticData.GetOutputSearchGraphHypergraph());
|
||||
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize) {
|
||||
// prune arc list only if there too many arcs
|
||||
|
@ -38,8 +38,8 @@ ChartHypothesisCollection::ChartHypothesisCollection()
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
m_beamWidth = staticData.GetBeamWidth();
|
||||
m_maxHypoStackSize = staticData.GetMaxHypoStackSize();
|
||||
m_nBestIsEnabled = staticData.IsNBestEnabled();
|
||||
m_maxHypoStackSize = staticData.options().search.stack_size;
|
||||
m_nBestIsEnabled = staticData.options().nbest.enabled;
|
||||
m_bestScore = -std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
|
@ -52,11 +52,7 @@ public:
|
||||
// shouldn't be mixing hypos with different lhs
|
||||
assert(hypoA->GetTargetLHS() == hypoB->GetTargetLHS());
|
||||
|
||||
int ret = hypoA->RecombineCompare(*hypoB);
|
||||
if (ret != 0)
|
||||
return (ret < 0);
|
||||
|
||||
return false;
|
||||
return (hypoA->RecombineCompare(*hypoB) < 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -207,7 +207,7 @@ void ChartManager::CalcNBest(
|
||||
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
|
||||
// too many translations are identical.
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::size_t nBestFactor = staticData.GetNBestFactor();
|
||||
const std::size_t nBestFactor = staticData.options().nbest.factor;
|
||||
std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;
|
||||
|
||||
// Extract the derivations.
|
||||
@ -318,13 +318,14 @@ void ChartManager::OutputBest(OutputCollector *collector) const
|
||||
void ChartManager::OutputNBest(OutputCollector *collector) const
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
size_t nBestSize = staticData.options().nbest.nbest_size;
|
||||
if (nBestSize > 0) {
|
||||
const size_t translationId = m_source.GetTranslationId();
|
||||
|
||||
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
|
||||
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
|
||||
<< staticData.options().nbest.output_file_path << endl);
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
||||
CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
|
||||
CalcNBest(nBestSize, nBestList,staticData.options().nbest.only_distinct);
|
||||
OutputNBestList(collector, nBestList, translationId);
|
||||
IFVERBOSE(2) {
|
||||
PrintUserTime("N-Best Hypotheses Generation Time:");
|
||||
@ -348,10 +349,9 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
|
||||
FixPrecision(out);
|
||||
}
|
||||
|
||||
bool includeWordAlignment =
|
||||
StaticData::Instance().PrintAlignmentInfoInNbest();
|
||||
|
||||
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
|
||||
NBestOptions const& nbo = StaticData::Instance().options().nbest;
|
||||
bool includeWordAlignment = nbo.include_alignment_info;
|
||||
bool PrintNBestTrees = nbo.print_trees;
|
||||
|
||||
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
|
||||
p != nBestList.end(); ++p) {
|
||||
@ -620,9 +620,9 @@ void ChartManager::OutputDetailedTranslationReport(
|
||||
|
||||
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
|
||||
const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
|
||||
size_t nBestSize = staticData.GetNBestSize();
|
||||
size_t nBestSize = staticData.options().nbest.nbest_size;
|
||||
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
|
||||
CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
|
||||
CalcNBest(nBestSize, nBestList, staticData.options().nbest.nbest_size);
|
||||
OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
|
||||
targetPhrase->SetTargetLHS(targetLHS);
|
||||
targetPhrase->SetAlignmentInfo("0-0");
|
||||
targetPhrase->EvaluateInIsolation(*unksrc);
|
||||
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.PrintNBestTrees() || staticData.GetTreeStructure() != NULL) {
|
||||
|
||||
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.options().nbest.print_trees || staticData.GetTreeStructure() != NULL) {
|
||||
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
// $Id$
|
||||
|
||||
#include "ConfusionNet.h"
|
||||
@ -65,9 +66,9 @@ ConfusionNet() : InputType()
|
||||
{
|
||||
stats.createOne();
|
||||
|
||||
const StaticData& staticData = StaticData::Instance();
|
||||
if (staticData.IsSyntax()) {
|
||||
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
|
||||
const StaticData& SD = StaticData::Instance();
|
||||
if (SD.IsSyntax()) {
|
||||
m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal());
|
||||
}
|
||||
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
// $Id: ExportInterface.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
|
||||
|
||||
/***********************************************************************
|
||||
@ -63,9 +64,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include <xmlrpc-c/base.hpp>
|
||||
#include <xmlrpc-c/registry.hpp>
|
||||
#include <xmlrpc-c/server_abyss.hpp>
|
||||
#include "server/Translator.h"
|
||||
#include "server/Optimizer.h"
|
||||
#include "server/Updater.h"
|
||||
#include "server/Server.h"
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
@ -147,41 +146,9 @@ int
|
||||
run_as_server()
|
||||
{
|
||||
#ifdef HAVE_XMLRPC_C
|
||||
int port;
|
||||
params.SetParameter(port, "server-port", 8080);
|
||||
bool isSerial;
|
||||
params.SetParameter(isSerial, "serial", false);
|
||||
string logfile;
|
||||
params.SetParameter(logfile, "server-log", string(""));
|
||||
size_t num_threads;
|
||||
params.SetParameter(num_threads, "threads", size_t(10));
|
||||
if (isSerial) VERBOSE(1,"Running server in serial mode." << endl);
|
||||
|
||||
xmlrpc_c::registry myRegistry;
|
||||
|
||||
xmlrpc_c::methodPtr const translator(new MosesServer::Translator(num_threads));
|
||||
xmlrpc_c::methodPtr const updater(new MosesServer::Updater);
|
||||
xmlrpc_c::methodPtr const optimizer(new MosesServer::Optimizer);
|
||||
|
||||
myRegistry.addMethod("translate", translator);
|
||||
myRegistry.addMethod("updater", updater);
|
||||
myRegistry.addMethod("optimize", optimizer);
|
||||
|
||||
xmlrpc_c::serverAbyss myAbyssServer(myRegistry, port, logfile);
|
||||
|
||||
XVERBOSE(1,"Listening on port " << port << endl);
|
||||
if (isSerial) {
|
||||
while(1) myAbyssServer.runOnce();
|
||||
} else myAbyssServer.run();
|
||||
|
||||
std::cerr << "xmlrpc_c::serverAbyss.run() returned but should not." << std::endl;
|
||||
// #pragma message("BUILDING MOSES WITH SERVER SUPPORT")
|
||||
#else
|
||||
// #pragma message("BUILDING MOSES WITHOUT SERVER SUPPORT")
|
||||
std::cerr << "Moses was compiled without server support." << endl;
|
||||
MosesServer::Server server(params);
|
||||
return server.run(); // actually: don't return. see Server::run()
|
||||
#endif
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
int
|
||||
@ -212,21 +179,57 @@ batch_run()
|
||||
ThreadPool pool(staticData.ThreadCount());
|
||||
#endif
|
||||
|
||||
// using context for adaptation:
|
||||
// e.g., context words / strings from config file / cmd line
|
||||
std::string context_string;
|
||||
params.SetParameter(context_string,"context-string",string(""));
|
||||
|
||||
// main loop over set of input sentences
|
||||
// ... or weights for documents/domains from config file / cmd. line
|
||||
std::string context_weights;
|
||||
params.SetParameter(context_weights,"context-weights",string(""));
|
||||
|
||||
// ... or the surrounding context (--context-window ...)
|
||||
size_t size_t_max = std::numeric_limits<size_t>::max();
|
||||
bool use_context_window = ioWrapper->GetLookAhead() || ioWrapper->GetLookBack();
|
||||
bool use_context = use_context_window || context_string.size();
|
||||
bool use_sliding_context_window = (use_context_window
|
||||
&& ioWrapper->GetLookAhead() != size_t_max);
|
||||
|
||||
boost::shared_ptr<std::vector<std::string> > context_window;
|
||||
boost::shared_ptr<std::vector<std::string> >* cw;
|
||||
cw = use_context_window ? &context_window : NULL;
|
||||
if (!cw && context_string.size())
|
||||
context_window.reset(new std::vector<std::string>(1,context_string));
|
||||
|
||||
// global scope of caches, biases, etc., if any
|
||||
boost::shared_ptr<ContextScope> gscope;
|
||||
if (!use_sliding_context_window)
|
||||
gscope.reset(new ContextScope);
|
||||
|
||||
// main loop over set of input sentences
|
||||
boost::shared_ptr<InputType> source;
|
||||
while ((source = ioWrapper->ReadInput()) != NULL) {
|
||||
while ((source = ioWrapper->ReadInput(cw)) != NULL) {
|
||||
IFVERBOSE(1) ResetUserTime();
|
||||
|
||||
// set up task of translating one sentence
|
||||
boost::shared_ptr<TranslationTask>
|
||||
task = TranslationTask::create(source, ioWrapper);
|
||||
if (source->GetContext())
|
||||
task->SetContextString(*source->GetContext());
|
||||
else task->SetContextString(context_string);
|
||||
boost::shared_ptr<ContextScope> lscope;
|
||||
if (gscope) lscope = gscope;
|
||||
else lscope.reset(new ContextScope);
|
||||
|
||||
boost::shared_ptr<TranslationTask> task;
|
||||
task = TranslationTask::create(source, ioWrapper, lscope);
|
||||
|
||||
if (cw) {
|
||||
if (context_string.size())
|
||||
context_window->push_back(context_string);
|
||||
if(!use_sliding_context_window)
|
||||
cw = NULL;
|
||||
}
|
||||
if (context_window)
|
||||
task->SetContextWindow(context_window);
|
||||
|
||||
if (context_weights != "")
|
||||
task->SetContextWeights(context_weights);
|
||||
|
||||
// Allow for (sentence-)context-specific processing prior to
|
||||
// decoding. This can be used, for example, for context-sensitive
|
||||
|
@ -1,3 +1,4 @@
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
|
||||
#pragma once
|
||||
// $Id$
|
||||
|
||||
|
@ -43,7 +43,9 @@ ConstrainedDecoding::ConstrainedDecoding(const std::string &line)
|
||||
void ConstrainedDecoding::Load()
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
bool addBeginEndWord = (staticData.GetSearchAlgorithm() == CYKPlus) || (staticData.GetSearchAlgorithm() == ChartIncremental);
|
||||
bool addBeginEndWord
|
||||
= ((staticData.options().search.algo == CYKPlus)
|
||||
|| (staticData.options().search.algo == ChartIncremental));
|
||||
|
||||
for(size_t i = 0; i < m_paths.size(); ++i) {
|
||||
InputFileStream constraintFile(m_paths[i]);
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryScope3.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
|
||||
#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
|
||||
@ -152,7 +151,7 @@ FeatureFactory
|
||||
::DefaultSetup(F *feature)
|
||||
{
|
||||
StaticData &static_data = StaticData::InstanceNonConst();
|
||||
const string &featureName = feature->GetScoreProducerDescription();
|
||||
const std::string &featureName = feature->GetScoreProducerDescription();
|
||||
std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);
|
||||
|
||||
|
||||
@ -165,8 +164,8 @@ FeatureFactory
|
||||
<< "WARNING: Auto-initializing all weights for this FF to 1.0");
|
||||
weights.assign(feature->GetNumScoreComponents(),1.0);
|
||||
} else {
|
||||
TRACE_ERR("WARNING: No weights specified in config file for FF "
|
||||
<< featureName << ". Using default values supplied by FF.");
|
||||
VERBOSE(2,"WARNING: No weights specified in config file for FF "
|
||||
<< featureName << ". Using default values supplied by FF.");
|
||||
}
|
||||
}
|
||||
UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
|
||||
@ -215,7 +214,7 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME(PhraseDictionaryMultiModel);
|
||||
MOSES_FNAME(PhraseDictionaryMultiModelCounts);
|
||||
MOSES_FNAME(PhraseDictionaryALSuffixArray);
|
||||
MOSES_FNAME(PhraseDictionaryDynSuffixArray);
|
||||
// MOSES_FNAME(PhraseDictionaryDynSuffixArray);
|
||||
MOSES_FNAME(PhraseDictionaryTransliteration);
|
||||
MOSES_FNAME(PhraseDictionaryDynamicCacheBased);
|
||||
MOSES_FNAME(PhraseDictionaryFuzzyMatch);
|
||||
@ -353,18 +352,18 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line
|
||||
|
||||
void FeatureRegistry::PrintFF() const
|
||||
{
|
||||
vector<string> ffs;
|
||||
std::vector<std::string> ffs;
|
||||
std::cerr << "Available feature functions:" << std::endl;
|
||||
Map::const_iterator iter;
|
||||
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
|
||||
const string &ffName = iter->first;
|
||||
const std::string &ffName = iter->first;
|
||||
ffs.push_back(ffName);
|
||||
}
|
||||
|
||||
vector<string>::const_iterator iterVec;
|
||||
std::vector<std::string>::const_iterator iterVec;
|
||||
std::sort(ffs.begin(), ffs.end());
|
||||
for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
|
||||
const string &ffName = *iterVec;
|
||||
const std::string &ffName = *iterVec;
|
||||
std::cerr << ffName << " ";
|
||||
}
|
||||
|
||||
|
@ -19,8 +19,8 @@ HyperParameterAsWeight::HyperParameterAsWeight(const std::string &line)
|
||||
|
||||
vector<float> weights = staticData.GetWeights(this);
|
||||
|
||||
staticData.m_maxHypoStackSize = weights[0] * 1000;
|
||||
staticData.m_beamWidth = weights[1] * 10;
|
||||
staticData.m_options.search.stack_size = weights[0] * 1000;
|
||||
staticData.m_options.search.beam_width = weights[1] * 10;
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,27 +1,24 @@
|
||||
#include "InternalTree.h"
|
||||
#include "moses/StaticData.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool terminal):
|
||||
m_value_nt(0),
|
||||
m_isTerminal(terminal)
|
||||
InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool nonterminal)
|
||||
{
|
||||
|
||||
if (len > 0) {
|
||||
m_value.assign(line, start, len);
|
||||
m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(start, len), nonterminal);
|
||||
}
|
||||
}
|
||||
|
||||
InternalTree::InternalTree(const std::string & line, const bool terminal):
|
||||
m_value_nt(0),
|
||||
m_isTerminal(terminal)
|
||||
InternalTree::InternalTree(const std::string & line, const bool nonterminal)
|
||||
{
|
||||
|
||||
size_t found = line.find_first_of("[] ");
|
||||
|
||||
if (found == line.npos) {
|
||||
m_value = line;
|
||||
m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), line, nonterminal);
|
||||
} else {
|
||||
AddSubTree(line, 0);
|
||||
}
|
||||
@ -32,6 +29,7 @@ size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
|
||||
|
||||
char token = 0;
|
||||
size_t len = 0;
|
||||
bool has_value = false;
|
||||
|
||||
while (token != ']' && pos != std::string::npos) {
|
||||
size_t oldpos = pos;
|
||||
@ -41,30 +39,27 @@ size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
|
||||
len = pos-oldpos;
|
||||
|
||||
if (token == '[') {
|
||||
if (!m_value.empty()) {
|
||||
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
|
||||
if (has_value) {
|
||||
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
|
||||
pos = m_children.back()->AddSubTree(line, pos+1);
|
||||
} else {
|
||||
if (len > 0) {
|
||||
m_value.assign(line, oldpos, len);
|
||||
m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(oldpos, len), false);
|
||||
has_value = true;
|
||||
}
|
||||
pos = AddSubTree(line, pos+1);
|
||||
}
|
||||
} else if (token == ' ' || token == ']') {
|
||||
if (len > 0 && m_value.empty()) {
|
||||
m_value.assign(line, oldpos, len);
|
||||
if (len > 0 && !has_value) {
|
||||
m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(oldpos, len), true);
|
||||
has_value = true;
|
||||
} else if (len > 0) {
|
||||
m_isTerminal = false;
|
||||
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
|
||||
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
|
||||
}
|
||||
if (token == ' ') {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!m_children.empty()) {
|
||||
m_isTerminal = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos == std::string::npos) {
|
||||
@ -82,16 +77,16 @@ std::string InternalTree::GetString(bool start) const
|
||||
ret += " ";
|
||||
}
|
||||
|
||||
if (!m_isTerminal) {
|
||||
if (!IsTerminal()) {
|
||||
ret += "[";
|
||||
}
|
||||
|
||||
ret += m_value;
|
||||
ret += m_value.GetString(StaticData::Instance().GetOutputFactorOrder(), false);
|
||||
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
ret += (*it)->GetString(false);
|
||||
}
|
||||
|
||||
if (!m_isTerminal) {
|
||||
if (!IsTerminal()) {
|
||||
ret += "]";
|
||||
}
|
||||
return ret;
|
||||
@ -120,13 +115,13 @@ void InternalTree::Unbinarize()
|
||||
{
|
||||
|
||||
// nodes with virtual label cannot be unbinarized
|
||||
if (m_value.empty() || m_value[0] == '^') {
|
||||
if (m_value.GetString(0).empty() || m_value.GetString(0).as_string()[0] == '^') {
|
||||
return;
|
||||
}
|
||||
|
||||
//if node has child that is virtual node, get unbinarized list of children
|
||||
for (std::vector<TreePointer>::iterator it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if (!(*it)->IsTerminal() && (*it)->GetLabel()[0] == '^') {
|
||||
if (!(*it)->IsTerminal() && (*it)->GetLabel().GetString(0).as_string()[0] == '^') {
|
||||
std::vector<TreePointer> new_children;
|
||||
GetUnbinarizedChildren(new_children);
|
||||
m_children = new_children;
|
||||
@ -144,8 +139,8 @@ void InternalTree::Unbinarize()
|
||||
void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
|
||||
{
|
||||
for (std::vector<TreePointer>::const_iterator itx = m_children.begin(); itx != m_children.end(); ++itx) {
|
||||
const std::string &label = (*itx)->GetLabel();
|
||||
if (!label.empty() && label[0] == '^') {
|
||||
const StringPiece label = (*itx)->GetLabel().GetString(0);
|
||||
if (!label.empty() && label.as_string()[0] == '^') {
|
||||
(*itx)->GetUnbinarizedChildren(ret);
|
||||
} else {
|
||||
ret.push_back(*itx);
|
||||
@ -153,7 +148,7 @@ void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
|
||||
}
|
||||
}
|
||||
|
||||
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
|
||||
bool InternalTree::FlatSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
@ -163,7 +158,7 @@ bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
|
||||
bool InternalTree::RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
@ -178,7 +173,7 @@ bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePo
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
|
||||
bool InternalTree::RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetLabel() == label) {
|
||||
@ -194,88 +189,4 @@ bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePo
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetNTLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetNTLabel() == label) {
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if ((*it)->GetNTLabel() == label) {
|
||||
parent = this;
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(label, it2, parent)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(labels, it2)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
|
||||
{
|
||||
for (it = m_children.begin(); it != m_children.end(); ++it) {
|
||||
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
|
||||
parent = this;
|
||||
return true;
|
||||
}
|
||||
std::vector<TreePointer>::const_iterator it2;
|
||||
if ((*it)->RecursiveSearch(labels, it2, parent)) {
|
||||
it = it2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
@ -5,30 +5,28 @@
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "FFState.h"
|
||||
#include "moses/Word.h"
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/make_shared.hpp>
|
||||
#include "util/generator.hh"
|
||||
#include "util/exception.hh"
|
||||
#include "util/string_piece.hh"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class InternalTree;
|
||||
typedef boost::shared_ptr<InternalTree> TreePointer;
|
||||
typedef int NTLabel;
|
||||
|
||||
class InternalTree
|
||||
{
|
||||
std::string m_value;
|
||||
NTLabel m_value_nt;
|
||||
Word m_value;
|
||||
std::vector<TreePointer> m_children;
|
||||
bool m_isTerminal;
|
||||
public:
|
||||
InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
|
||||
InternalTree(const std::string & line, const bool terminal = false);
|
||||
InternalTree(const std::string & line, const bool nonterminal = true);
|
||||
InternalTree(const InternalTree & tree):
|
||||
m_value(tree.m_value),
|
||||
m_isTerminal(tree.m_isTerminal) {
|
||||
m_value(tree.m_value) {
|
||||
const std::vector<TreePointer> & children = tree.m_children;
|
||||
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
|
||||
m_children.push_back(boost::make_shared<InternalTree>(**it));
|
||||
@ -40,20 +38,10 @@ public:
|
||||
void Combine(const std::vector<TreePointer> &previous);
|
||||
void Unbinarize();
|
||||
void GetUnbinarizedChildren(std::vector<TreePointer> &children) const;
|
||||
const std::string & GetLabel() const {
|
||||
const Word & GetLabel() const {
|
||||
return m_value;
|
||||
}
|
||||
|
||||
// optionally identify label by int instead of string;
|
||||
// allows abstraction if multiple nonterminal strings should map to same label.
|
||||
const NTLabel & GetNTLabel() const {
|
||||
return m_value_nt;
|
||||
}
|
||||
|
||||
void SetNTLabel(NTLabel value) {
|
||||
m_value_nt = value;
|
||||
}
|
||||
|
||||
size_t GetLength() const {
|
||||
return m_children.size();
|
||||
}
|
||||
@ -62,38 +50,22 @@ public:
|
||||
}
|
||||
|
||||
bool IsTerminal() const {
|
||||
return m_isTerminal;
|
||||
return !m_value.IsNonTerminal();
|
||||
}
|
||||
|
||||
bool IsLeafNT() const {
|
||||
return (!m_isTerminal && m_children.size() == 0);
|
||||
return (m_value.IsNonTerminal() && m_children.size() == 0);
|
||||
}
|
||||
|
||||
// different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
|
||||
// can be used for formulating syntax constraints.
|
||||
|
||||
// if found, 'it' is iterator to first tree node that matches search string
|
||||
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool FlatSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
|
||||
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
|
||||
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
|
||||
// use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
|
||||
// if found, 'it' is iterator to first tree node that matches search string
|
||||
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
|
||||
|
||||
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
|
||||
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
|
||||
// pass vector of possible labels to search
|
||||
// if found, 'it' is iterator to first tree node that matches search string
|
||||
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
|
||||
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
|
||||
|
||||
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
|
||||
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
bool RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
|
||||
|
||||
// Python-like generator that yields next nonterminal leaf on every call
|
||||
$generator(leafNT) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
// -*- c++ -*-
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
@ -1,6 +1,5 @@
|
||||
// -*- c++ -*-
|
||||
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
@ -12,7 +11,6 @@
|
||||
#include "moses/WordsBitmap.h"
|
||||
#include "moses/TranslationOption.h"
|
||||
#include "moses/FF/FFState.h"
|
||||
|
||||
#include "ReorderingStack.h"
|
||||
|
||||
namespace Moses
|
||||
|
@ -75,7 +75,7 @@ void Model1Vocabulary::Load(const std::string& fileName)
|
||||
++i;
|
||||
std::vector<std::string> tokens = Tokenize(line);
|
||||
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
|
||||
unsigned id = Scan<unsigned>(tokens[0]);
|
||||
unsigned id = atoll( tokens[0].c_str() );
|
||||
if (! ( (id == 1) && (tokens[1] == "UNK") )) {
|
||||
const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading?
|
||||
bool stored = Store(factor, id);
|
||||
@ -86,7 +86,7 @@ void Model1Vocabulary::Load(const std::string& fileName)
|
||||
++i;
|
||||
std::vector<std::string> tokens = Tokenize(line);
|
||||
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
|
||||
unsigned id = Scan<unsigned>(tokens[0]);
|
||||
unsigned id = atoll( tokens[0].c_str() );
|
||||
const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading?
|
||||
bool stored = Store(factor, id);
|
||||
UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry.");
|
||||
@ -105,11 +105,11 @@ void Model1LexicalTable::Load(const std::string &fileName, const Model1Vocabular
|
||||
++i;
|
||||
std::vector<std::string> tokens = Tokenize(line);
|
||||
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
|
||||
unsigned idS = Scan<unsigned>(tokens[0]);
|
||||
unsigned idT = Scan<unsigned>(tokens[1]);
|
||||
unsigned idS = atoll( tokens[0].c_str() );
|
||||
unsigned idT = atoll( tokens[1].c_str() );
|
||||
const Factor* wordS = vcbS.GetWord(idS);
|
||||
const Factor* wordT = vcbT.GetWord(idT);
|
||||
float prob = Scan<float>(tokens[2]);
|
||||
float prob = std::atof( tokens[2].c_str() );
|
||||
if ( (wordS != NULL) && (wordT != NULL) ) {
|
||||
m_ltable[ wordS ][ wordT ] = prob;
|
||||
}
|
||||
|
@ -134,7 +134,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (targetPhrase.GetAlignNonTerm().GetSize() != 0) {
|
||||
|
||||
// Initialize phrase orientation scoring object
|
||||
MosesTraining::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
|
||||
MosesTraining::Syntax::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
|
||||
targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm());
|
||||
|
||||
PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData();
|
||||
@ -150,7 +150,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
|
||||
// LEFT-TO-RIGHT DIRECTION
|
||||
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_L2R);
|
||||
MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::Syntax::GHKM::PhraseOrientation::REO_DIR_L2R);
|
||||
|
||||
if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
|
||||
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
||||
@ -170,7 +170,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned &&
|
||||
reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) {
|
||||
// discontinuous
|
||||
l2rOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
l2rOrientation = MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
} else {
|
||||
reoClassData->firstNonTerminalIsBoundary = true;
|
||||
}
|
||||
@ -180,7 +180,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
|
||||
// RIGHT-TO-LEFT DIRECTION
|
||||
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_R2L);
|
||||
MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::Syntax::GHKM::PhraseOrientation::REO_DIR_R2L);
|
||||
|
||||
if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
|
||||
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
||||
@ -200,7 +200,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
|
||||
reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) {
|
||||
// discontinuous
|
||||
r2lOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
r2lOrientation = MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
} else {
|
||||
reoClassData->lastNonTerminalIsBoundary = true;
|
||||
}
|
||||
@ -335,25 +335,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
// LEFT-TO-RIGHT DIRECTION
|
||||
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
|
||||
MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
|
||||
|
||||
IFFEATUREVERBOSE(2) {
|
||||
FEATUREVERBOSE(2, "l2rOrientation ");
|
||||
switch (l2rOrientation) {
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
FEATUREVERBOSE2(2, "mono" << std::endl);
|
||||
break;
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
FEATUREVERBOSE2(2, "swap" << std::endl);
|
||||
break;
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
||||
break;
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
FEATUREVERBOSE2(2, "dright" << std::endl);
|
||||
break;
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_MSLR
|
||||
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
||||
break;
|
||||
default:
|
||||
@ -396,23 +396,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
} else {
|
||||
|
||||
if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
|
||||
newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
||||
|
||||
} else if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
|
||||
newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
||||
|
||||
} else if ( ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( l2rOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
|
||||
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
@ -437,25 +437,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
// RIGHT-TO-LEFT DIRECTION
|
||||
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
|
||||
MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
|
||||
|
||||
IFFEATUREVERBOSE(2) {
|
||||
FEATUREVERBOSE(2, "r2lOrientation ");
|
||||
switch (r2lOrientation) {
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
FEATUREVERBOSE2(2, "mono" << std::endl);
|
||||
break;
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
FEATUREVERBOSE2(2, "swap" << std::endl);
|
||||
break;
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
||||
break;
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
FEATUREVERBOSE2(2, "dright" << std::endl);
|
||||
break;
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
|
||||
case MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_MSLR
|
||||
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
||||
break;
|
||||
default:
|
||||
@ -498,23 +498,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
} else {
|
||||
|
||||
if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
|
||||
newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
||||
|
||||
} else if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
|
||||
newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
||||
|
||||
} else if ( ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( r2lOrientation == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
|
||||
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
@ -862,17 +862,17 @@ void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTermin
|
||||
}
|
||||
|
||||
|
||||
const std::string* PhraseOrientationFeature::ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const
|
||||
const std::string* PhraseOrientationFeature::ToString(const MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS o) const
|
||||
{
|
||||
if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
return &MORIENT;
|
||||
|
||||
} else if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
return &SORIENT;
|
||||
|
||||
} else if ( ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( o == MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
return &DORIENT;
|
||||
|
||||
} else {
|
||||
|
@ -302,8 +302,8 @@ public:
|
||||
|
||||
struct ReoClassData {
|
||||
public:
|
||||
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
||||
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
||||
std::vector<MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
||||
std::vector<MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
||||
bool firstNonTerminalIsBoundary;
|
||||
bool firstNonTerminalPreviousSourceSpanIsAligned;
|
||||
bool firstNonTerminalFollowingSourceSpanIsAligned;
|
||||
@ -401,7 +401,7 @@ protected:
|
||||
ScoreComponentCollection* scoreBreakdown,
|
||||
const std::string* o) const;
|
||||
|
||||
const std::string* ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const;
|
||||
const std::string* ToString(const MosesTraining::Syntax::GHKM::PhraseOrientation::REO_CLASS o) const;
|
||||
|
||||
static const std::string MORIENT;
|
||||
static const std::string SORIENT;
|
||||
|
@ -16,21 +16,29 @@ namespace Moses
|
||||
|
||||
PhrasePairFeature::PhrasePairFeature(const std::string &line)
|
||||
:StatelessFeatureFunction(0, line)
|
||||
,m_unrestricted(false)
|
||||
,m_simple(true)
|
||||
,m_sourceContext(false)
|
||||
,m_domainTrigger(false)
|
||||
,m_ignorePunctuation(false)
|
||||
{
|
||||
std::cerr << "Initializing PhrasePairFeature.." << std::endl;
|
||||
VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
|
||||
ReadParameters();
|
||||
|
||||
if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
|
||||
if (m_sourceContext == 1) std::cerr << "using source context.. ";
|
||||
if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";
|
||||
if (m_simple == 1) VERBOSE(1, " Using simple phrase pairs.");
|
||||
if (m_sourceContext == 1) VERBOSE(1, " Using source context.");
|
||||
if (m_domainTrigger == 1) VERBOSE(1, " Using domain triggers.");
|
||||
|
||||
// compile a list of punctuation characters
|
||||
if (m_ignorePunctuation) {
|
||||
std::cerr << "ignoring punctuation for triggers.. ";
|
||||
VERBOSE(1, " Ignoring punctuation for triggers.");
|
||||
char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
|
||||
for (size_t i=0; i < sizeof(punctuation)-1; ++i)
|
||||
for (size_t i=0; i < sizeof(punctuation)-1; ++i) {
|
||||
m_punctuationHash[punctuation[i]] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
VERBOSE(1, " Done." << std::endl);
|
||||
}
|
||||
|
||||
void PhrasePairFeature::SetParameter(const std::string& key, const std::string& value)
|
||||
@ -76,7 +84,7 @@ void PhrasePairFeature::Load()
|
||||
}
|
||||
|
||||
inFileSource.close();
|
||||
} else {
|
||||
} else if (!m_unrestricted) {
|
||||
// restricted source word vocabulary
|
||||
ifstream inFileSource(m_filePathSource.c_str());
|
||||
UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource);
|
||||
@ -101,8 +109,6 @@ void PhrasePairFeature::Load()
|
||||
}
|
||||
|
||||
inFileTarget.close();*/
|
||||
|
||||
m_unrestricted = false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -114,25 +120,6 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
const Phrase& source = inputPath.GetPhrase();
|
||||
if (m_simple) {
|
||||
ostringstream namestr;
|
||||
namestr << "pp_";
|
||||
namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
|
||||
for (size_t i = 1; i < source.GetSize(); ++i) {
|
||||
const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
|
||||
namestr << ",";
|
||||
namestr << sourceFactor->GetString();
|
||||
}
|
||||
namestr << "~";
|
||||
namestr << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
|
||||
for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
|
||||
const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
|
||||
namestr << ",";
|
||||
namestr << targetFactor->GetString();
|
||||
}
|
||||
|
||||
scoreBreakdown.SparsePlusEquals(namestr.str(),1);
|
||||
}
|
||||
if (m_domainTrigger) {
|
||||
const Sentence& isnt = static_cast<const Sentence&>(input);
|
||||
const bool use_topicid = isnt.GetUseTopicId();
|
||||
@ -140,18 +127,18 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
|
||||
|
||||
// compute pair
|
||||
ostringstream pair;
|
||||
pair << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
|
||||
pair << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
|
||||
for (size_t i = 1; i < source.GetSize(); ++i) {
|
||||
const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
|
||||
pair << ",";
|
||||
pair << sourceFactor->GetString();
|
||||
pair << "~";
|
||||
pair << ReplaceTilde( sourceFactor->GetString() );
|
||||
}
|
||||
pair << "~";
|
||||
pair << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
|
||||
pair << "~~";
|
||||
pair << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
|
||||
for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
|
||||
const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
|
||||
pair << ",";
|
||||
pair << targetFactor->GetString();
|
||||
pair << "~";
|
||||
pair << ReplaceTilde( targetFactor->GetString() );
|
||||
}
|
||||
|
||||
if (use_topicid || use_topicid_prob) {
|
||||
@ -159,7 +146,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
|
||||
// use topicid as trigger
|
||||
const long topicid = isnt.GetTopicId();
|
||||
stringstream feature;
|
||||
feature << "pp_";
|
||||
feature << m_description << "_";
|
||||
if (topicid == -1)
|
||||
feature << "unk";
|
||||
else
|
||||
@ -173,13 +160,13 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
|
||||
const vector<string> &topicid_prob = *(isnt.GetTopicIdAndProb());
|
||||
if (atol(topicid_prob[0].c_str()) == -1) {
|
||||
stringstream feature;
|
||||
feature << "pp_unk_";
|
||||
feature << m_description << "_unk_";
|
||||
feature << pair.str();
|
||||
scoreBreakdown.SparsePlusEquals(feature.str(), 1);
|
||||
} else {
|
||||
for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
|
||||
stringstream feature;
|
||||
feature << "pp_";
|
||||
feature << m_description << "_";
|
||||
feature << topicid_prob[i];
|
||||
feature << "_";
|
||||
feature << pair.str();
|
||||
@ -193,7 +180,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
|
||||
for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
|
||||
string sourceTrigger = *p;
|
||||
ostringstream namestr;
|
||||
namestr << "pp_";
|
||||
namestr << m_description << "_";
|
||||
namestr << sourceTrigger;
|
||||
namestr << "_";
|
||||
namestr << pair.str();
|
||||
@ -221,21 +208,21 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
|
||||
|
||||
if (m_unrestricted || sourceTriggerExists) {
|
||||
ostringstream namestr;
|
||||
namestr << "pp_";
|
||||
namestr << m_description << "_";
|
||||
namestr << sourceTrigger;
|
||||
namestr << "~";
|
||||
namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
|
||||
namestr << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
|
||||
for (size_t i = 1; i < source.GetSize(); ++i) {
|
||||
const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
|
||||
namestr << ",";
|
||||
namestr << sourceFactor->GetString();
|
||||
namestr << "~";
|
||||
namestr << ReplaceTilde( sourceFactor->GetString() );
|
||||
}
|
||||
namestr << "~";
|
||||
namestr << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
|
||||
namestr << "~~";
|
||||
namestr << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
|
||||
for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
|
||||
const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
|
||||
namestr << ",";
|
||||
namestr << targetFactor->GetString();
|
||||
namestr << "~";
|
||||
namestr << ReplaceTilde( targetFactor->GetString() );
|
||||
}
|
||||
|
||||
scoreBreakdown.SparsePlusEquals(namestr.str(),1);
|
||||
@ -244,6 +231,31 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
|
||||
}
|
||||
}
|
||||
|
||||
void PhrasePairFeature::EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
if (m_simple) {
|
||||
ostringstream namestr;
|
||||
namestr << m_description << "_";
|
||||
namestr << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
|
||||
for (size_t i = 1; i < source.GetSize(); ++i) {
|
||||
const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
|
||||
namestr << "~";
|
||||
namestr << ReplaceTilde( sourceFactor->GetString() );
|
||||
}
|
||||
namestr << "~~";
|
||||
namestr << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
|
||||
for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
|
||||
const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
|
||||
namestr << "~";
|
||||
namestr << ReplaceTilde( targetFactor->GetString() );
|
||||
}
|
||||
scoreBreakdown.SparsePlusEquals(namestr.str(),1);
|
||||
}
|
||||
}
|
||||
|
||||
bool PhrasePairFeature::IsUseable(const FactorMask &mask) const
|
||||
{
|
||||
bool ret = mask[m_targetFactorId];
|
||||
|
@ -1,5 +1,4 @@
|
||||
#ifndef moses_PhrasePairFeature_h
|
||||
#define moses_PhrasePairFeature_h
|
||||
#pragma once
|
||||
|
||||
#include <stdexcept>
|
||||
#include <boost/unordered_set.hpp>
|
||||
@ -32,6 +31,16 @@ class PhrasePairFeature: public StatelessFeatureFunction
|
||||
CharHash m_punctuationHash;
|
||||
std::string m_filePathSource;
|
||||
|
||||
inline std::string ReplaceTilde(const StringPiece &str) const {
|
||||
std::string out = str.as_string();
|
||||
size_t pos = out.find('~');
|
||||
while ( pos != std::string::npos ) {
|
||||
out.replace(pos,1,"<TILDE>");
|
||||
pos = out.find('~',pos);
|
||||
}
|
||||
return out;
|
||||
};
|
||||
|
||||
public:
|
||||
PhrasePairFeature(const std::string &line);
|
||||
|
||||
@ -43,8 +52,7 @@ public:
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const {
|
||||
}
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
@ -69,5 +77,3 @@ public:
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -12,7 +12,7 @@ namespace Moses
|
||||
{
|
||||
|
||||
RulePairUnlexicalizedSource::RulePairUnlexicalizedSource(const std::string &line)
|
||||
: StatelessFeatureFunction(0, line)
|
||||
: StatelessFeatureFunction(1, line)
|
||||
, m_glueRules(false)
|
||||
, m_nonGlueRules(true)
|
||||
, m_glueTargetLHSStr("Q")
|
||||
@ -81,6 +81,9 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
|
||||
}
|
||||
|
||||
scoreBreakdown.PlusEquals(this, namestr.str(), 1);
|
||||
if ( targetPhraseLHS != m_glueTargetLHS ) {
|
||||
scoreBreakdown.PlusEquals(this, 1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ public:
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const {
|
||||
vector<float> newScores(m_numScoreComponents);
|
||||
std::vector<float> newScores(m_numScoreComponents);
|
||||
newScores[0] = translationOptionList.size();
|
||||
|
||||
TranslationOptionList::const_iterator iterTransOpt;
|
||||
|
@ -13,6 +13,7 @@ namespace Moses
|
||||
SoftMatchingFeature::SoftMatchingFeature(const std::string &line)
|
||||
: StatelessFeatureFunction(0, line)
|
||||
, m_softMatches(moses_MaxNumNonterminals)
|
||||
, m_scoreIdentical(true)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
@ -26,6 +27,8 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string
|
||||
} else if (key == "path") {
|
||||
const std::string filePath = value;
|
||||
Load(filePath);
|
||||
} else if (key == "score-identical") {
|
||||
m_scoreIdentical = Scan<bool>(value);
|
||||
} else {
|
||||
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
|
||||
}
|
||||
@ -80,8 +83,10 @@ void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
|
||||
const ChartHypothesis* prevHypo = hypo.GetPrevHypo(nonTermInd);
|
||||
const Word& prevLHS = prevHypo->GetTargetLHS();
|
||||
|
||||
const std::string &name = GetOrSetFeatureName(word, prevLHS);
|
||||
accumulator->PlusEquals(this,name,1);
|
||||
if ( (word != prevLHS) || m_scoreIdentical ) {
|
||||
const std::string &name = GetOrSetFeatureName(word, prevLHS);
|
||||
accumulator->PlusEquals(this,name,1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -55,6 +55,7 @@ public:
|
||||
private:
|
||||
mutable std::vector<std::vector<Word> > m_softMatches; // map RHS of new rule to list of possible LHS of old rule (subtree)
|
||||
mutable std::vector<std::vector<std::string> > m_nameCache;
|
||||
bool m_scoreIdentical;
|
||||
|
||||
#ifdef WITH_THREADS
|
||||
//reader-writer lock
|
||||
|
@ -38,9 +38,8 @@ void SourceWordDeletionFeature::SetParameter(const std::string& key, const std::
|
||||
|
||||
void SourceWordDeletionFeature::Load()
|
||||
{
|
||||
if (m_filename == "") {
|
||||
if (m_filename.empty())
|
||||
return;
|
||||
}
|
||||
|
||||
FEATUREVERBOSE(1, "Loading source word deletion word list from " << m_filename << std::endl);
|
||||
ifstream inFile(m_filename.c_str());
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user