Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Kenneth Heafield 2015-09-10 16:52:26 +02:00
commit 37f7326057
395 changed files with 11546 additions and 12722 deletions

View File

@ -21,6 +21,9 @@ mingw/MosesGUI/icons_rc.py
mingw/MosesGUI/Ui_credits.py
mingw/MosesGUI/Ui_mainWindow.py
moses/TranslationModel/UG
moses/server
moses/parameters
moses/thread_safe_container.h
phrase-extract/pcfg-common
phrase-extract/syntax-common
randlm
@ -32,3 +35,4 @@ srilm
util
xmlrpc-c
.git
util/ug_cache_with_timeout.h

View File

@ -133,7 +133,9 @@ if [ option.get "filter-warnings" : : "yes" ] {
requirements += <cxxflags>-Wno-unused-but-set-variable ;
requirements += <cxxflags>-Wno-unused-result ;
requirements += <cxxflags>-Wno-unused-variable ;
requirements += <cxxflags>-Wcomment ;
requirements += <cxxflags>-Wno-comment ;
requirements += <cxxflags>-Wno-strict-aliasing ;
requirements += <cxxflags>-Wno-overloaded-virtual ;
}
if [ option.get "debug-build" : : "yes" ] {
@ -179,7 +181,7 @@ if [ option.get "with-icu" : : "yes" ]
requirements += <library>icui18n/<link>shared ;
requirements += <cxxflags>-fPIC ;
requirements += <address-model>64 ;
requirements += <runtime-link>shared ;
# requirements += <runtime-link>shared ;
}
if [ option.get "with-probing-pt" : : "yes" ]
@ -301,5 +303,5 @@ if [ path.exists $(TOP)/dist ] && $(prefix) != dist {
#local temp = [ _shell "bash source ./s.sh" ] ;
local temp = [ _shell "mkdir -p $(TOP)/bin" ] ;
local temp = [ _shell "rm $(TOP)/bin/moses_chart" ] ;
local temp = [ _shell "rm -f $(TOP)/bin/moses_chart" ] ;
local temp = [ _shell "cd $(TOP)/bin && ln -s moses moses_chart" ] ;

View File

@ -21,6 +21,11 @@ SuffixArray::SuffixArray()
m_wordInSentence(NULL),
m_sentence(NULL),
m_sentenceLength(NULL),
m_document(NULL),
m_documentName(NULL),
m_documentNameLength(0),
m_documentCount(0),
m_useDocument(false),
m_vcb(),
m_size(0),
m_sentenceCount(0) { }
@ -32,6 +37,8 @@ SuffixArray::~SuffixArray()
free(m_wordInSentence);
free(m_sentence);
free(m_sentenceLength);
free(m_document);
free(m_documentName);
}
void SuffixArray::Create(const string& fileName )
@ -46,22 +53,32 @@ void SuffixArray::Create(const string& fileName )
textFile.open(fileName.c_str());
if (!textFile) {
cerr << "no such file or directory " << fileName << endl;
cerr << "Error: no such file or directory " << fileName << endl;
exit(1);
}
// first pass through data: get size
istream *fileP = &textFile;
m_size = 0;
m_sentenceCount = 0;
m_documentCount = 0;
while(!fileP->eof()) {
SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
if (fileP->eof()) break;
if (m_useDocument && ProcessDocumentLine(line,0)) continue;
vector< WORD_ID > words = m_vcb.Tokenize( line );
m_size += words.size() + 1;
m_sentenceCount++;
}
textFile.close();
cerr << m_size << " words (incl. sentence boundaries)" << endl;
if (m_useDocument) {
cerr << m_documentCount << " documents" << endl;
if (m_documentCount == 0) {
cerr << "Error: no documents found, aborting." << endl;
exit(1);
}
}
// allocate memory
m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
@ -69,21 +86,31 @@ void SuffixArray::Create(const string& fileName )
m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
m_sentence = (INDEX*) calloc( sizeof( INDEX ), m_size );
m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );
// fill the array
int wordIndex = 0;
int sentenceId = 0;
textFile.open(fileName.c_str());
if (!textFile) {
cerr << "no such file or directory " << fileName << endl;
exit(1);
CheckAllocation(m_array != NULL, "m_array");
CheckAllocation(m_index != NULL, "m_index");
CheckAllocation(m_wordInSentence != NULL, "m_wordInSentence");
CheckAllocation(m_sentence != NULL, "m_sentence");
CheckAllocation(m_sentenceLength != NULL, "m_sentenceLength");
if (m_useDocument) {
m_document = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
m_documentName = (INDEX*) calloc( sizeof( char ), m_documentCount );
m_documentNameBuffer = (char*) calloc( sizeof( char ), m_documentNameLength );
CheckAllocation(m_document != NULL, "m_document");
CheckAllocation(m_documentName != NULL, "m_documentName");
CheckAllocation(m_documentNameBuffer != NULL, "m_documentNameBuffer");
}
// second pass through data: fill the arrays
int wordIndex = 0;
int sentenceId = 0;
m_documentNameLength = 0; // re-use as counter
m_documentCount = 0; // re-use as counter
textFile.open(fileName.c_str());
fileP = &textFile;
while(!fileP->eof()) {
SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
if (fileP->eof()) break;
if (m_useDocument && ProcessDocumentLine(line,sentenceId)) continue;
vector< WORD_ID > words = m_vcb.Tokenize( line );
vector< WORD_ID >::const_iterator i;
@ -105,7 +132,7 @@ void SuffixArray::Create(const string& fileName )
m_buffer = (INDEX*) calloc( sizeof( INDEX ), m_size );
if (m_buffer == NULL) {
cerr << "cannot allocate memory to m_buffer" << endl;
cerr << "Error: cannot allocate memory to m_buffer" << endl;
exit(1);
}
@ -114,6 +141,45 @@ void SuffixArray::Create(const string& fileName )
cerr << "done sorting" << endl;
}
// very specific code to deal with common crawl document ids
bool SuffixArray::ProcessDocumentLine( const char *line, const size_t sentenceId )
{
size_t i;
// first 32 characters are hex-hash
for(i=0; i<32; i++) {
if ((line[i] < '0' || line[i] > '9') && (line[i] < 'a' || line[i] > 'f')) {
return false;
}
}
if (line[i++] != ' ') return false;
// second token is float
for (; line[i] != ' ' && line[i] != 0; i++) {
if (line[i] != '.' && (line[i] < '0' || line[i] > '9')) {
return false;
}
}
i++;
// last token is url (=name)
size_t startName = i;
for (; line[i] != ' ' && line[i] != 0; i++) {}
if (line[i] == ' ') return false;
size_t endName = i+1; // include '\0'
// second pass: record name and sentence number
if (m_document != NULL) {
m_documentName[m_documentCount] = m_documentNameLength;
for(size_t i=startName; i<endName; i++) {
m_documentNameBuffer[m_documentNameLength + i-startName] = line[i];
}
m_document[m_documentCount] = sentenceId;
}
m_documentNameLength += endName-startName;
m_documentCount++;
return true;
}
// good ol' quick sort
void SuffixArray::Sort(INDEX start, INDEX end)
{
@ -162,7 +228,6 @@ int SuffixArray::CompareIndex( INDEX a, INDEX b ) const
inline int SuffixArray::CompareWord( WORD_ID a, WORD_ID b ) const
{
// cerr << "c(" << m_vcb.GetWord(a) << ":" << m_vcb.GetWord(b) << ")=" << m_vcb.GetWord(a).compare( m_vcb.GetWord(b) ) << endl;
return m_vcb.GetWord(a).compare( m_vcb.GetWord(b) );
}
@ -272,13 +337,73 @@ void SuffixArray::List(INDEX start, INDEX end)
}
}
void SuffixArray::PrintSentenceMatches( const std::vector< WORD > &phrase )
{
cout << "QUERY\t";
for(size_t i=0; i<phrase.size(); i++) {
if (i>0) cout << " ";
cout << phrase[i];
}
cout << '\t';
INDEX start = 0;
INDEX end = m_size-1;
INDEX mid = FindFirst( phrase, start, end );
if (mid == m_size) { // no matches
cout << "0 matches" << endl;
return;
}
INDEX firstMatch = FindLast( phrase, mid, start, -1 );
INDEX lastMatch = FindLast( phrase, mid, end, 1 );
// loop through all matches
cout << (lastMatch-firstMatch+1) << " matches" << endl;
for(INDEX i=firstMatch; i<=lastMatch; i++) {
// get sentence information
INDEX pos = GetPosition( i );
INDEX start = pos - GetWordInSentence( pos );
char length = GetSentenceLength( GetSentence( pos ) );
// print document name
if (m_useDocument) {
INDEX sentence = GetSentence( pos );
INDEX document = GetDocument( sentence );
PrintDocumentName( document );
cout << '\t';
}
// print sentence
for(char i=0; i<length; i++) {
if (i>0) cout << " ";
cout << GetWord( start + i );
}
cout << endl;
}
}
SuffixArray::INDEX SuffixArray::GetDocument( INDEX sentence ) const
{
// binary search
INDEX min = 0;
INDEX max = m_documentCount-1;
if (sentence >= m_document[max]) {
return max;
}
while(true) {
INDEX mid = (min + max) / 2;
if (sentence >= m_document[mid] && sentence < m_document[mid+1]) {
return mid;
}
if (sentence < m_document[mid]) {
max = mid-1;
} else {
min = mid+1;
}
}
}
void SuffixArray::Save(const string& fileName ) const
{
FILE *pFile = fopen ( fileName.c_str() , "w" );
if (pFile == NULL) {
cerr << "Cannot open " << fileName << endl;
exit(1);
}
if (pFile == NULL) Error("cannot open",fileName);
fwrite( &m_size, sizeof(INDEX), 1, pFile );
fwrite( m_array, sizeof(WORD_ID), m_size, pFile ); // corpus
@ -288,6 +413,16 @@ void SuffixArray::Save(const string& fileName ) const
fwrite( &m_sentenceCount, sizeof(INDEX), 1, pFile );
fwrite( m_sentenceLength, sizeof(char), m_sentenceCount, pFile); // sentence length
char useDocument = m_useDocument; // not sure if that is needed
fwrite( &useDocument, sizeof(char), 1, pFile );
if (m_useDocument) {
fwrite( &m_documentCount, sizeof(INDEX), 1, pFile );
fwrite( m_document, sizeof(INDEX), m_documentCount, pFile );
fwrite( m_documentName, sizeof(INDEX), m_documentCount, pFile );
fwrite( &m_documentNameLength, sizeof(INDEX), 1, pFile );
fwrite( m_documentNameBuffer, sizeof(char), m_documentNameLength, pFile );
}
fclose( pFile );
m_vcb.Save( fileName + ".src-vcb" );
@ -296,56 +431,81 @@ void SuffixArray::Save(const string& fileName ) const
void SuffixArray::Load(const string& fileName )
{
FILE *pFile = fopen ( fileName.c_str() , "r" );
if (pFile == NULL) {
cerr << "no such file or directory " << fileName << endl;
exit(1);
}
if (pFile == NULL) Error("no such file or directory", fileName);
cerr << "loading from " << fileName << endl;
fread( &m_size, sizeof(INDEX), 1, pFile );
fread( &m_size, sizeof(INDEX), 1, pFile )
|| Error("could not read m_size from", fileName);
cerr << "words in corpus: " << m_size << endl;
m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
m_index = (INDEX*) calloc( sizeof( INDEX ), m_size );
m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
m_sentence = (INDEX*) calloc( sizeof( INDEX ), m_size );
CheckAllocation(m_array != NULL, "m_array");
CheckAllocation(m_index != NULL, "m_index");
CheckAllocation(m_wordInSentence != NULL, "m_wordInSentence");
CheckAllocation(m_sentence != NULL, "m_sentence");
fread( m_array, sizeof(WORD_ID), m_size, pFile ) // corpus
|| Error("could not read m_array from", fileName);
fread( m_index, sizeof(INDEX), m_size, pFile ) // suffix array
|| Error("could not read m_index from", fileName);
fread( m_wordInSentence, sizeof(char), m_size, pFile) // word index
|| Error("could not read m_wordInSentence from", fileName);
fread( m_sentence, sizeof(INDEX), m_size, pFile ) // sentence index
|| Error("could not read m_sentence from", fileName);
if (m_array == NULL) {
cerr << "Error: cannot allocate memory to m_array" << endl;
exit(1);
}
if (m_index == NULL) {
cerr << "Error: cannot allocate memory to m_index" << endl;
exit(1);
}
if (m_wordInSentence == NULL) {
cerr << "Error: cannot allocate memory to m_wordInSentence" << endl;
exit(1);
}
if (m_sentence == NULL) {
cerr << "Error: cannot allocate memory to m_sentence" << endl;
exit(1);
}
fread( m_array, sizeof(WORD_ID), m_size, pFile ); // corpus
fread( m_index, sizeof(INDEX), m_size, pFile ); // suffix array
fread( m_wordInSentence, sizeof(char), m_size, pFile); // word index
fread( m_sentence, sizeof(INDEX), m_size, pFile); // sentence index
fread( &m_sentenceCount, sizeof(INDEX), 1, pFile );
fread( &m_sentenceCount, sizeof(INDEX), 1, pFile )
|| Error("could not read m_sentenceCount from", fileName);
cerr << "sentences in corpus: " << m_sentenceCount << endl;
m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );
if (m_sentenceLength == NULL) {
cerr << "Error: cannot allocate memory to m_sentenceLength" << endl;
exit(1);
m_sentenceLength = (char*) calloc( sizeof( char ), m_sentenceCount );
CheckAllocation(m_sentenceLength != NULL, "m_sentenceLength");
fread( m_sentenceLength, sizeof(char), m_sentenceCount, pFile) // sentence length
|| Error("could not read m_sentenceLength from", fileName);
if (m_useDocument) { // do not read it when you do not need it
char useDocument;
fread( &useDocument, sizeof(char), 1, pFile )
|| Error("could not read m_useDocument from", fileName);
if (!useDocument) {
cerr << "Error: stored suffix array does not have a document index\n";
exit(1);
}
fread( &m_documentCount, sizeof(INDEX), 1, pFile )
|| Error("could not read m_documentCount from", fileName);
m_document = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
m_documentName = (INDEX*) calloc( sizeof( INDEX ), m_documentCount );
CheckAllocation(m_document != NULL, "m_document");
CheckAllocation(m_documentName != NULL, "m_documentName");
fread( m_document, sizeof(INDEX), m_documentCount, pFile )
|| Error("could not read m_document from", fileName);
fread( m_documentName, sizeof(INDEX), m_documentCount, pFile )
|| Error("could not read m_documentName from", fileName);
fread( &m_documentNameLength, sizeof(INDEX), 1, pFile )
|| Error("could not read m_documentNameLength from", fileName);
m_documentNameBuffer = (char*) calloc( sizeof( char ), m_documentNameLength );
CheckAllocation(m_documentNameBuffer != NULL, "m_documentNameBuffer");
fread( m_documentNameBuffer, sizeof(char), m_documentNameLength, pFile )
|| Error("could not read m_document from", fileName);
}
fread( m_sentenceLength, sizeof(char), m_sentenceCount, pFile); // sentence length
fclose( pFile );
m_vcb.Load( fileName + ".src-vcb" );
}
void SuffixArray::CheckAllocation( bool check, const char *dataStructure ) const
{
if (check) return;
cerr << "Error: could not allocate memory for " << dataStructure << endl;
exit(1);
}
bool SuffixArray::Error( const char *message, const string &fileName) const
{
cerr << "Error: " << message << " " << fileName << endl;
exit(1);
return true; // yeah, i know.
}

View File

@ -15,6 +15,12 @@ private:
INDEX *m_sentence;
char *m_sentenceLength;
WORD_ID m_endOfSentence;
INDEX *m_document;
INDEX *m_documentName;
char *m_documentNameBuffer;
size_t m_documentNameLength;
size_t m_documentCount;
bool m_useDocument;
Vocabulary m_vcb;
INDEX m_size;
INDEX m_sentenceCount;
@ -28,6 +34,7 @@ public:
~SuffixArray();
void Create(const std::string& fileName );
bool ProcessDocumentLine( const char* const, const size_t );
void Sort(INDEX start, INDEX end);
int CompareIndex( INDEX a, INDEX b ) const;
inline int CompareWord( WORD_ID a, WORD_ID b ) const;
@ -40,6 +47,7 @@ public:
INDEX FindLast( const std::vector< WORD > &phrase, INDEX start, INDEX end, int direction );
int Match( const std::vector< WORD > &phrase, INDEX index );
void List( INDEX start, INDEX end );
void PrintSentenceMatches( const std::vector< WORD > &phrase );
inline INDEX GetPosition( INDEX index ) const {
return m_index[ index ];
}
@ -58,6 +66,17 @@ public:
inline WORD GetWord( INDEX position ) const {
return m_vcb.GetWord( m_array[position] );
}
void UseDocument() {
m_useDocument = true;
}
INDEX GetDocument( INDEX sentence ) const;
void PrintDocumentName( INDEX document ) {
for(INDEX i=m_documentName[ document ]; m_documentNameBuffer[i] != 0; i++) {
std::cout << m_documentNameBuffer[ i ];
}
}
void Save(const std::string& fileName ) const;
void Load(const std::string& fileName );
void CheckAllocation(bool, const char *dataStructure) const;
bool Error( const char* message, const std::string& fileName) const;
};

View File

@ -1,4 +1,5 @@
#include "SuffixArray.h"
#include "../util/tokenize.hh"
#include <getopt.h>
using namespace std;
@ -13,10 +14,12 @@ int main(int argc, char* argv[])
string query;
string fileNameSuffix;
string fileNameSource;
int loadFlag = false;
int saveFlag = false;
int createFlag = false;
int queryFlag = false;
bool loadFlag = false;
bool saveFlag = false;
bool createFlag = false;
bool queryFlag = false;
bool querySentenceFlag = false;
int stdioFlag = false; // receive requests from STDIN, respond to STDOUT
string info = "usage: biconcor\n\t[--load model-file]\n\t[--save model-file]\n\t[--create corpus]\n\t[--query string]\n\t[--stdio]\n";
while(1) {
@ -25,11 +28,14 @@ int main(int argc, char* argv[])
{"save", required_argument, 0, 's'},
{"create", required_argument, 0, 'c'},
{"query", required_argument, 0, 'q'},
{"query-sentence", required_argument, 0, 'Q'},
{"document", required_argument, 0, 'd'},
{"stdio", no_argument, 0, 'i'},
{"stdio-sentence", no_argument, 0, 'I'},
{0, 0, 0, 0}
};
int option_index = 0;
int c = getopt_long (argc, argv, "l:s:c:q:i", long_options, &option_index);
int c = getopt_long (argc, argv, "l:s:c:q:Q:iId", long_options, &option_index);
if (c == -1) break;
switch (c) {
case 'l':
@ -48,17 +54,25 @@ int main(int argc, char* argv[])
query = string(optarg);
queryFlag = true;
break;
case 'Q':
query = string(optarg);
querySentenceFlag = true;
break;
case 'i':
stdioFlag = true;
break;
case 'I':
stdioFlag = true;
querySentenceFlag = true;
break;
case 'd':
suffixArray.UseDocument();
break;
default:
cerr << info;
exit(1);
}
}
if (stdioFlag) {
queryFlag = true;
}
// check if parameter settings are legal
if (saveFlag && !createFlag) {
@ -74,7 +88,7 @@ int main(int argc, char* argv[])
exit(1);
}
// do your thing
// get suffix array
if (createFlag) {
cerr << "will create\n";
cerr << "corpus is in " << fileNameSource << endl;
@ -88,16 +102,26 @@ int main(int argc, char* argv[])
cerr << "will load from " << fileNameSuffix << endl;
suffixArray.Load( fileNameSuffix );
}
// do something with it
if (stdioFlag) {
while(true) {
string query;
if (getline(cin, query, '\n').eof()) {
return 0;
}
cout << lookup( query ) << endl;
if (querySentenceFlag) {
vector< string > queryString = util::tokenize( query.c_str() );
suffixArray.PrintSentenceMatches( queryString );
} else {
cout << lookup( query ) << endl;
}
}
} else if (queryFlag) {
cout << lookup( query ) << endl;
} else if (querySentenceFlag) {
vector< string > queryString = util::tokenize( query.c_str() );
suffixArray.PrintSentenceMatches( queryString );
}
return 0;
}
@ -105,32 +129,6 @@ int main(int argc, char* argv[])
size_t lookup( string query )
{
cerr << "query is " << query << endl;
vector< string > queryString = tokenize( query.c_str() );
vector< string > queryString = util::tokenize( query.c_str() );
return suffixArray.Count( queryString );
}
// Duplicate of definition in util/tokenize.hh.
// TODO: Can we de-duplicate this? At the time of writing biconcor does not
// use util at all.
vector<string> tokenize(const char input[])
{
vector< string > token;
bool betweenWords = true;
int start=0;
int i;
for(i = 0; input[i] != '\0'; i++) {
const bool isSpace = (input[i] == ' ' || input[i] == '\t');
if (!isSpace && betweenWords) {
start = i;
betweenWords = false;
} else if (isSpace && !betweenWords) {
token.push_back( string( input+start, i-start ) );
betweenWords = true;
}
}
if (!betweenWords)
token.push_back( string( input+start, i-start ) );
return token;
}

View File

@ -42,6 +42,7 @@
<option id="gnu.cpp.link.option.libs.1325292383" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="cmph"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
@ -59,6 +60,7 @@
</option>
<option id="gnu.cpp.link.option.paths.815001500" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>

View File

@ -11,12 +11,12 @@
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -72,13 +72,13 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">

View File

@ -102,9 +102,14 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
</link>
<link>
<name>SyntaxTree.cpp</name>
<name>SyntaxNodeCollection.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.cpp</locationURI>
</link>
<link>
<name>SyntaxNodeCollection.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.h</locationURI>
</link>
<link>
<name>SyntaxTree.h</name>

View File

@ -81,9 +81,14 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
</link>
<link>
<name>SyntaxTree.cpp</name>
<name>SyntaxNodeCollection.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.cpp</locationURI>
</link>
<link>
<name>SyntaxNodeCollection.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxNodeCollection.h</locationURI>
</link>
<link>
<name>SyntaxTree.h</name>

View File

@ -83,6 +83,16 @@
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>InternalTree.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mert/InternalTree.cpp</locationURI>
</link>
<link>
<name>InternalTree.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mert/InternalTree.h</locationURI>
</link>
<link>
<name>bin</name>
<type>2</type>

View File

@ -546,26 +546,11 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/interpolate.hh</locationURI>
</link>
<link>
<name>builder/joint_order.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/joint_order.hh</locationURI>
</link>
<link>
<name>builder/lmplz_main.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/lmplz_main.cc</locationURI>
</link>
<link>
<name>builder/ngram.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/ngram.hh</locationURI>
</link>
<link>
<name>builder/ngram_stream.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/ngram_stream.hh</locationURI>
</link>
<link>
<name>builder/pipeline.cc</name>
<type>1</type>
@ -576,21 +561,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/pipeline.hh</locationURI>
</link>
<link>
<name>builder/print.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/print.cc</locationURI>
</link>
<link>
<name>builder/print.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/print.hh</locationURI>
</link>
<link>
<name>builder/sort.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/builder/sort.hh</locationURI>
</link>
<link>
<name>filter/Jamfile</name>
<type>1</type>

View File

@ -11,15 +11,15 @@
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.debug.1721952013" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.debug.1721952013" name="Debug" parent="cdt.managedbuild.config.gnu.lib.debug">
<folderInfo id="cdt.managedbuild.config.gnu.lib.debug.1721952013." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.lib.debug.1932340583" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.lib.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.lib.debug.296711714" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.lib.debug"/>
@ -32,6 +32,9 @@
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.2072043013" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1183866856" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.lib.debug.1365367786" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.lib.debug">
@ -46,9 +49,6 @@
</tool>
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.lib.debug.1721952013.195400614" name="MeteorScorer.cpp" rcbsApplicability="disable" resourcePath="MeteorScorer.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537.307282660">
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537.307282660" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.lib.debug.329920537"/>
</fileInfo>
<sourceEntries>
<entry excluding="mert/PreProcessFilter.h|mert/PreProcessFilter.cpp|mert/UtilTest.cpp|mert/TimerTest.cpp|mert/SingletonTest.cpp|mert/PointTest.cpp|mert/OptimizerFactoryTest.cpp|mert/NgramTest.cpp|mert/FeatureDataTest.cpp|mert/DataTest.cpp|mert/ReferenceTest.cpp|mert/VocabularyTest.cpp|mert/extractor.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
@ -66,15 +66,15 @@
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.release.3250316" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.lib.release.3250316" name="Release" parent="cdt.managedbuild.config.gnu.lib.release">
<folderInfo id="cdt.managedbuild.config.gnu.lib.release.3250316." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.lib.release.1996805666" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.lib.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.lib.release.106685808" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.lib.release"/>

View File

@ -28,7 +28,7 @@
<listOptionValue builtIn="false" value="/opt/local/include/"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.849384962" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="WITH_THREADS"/>
@ -47,6 +47,7 @@
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1546774818" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.523170942" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
@ -56,6 +57,7 @@
</option>
<option id="gnu.cpp.link.option.libs.998577284" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="cmph"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>

View File

@ -11,15 +11,15 @@
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1846963597" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1846963597" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1167373278" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.397694981" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
@ -31,8 +31,11 @@
<option id="gnu.cpp.compiler.option.include.paths.876218169" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.53427549" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="PT_UG"/>
<listOptionValue builtIn="false" value="HAVE_CMPH"/>
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="WITH_THREADS"/>
@ -58,18 +61,18 @@
</tool>
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1761300858" name="ParallelBackoff.h" rcbsApplicability="disable" resourcePath="LM/ParallelBackoff.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1815042864" name="SRI.h" rcbsApplicability="disable" resourcePath="LM/SRI.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1720439764" name="NeuralLMWrapper.h" rcbsApplicability="disable" resourcePath="LM/NeuralLMWrapper.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1094892289" name="MaxEntSRI.h" rcbsApplicability="disable" resourcePath="LM/MaxEntSRI.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1113398114" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1183410636" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1448475064" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1459438132" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1094892289" name="MaxEntSRI.h" rcbsApplicability="disable" resourcePath="LM/MaxEntSRI.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1720439764" name="NeuralLMWrapper.h" rcbsApplicability="disable" resourcePath="LM/NeuralLMWrapper.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1272004353" name="BilingualLM.h" rcbsApplicability="disable" resourcePath="LM/BilingualLM.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1815042864" name="SRI.h" rcbsApplicability="disable" resourcePath="LM/SRI.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1459438132" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.871386239" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.1846963597.1761300858" name="ParallelBackoff.h" rcbsApplicability="disable" resourcePath="LM/ParallelBackoff.h" toolsToInvoke=""/>
<sourceEntries>
<entry excluding="LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/bilingual-lm|LM/MaxEntSRI.h|LM/MaxEntSRI.cpp|LM/BilingualLM.h|LM/BilingualLM.cpp|TranslationModel/CompactPT|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp|LM/ORLM.h|LM/ORLM.cpp|LM/NeuralLMWrapper.h|LM/NeuralLMWrapper.cpp|LM/SRI.h|LM/SRI.cpp|LM/IRST.h|LM/IRST.cpp|LM/DALMWrapper.h|LM/DALMWrapper.cpp|LM/oxlm|TranslationModel/ProbingPT|TranslationModel/UG|TranslationModel/UG/util" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
<entry excluding="TranslationModel/UG/mm/test-http-client.cc|TranslationModel/UG/ptable-describe-features.cc|TranslationModel/UG/count-ptable-features.cc|TranslationModel/UG/try-align2.cc|TranslationModel/UG/try-align.cc|TranslationModel/UG/spe-check-coverage3.cc|TranslationModel/UG/spe-check-coverage2.cc|TranslationModel/UG/spe-check-coverage.cc|TranslationModel/UG/sim-pe.cc|TranslationModel/UG/generic/stringdist|TranslationModel/UG/mm/test-dynamic-im-tsa.cc|TranslationModel/UG/mm/mtt.count.cc|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/bilingual-lm|LM/MaxEntSRI.h|LM/MaxEntSRI.cpp|LM/BilingualLM.h|LM/BilingualLM.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp|LM/ORLM.h|LM/ORLM.cpp|LM/NeuralLMWrapper.h|LM/NeuralLMWrapper.cpp|LM/SRI.h|LM/SRI.cpp|LM/IRST.h|LM/IRST.cpp|LM/DALMWrapper.h|LM/DALMWrapper.cpp|LM/oxlm|TranslationModel/ProbingPT|TranslationModel/UG/util" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
@ -79,16 +82,16 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1911984684" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1911984684" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1911984684" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1911984684." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1552241309" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.332871558" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
@ -141,10 +144,10 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/moses"/>
</configuration>
<configuration configurationName="Debug">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/moses"/>
</configuration>
</storageModule>

View File

@ -60,6 +60,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/AlignmentInfoTest.cpp</locationURI>
</link>
<link>
<name>AllOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.cpp</locationURI>
</link>
<link>
<name>AllOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.h</locationURI>
</link>
<link>
<name>BaseManager.cpp</name>
<type>1</type>
@ -70,6 +80,11 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/BaseManager.h</locationURI>
</link>
<link>
<name>BeamSearchOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BeamSearchOptions.h</locationURI>
</link>
<link>
<name>BitmapContainer.cpp</name>
<type>1</type>
@ -80,6 +95,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/BitmapContainer.h</locationURI>
</link>
<link>
<name>BookkeepingOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.cpp</locationURI>
</link>
<link>
<name>BookkeepingOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.h</locationURI>
</link>
<link>
<name>CMakeLists.txt</name>
<type>1</type>
@ -230,6 +255,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.h</locationURI>
</link>
<link>
<name>CubePruningOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.cpp</locationURI>
</link>
<link>
<name>CubePruningOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.h</locationURI>
</link>
<link>
<name>DecodeGraph.cpp</name>
<type>1</type>
@ -460,6 +495,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/InputFileStream.h</locationURI>
</link>
<link>
<name>InputOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.cpp</locationURI>
</link>
<link>
<name>InputOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.h</locationURI>
</link>
<link>
<name>InputPath.cpp</name>
<type>1</type>
@ -490,6 +535,16 @@
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>LMBR_Options.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.cpp</locationURI>
</link>
<link>
<name>LMBR_Options.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.h</locationURI>
</link>
<link>
<name>LVoc.cpp</name>
<type>1</type>
@ -510,6 +565,21 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LatticeMBR.h</locationURI>
</link>
<link>
<name>LookupOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LookupOptions.h</locationURI>
</link>
<link>
<name>MBR_Options.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.cpp</locationURI>
</link>
<link>
<name>MBR_Options.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.h</locationURI>
</link>
<link>
<name>Manager.cpp</name>
<type>1</type>
@ -535,6 +605,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/MosesTest.cpp</locationURI>
</link>
<link>
<name>NBestOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.cpp</locationURI>
</link>
<link>
<name>NBestOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.h</locationURI>
</link>
<link>
<name>NonTerminal.cpp</name>
<type>1</type>
@ -550,6 +630,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/ObjectPool.h</locationURI>
</link>
<link>
<name>OptionsBaseClass.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.cpp</locationURI>
</link>
<link>
<name>OptionsBaseClass.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.h</locationURI>
</link>
<link>
<name>OutputCollector.h</name>
<type>1</type>
@ -635,6 +725,26 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/ReorderingConstraint.h</locationURI>
</link>
<link>
<name>ReorderingOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.cpp</locationURI>
</link>
<link>
<name>ReorderingOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.h</locationURI>
</link>
<link>
<name>ReportingOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.cpp</locationURI>
</link>
<link>
<name>ReportingOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.h</locationURI>
</link>
<link>
<name>RuleCube.cpp</name>
<type>1</type>
@ -711,14 +821,14 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormal.h</locationURI>
</link>
<link>
<name>SearchNormalBatch.cpp</name>
<name>SearchOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormalBatch.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.cpp</locationURI>
</link>
<link>
<name>SearchNormalBatch.h</name>
<name>SearchOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/SearchNormalBatch.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.h</locationURI>
</link>
<link>
<name>Sentence.cpp</name>
@ -740,6 +850,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/SentenceStats.h</locationURI>
</link>
<link>
<name>ServerOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.cpp</locationURI>
</link>
<link>
<name>ServerOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.h</locationURI>
</link>
<link>
<name>SquareMatrix.cpp</name>
<type>1</type>
@ -1065,6 +1185,11 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/mbr.h</locationURI>
</link>
<link>
<name>parameters</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>rule.proto</name>
<type>1</type>
@ -1360,16 +1485,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SetSourcePhrase.h</locationURI>
</link>
<link>
<name>FF/SkeletonChangeInput.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.cpp</locationURI>
</link>
<link>
<name>FF/SkeletonChangeInput.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.h</locationURI>
</link>
<link>
<name>FF/SkeletonStatefulFF.cpp</name>
<type>1</type>
@ -2020,16 +2135,6 @@
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/BilingualDynSuffixArray.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/BilingualDynSuffixArray.cpp</locationURI>
</link>
<link>
<name>TranslationModel/BilingualDynSuffixArray.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/BilingualDynSuffixArray.h</locationURI>
</link>
<link>
<name>TranslationModel/CYKPlusParser</name>
<type>2</type>
@ -2040,21 +2145,6 @@
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/DynSuffixArray.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSuffixArray.cpp</locationURI>
</link>
<link>
<name>TranslationModel/DynSuffixArray.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSuffixArray.h</locationURI>
</link>
<link>
<name>TranslationModel/PhraseDictionary.cpp</name>
<type>1</type>
@ -2070,16 +2160,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryDynSuffixArray.README</locationURI>
</link>
<link>
<name>TranslationModel/PhraseDictionaryDynSuffixArray.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryDynSuffixArray.cpp</locationURI>
</link>
<link>
<name>TranslationModel/PhraseDictionaryDynSuffixArray.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/PhraseDictionaryDynSuffixArray.h</locationURI>
</link>
<link>
<name>TranslationModel/PhraseDictionaryDynamicCacheBased.cpp</name>
<type>1</type>
@ -2200,16 +2280,6 @@
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/WordCoocTable.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/WordCoocTable.cpp</locationURI>
</link>
<link>
<name>TranslationModel/WordCoocTable.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/WordCoocTable.h</locationURI>
</link>
<link>
<name>TranslationModel/fuzzy-match</name>
<type>2</type>
@ -2240,6 +2310,146 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/PhraseOrientation.h</locationURI>
</link>
<link>
<name>parameters/AllOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.cpp</locationURI>
</link>
<link>
<name>parameters/AllOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/AllOptions.h</locationURI>
</link>
<link>
<name>parameters/BeamSearchOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BeamSearchOptions.h</locationURI>
</link>
<link>
<name>parameters/BookkeepingOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.cpp</locationURI>
</link>
<link>
<name>parameters/BookkeepingOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/BookkeepingOptions.h</locationURI>
</link>
<link>
<name>parameters/ContextParameters.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.cpp</locationURI>
</link>
<link>
<name>parameters/ContextParameters.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.h</locationURI>
</link>
<link>
<name>parameters/CubePruningOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.cpp</locationURI>
</link>
<link>
<name>parameters/CubePruningOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/CubePruningOptions.h</locationURI>
</link>
<link>
<name>parameters/InputOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.cpp</locationURI>
</link>
<link>
<name>parameters/InputOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/InputOptions.h</locationURI>
</link>
<link>
<name>parameters/LMBR_Options.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.cpp</locationURI>
</link>
<link>
<name>parameters/LMBR_Options.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LMBR_Options.h</locationURI>
</link>
<link>
<name>parameters/LookupOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/LookupOptions.h</locationURI>
</link>
<link>
<name>parameters/MBR_Options.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.cpp</locationURI>
</link>
<link>
<name>parameters/MBR_Options.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/MBR_Options.h</locationURI>
</link>
<link>
<name>parameters/NBestOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.cpp</locationURI>
</link>
<link>
<name>parameters/NBestOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/NBestOptions.h</locationURI>
</link>
<link>
<name>parameters/OptionsBaseClass.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.cpp</locationURI>
</link>
<link>
<name>parameters/OptionsBaseClass.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/OptionsBaseClass.h</locationURI>
</link>
<link>
<name>parameters/ReorderingOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.cpp</locationURI>
</link>
<link>
<name>parameters/ReorderingOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReorderingOptions.h</locationURI>
</link>
<link>
<name>parameters/ReportingOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.cpp</locationURI>
</link>
<link>
<name>parameters/ReportingOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ReportingOptions.h</locationURI>
</link>
<link>
<name>parameters/SearchOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.cpp</locationURI>
</link>
<link>
<name>parameters/SearchOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/SearchOptions.h</locationURI>
</link>
<link>
<name>parameters/ServerOptions.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.cpp</locationURI>
</link>
<link>
<name>parameters/ServerOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ServerOptions.h</locationURI>
</link>
<link>
<name>FF/LexicalReordering/LexicalReordering.cpp</name>
<type>1</type>
@ -2935,86 +3145,6 @@
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/FileHandler.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/FileHandler.cpp</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/FileHandler.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/FileHandler.h</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/Jamfile</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/Jamfile</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/RandLMCache.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/RandLMCache.h</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/RandLMFilter.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/RandLMFilter.h</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/fdstream.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/fdstream.h</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/hash.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/hash.h</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/onlineRLM.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/onlineRLM.h</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/params.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/params.cpp</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/params.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/params.h</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/perfectHash.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/perfectHash.h</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/quantizer.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/quantizer.h</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/types.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/types.h</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/utils.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/utils.h</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/vocab.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/vocab.cpp</locationURI>
</link>
<link>
<name>TranslationModel/DynSAInclude/vocab.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/DynSAInclude/vocab.h</locationURI>
</link>
<link>
<name>TranslationModel/ProbingPT/Jamfile</name>
<type>1</type>
@ -3285,6 +3415,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/Makefile</locationURI>
</link>
<link>
<name>TranslationModel/UG/TargetPhraseCollectionCache.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/TargetPhraseCollectionCache.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/TargetPhraseCollectionCache.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/TargetPhraseCollectionCache.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/bin</name>
<type>2</type>
@ -3330,11 +3470,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/ptable-lookup.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/sapt_phrase_key.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_phrase_key.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/sapt_phrase_scorers.h</name>
<type>1</type>
@ -3680,6 +3815,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/test-dynamic-im-tsa.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/test-http-client.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/test-http-client.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/test-xml-escaping.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/test-xml-escaping.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/tpt_pickler.cc</name>
<type>1</type>
@ -3725,6 +3870,56 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_bitext_agenda.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_agenda.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_bitext_agenda_job.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_agenda_job.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_bitext_agenda_worker.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_agenda_worker.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_bitext_jstats.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_jstats.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_bitext_jstats.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_jstats.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_bitext_moses.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_moses.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_bitext_phrase_extraction_record.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_phrase_extraction_record.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_bitext_pstats.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_pstats.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_bitext_pstats.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_pstats.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_bitext_sampler.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext_sampler.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_conll_bottom_up_token.h</name>
<type>1</type>
@ -3760,6 +3955,26 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_deptree.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_http_client.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_http_client.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_http_client.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_http_client.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_im_bitext.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_im_bitext.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_im_bitext.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_im_bitext.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_im_tsa.h</name>
<type>1</type>
@ -3780,6 +3995,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_lexical_reordering.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_lexical_reordering.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_lexical_reordering.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_lexical_reordering.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_load_primer.cc</name>
<type>1</type>
@ -3800,6 +4025,11 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_2d_table.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_mm_bitext.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_bitext.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_mm_tsa.h</name>
<type>1</type>
@ -3815,16 +4045,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_ttrack.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_mmbitext.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mmbitext.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_mmbitext.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mmbitext.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_phrasepair.cc</name>
<type>1</type>
@ -3835,6 +4055,21 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_phrasepair.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_prep_phrases.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_prep_phrases.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_sampling_bias.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_sampling_bias.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_sampling_bias.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_sampling_bias.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/ug_tsa_array_entry.cc</name>
<type>1</type>
@ -4060,6 +4295,21 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/stringdist/ug_stringdist.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/generic/threading/ug_ref_counter.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_ref_counter.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/generic/threading/ug_thread_pool.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_pool.cc</locationURI>
</link>
<link>
<name>TranslationModel/UG/generic/threading/ug_thread_pool.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_pool.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc</name>
<type>1</type>
@ -4070,6 +4320,11 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.h</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/gcc-4.8</name>
<type>2</type>
@ -4110,6 +4365,11 @@
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/gcc-4.8/release</name>
<type>2</type>
@ -4145,6 +4405,11 @@
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on</name>
<type>2</type>
@ -4720,6 +4985,11 @@
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
<type>2</type>
@ -5315,6 +5585,11 @@
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
<type>2</type>
@ -5570,6 +5845,201 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-demo1.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/num_read_write.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_jstats.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_jstats.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_pstats.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_bitext_pstats.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_http_client.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_http_client.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_im_bitext.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_im_bitext.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_lexical_reordering.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_lexical_reordering.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_sampling_bias.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_sampling_bias.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</locationURI>
</link>
<link>
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage</name>
<type>1</type>

View File

@ -1,6 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Project Name="pruneGeneration" InternalType="Console">
<Plugins>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
<Plugin Name="CMakePlugin">
<![CDATA[[{
"name": "Debug",
@ -13,9 +16,6 @@
"parentProject": ""
}]]]>
</Plugin>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
</Plugins>
<Description/>
<Dependencies/>
@ -44,8 +44,10 @@
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
<Library Value="boost_filesystem"/>
<Library Value="boost_system"/>
<Library Value="boost_iostreams"/>
<Library Value="moses"/>
<Library Value="z"/>
<Library Value="bz2"/>
</Linker>
<ResourceCompiler Options="" Required="no"/>
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>

View File

@ -44,6 +44,7 @@
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1443553047" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.1096041402" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
@ -53,6 +54,7 @@
</option>
<option id="gnu.cpp.link.option.libs.1087215166" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="cmph"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
@ -75,6 +77,7 @@
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>

View File

@ -13,7 +13,7 @@ with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ;
if $(with-xmlrpc-c) {
echo While building mosesserver ... ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
echo "!!! You are linking the XMLRPC-C library; Do NOT use v.1.25.29 !!!" ;
echo "!!! You are linking the XMLRPC-C library; Must be v.1.32 (September 2012) or higher !!!" ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
build-moses-server = true ;

View File

@ -38,13 +38,12 @@ int main(int argc, char** argv)
#include "moses/StaticData.h"
#include "moses/ThreadPool.h"
#include "moses/TranslationTask.h"
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
#include "moses/FF/StatefulFeatureFunction.h"
#if PT_UG
#include "moses/TranslationModel/UG/mmsapt.h"
#endif
#include "moses/TreeInput.h"
#include "moses/LM/ORLM.h"
#include "moses/IOWrapper.h"
#include <boost/foreach.hpp>
@ -58,8 +57,8 @@ int main(int argc, char** argv)
#include <xmlrpc-c/server_abyss.hpp>
// using namespace Moses;
using Moses::TreeInput;
using namespace std;
using namespace Moses;
typedef std::map<std::string, xmlrpc_c::value> params_t;
@ -82,70 +81,16 @@ public:
Mmsapt* pdsa = reinterpret_cast<Mmsapt*>(PhraseDictionary::GetColl()[0]);
pdsa->add(source_,target_,alignment_);
#else
const PhraseDictionary* pdf = PhraseDictionary::GetColl()[0];
PhraseDictionaryDynSuffixArray*
pdsa = (PhraseDictionaryDynSuffixArray*) pdf;
cerr << "Inserting into address " << pdsa << endl;
pdsa->insertSnt(source_, target_, alignment_);
std::string msg;
msg = "Server was compiled without a phrase table implementation that ";
msg += "supports updates.";
throw xmlrpc_c::fault(msg.c_str(), xmlrpc_c::fault::CODE_PARSE);
#endif
if(add2ORLM_) {
//updateORLM();
}
XVERBOSE(1,"Done inserting\n");
//PhraseDictionary* pdsa = (PhraseDictionary*) pdf->GetDictionary(*dummy);
map<string, xmlrpc_c::value> retData;
//*retvalP = xmlrpc_c::value_struct(retData);
#ifndef PT_UG
pdf = 0;
#endif
pdsa = 0;
*retvalP = xmlrpc_c::value_string("Phrase table updated");
}
string source_, target_, alignment_;
bool bounded_, add2ORLM_;
/*
void updateORLM() {
// TODO(level101): this belongs in the language model, not in moseserver.cpp
vector<string> vl;
map<vector<string>, int> ngSet;
LMList lms = StaticData::Instance().GetLMList(); // get LM
LMList::const_iterator lmIter = lms.begin();
LanguageModel *lm = *lmIter;
LanguageModelORLM* orlm = static_cast<LanguageModelORLM*>(lm);
if(orlm == 0) {
cerr << "WARNING: Unable to add target sentence to ORLM\n";
return;
}
// break out new ngrams from sentence
const int ngOrder(orlm->GetNGramOrder());
const std::string sBOS = orlm->GetSentenceStart()->GetString().as_string();
const std::string sEOS = orlm->GetSentenceEnd()->GetString().as_string();
Utils::splitToStr(target_, vl, " ");
// insert BOS and EOS
vl.insert(vl.begin(), sBOS);
vl.insert(vl.end(), sEOS);
for(int j=0; j < vl.size(); ++j) {
int i = (j<ngOrder) ? 0 : j-ngOrder+1;
for(int t=j; t >= i; --t) {
vector<string> ngVec;
for(int s=t; s<=j; ++s) {
ngVec.push_back(vl[s]);
//cerr << vl[s] << " ";
}
ngSet[ngVec]++;
//cerr << endl;
}
}
// insert into LM in order from 1grams up (for LM well-formedness)
cerr << "Inserting " << ngSet.size() << " ngrams into ORLM...\n";
for(int i=1; i <= ngOrder; ++i) {
iterate(ngSet, it) {
if(it->first.size() == i)
orlm->UpdateORLM(it->first, it->second);
}
}
}
*/
bool bounded_;
void breakOutParams(const params_t& params) {
params_t::const_iterator si = params.find("source");
@ -165,8 +110,6 @@ public:
XVERBOSE(1,"alignment = " << alignment_ << endl);
si = params.find("bounded");
bounded_ = (si != params.end());
si = params.find("updateORLM");
add2ORLM_ = (si != params.end());
}
};
@ -678,6 +621,14 @@ int main(int argc, char** argv)
bool isSerial = false;
size_t numThreads = 10; //for translation tasks
//Abyss server configuration: initial values reflect hard-coded default
//-> http://xmlrpc-c.sourceforge.net/doc/libxmlrpc_server_abyss.html#max_conn
size_t maxConn = 15;
size_t maxConnBacklog = 15;
size_t keepaliveTimeout = 15;
size_t keepaliveMaxConn = 30;
size_t timeout = 15;
for (int i = 0; i < argc; ++i) {
if (!strcmp(argv[i],"--server-port")) {
++i;
@ -695,6 +646,46 @@ int main(int argc, char** argv)
} else {
logfile = argv[i];
}
} else if (!strcmp(argv[i],"--server-maxconn")) {
++i;
if (i >= argc) {
cerr << "Error: Missing argument to --server-maxconn" << endl;
exit(1);
} else {
maxConn = atoi(argv[i]);
}
} else if (!strcmp(argv[i],"--server-maxconn-backlog")) {
++i;
if (i >= argc) {
cerr << "Error: Missing argument to --server-maxconn-backlog" << endl;
exit(1);
} else {
maxConnBacklog = atoi(argv[i]);
}
} else if (!strcmp(argv[i],"--server-keepalive-timeout")) {
++i;
if (i >= argc) {
cerr << "Error: Missing argument to --server-keepalive-timeout" << endl;
exit(1);
} else {
keepaliveTimeout = atoi(argv[i]);
}
} else if (!strcmp(argv[i],"--server-keepalive-maxconn")) {
++i;
if (i >= argc) {
cerr << "Error: Missing argument to --server-keepalive-maxconn" << endl;
exit(1);
} else {
keepaliveMaxConn = atoi(argv[i]);
}
} else if (!strcmp(argv[i],"--server-timeout")) {
++i;
if (i >= argc) {
cerr << "Error: Missing argument to --server-timeout" << endl;
exit(1);
} else {
timeout = atoi(argv[i]);
}
} else if (!strcmp(argv[i], "--threads")) {
++i;
if (i>=argc) {
@ -740,20 +731,27 @@ int main(int argc, char** argv)
myRegistry.addMethod("updater", updater);
myRegistry.addMethod("optimize", optimizer);
/* CODE FOR old xmlrpc-c v. 1.32 or lower
xmlrpc_c::serverAbyss myAbyssServer(
myRegistry,
port, // TCP port on which to listen
logfile
);
/* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04
*/
/* doesn't work with xmlrpc-c v. 1.16.33 - ie very old lib on Ubuntu 12.04 */
xmlrpc_c::serverAbyss myAbyssServer(
xmlrpc_c::serverAbyss::constrOpt()
.registryPtr(&myRegistry)
.registryP(&myRegistry)
.portNumber(port) // TCP port on which to listen
.logFileName(logfile)
.allowOrigin("*")
.maxConn((unsigned int)maxConn)
.maxConnBacklog((unsigned int)maxConnBacklog)
.keepaliveTimeout((unsigned int)keepaliveTimeout)
.keepaliveMaxConn((unsigned int)keepaliveMaxConn)
.timeout((unsigned int)timeout)
);
*/
XVERBOSE(1,"Listening on port " << port << endl);
if (isSerial) {

View File

@ -2,7 +2,7 @@
# xmlrpc-c library (including the abyss server) that is needed for
# moses server functionality
if [ option.get "no-xmlrpc-c" ]
if [ option.get "no-xmlrpc-c" : : "yes" ]
{
rule xmlrpc ( what ? ) { } # never return anything
}

View File

@ -1,46 +1,139 @@
cmake_minimum_required(VERSION 2.8.8)
#
# The KenLM cmake files make use of add_library(... OBJECTS ...)
#
# This syntax allows grouping of source files when compiling
# (effectively creating "fake" libraries based on source subdirs).
#
# This syntax was only added in cmake version 2.8.8
#
# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
set(KENLM_MAX_ORDER 6)
add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})
# Explicitly list the source files for this subdirectory
#
# If you add any source files to this subdirectory
# that should be included in the kenlm library,
# (this excludes any unit test files)
# you should add them to the following list:
set(KENLM_SOURCE
bhiksha.cc
binary_format.cc
config.cc
lm_exception.cc
model.cc
quantize.cc
read_arpa.cc
search_hashed.cc
search_trie.cc
sizes.cc
trie.cc
trie_sort.cc
value_build.cc
virtual_interface.cc
vocab.cc
)
# Group these objects together for later use.
#
# Given add_library(foo OBJECT ${my_foo_sources}),
# refer to these objects as $<TARGET_OBJECTS:foo>
#
add_library(kenlm OBJECT ${KENLM_SOURCE})
# This directory has children that need to be processed
add_subdirectory(builder)
add_subdirectory(common)
add_subdirectory(filter)
# Explicitly list the executable files to be compiled
set(EXE_LIST
query
fragment
build_binary
)
# Iterate through the executable list
foreach(exe ${EXE_LIST})
# Compile the executable, linking against the requisite dependent object files
add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
# Link the executable against boost
target_link_libraries(${exe} ${Boost_LIBRARIES})
# Group executables together
set_target_properties(${exe} PROPERTIES FOLDER executables)
# End for loop
endforeach(exe)
# Install the executable files
install(TARGETS ${EXE_LIST} DESTINATION bin)
if(BUILD_TESTING)
# Explicitly list the Boost test files to be compiled
set(KENLM_BOOST_TESTS_LIST
left_test
model_test
partial_test
)
# Iterate through the Boost tests list
foreach(test ${KENLM_BOOST_TESTS_LIST})
# Compile the executable, linking against the requisite dependent object files
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>)
# Require the following compile flag
set_target_properties(${test} PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
# Link the executable against boost
target_link_libraries(${test} ${Boost_LIBRARIES})
# model_test requires an extra command line parameter
if ("${test}" STREQUAL "model_test")
set(test_params
${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa
)
else()
set(test_params
${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
)
endif()
# Specify command arguments for how to run each unit test
#
# Assuming that foo was defined via add_executable(foo ...),
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
#
add_test(NAME ${test}_test
COMMAND $<TARGET_FILE:${test}> ${test_params})
# Group unit tests together
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
# End for loop
endforeach(test)
endif()
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/bhiksha.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/bhiksha.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/binary_format.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/binary_format.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/blank.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/config.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/config.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/enumerate_vocab.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/facade.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/left.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/lm_exception.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/lm_exception.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/max_order.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/model_type.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/ngram_query.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/partial.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/quantize.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/quantize.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/read_arpa.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/read_arpa.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/return.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_hashed.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_hashed.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_trie.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/search_trie.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/sizes.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/sizes.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/state.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie_sort.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/trie_sort.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value_build.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/value_build.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/virtual_interface.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/virtual_interface.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/vocab.cc")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/vocab.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/weights.hh")
list(APPEND SOURCE_KENLM "${CMAKE_CURRENT_SOURCE_DIR}/word_index.hh")
add_library(kenlm OBJECT ${SOURCE_KENLM})

View File

@ -17,7 +17,7 @@ wrappers = ;
local with-nplm = [ option.get "with-nplm" ] ;
if $(with-nplm) {
lib nplm : : <search>$(with-nplm)/src ;
obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp ;
obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp <include>$(with-nplm)/3rdparty/eigen <define>NPLM_DOUBLE_PRECISION=0 ;
alias nplm-all : nplm.o nplm ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ;
wrappers += nplm-all ;
}

87
lm/builder/CMakeLists.txt Normal file
View File

@ -0,0 +1,87 @@
cmake_minimum_required(VERSION 2.8.8)
#
# The KenLM cmake files make use of add_library(... OBJECTS ...)
#
# This syntax allows grouping of source files when compiling
# (effectively creating "fake" libraries based on source subdirs).
#
# This syntax was only added in cmake version 2.8.8
#
# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
# Explicitly list the source files for this subdirectory
#
# If you add any source files to this subdirectory
# that should be included in the kenlm library,
# (this excludes any unit test files)
# you should add them to the following list:
#
# In order to set correct paths to these files
# in case this variable is referenced by CMake files in the parent directory,
# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
#
set(KENLM_BUILDER_SOURCE
${CMAKE_CURRENT_SOURCE_DIR}/adjust_counts.cc
${CMAKE_CURRENT_SOURCE_DIR}/corpus_count.cc
${CMAKE_CURRENT_SOURCE_DIR}/initial_probabilities.cc
${CMAKE_CURRENT_SOURCE_DIR}/interpolate.cc
${CMAKE_CURRENT_SOURCE_DIR}/output.cc
${CMAKE_CURRENT_SOURCE_DIR}/pipeline.cc
)
# Group these objects together for later use.
#
# Given add_library(foo OBJECT ${my_foo_sources}),
# refer to these objects as $<TARGET_OBJECTS:foo>
#
add_library(kenlm_builder OBJECT ${KENLM_BUILDER_SOURCE})
# Compile the executable, linking against the requisite dependent object files
add_executable(lmplz lmplz_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
# Link the executable against boost
target_link_libraries(lmplz ${Boost_LIBRARIES})
# Group executables together
set_target_properties(lmplz PROPERTIES FOLDER executables)
if(BUILD_TESTING)
# Explicitly list the Boost test files to be compiled
set(KENLM_BOOST_TESTS_LIST
adjust_counts_test
corpus_count_test
)
# Iterate through the Boost tests list
foreach(test ${KENLM_BOOST_TESTS_LIST})
# Compile the executable, linking against the requisite dependent object files
add_executable(${test} ${test}.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_common> $<TARGET_OBJECTS:kenlm_builder> $<TARGET_OBJECTS:kenlm_util>)
# Require the following compile flag
set_target_properties(${test} PROPERTIES COMPILE_FLAGS "-DBOOST_TEST_DYN_LINK -DBOOST_PROGRAM_OPTIONS_DYN_LINK")
# Link the executable against boost
target_link_libraries(${test} ${Boost_LIBRARIES})
# Specify command arguments for how to run each unit test
#
# Assuming that foo was defined via add_executable(foo ...),
# the syntax $<TARGET_FILE:foo> gives the full path to the executable.
#
add_test(NAME ${test}_test
COMMAND $<TARGET_FILE:${test}>)
# Group unit tests together
set_target_properties(${test} PROPERTIES FOLDER "unit_tests")
# End for loop
endforeach(test)
endif()

View File

@ -15,9 +15,6 @@
#include "util/stream/timer.hh"
#include "util/tokenize_piece.hh"
#include <boost/unordered_set.hpp>
#include <boost/unordered_map.hpp>
#include <functional>
#include <stdint.h>

View File

@ -43,12 +43,13 @@ BOOST_AUTO_TEST_CASE(Short) {
util::scoped_fd vocab(util::MakeTemp("corpus_count_test_vocab"));
util::stream::Chain chain(config);
NGramStream<BuildingPayload> stream;
uint64_t token_count;
WordIndex type_count = 10;
std::vector<bool> prune_words;
CorpusCount counter(input_piece, vocab.get(), token_count, type_count, prune_words, "", chain.BlockSize() / chain.EntrySize(), SILENT);
chain >> boost::ref(counter) >> stream >> util::stream::kRecycle;
chain >> boost::ref(counter);
NGramStream<BuildingPayload> stream(chain.Add());
chain >> util::stream::kRecycle;
const char *v[] = {"<unk>", "<s>", "</s>", "looking", "on", "a", "little", "more", "loin", "foo", "bar"};

View File

@ -1,54 +1,18 @@
#ifndef LM_BUILDER_PRINT_H
#define LM_BUILDER_PRINT_H
#ifndef LM_BUILDER_DEBUG_PRINT_H
#define LM_BUILDER_DEBUG_PRINT_H
#include "lm/common/ngram_stream.hh"
#include "lm/builder/output.hh"
#include "lm/builder/payload.hh"
#include "lm/common/ngram.hh"
#include "lm/common/print.hh"
#include "lm/common/ngram_stream.hh"
#include "util/fake_ofstream.hh"
#include "util/file.hh"
#include "util/mmap.hh"
#include "util/string_piece.hh"
#include <boost/lexical_cast.hpp>
#include <ostream>
#include <cassert>
// Warning: print routines read all unigrams before all bigrams before all
// trigrams etc. So if other parts of the chain move jointly, you'll have to
// buffer.
namespace lm { namespace builder {
class VocabReconstitute {
public:
// fd must be alive for life of this object; does not take ownership.
explicit VocabReconstitute(int fd);
const char *Lookup(WordIndex index) const {
assert(index < map_.size() - 1);
return map_[index];
}
StringPiece LookupPiece(WordIndex index) const {
return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]);
}
std::size_t Size() const {
// There's an extra entry to support StringPiece lengths.
return map_.size() - 1;
}
private:
util::scoped_memory memory_;
std::vector<const char*> map_;
};
// Not defined, only specialized.
template <class T> void PrintPayload(util::FakeOFStream &to, const BuildingPayload &payload);
template <> inline void PrintPayload<uint64_t>(util::FakeOFStream &to, const BuildingPayload &payload) {
// TODO slow
to << payload.count;
}
template <> inline void PrintPayload<Uninterpolated>(util::FakeOFStream &to, const BuildingPayload &payload) {
@ -101,19 +65,6 @@ template <class V> class Print {
int to_;
};
class PrintARPA : public OutputHook {
public:
explicit PrintARPA(int fd, bool verbose_header)
: OutputHook(PROB_SEQUENTIAL_HOOK), out_fd_(fd), verbose_header_(verbose_header) {}
void Sink(util::stream::Chains &chains);
void Run(const util::stream::ChainPositions &positions);
private:
util::scoped_fd out_fd_;
bool verbose_header_;
};
}} // namespaces
#endif // LM_BUILDER_PRINT_H
#endif // LM_BUILDER_DEBUG_PRINT_H

View File

@ -1,4 +1,4 @@
#include "lm/builder/print.hh"
#include "lm/common/print.hh"
#include "lm/word_index.hh"
#include "util/file.hh"
#include "util/read_compressed.hh"
@ -20,7 +20,7 @@ int main(int argc, char *argv[]) {
}
util::ReadCompressed counts(util::OpenReadOrThrow(argv[1]));
util::scoped_fd vocab_file(util::OpenReadOrThrow(argv[2]));
lm::builder::VocabReconstitute vocab(vocab_file.get());
lm::VocabReconstitute vocab(vocab_file.get());
unsigned int order = boost::lexical_cast<unsigned int>(argv[3]);
std::vector<char> record(sizeof(uint32_t) * order + sizeof(uint64_t));
while (std::size_t got = counts.ReadOrEOF(&*record.begin(), record.size())) {

View File

@ -5,6 +5,8 @@
#include <vector>
#include <stdint.h>
namespace lm { namespace builder {
// Some configuration info that is used to add
// comments to the beginning of an ARPA file
struct HeaderInfo {
@ -21,4 +23,6 @@ struct HeaderInfo {
// TODO: More info if multiple models were interpolated
};
}} // namespaces
#endif

View File

@ -1,9 +1,9 @@
#include "lm/builder/initial_probabilities.hh"
#include "lm/builder/discount.hh"
#include "lm/builder/special.hh"
#include "lm/builder/hash_gamma.hh"
#include "lm/builder/payload.hh"
#include "lm/common/special.hh"
#include "lm/common/ngram_stream.hh"
#include "util/murmur_hash.hh"
#include "util/file.hh"

View File

@ -10,9 +10,8 @@
namespace util { namespace stream { class Chains; } }
namespace lm {
namespace builder {
class SpecialVocab;
namespace builder {
struct InitialProbabilitiesConfig {
// These should be small buffers to keep the adder from getting too far ahead

View File

@ -1,16 +1,16 @@
#include "lm/builder/interpolate.hh"
#include "lm/builder/hash_gamma.hh"
#include "lm/builder/joint_order.hh"
#include "lm/common/ngram_stream.hh"
#include "lm/builder/payload.hh"
#include "lm/common/compare.hh"
#include "lm/common/joint_order.hh"
#include "lm/common/ngram_stream.hh"
#include "lm/lm_exception.hh"
#include "util/fixed_array.hh"
#include "util/murmur_hash.hh"
#include <cassert>
#include <cmath>
#include <iostream>
namespace lm { namespace builder {
namespace {
@ -91,7 +91,8 @@ template <class Output> class Callback {
}
}
void Enter(unsigned order_minus_1, NGram<BuildingPayload> &gram) {
void Enter(unsigned order_minus_1, void *data) {
NGram<BuildingPayload> gram(data, order_minus_1 + 1);
BuildingPayload &pay = gram.Value();
pay.complete.prob = pay.uninterp.prob + pay.uninterp.gamma * probs_[order_minus_1];
probs_[order_minus_1 + 1] = pay.complete.prob;
@ -125,7 +126,7 @@ template <class Output> class Callback {
output_.Gram(order_minus_1, out_backoff, pay.complete);
}
void Exit(unsigned, const NGram<BuildingPayload> &) const {}
void Exit(unsigned, void *) const {}
private:
util::FixedArray<util::stream::Stream> backoffs_;

View File

@ -1,7 +1,7 @@
#ifndef LM_BUILDER_INTERPOLATE_H
#define LM_BUILDER_INTERPOLATE_H
#include "lm/builder/special.hh"
#include "lm/common/special.hh"
#include "lm/word_index.hh"
#include "util/stream/multi_stream.hh"

View File

@ -1,6 +1,6 @@
#include "lm/builder/output.hh"
#include "lm/builder/pipeline.hh"
#include "lm/builder/print.hh"
#include "lm/common/size_option.hh"
#include "lm/lm_exception.hh"
#include "util/file.hh"
#include "util/file_piece.hh"
@ -13,21 +13,6 @@
#include <vector>
namespace {
class SizeNotify {
public:
SizeNotify(std::size_t &out) : behind_(out) {}
void operator()(const std::string &from) {
behind_ = util::ParseSize(from);
}
private:
std::size_t &behind_;
};
boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value) {
return boost::program_options::value<std::string>()->notifier(SizeNotify(to))->default_value(default_value);
}
// Parse and validate pruning thresholds then return vector of threshold counts
// for each n-grams order.
@ -106,17 +91,16 @@ int main(int argc, char *argv[]) {
("interpolate_unigrams", po::value<bool>(&pipeline.initial_probs.interpolate_unigrams)->default_value(true)->implicit_value(true), "Interpolate the unigrams (default) as opposed to giving lots of mass to <unk> like SRI. If you want SRI's behavior with a large <unk> and the old lmplz default, use --interpolate_unigrams 0.")
("skip_symbols", po::bool_switch(), "Treat <s>, </s>, and <unk> as whitespace instead of throwing an exception")
("temp_prefix,T", po::value<std::string>(&pipeline.sort.temp_prefix)->default_value("/tmp/lm"), "Temporary file prefix")
("memory,S", SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
("minimum_block", SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
("sort_block", SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
("memory,S", lm:: SizeOption(pipeline.sort.total_memory, util::GuessPhysicalMemory() ? "80%" : "1G"), "Sorting memory")
("minimum_block", lm::SizeOption(pipeline.minimum_block, "8K"), "Minimum block size to allow")
("sort_block", lm::SizeOption(pipeline.sort.buffer_size, "64M"), "Size of IO operations for sort (determines arity)")
("block_count", po::value<std::size_t>(&pipeline.block_count)->default_value(2), "Block count (per order)")
("vocab_estimate", po::value<lm::WordIndex>(&pipeline.vocab_estimate)->default_value(1000000), "Assume this vocabulary size for purposes of calculating memory in step 1 (corpus count) and pre-sizing the hash table")
("vocab_file", po::value<std::string>(&pipeline.vocab_file)->default_value(""), "Location to write a file containing the unique vocabulary strings delimited by null bytes")
("vocab_pad", po::value<uint64_t>(&pipeline.vocab_size_for_unk)->default_value(0), "If the vocabulary is smaller than this value, pad with <unk> to reach this size. Requires --interpolate_unigrams")
("verbose_header", po::bool_switch(&verbose_header), "Add a verbose header to the ARPA file that includes information such as token count, smoothing type, etc.")
("text", po::value<std::string>(&text), "Read text from a file instead of stdin")
("arpa", po::value<std::string>(&arpa), "Write ARPA to a file instead of stdout")
("intermediate", po::value<std::string>(&intermediate), "Write ngrams to an intermediate file. Turns off ARPA output (which can be reactivated by --arpa file). Forces --renumber on. Implicitly makes --vocab_file be the provided name + .vocab.")
("intermediate", po::value<std::string>(&intermediate), "Write ngrams to intermediate files. Turns off ARPA output (which can be reactivated by --arpa file). Forces --renumber on.")
("renumber", po::bool_switch(&pipeline.renumber_vocabulary), "Rrenumber the vocabulary identifiers so that they are monotone with the hash of each string. This is consistent with the ordering used by the trie data structure.")
("collapse_values", po::bool_switch(&pipeline.output_q), "Collapse probability and backoff into a single value, q that yields the same sentence-level probabilities. See http://kheafield.com/professional/edinburgh/rest_paper.pdf for more details, including a proof.")
("prune", po::value<std::vector<std::string> >(&pruning)->multitoken(), "Prune n-grams with count less than or equal to the given threshold. Specify one value for each order i.e. 0 0 1 to prune singleton trigrams and above. The sequence of values must be non-decreasing and the last value applies to any remaining orders. Default is to not prune, which is equivalent to --prune 0.")
@ -217,15 +201,10 @@ int main(int argc, char *argv[]) {
bool writing_intermediate = vm.count("intermediate");
if (writing_intermediate) {
pipeline.renumber_vocabulary = true;
if (!pipeline.vocab_file.empty()) {
std::cerr << "--intermediate and --vocab_file are incompatible because --intermediate already makes a vocab file." << std::endl;
return 1;
}
pipeline.vocab_file = intermediate + ".vocab";
}
lm::builder::Output output(writing_intermediate ? intermediate : pipeline.sort.temp_prefix, writing_intermediate);
lm::builder::Output output(writing_intermediate ? intermediate : pipeline.sort.temp_prefix, writing_intermediate, pipeline.output_q);
if (!writing_intermediate || vm.count("arpa")) {
output.Add(new lm::builder::PrintARPA(out.release(), verbose_header));
output.Add(new lm::builder::PrintHook(out.release(), verbose_header));
}
lm::builder::Pipeline(pipeline, in.release(), output);
} catch (const util::MallocException &e) {

View File

@ -1,6 +1,8 @@
#include "lm/builder/output.hh"
#include "lm/common/model_buffer.hh"
#include "lm/common/print.hh"
#include "util/fake_ofstream.hh"
#include "util/stream/multi_stream.hh"
#include <iostream>
@ -9,23 +11,22 @@ namespace lm { namespace builder {
OutputHook::~OutputHook() {}
Output::Output(StringPiece file_base, bool keep_buffer)
: file_base_(file_base.data(), file_base.size()), keep_buffer_(keep_buffer) {}
Output::Output(StringPiece file_base, bool keep_buffer, bool output_q)
: buffer_(file_base, keep_buffer, output_q) {}
void Output::SinkProbs(util::stream::Chains &chains, bool output_q) {
void Output::SinkProbs(util::stream::Chains &chains) {
Apply(PROB_PARALLEL_HOOK, chains);
if (!keep_buffer_ && !Have(PROB_SEQUENTIAL_HOOK)) {
if (!buffer_.Keep() && !Have(PROB_SEQUENTIAL_HOOK)) {
chains >> util::stream::kRecycle;
chains.Wait(true);
return;
}
lm::common::ModelBuffer buf(file_base_, keep_buffer_, output_q);
buf.Sink(chains);
buffer_.Sink(chains, header_.counts_pruned);
chains >> util::stream::kRecycle;
chains.Wait(false);
if (Have(PROB_SEQUENTIAL_HOOK)) {
std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl;
buf.Source(chains);
buffer_.Source(chains);
Apply(PROB_SEQUENTIAL_HOOK, chains);
chains >> util::stream::kRecycle;
chains.Wait(true);
@ -34,8 +35,18 @@ void Output::SinkProbs(util::stream::Chains &chains, bool output_q) {
void Output::Apply(HookType hook_type, util::stream::Chains &chains) {
for (boost::ptr_vector<OutputHook>::iterator entry = outputs_[hook_type].begin(); entry != outputs_[hook_type].end(); ++entry) {
entry->Sink(chains);
entry->Sink(header_, VocabFile(), chains);
}
}
void PrintHook::Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) {
if (verbose_header_) {
util::FakeOFStream out(file_.get(), 50);
out << "# Input file: " << info.input_file << '\n';
out << "# Token count: " << info.token_count << '\n';
out << "# Smoothing: Modified Kneser-Ney" << '\n';
}
chains >> PrintARPA(vocab_file, file_.get(), info.counts_pruned);
}
}} // namespaces

View File

@ -2,6 +2,7 @@
#define LM_BUILDER_OUTPUT_H
#include "lm/builder/header_info.hh"
#include "lm/common/model_buffer.hh"
#include "util/file.hh"
#include <boost/ptr_container/ptr_vector.hpp>
@ -20,69 +21,64 @@ enum HookType {
NUMBER_OF_HOOKS // Keep this last so we know how many values there are.
};
class Output;
class OutputHook {
public:
explicit OutputHook(HookType hook_type) : type_(hook_type), master_(NULL) {}
explicit OutputHook(HookType hook_type) : type_(hook_type) {}
virtual ~OutputHook();
virtual void Sink(util::stream::Chains &chains) = 0;
virtual void Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains) = 0;
protected:
const HeaderInfo &GetHeader() const;
int GetVocabFD() const;
HookType Type() const { return type_; }
private:
friend class Output;
const HookType type_;
const Output *master_;
HookType type_;
};
class Output : boost::noncopyable {
public:
Output(StringPiece file_base, bool keep_buffer);
Output(StringPiece file_base, bool keep_buffer, bool output_q);
// Takes ownership.
void Add(OutputHook *hook) {
hook->master_ = this;
outputs_[hook->type_].push_back(hook);
outputs_[hook->Type()].push_back(hook);
}
bool Have(HookType hook_type) const {
return !outputs_[hook_type].empty();
}
void SetVocabFD(int to) { vocab_fd_ = to; }
int GetVocabFD() const { return vocab_fd_; }
int VocabFile() const { return buffer_.VocabFile(); }
void SetHeader(const HeaderInfo &header) { header_ = header; }
const HeaderInfo &GetHeader() const { return header_; }
// This is called by the pipeline.
void SinkProbs(util::stream::Chains &chains, bool output_q);
void SinkProbs(util::stream::Chains &chains);
unsigned int Steps() const { return Have(PROB_SEQUENTIAL_HOOK); }
private:
void Apply(HookType hook_type, util::stream::Chains &chains);
boost::ptr_vector<OutputHook> outputs_[NUMBER_OF_HOOKS];
int vocab_fd_;
HeaderInfo header_;
ModelBuffer buffer_;
std::string file_base_;
bool keep_buffer_;
boost::ptr_vector<OutputHook> outputs_[NUMBER_OF_HOOKS];
HeaderInfo header_;
};
inline const HeaderInfo &OutputHook::GetHeader() const {
return master_->GetHeader();
}
class PrintHook : public OutputHook {
public:
// Takes ownership
PrintHook(int write_fd, bool verbose_header)
: OutputHook(PROB_SEQUENTIAL_HOOK), file_(write_fd), verbose_header_(verbose_header) {}
inline int OutputHook::GetVocabFD() const {
return master_->GetVocabFD();
}
void Sink(const HeaderInfo &info, int vocab_file, util::stream::Chains &chains);
private:
util::scoped_fd file_;
bool verbose_header_;
};
}} // namespaces

View File

@ -277,27 +277,27 @@ void InterpolateProbabilities(const std::vector<uint64_t> &counts, Master &maste
}
master >> Interpolate(std::max(master.Config().vocab_size_for_unk, counts[0] - 1 /* <s> is not included */), util::stream::ChainPositions(gamma_chains), config.prune_thresholds, config.prune_vocab, config.output_q, specials);
gamma_chains >> util::stream::kRecycle;
output.SinkProbs(master.MutableChains(), config.output_q);
output.SinkProbs(master.MutableChains());
}
class VocabNumbering {
public:
VocabNumbering(StringPiece vocab_file, StringPiece temp_prefix, bool renumber)
: vocab_file_(vocab_file.data(), vocab_file.size()),
temp_prefix_(temp_prefix.data(), temp_prefix.size()),
VocabNumbering(int final_vocab, StringPiece temp_prefix, bool renumber)
: final_vocab_(final_vocab),
renumber_(renumber),
specials_(kBOS, kEOS) {
InitFile(renumber || vocab_file.empty());
if (renumber) {
temporary_.reset(util::MakeTemp(temp_prefix));
}
}
int File() const { return null_delimited_.get(); }
int WriteOnTheFly() const { return renumber_ ? temporary_.get() : final_vocab_; }
// Compute the vocabulary mapping and return the memory used.
std::size_t ComputeMapping(WordIndex type_count) {
if (!renumber_) return 0;
util::scoped_fd previous(null_delimited_.release());
InitFile(vocab_file_.empty());
ngram::SortedVocabulary::ComputeRenumbering(type_count, previous.get(), null_delimited_.get(), vocab_mapping_);
ngram::SortedVocabulary::ComputeRenumbering(type_count, temporary_.get(), final_vocab_, vocab_mapping_);
temporary_.reset();
return sizeof(WordIndex) * vocab_mapping_.size();
}
@ -312,15 +312,9 @@ class VocabNumbering {
const SpecialVocab &Specials() const { return specials_; }
private:
void InitFile(bool temp) {
null_delimited_.reset(temp ?
util::MakeTemp(temp_prefix_) :
util::CreateOrThrow(vocab_file_.c_str()));
}
std::string vocab_file_, temp_prefix_;
util::scoped_fd null_delimited_;
int final_vocab_;
// Out of order vocab file created on the fly.
util::scoped_fd temporary_;
bool renumber_;
@ -349,18 +343,17 @@ void Pipeline(PipelineConfig &config, int text_file, Output &output) {
// master's destructor will wait for chains. But they might be deadlocked if
// this thread dies because e.g. it ran out of memory.
try {
VocabNumbering numbering(config.vocab_file, config.TempPrefix(), config.renumber_vocabulary);
VocabNumbering numbering(output.VocabFile(), config.TempPrefix(), config.renumber_vocabulary);
uint64_t token_count;
WordIndex type_count;
std::string text_file_name;
std::vector<bool> prune_words;
util::scoped_ptr<util::stream::Sort<SuffixOrder, CombineCounts> > sorted_counts(
CountText(text_file, numbering.File(), master, token_count, type_count, text_file_name, prune_words));
CountText(text_file, numbering.WriteOnTheFly(), master, token_count, type_count, text_file_name, prune_words));
std::cerr << "Unigram tokens " << token_count << " types " << type_count << std::endl;
// Create vocab mapping, which uses temporary memory, while nothing else is happening.
std::size_t subtract_for_numbering = numbering.ComputeMapping(type_count);
output.SetVocabFD(numbering.File());
std::cerr << "=== 2/" << master.Steps() << " Calculating and sorting adjusted counts ===" << std::endl;
master.InitForAdjust(*sorted_counts, type_count, subtract_for_numbering);

View File

@ -18,7 +18,6 @@ class Output;
struct PipelineConfig {
std::size_t order;
std::string vocab_file;
util::stream::SortConfig sort;
InitialProbabilitiesConfig initial_probs;
util::stream::ChainConfig read_backoffs;

View File

@ -1,64 +0,0 @@
#include "lm/builder/print.hh"
#include "util/fake_ofstream.hh"
#include "util/file.hh"
#include "util/mmap.hh"
#include "util/scoped.hh"
#include "util/stream/timer.hh"
#include <sstream>
#include <cstring>
namespace lm { namespace builder {
VocabReconstitute::VocabReconstitute(int fd) {
uint64_t size = util::SizeOrThrow(fd);
util::MapRead(util::POPULATE_OR_READ, fd, 0, size, memory_);
const char *const start = static_cast<const char*>(memory_.get());
const char *i;
for (i = start; i != start + size; i += strlen(i) + 1) {
map_.push_back(i);
}
// Last one for LookupPiece.
map_.push_back(i);
}
void PrintARPA::Sink(util::stream::Chains &chains) {
chains >> boost::ref(*this);
}
void PrintARPA::Run(const util::stream::ChainPositions &positions) {
VocabReconstitute vocab(GetVocabFD());
util::FakeOFStream out(out_fd_.get());
// Write header.
if (verbose_header_) {
out << "# Input file: " << GetHeader().input_file << '\n';
out << "# Token count: " << GetHeader().token_count << '\n';
out << "# Smoothing: Modified Kneser-Ney" << '\n';
}
out << "\\data\\\n";
for (size_t i = 0; i < positions.size(); ++i) {
out << "ngram " << (i+1) << '=' << GetHeader().counts_pruned[i] << '\n';
}
out << '\n';
for (unsigned order = 1; order <= positions.size(); ++order) {
out << "\\" << order << "-grams:" << '\n';
for (NGramStream<BuildingPayload> stream(positions[order - 1]); stream; ++stream) {
// Correcting for numerical precision issues. Take that IRST.
out << stream->Value().complete.prob << '\t' << vocab.Lookup(*stream->begin());
for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
out << ' ' << vocab.Lookup(*i);
}
if (order != positions.size())
out << '\t' << stream->Value().complete.backoff;
out << '\n';
}
out << '\n';
}
out << "\\end\\\n";
}
}} // namespaces

40
lm/common/CMakeLists.txt Normal file
View File

@ -0,0 +1,40 @@
cmake_minimum_required(VERSION 2.8.8)
#
# The KenLM cmake files make use of add_library(... OBJECTS ...)
#
# This syntax allows grouping of source files when compiling
# (effectively creating "fake" libraries based on source subdirs).
#
# This syntax was only added in cmake version 2.8.8
#
# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
# Explicitly list the source files for this subdirectory
#
# If you add any source files to this subdirectory
# that should be included in the kenlm library,
# (this excludes any unit test files)
# you should add them to the following list:
#
# In order to set correct paths to these files
# in case this variable is referenced by CMake files in the parent directory,
# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
#
set(KENLM_COMMON_SOURCE
${CMAKE_CURRENT_SOURCE_DIR}/model_buffer.cc
${CMAKE_CURRENT_SOURCE_DIR}/print.cc
${CMAKE_CURRENT_SOURCE_DIR}/renumber.cc
${CMAKE_CURRENT_SOURCE_DIR}/size_option.cc
)
# Group these objects together for later use.
#
# Given add_library(foo OBJECT ${my_foo_sources}),
# refer to these objects as $<TARGET_OBJECTS:foo>
#
add_library(kenlm_common OBJECT ${KENLM_COMMON_SOURCE})

View File

@ -1,2 +1,2 @@
fakelib common : [ glob *.cc : *test.cc *main.cc ]
../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm ;
../../util//kenutil ../../util/stream//stream ../../util/double-conversion//double-conversion ..//kenlm /top//boost_program_options ;

View File

@ -1,8 +1,7 @@
#ifndef LM_BUILDER_JOINT_ORDER_H
#define LM_BUILDER_JOINT_ORDER_H
#ifndef LM_COMMON_JOINT_ORDER_H
#define LM_COMMON_JOINT_ORDER_H
#include "lm/common/ngram_stream.hh"
#include "lm/builder/payload.hh"
#include "lm/lm_exception.hh"
#ifdef DEBUG
@ -12,15 +11,19 @@
#include <cstring>
namespace lm { namespace builder {
namespace lm {
template <class Callback, class Compare> void JointOrder(const util::stream::ChainPositions &positions, Callback &callback) {
// Allow matching to reference streams[-1].
NGramStreams<BuildingPayload> streams_with_dummy;
streams_with_dummy.InitWithDummy(positions);
NGramStream<BuildingPayload> *streams = streams_with_dummy.begin() + 1;
util::FixedArray<ProxyStream<NGramHeader> > streams_with_dummy(positions.size() + 1);
// A bogus stream for [-1].
streams_with_dummy.push_back();
for (std::size_t i = 0; i < positions.size(); ++i) {
streams_with_dummy.push_back(positions[i], NGramHeader(NULL, i + 1));
}
ProxyStream<NGramHeader> *streams = streams_with_dummy.begin() + 1;
unsigned int order;
std::size_t order;
for (order = 0; order < positions.size() && streams[order]; ++order) {}
assert(order); // should always have <unk>.
@ -31,11 +34,11 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
less_compare.push_back(i + 1);
#endif // DEBUG
unsigned int current = 0;
std::size_t current = 0;
while (true) {
// Does the context match the lower one?
if (!memcmp(streams[static_cast<int>(current) - 1]->begin(), streams[current]->begin() + Compare::kMatchOffset, sizeof(WordIndex) * current)) {
callback.Enter(current, *streams[current]);
callback.Enter(current, streams[current].Get());
// Transition to looking for extensions.
if (++current < order) continue;
}
@ -51,7 +54,7 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
while(true) {
assert(current > 0);
--current;
callback.Exit(current, *streams[current]);
callback.Exit(current, streams[current].Get());
if (++streams[current]) break;
@ -63,6 +66,6 @@ template <class Callback, class Compare> void JointOrder(const util::stream::Cha
}
}
}} // namespaces
} // namespaces
#endif // LM_BUILDER_JOINT_ORDER_H
#endif // LM_COMMON_JOINT_ORDER_H

View File

@ -8,25 +8,30 @@
#include <boost/lexical_cast.hpp>
namespace lm { namespace common {
namespace lm {
namespace {
const char kMetadataHeader[] = "KenLM intermediate binary file";
} // namespace
ModelBuffer::ModelBuffer(const std::string &file_base, bool keep_buffer, bool output_q)
: file_base_(file_base), keep_buffer_(keep_buffer), output_q_(output_q) {}
ModelBuffer::ModelBuffer(const std::string &file_base)
: file_base_(file_base), keep_buffer_(false) {
ModelBuffer::ModelBuffer(StringPiece file_base, bool keep_buffer, bool output_q)
: file_base_(file_base.data(), file_base.size()), keep_buffer_(keep_buffer), output_q_(output_q),
vocab_file_(keep_buffer ? util::CreateOrThrow((file_base_ + ".vocab").c_str()) : util::MakeTemp(file_base_)) {}
ModelBuffer::ModelBuffer(StringPiece file_base)
: file_base_(file_base.data(), file_base.size()), keep_buffer_(false) {
const std::string full_name = file_base_ + ".kenlm_intermediate";
util::FilePiece in(full_name.c_str());
StringPiece token = in.ReadLine();
UTIL_THROW_IF2(token != kMetadataHeader, "File " << full_name << " begins with \"" << token << "\" not " << kMetadataHeader);
token = in.ReadDelimited();
UTIL_THROW_IF2(token != "Order", "Expected Order, got \"" << token << "\" in " << full_name);
unsigned long order = in.ReadULong();
UTIL_THROW_IF2(token != "Counts", "Expected Counts, got \"" << token << "\" in " << full_name);
char got;
while ((got = in.get()) == ' ') {
counts_.push_back(in.ReadULong());
}
UTIL_THROW_IF2(got != '\n', "Expected newline at end of counts.");
token = in.ReadDelimited();
UTIL_THROW_IF2(token != "Payload", "Expected Payload, got \"" << token << "\" in " << full_name);
@ -39,16 +44,16 @@ ModelBuffer::ModelBuffer(const std::string &file_base)
UTIL_THROW(util::Exception, "Unknown payload " << token);
}
files_.Init(order);
for (unsigned long i = 0; i < order; ++i) {
vocab_file_.reset(util::OpenReadOrThrow((file_base_ + ".vocab").c_str()));
files_.Init(counts_.size());
for (unsigned long i = 0; i < counts_.size(); ++i) {
files_.push_back(util::OpenReadOrThrow((file_base_ + '.' + boost::lexical_cast<std::string>(i + 1)).c_str()));
}
}
// virtual destructor
ModelBuffer::~ModelBuffer() {}
void ModelBuffer::Sink(util::stream::Chains &chains) {
void ModelBuffer::Sink(util::stream::Chains &chains, const std::vector<uint64_t> &counts) {
counts_ = counts;
// Open files.
files_.Init(chains.size());
for (std::size_t i = 0; i < chains.size(); ++i) {
@ -64,19 +69,23 @@ void ModelBuffer::Sink(util::stream::Chains &chains) {
if (keep_buffer_) {
util::scoped_fd metadata(util::CreateOrThrow((file_base_ + ".kenlm_intermediate").c_str()));
util::FakeOFStream meta(metadata.get(), 200);
meta << kMetadataHeader << "\nOrder " << chains.size() << "\nPayload " << (output_q_ ? "q" : "pb") << '\n';
meta << kMetadataHeader << "\nCounts";
for (std::vector<uint64_t>::const_iterator i = counts_.begin(); i != counts_.end(); ++i) {
meta << ' ' << *i;
}
meta << "\nPayload " << (output_q_ ? "q" : "pb") << '\n';
}
}
void ModelBuffer::Source(util::stream::Chains &chains) {
assert(chains.size() == files_.size());
for (unsigned int i = 0; i < files_.size(); ++i) {
assert(chains.size() <= files_.size());
for (unsigned int i = 0; i < chains.size(); ++i) {
chains[i] >> util::stream::PRead(files_[i].get());
}
}
std::size_t ModelBuffer::Order() const {
return files_.size();
void ModelBuffer::Source(std::size_t order_minus_1, util::stream::Chain &chain) {
chain >> util::stream::PRead(files_[order_minus_1].get());
}
}} // namespaces
} // namespace

View File

@ -1,5 +1,5 @@
#ifndef LM_BUILDER_MODEL_BUFFER_H
#define LM_BUILDER_MODEL_BUFFER_H
#ifndef LM_COMMON_MODEL_BUFFER_H
#define LM_COMMON_MODEL_BUFFER_H
/* Format with separate files in suffix order. Each file contains
* n-grams of the same order.
@ -9,37 +9,55 @@
#include "util/fixed_array.hh"
#include <string>
#include <vector>
namespace util { namespace stream { class Chains; } }
namespace util { namespace stream {
class Chains;
class Chain;
}} // namespaces
namespace lm { namespace common {
namespace lm {
class ModelBuffer {
public:
// Construct for writing.
ModelBuffer(const std::string &file_base, bool keep_buffer, bool output_q);
// Construct for writing. Must call VocabFile() and fill it with null-delimited vocab words.
ModelBuffer(StringPiece file_base, bool keep_buffer, bool output_q);
// Load from file.
explicit ModelBuffer(const std::string &file_base);
explicit ModelBuffer(StringPiece file_base);
// explicit for virtual destructor.
~ModelBuffer();
void Sink(util::stream::Chains &chains);
// Must call VocabFile and populate before calling this function.
void Sink(util::stream::Chains &chains, const std::vector<uint64_t> &counts);
// Read files and write to the given chains. If fewer chains are provided,
// only do the lower orders.
void Source(util::stream::Chains &chains);
void Source(std::size_t order_minus_1, util::stream::Chain &chain);
// The order of the n-gram model that is associated with the model buffer.
std::size_t Order() const;
std::size_t Order() const { return counts_.size(); }
// Requires Sink or load from file.
const std::vector<uint64_t> &Counts() const {
assert(!counts_.empty());
return counts_;
}
int VocabFile() const { return vocab_file_.get(); }
int StealVocabFile() { return vocab_file_.release(); }
bool Keep() const { return keep_buffer_; }
private:
const std::string file_base_;
const bool keep_buffer_;
bool output_q_;
std::vector<uint64_t> counts_;
util::scoped_fd vocab_file_;
util::FixedArray<util::scoped_fd> files_;
};
}} // namespaces
} // namespace lm
#endif // LM_BUILDER_MODEL_BUFFER_H
#endif // LM_COMMON_MODEL_BUFFER_H

View File

@ -16,6 +16,8 @@ class NGramHeader {
NGramHeader(void *begin, std::size_t order)
: begin_(static_cast<WordIndex*>(begin)), end_(begin_ + order) {}
NGramHeader() : begin_(NULL), end_(NULL) {}
const uint8_t *Base() const { return reinterpret_cast<const uint8_t*>(begin_); }
uint8_t *Base() { return reinterpret_cast<uint8_t*>(begin_); }
@ -32,6 +34,7 @@ class NGramHeader {
const WordIndex *end() const { return end_; }
WordIndex *end() { return end_; }
std::size_t size() const { return end_ - begin_; }
std::size_t Order() const { return end_ - begin_; }
private:
@ -42,6 +45,8 @@ template <class PayloadT> class NGram : public NGramHeader {
public:
typedef PayloadT Payload;
NGram() : NGramHeader(NULL, 0) {}
NGram(void *begin, std::size_t order) : NGramHeader(begin, order) {}
// Would do operator++ but that can get confusing for a stream.

View File

@ -10,24 +10,21 @@
namespace lm {
template <class Payload> class NGramStream {
template <class Proxy> class ProxyStream {
public:
NGramStream() : gram_(NULL, 0) {}
// Make an invalid stream.
ProxyStream() {}
NGramStream(const util::stream::ChainPosition &position) : gram_(NULL, 0) {
Init(position);
explicit ProxyStream(const util::stream::ChainPosition &position, const Proxy &proxy = Proxy())
: proxy_(proxy), stream_(position) {
proxy_.ReBase(stream_.Get());
}
void Init(const util::stream::ChainPosition &position) {
stream_.Init(position);
gram_ = NGram<Payload>(stream_.Get(), NGram<Payload>::OrderFromSize(position.GetChain().EntrySize()));
}
Proxy &operator*() { return proxy_; }
const Proxy &operator*() const { return proxy_; }
NGram<Payload> &operator*() { return gram_; }
const NGram<Payload> &operator*() const { return gram_; }
NGram<Payload> *operator->() { return &gram_; }
const NGram<Payload> *operator->() const { return &gram_; }
Proxy *operator->() { return &proxy_; }
const Proxy *operator->() const { return &proxy_; }
void *Get() { return stream_.Get(); }
const void *Get() const { return stream_.Get(); }
@ -36,21 +33,25 @@ template <class Payload> class NGramStream {
bool operator!() const { return !stream_; }
void Poison() { stream_.Poison(); }
NGramStream &operator++() {
ProxyStream<Proxy> &operator++() {
++stream_;
gram_.ReBase(stream_.Get());
proxy_.ReBase(stream_.Get());
return *this;
}
private:
NGram<Payload> gram_;
Proxy proxy_;
util::stream::Stream stream_;
};
template <class Payload> inline util::stream::Chain &operator>>(util::stream::Chain &chain, NGramStream<Payload> &str) {
str.Init(chain.Add());
return chain;
}
template <class Payload> class NGramStream : public ProxyStream<NGram<Payload> > {
public:
// Make an invalid stream.
NGramStream() {}
explicit NGramStream(const util::stream::ChainPosition &position) :
ProxyStream<NGram<Payload> >(position, NGram<Payload>(NULL, NGram<Payload>::OrderFromSize(position.GetChain().EntrySize()))) {}
};
template <class Payload> class NGramStreams : public util::stream::GenericStreams<NGramStream<Payload> > {
private:

62
lm/common/print.cc Normal file
View File

@ -0,0 +1,62 @@
#include "lm/common/print.hh"
#include "lm/common/ngram_stream.hh"
#include "util/fake_ofstream.hh"
#include "util/file.hh"
#include "util/mmap.hh"
#include "util/scoped.hh"
#include <sstream>
#include <cstring>
namespace lm {
VocabReconstitute::VocabReconstitute(int fd) {
uint64_t size = util::SizeOrThrow(fd);
util::MapRead(util::POPULATE_OR_READ, fd, 0, size, memory_);
const char *const start = static_cast<const char*>(memory_.get());
const char *i;
for (i = start; i != start + size; i += strlen(i) + 1) {
map_.push_back(i);
}
// Last one for LookupPiece.
map_.push_back(i);
}
namespace {
template <class Payload> void PrintLead(const VocabReconstitute &vocab, ProxyStream<Payload> &stream, util::FakeOFStream &out) {
out << stream->Value().prob << '\t' << vocab.Lookup(*stream->begin());
for (const WordIndex *i = stream->begin() + 1; i != stream->end(); ++i) {
out << ' ' << vocab.Lookup(*i);
}
}
} // namespace
void PrintARPA::Run(const util::stream::ChainPositions &positions) {
VocabReconstitute vocab(vocab_fd_);
util::FakeOFStream out(out_fd_);
out << "\\data\\\n";
for (size_t i = 0; i < positions.size(); ++i) {
out << "ngram " << (i+1) << '=' << counts_[i] << '\n';
}
out << '\n';
for (unsigned order = 1; order < positions.size(); ++order) {
out << "\\" << order << "-grams:" << '\n';
for (ProxyStream<NGram<ProbBackoff> > stream(positions[order - 1], NGram<ProbBackoff>(NULL, order)); stream; ++stream) {
PrintLead(vocab, stream, out);
out << '\t' << stream->Value().backoff << '\n';
}
out << '\n';
}
out << "\\" << positions.size() << "-grams:" << '\n';
for (ProxyStream<NGram<Prob> > stream(positions.back(), NGram<Prob>(NULL, positions.size())); stream; ++stream) {
PrintLead(vocab, stream, out);
out << '\n';
}
out << '\n';
out << "\\end\\\n";
}
} // namespace lm

58
lm/common/print.hh Normal file
View File

@ -0,0 +1,58 @@
#ifndef LM_COMMON_PRINT_H
#define LM_COMMON_PRINT_H
#include "lm/word_index.hh"
#include "util/mmap.hh"
#include "util/string_piece.hh"
#include <cassert>
#include <vector>
namespace util { namespace stream { class ChainPositions; }}
// Warning: PrintARPA routines read all unigrams before all bigrams before all
// trigrams etc. So if other parts of the chain move jointly, you'll have to
// buffer.
namespace lm {
class VocabReconstitute {
public:
// fd must be alive for life of this object; does not take ownership.
explicit VocabReconstitute(int fd);
const char *Lookup(WordIndex index) const {
assert(index < map_.size() - 1);
return map_[index];
}
StringPiece LookupPiece(WordIndex index) const {
return StringPiece(map_[index], map_[index + 1] - 1 - map_[index]);
}
std::size_t Size() const {
// There's an extra entry to support StringPiece lengths.
return map_.size() - 1;
}
private:
util::scoped_memory memory_;
std::vector<const char*> map_;
};
class PrintARPA {
public:
// Does not take ownership of vocab_fd or out_fd.
explicit PrintARPA(int vocab_fd, int out_fd, const std::vector<uint64_t> &counts)
: vocab_fd_(vocab_fd), out_fd_(out_fd), counts_(counts) {}
void Run(const util::stream::ChainPositions &positions);
private:
int vocab_fd_;
int out_fd_;
std::vector<uint64_t> counts_;
};
} // namespace lm
#endif // LM_COMMON_PRINT_H

24
lm/common/size_option.cc Normal file
View File

@ -0,0 +1,24 @@
#include <boost/program_options.hpp>
#include "util/usage.hh"
namespace lm {
namespace {
class SizeNotify {
public:
explicit SizeNotify(std::size_t &out) : behind_(out) {}
void operator()(const std::string &from) {
behind_ = util::ParseSize(from);
}
private:
std::size_t &behind_;
};
}
boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value) {
return boost::program_options::value<std::string>()->notifier(SizeNotify(to))->default_value(default_value);
}
} // namespace lm

11
lm/common/size_option.hh Normal file
View File

@ -0,0 +1,11 @@
#include <boost/program_options.hpp>
#include <cstddef>
#include <string>
namespace lm {
// Create a boost program option for data sizes. This parses sizes like 1T and 10k.
boost::program_options::typed_value<std::string> *SizeOption(std::size_t &to, const char *default_value);
} // namespace lm

View File

@ -1,9 +1,9 @@
#ifndef LM_BUILDER_SPECIAL_H
#define LM_BUILDER_SPECIAL_H
#ifndef LM_COMMON_SPECIAL_H
#define LM_COMMON_SPECIAL_H
#include "lm/word_index.hh"
namespace lm { namespace builder {
namespace lm {
class SpecialVocab {
public:
@ -22,6 +22,6 @@ class SpecialVocab {
WordIndex eos_;
};
}} // namespaces
} // namespace lm
#endif // LM_BUILDER_SPECIAL_H
#endif // LM_COMMON_SPECIAL_H

62
lm/filter/CMakeLists.txt Normal file
View File

@ -0,0 +1,62 @@
cmake_minimum_required(VERSION 2.8.8)
#
# The KenLM cmake files make use of add_library(... OBJECTS ...)
#
# This syntax allows grouping of source files when compiling
# (effectively creating "fake" libraries based on source subdirs).
#
# This syntax was only added in cmake version 2.8.8
#
# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
# This CMake file was created by Lane Schwartz <dowobeha@gmail.com>
# Explicitly list the source files for this subdirectory
#
# If you add any source files to this subdirectory
# that should be included in the kenlm library,
# (this excludes any unit test files)
# you should add them to the following list:
#
# In order to set correct paths to these files
# in case this variable is referenced by CMake files in the parent directory,
# we prefix all files with ${CMAKE_CURRENT_SOURCE_DIR}.
#
set(KENLM_FILTER_SOURCE
${CMAKE_CURRENT_SOURCE_DIR}/arpa_io.cc
${CMAKE_CURRENT_SOURCE_DIR}/phrase.cc
${CMAKE_CURRENT_SOURCE_DIR}/vocab.cc
)
# Group these objects together for later use.
#
# Given add_library(foo OBJECT ${my_foo_sources}),
# refer to these objects as $<TARGET_OBJECTS:foo>
#
add_library(kenlm_filter OBJECT ${KENLM_FILTER_SOURCE})
# Explicitly list the executable files to be compiled
set(EXE_LIST
filter
phrase_table_vocab
)
# Iterate through the executable list
foreach(exe ${EXE_LIST})
# Compile the executable, linking against the requisite dependent object files
add_executable(${exe} ${exe}_main.cc $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_filter> $<TARGET_OBJECTS:kenlm_util>)
# Link the executable against boost
target_link_libraries(${exe} ${Boost_LIBRARIES})
# Group executables together
set_target_properties(${exe} PROPERTIES FOLDER executables)
# End for loop
endforeach(exe)

View File

@ -5,10 +5,7 @@
#include <vector>
#include "StatisticsBasedScorer.h"
#include "moses/FF/InternalTree.h"
using Moses::TreePointer;
using Moses::InternalTree;
#include "InternalTree.h"
namespace MosesTuning
{

110
mert/InternalTree.cpp Normal file
View File

@ -0,0 +1,110 @@
#include "InternalTree.h"
namespace MosesTuning
{
InternalTree::InternalTree(const std::string & line, const bool terminal):
m_isTerminal(terminal)
{
size_t found = line.find_first_of("[] ");
if (found == line.npos) {
m_value = line;
}
else {
AddSubTree(line, 0);
}
}
size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
{
std::string value;
char token = 0;
while (token != ']' && pos != std::string::npos) {
size_t oldpos = pos;
pos = line.find_first_of("[] ", pos);
if (pos == std::string::npos) break;
token = line[pos];
value = line.substr(oldpos,pos-oldpos);
if (token == '[') {
if (m_value.size() > 0) {
m_children.push_back(boost::make_shared<InternalTree>(value,false));
pos = m_children.back()->AddSubTree(line, pos+1);
} else {
if (value.size() > 0) {
m_value = value;
}
pos = AddSubTree(line, pos+1);
}
} else if (token == ' ' || token == ']') {
if (value.size() > 0 && !(m_value.size() > 0)) {
m_value = value;
} else if (value.size() > 0) {
m_isTerminal = false;
m_children.push_back(boost::make_shared<InternalTree>(value,true));
}
if (token == ' ') {
pos++;
}
}
if (m_children.size() > 0) {
m_isTerminal = false;
}
}
if (pos == std::string::npos) {
return line.size();
}
return std::min(line.size(),pos+1);
}
std::string InternalTree::GetString(bool start) const
{
std::string ret = "";
if (!start) {
ret += " ";
}
if (!m_isTerminal) {
ret += "[";
}
ret += m_value;
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
ret += (*it)->GetString(false);
}
if (!m_isTerminal) {
ret += "]";
}
return ret;
}
void InternalTree::Combine(const std::vector<TreePointer> &previous)
{
std::vector<TreePointer>::iterator it;
bool found = false;
leafNT next_leafNT(this);
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
found = next_leafNT(it);
if (found) {
*it = *it_prev;
} else {
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
}
}
}
}

77
mert/InternalTree.h Normal file
View File

@ -0,0 +1,77 @@
#pragma once
#include <iostream>
#include <string>
#include <map>
#include <vector>
#include <boost/shared_ptr.hpp>
#include <boost/make_shared.hpp>
#include "util/generator.hh"
#include "util/exception.hh"
namespace MosesTuning
{
class InternalTree;
typedef boost::shared_ptr<InternalTree> TreePointer;
typedef int NTLabel;
class InternalTree
{
std::string m_value;
std::vector<TreePointer> m_children;
bool m_isTerminal;
public:
InternalTree(const std::string & line, const bool terminal = false);
InternalTree(const InternalTree & tree):
m_value(tree.m_value),
m_isTerminal(tree.m_isTerminal) {
const std::vector<TreePointer> & children = tree.m_children;
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
m_children.push_back(boost::make_shared<InternalTree>(**it));
}
}
size_t AddSubTree(const std::string & line, size_t start);
std::string GetString(bool start = true) const;
void Combine(const std::vector<TreePointer> &previous);
const std::string & GetLabel() const {
return m_value;
}
size_t GetLength() const {
return m_children.size();
}
std::vector<TreePointer> & GetChildren() {
return m_children;
}
bool IsTerminal() const {
return m_isTerminal;
}
bool IsLeafNT() const {
return (!m_isTerminal && m_children.size() == 0);
}
};
// Python-like generator that yields next nonterminal leaf on every call
$generator(leafNT)
{
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {}
$emit(std::vector<TreePointer>::iterator)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(it);
} else if ((*it)->GetLength() > 0) {
if ((*it).get()) { // normal pointer to same object that TreePointer points to
$restart(tree = (*it).get());
}
}
}
$stop;
};
}

View File

@ -30,7 +30,7 @@ InterpolatedScorer.cpp
Point.cpp
PerScorer.cpp
HwcmScorer.cpp
../moses/FF/InternalTree.cpp
InternalTree.cpp
Scorer.cpp
ScorerFactory.cpp
Optimizer.cpp

View File

@ -159,13 +159,15 @@ int main(int argc, char* argv[])
}
StaticData& SD = const_cast<StaticData&>(StaticData::Instance());
SD.SetUseLatticeMBR(true);
LMBR_Options& lmbr = SD.options().lmbr;
MBR_Options& mbr = SD.options().mbr;
lmbr.enabled = true;
boost::shared_ptr<IOWrapper> ioWrapper(new IOWrapper);
if (!ioWrapper) {
throw runtime_error("Failed to initialise IOWrapper");
}
size_t nBestSize = SD.GetMBRSize();
size_t nBestSize = mbr.size;
if (nBestSize <= 0) {
throw new runtime_error("Non-positive size specified for n-best list");
@ -187,13 +189,13 @@ int main(int argc, char* argv[])
manager.CalcNBest(nBestSize, nBestList,true);
//grid search
BOOST_FOREACH(float const& p, pgrid) {
SD.SetLatticeMBRPrecision(p);
lmbr.precision = p;
BOOST_FOREACH(float const& r, rgrid) {
SD.SetLatticeMBRPRatio(r);
lmbr.ratio = r;
BOOST_FOREACH(size_t const prune_i, prune_grid) {
SD.SetLatticeMBRPruningFactor(size_t(prune_i));
lmbr.pruning_factor = prune_i;
BOOST_FOREACH(float const& scale_i, scale_grid) {
SD.SetMBRScale(scale_i);
mbr.scale = scale_i;
size_t lineCount = source->GetTranslationId();
cout << lineCount << " ||| " << p << " "
<< r << " " << size_t(prune_i) << " " << scale_i

View File

@ -27,7 +27,7 @@ BaseManager::GetSource() const
return m_source;
}
const ttasksptr&
const ttasksptr
BaseManager::GetTtask() const
{
return m_ttask.lock();
@ -140,6 +140,14 @@ void BaseManager::WriteApplicationContext(std::ostream &out,
}
}
AllOptions const&
BaseManager::
options() const
{
return GetTtask()->options();
}
} // namespace

View File

@ -5,7 +5,7 @@
#include <string>
#include "ScoreComponentCollection.h"
#include "InputType.h"
#include "moses/parameters/AllOptions.h"
namespace Moses
{
class ScoreComponentCollection;
@ -50,7 +50,8 @@ public:
//! the input sentence being decoded
const InputType& GetSource() const;
const ttasksptr& GetTtask() const;
const ttasksptr GetTtask() const;
AllOptions const& options() const;
virtual void Decode() = 0;
// outputs

View File

@ -53,7 +53,7 @@ ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager) :
ChartCellBase(startPos, endPos), m_manager(manager)
{
const StaticData &staticData = StaticData::Instance();
m_nBestIsEnabled = staticData.IsNBestEnabled();
m_nBestIsEnabled = staticData.options().nbest.enabled;
}
ChartCell::~ChartCell() {}
@ -100,7 +100,7 @@ void ChartCell::Decode(const ChartTranslationOptionList &transOptList
}
// pluck things out of queue and add to hypo collection
const size_t popLimit = staticData.GetCubePruningPopLimit();
const size_t popLimit = staticData.options().cube.pop_limit;
for (size_t numPops = 0; numPops < popLimit && !queue.IsEmpty(); ++numPops) {
ChartHypothesis *hypo = queue.Pop();
AddHypothesis(hypo);

View File

@ -287,8 +287,11 @@ void ChartHypothesis::CleanupArcList()
* so we'll keep all of arc list if nedd distinct n-best list
*/
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.GetOutputSearchGraphHypergraph();
size_t nBestSize = staticData.options().nbest.nbest_size;
bool distinctNBest = (staticData.options().nbest.only_distinct
|| staticData.options().mbr.enabled
|| staticData.GetOutputSearchGraph()
|| staticData.GetOutputSearchGraphHypergraph());
if (!distinctNBest && m_arcList->size() > nBestSize) {
// prune arc list only if there too many arcs

View File

@ -38,8 +38,8 @@ ChartHypothesisCollection::ChartHypothesisCollection()
const StaticData &staticData = StaticData::Instance();
m_beamWidth = staticData.GetBeamWidth();
m_maxHypoStackSize = staticData.GetMaxHypoStackSize();
m_nBestIsEnabled = staticData.IsNBestEnabled();
m_maxHypoStackSize = staticData.options().search.stack_size;
m_nBestIsEnabled = staticData.options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity();
}

View File

@ -207,7 +207,7 @@ void ChartManager::CalcNBest(
// with 0 being 'unlimited.' This actually sets a large-ish limit in case
// too many translations are identical.
const StaticData &staticData = StaticData::Instance();
const std::size_t nBestFactor = staticData.GetNBestFactor();
const std::size_t nBestFactor = staticData.options().nbest.factor;
std::size_t numDerivations = (nBestFactor == 0) ? n*1000 : n*nBestFactor;
// Extract the derivations.
@ -318,13 +318,14 @@ void ChartManager::OutputBest(OutputCollector *collector) const
void ChartManager::OutputNBest(OutputCollector *collector) const
{
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
size_t nBestSize = staticData.options().nbest.nbest_size;
if (nBestSize > 0) {
const size_t translationId = m_source.GetTranslationId();
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO " << staticData.GetNBestFilePath() << endl);
VERBOSE(2,"WRITING " << nBestSize << " TRANSLATION ALTERNATIVES TO "
<< staticData.options().nbest.output_file_path << endl);
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList,staticData.GetDistinctNBest());
CalcNBest(nBestSize, nBestList,staticData.options().nbest.only_distinct);
OutputNBestList(collector, nBestList, translationId);
IFVERBOSE(2) {
PrintUserTime("N-Best Hypotheses Generation Time:");
@ -348,10 +349,9 @@ void ChartManager::OutputNBestList(OutputCollector *collector,
FixPrecision(out);
}
bool includeWordAlignment =
StaticData::Instance().PrintAlignmentInfoInNbest();
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
NBestOptions const& nbo = StaticData::Instance().options().nbest;
bool includeWordAlignment = nbo.include_alignment_info;
bool PrintNBestTrees = nbo.print_trees;
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
p != nBestList.end(); ++p) {
@ -620,9 +620,9 @@ void ChartManager::OutputDetailedTranslationReport(
if (staticData.IsDetailedAllTranslationReportingEnabled()) {
const Sentence &sentence = dynamic_cast<const Sentence &>(m_source);
size_t nBestSize = staticData.GetNBestSize();
size_t nBestSize = staticData.options().nbest.nbest_size;
std::vector<boost::shared_ptr<ChartKBestExtractor::Derivation> > nBestList;
CalcNBest(nBestSize, nBestList, staticData.GetDistinctNBest());
CalcNBest(nBestSize, nBestList, staticData.options().nbest.nbest_size);
OutputDetailedAllTranslationReport(collector, nBestList, sentence, translationId);
}

View File

@ -106,7 +106,8 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
targetPhrase->SetTargetLHS(targetLHS);
targetPhrase->SetAlignmentInfo("0-0");
targetPhrase->EvaluateInIsolation(*unksrc);
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.PrintNBestTrees() || staticData.GetTreeStructure() != NULL) {
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled() || staticData.options().nbest.print_trees || staticData.GetTreeStructure() != NULL) {
targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]");
}

View File

@ -1,3 +1,4 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
// $Id$
#include "ConfusionNet.h"
@ -65,9 +66,9 @@ ConfusionNet() : InputType()
{
stats.createOne();
const StaticData& staticData = StaticData::Instance();
if (staticData.IsSyntax()) {
m_defaultLabelSet.insert(StaticData::Instance().GetInputDefaultNonTerminal());
const StaticData& SD = StaticData::Instance();
if (SD.IsSyntax()) {
m_defaultLabelSet.insert(SD.GetInputDefaultNonTerminal());
}
UTIL_THROW_IF2(&InputFeature::Instance() == NULL, "Input feature must be specified");
}

View File

@ -1,3 +1,4 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
// $Id: ExportInterface.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
/***********************************************************************
@ -63,9 +64,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <xmlrpc-c/base.hpp>
#include <xmlrpc-c/registry.hpp>
#include <xmlrpc-c/server_abyss.hpp>
#include "server/Translator.h"
#include "server/Optimizer.h"
#include "server/Updater.h"
#include "server/Server.h"
#endif
using namespace std;
@ -147,41 +146,9 @@ int
run_as_server()
{
#ifdef HAVE_XMLRPC_C
int port;
params.SetParameter(port, "server-port", 8080);
bool isSerial;
params.SetParameter(isSerial, "serial", false);
string logfile;
params.SetParameter(logfile, "server-log", string(""));
size_t num_threads;
params.SetParameter(num_threads, "threads", size_t(10));
if (isSerial) VERBOSE(1,"Running server in serial mode." << endl);
xmlrpc_c::registry myRegistry;
xmlrpc_c::methodPtr const translator(new MosesServer::Translator(num_threads));
xmlrpc_c::methodPtr const updater(new MosesServer::Updater);
xmlrpc_c::methodPtr const optimizer(new MosesServer::Optimizer);
myRegistry.addMethod("translate", translator);
myRegistry.addMethod("updater", updater);
myRegistry.addMethod("optimize", optimizer);
xmlrpc_c::serverAbyss myAbyssServer(myRegistry, port, logfile);
XVERBOSE(1,"Listening on port " << port << endl);
if (isSerial) {
while(1) myAbyssServer.runOnce();
} else myAbyssServer.run();
std::cerr << "xmlrpc_c::serverAbyss.run() returned but should not." << std::endl;
// #pragma message("BUILDING MOSES WITH SERVER SUPPORT")
#else
// #pragma message("BUILDING MOSES WITHOUT SERVER SUPPORT")
std::cerr << "Moses was compiled without server support." << endl;
MosesServer::Server server(params);
return server.run(); // actually: don't return. see Server::run()
#endif
return 1;
}
int
@ -212,31 +179,58 @@ batch_run()
ThreadPool pool(staticData.ThreadCount());
#endif
// using context for adaptation:
// e.g., context words / strings from config file / cmd line
std::string context_string;
params.SetParameter(context_string,"context-string",string(""));
// ... or weights for documents/domains from config file / cmd. line
std::string context_weights;
params.SetParameter(context_weights,"context-weights",string(""));
// main loop over set of input sentences
// ... or the surrounding context (--context-window ...)
size_t size_t_max = std::numeric_limits<size_t>::max();
bool use_context_window = ioWrapper->GetLookAhead() || ioWrapper->GetLookBack();
bool use_context = use_context_window || context_string.size();
bool use_sliding_context_window = (use_context_window
&& ioWrapper->GetLookAhead() != size_t_max);
boost::shared_ptr<std::vector<std::string> > context_window;
boost::shared_ptr<std::vector<std::string> >* cw;
cw = use_context_window ? &context_window : NULL;
if (!cw && context_string.size())
context_window.reset(new std::vector<std::string>(1,context_string));
// global scope of caches, biases, etc., if any
boost::shared_ptr<ContextScope> gscope;
if (!use_sliding_context_window)
gscope.reset(new ContextScope);
// main loop over set of input sentences
boost::shared_ptr<InputType> source;
while ((source = ioWrapper->ReadInput()) != NULL) {
while ((source = ioWrapper->ReadInput(cw)) != NULL) {
IFVERBOSE(1) ResetUserTime();
// set up task of translating one sentence
boost::shared_ptr<TranslationTask>
task = TranslationTask::create(source, ioWrapper);
if (source->GetContext())
task->SetContextString(*source->GetContext());
else task->SetContextString(context_string);
boost::shared_ptr<ContextScope> lscope;
if (gscope) lscope = gscope;
else lscope.reset(new ContextScope);
//if (source->GetContextWeights().isEmpty())
// task->SetContextWeights(*source->GetContextWeights());
/*else //The context_weights will never be passed to the config file.*/
if (context_weights != "") {
task->SetContextWeights(context_weights);
boost::shared_ptr<TranslationTask> task;
task = TranslationTask::create(source, ioWrapper, lscope);
if (cw) {
if (context_string.size())
context_window->push_back(context_string);
if(!use_sliding_context_window)
cw = NULL;
}
if (context_window)
task->SetContextWindow(context_window);
if (context_weights != "")
task->SetContextWeights(context_weights);
// Allow for (sentence-)context-specific processing prior to
// decoding. This can be used, for example, for context-sensitive
// phrase lookup.

View File

@ -1,3 +1,4 @@
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
#pragma once
// $Id$

View File

@ -43,7 +43,9 @@ ConstrainedDecoding::ConstrainedDecoding(const std::string &line)
void ConstrainedDecoding::Load()
{
const StaticData &staticData = StaticData::Instance();
bool addBeginEndWord = (staticData.GetSearchAlgorithm() == CYKPlus) || (staticData.GetSearchAlgorithm() == ChartIncremental);
bool addBeginEndWord
= ((staticData.options().search.algo == CYKPlus)
|| (staticData.options().search.algo == ChartIncremental));
for(size_t i = 0; i < m_paths.size(); ++i) {
InputFileStream constraintFile(m_paths[i]);

View File

@ -6,7 +6,6 @@
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryScope3.h"
#include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
#include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
@ -152,7 +151,7 @@ FeatureFactory
::DefaultSetup(F *feature)
{
StaticData &static_data = StaticData::InstanceNonConst();
const string &featureName = feature->GetScoreProducerDescription();
const std::string &featureName = feature->GetScoreProducerDescription();
std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);
@ -165,8 +164,8 @@ FeatureFactory
<< "WARNING: Auto-initializing all weights for this FF to 1.0");
weights.assign(feature->GetNumScoreComponents(),1.0);
} else {
TRACE_ERR("WARNING: No weights specified in config file for FF "
<< featureName << ". Using default values supplied by FF.");
VERBOSE(2,"WARNING: No weights specified in config file for FF "
<< featureName << ". Using default values supplied by FF.");
}
}
UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
@ -215,7 +214,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(PhraseDictionaryMultiModel);
MOSES_FNAME(PhraseDictionaryMultiModelCounts);
MOSES_FNAME(PhraseDictionaryALSuffixArray);
MOSES_FNAME(PhraseDictionaryDynSuffixArray);
// MOSES_FNAME(PhraseDictionaryDynSuffixArray);
MOSES_FNAME(PhraseDictionaryTransliteration);
MOSES_FNAME(PhraseDictionaryDynamicCacheBased);
MOSES_FNAME(PhraseDictionaryFuzzyMatch);
@ -353,18 +352,18 @@ void FeatureRegistry::Construct(const std::string &name, const std::string &line
void FeatureRegistry::PrintFF() const
{
vector<string> ffs;
std::vector<std::string> ffs;
std::cerr << "Available feature functions:" << std::endl;
Map::const_iterator iter;
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
const string &ffName = iter->first;
const std::string &ffName = iter->first;
ffs.push_back(ffName);
}
vector<string>::const_iterator iterVec;
std::vector<std::string>::const_iterator iterVec;
std::sort(ffs.begin(), ffs.end());
for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
const string &ffName = *iterVec;
const std::string &ffName = *iterVec;
std::cerr << ffName << " ";
}

View File

@ -19,8 +19,8 @@ HyperParameterAsWeight::HyperParameterAsWeight(const std::string &line)
vector<float> weights = staticData.GetWeights(this);
staticData.m_maxHypoStackSize = weights[0] * 1000;
staticData.m_beamWidth = weights[1] * 10;
staticData.m_options.search.stack_size = weights[0] * 1000;
staticData.m_options.search.beam_width = weights[1] * 10;
}

View File

@ -1,27 +1,24 @@
#include "InternalTree.h"
#include "moses/StaticData.h"
namespace Moses
{
InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool terminal):
m_value_nt(0),
m_isTerminal(terminal)
InternalTree::InternalTree(const std::string & line, size_t start, size_t len, const bool nonterminal)
{
if (len > 0) {
m_value.assign(line, start, len);
m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(start, len), nonterminal);
}
}
InternalTree::InternalTree(const std::string & line, const bool terminal):
m_value_nt(0),
m_isTerminal(terminal)
InternalTree::InternalTree(const std::string & line, const bool nonterminal)
{
size_t found = line.find_first_of("[] ");
if (found == line.npos) {
m_value = line;
m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), line, nonterminal);
} else {
AddSubTree(line, 0);
}
@ -32,6 +29,7 @@ size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
char token = 0;
size_t len = 0;
bool has_value = false;
while (token != ']' && pos != std::string::npos) {
size_t oldpos = pos;
@ -41,30 +39,27 @@ size_t InternalTree::AddSubTree(const std::string & line, size_t pos)
len = pos-oldpos;
if (token == '[') {
if (!m_value.empty()) {
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
if (has_value) {
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
pos = m_children.back()->AddSubTree(line, pos+1);
} else {
if (len > 0) {
m_value.assign(line, oldpos, len);
m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(oldpos, len), false);
has_value = true;
}
pos = AddSubTree(line, pos+1);
}
} else if (token == ' ' || token == ']') {
if (len > 0 && m_value.empty()) {
m_value.assign(line, oldpos, len);
if (len > 0 && !has_value) {
m_value.CreateFromString(Output, StaticData::Instance().GetOutputFactorOrder(), StringPiece(line).substr(oldpos, len), true);
has_value = true;
} else if (len > 0) {
m_isTerminal = false;
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, true));
m_children.push_back(boost::make_shared<InternalTree>(line, oldpos, len, false));
}
if (token == ' ') {
pos++;
}
}
if (!m_children.empty()) {
m_isTerminal = false;
}
}
if (pos == std::string::npos) {
@ -82,16 +77,16 @@ std::string InternalTree::GetString(bool start) const
ret += " ";
}
if (!m_isTerminal) {
if (!IsTerminal()) {
ret += "[";
}
ret += m_value;
ret += m_value.GetString(StaticData::Instance().GetOutputFactorOrder(), false);
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it) {
ret += (*it)->GetString(false);
}
if (!m_isTerminal) {
if (!IsTerminal()) {
ret += "]";
}
return ret;
@ -120,13 +115,13 @@ void InternalTree::Unbinarize()
{
// nodes with virtual label cannot be unbinarized
if (m_value.empty() || m_value[0] == '^') {
if (m_value.GetString(0).empty() || m_value.GetString(0).as_string()[0] == '^') {
return;
}
//if node has child that is virtual node, get unbinarized list of children
for (std::vector<TreePointer>::iterator it = m_children.begin(); it != m_children.end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLabel()[0] == '^') {
if (!(*it)->IsTerminal() && (*it)->GetLabel().GetString(0).as_string()[0] == '^') {
std::vector<TreePointer> new_children;
GetUnbinarizedChildren(new_children);
m_children = new_children;
@ -144,8 +139,8 @@ void InternalTree::Unbinarize()
void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
{
for (std::vector<TreePointer>::const_iterator itx = m_children.begin(); itx != m_children.end(); ++itx) {
const std::string &label = (*itx)->GetLabel();
if (!label.empty() && label[0] == '^') {
const StringPiece label = (*itx)->GetLabel().GetString(0);
if (!label.empty() && label.as_string()[0] == '^') {
(*itx)->GetUnbinarizedChildren(ret);
} else {
ret.push_back(*itx);
@ -153,7 +148,7 @@ void InternalTree::GetUnbinarizedChildren(std::vector<TreePointer> &ret) const
}
}
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
bool InternalTree::FlatSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
@ -163,7 +158,7 @@ bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const
bool InternalTree::RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
@ -178,7 +173,7 @@ bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePo
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
bool InternalTree::RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
@ -194,88 +189,4 @@ bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePo
return false;
}
bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const
{
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
}

View File

@ -5,30 +5,28 @@
#include <map>
#include <vector>
#include "FFState.h"
#include "moses/Word.h"
#include <boost/shared_ptr.hpp>
#include <boost/make_shared.hpp>
#include "util/generator.hh"
#include "util/exception.hh"
#include "util/string_piece.hh"
namespace Moses
{
class InternalTree;
typedef boost::shared_ptr<InternalTree> TreePointer;
typedef int NTLabel;
class InternalTree
{
std::string m_value;
NTLabel m_value_nt;
Word m_value;
std::vector<TreePointer> m_children;
bool m_isTerminal;
public:
InternalTree(const std::string & line, size_t start, size_t len, const bool terminal);
InternalTree(const std::string & line, const bool terminal = false);
InternalTree(const std::string & line, const bool nonterminal = true);
InternalTree(const InternalTree & tree):
m_value(tree.m_value),
m_isTerminal(tree.m_isTerminal) {
m_value(tree.m_value) {
const std::vector<TreePointer> & children = tree.m_children;
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
m_children.push_back(boost::make_shared<InternalTree>(**it));
@ -40,20 +38,10 @@ public:
void Combine(const std::vector<TreePointer> &previous);
void Unbinarize();
void GetUnbinarizedChildren(std::vector<TreePointer> &children) const;
const std::string & GetLabel() const {
const Word & GetLabel() const {
return m_value;
}
// optionally identify label by int instead of string;
// allows abstraction if multiple nonterminal strings should map to same label.
const NTLabel & GetNTLabel() const {
return m_value_nt;
}
void SetNTLabel(NTLabel value) {
m_value_nt = value;
}
size_t GetLength() const {
return m_children.size();
}
@ -62,38 +50,22 @@ public:
}
bool IsTerminal() const {
return m_isTerminal;
return !m_value.IsNonTerminal();
}
bool IsLeafNT() const {
return (!m_isTerminal && m_children.size() == 0);
return (m_value.IsNonTerminal() && m_children.size() == 0);
}
// different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
// can be used for formulating syntax constraints.
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
bool FlatSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// pass vector of possible labels to search
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
bool RecursiveSearch(const Word & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// Python-like generator that yields next nonterminal leaf on every call
$generator(leafNT) {

View File

@ -1,4 +1,4 @@
// -*- c++ -*-
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
#pragma once
#include <string>

View File

@ -1,6 +1,5 @@
// -*- c++ -*-
// -*- mode: c++; indent-tabs-mode: nil; tab-width:2 -*-
#pragma once
#include <vector>
#include <string>
@ -12,7 +11,6 @@
#include "moses/WordsBitmap.h"
#include "moses/TranslationOption.h"
#include "moses/FF/FFState.h"
#include "ReorderingStack.h"
namespace Moses

View File

@ -75,7 +75,7 @@ void Model1Vocabulary::Load(const std::string& fileName)
++i;
std::vector<std::string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
unsigned id = Scan<unsigned>(tokens[0]);
unsigned id = atoll( tokens[0].c_str() );
if (! ( (id == 1) && (tokens[1] == "UNK") )) {
const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading?
bool stored = Store(factor, id);
@ -86,7 +86,7 @@ void Model1Vocabulary::Load(const std::string& fileName)
++i;
std::vector<std::string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
unsigned id = Scan<unsigned>(tokens[0]);
unsigned id = atoll( tokens[0].c_str() );
const Factor* factor = factorCollection.AddFactor(tokens[1],false); // TODO: can we assume that the vocabulary is know and filter the model on loading?
bool stored = Store(factor, id);
UTIL_THROW_IF2(!stored, "Line " << i << " in " << fileName << " overwrites existing vocabulary entry.");
@ -105,11 +105,11 @@ void Model1LexicalTable::Load(const std::string &fileName, const Model1Vocabular
++i;
std::vector<std::string> tokens = Tokenize(line);
UTIL_THROW_IF2(tokens.size()!=3, "Line " << i << " in " << fileName << " has wrong number of tokens.");
unsigned idS = Scan<unsigned>(tokens[0]);
unsigned idT = Scan<unsigned>(tokens[1]);
unsigned idS = atoll( tokens[0].c_str() );
unsigned idT = atoll( tokens[1].c_str() );
const Factor* wordS = vcbS.GetWord(idS);
const Factor* wordT = vcbT.GetWord(idT);
float prob = Scan<float>(tokens[2]);
float prob = std::atof( tokens[2].c_str() );
if ( (wordS != NULL) && (wordT != NULL) ) {
m_ltable[ wordS ][ wordT ] = prob;
}

View File

@ -16,21 +16,29 @@ namespace Moses
PhrasePairFeature::PhrasePairFeature(const std::string &line)
:StatelessFeatureFunction(0, line)
,m_unrestricted(false)
,m_simple(true)
,m_sourceContext(false)
,m_domainTrigger(false)
,m_ignorePunctuation(false)
{
std::cerr << "Initializing PhrasePairFeature.." << std::endl;
VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
ReadParameters();
if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
if (m_sourceContext == 1) std::cerr << "using source context.. ";
if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";
if (m_simple == 1) VERBOSE(1, " Using simple phrase pairs.");
if (m_sourceContext == 1) VERBOSE(1, " Using source context.");
if (m_domainTrigger == 1) VERBOSE(1, " Using domain triggers.");
// compile a list of punctuation characters
if (m_ignorePunctuation) {
std::cerr << "ignoring punctuation for triggers.. ";
VERBOSE(1, " Ignoring punctuation for triggers.");
char punctuation[] = "\"'!?¿·()#_,.:;•&@/\\0123456789~=";
for (size_t i=0; i < sizeof(punctuation)-1; ++i)
for (size_t i=0; i < sizeof(punctuation)-1; ++i) {
m_punctuationHash[punctuation[i]] = 1;
}
}
VERBOSE(1, " Done." << std::endl);
}
void PhrasePairFeature::SetParameter(const std::string& key, const std::string& value)
@ -76,7 +84,7 @@ void PhrasePairFeature::Load()
}
inFileSource.close();
} else {
} else if (!m_unrestricted) {
// restricted source word vocabulary
ifstream inFileSource(m_filePathSource.c_str());
UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource);
@ -101,8 +109,6 @@ void PhrasePairFeature::Load()
}
inFileTarget.close();*/
m_unrestricted = false;
}
}
@ -114,25 +120,6 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
, ScoreComponentCollection *estimatedFutureScore) const
{
const Phrase& source = inputPath.GetPhrase();
if (m_simple) {
ostringstream namestr;
namestr << "pp_";
namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
for (size_t i = 1; i < source.GetSize(); ++i) {
const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
namestr << ",";
namestr << sourceFactor->GetString();
}
namestr << "~";
namestr << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
namestr << ",";
namestr << targetFactor->GetString();
}
scoreBreakdown.SparsePlusEquals(namestr.str(),1);
}
if (m_domainTrigger) {
const Sentence& isnt = static_cast<const Sentence&>(input);
const bool use_topicid = isnt.GetUseTopicId();
@ -140,18 +127,18 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
// compute pair
ostringstream pair;
pair << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
pair << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
for (size_t i = 1; i < source.GetSize(); ++i) {
const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
pair << ",";
pair << sourceFactor->GetString();
pair << "~";
pair << ReplaceTilde( sourceFactor->GetString() );
}
pair << "~";
pair << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
pair << "~~";
pair << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
pair << ",";
pair << targetFactor->GetString();
pair << "~";
pair << ReplaceTilde( targetFactor->GetString() );
}
if (use_topicid || use_topicid_prob) {
@ -159,7 +146,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
// use topicid as trigger
const long topicid = isnt.GetTopicId();
stringstream feature;
feature << "pp_";
feature << m_description << "_";
if (topicid == -1)
feature << "unk";
else
@ -173,13 +160,13 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
const vector<string> &topicid_prob = *(isnt.GetTopicIdAndProb());
if (atol(topicid_prob[0].c_str()) == -1) {
stringstream feature;
feature << "pp_unk_";
feature << m_description << "_unk_";
feature << pair.str();
scoreBreakdown.SparsePlusEquals(feature.str(), 1);
} else {
for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
stringstream feature;
feature << "pp_";
feature << m_description << "_";
feature << topicid_prob[i];
feature << "_";
feature << pair.str();
@ -193,7 +180,7 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
string sourceTrigger = *p;
ostringstream namestr;
namestr << "pp_";
namestr << m_description << "_";
namestr << sourceTrigger;
namestr << "_";
namestr << pair.str();
@ -221,21 +208,21 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
if (m_unrestricted || sourceTriggerExists) {
ostringstream namestr;
namestr << "pp_";
namestr << m_description << "_";
namestr << sourceTrigger;
namestr << "~";
namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
namestr << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
for (size_t i = 1; i < source.GetSize(); ++i) {
const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
namestr << ",";
namestr << sourceFactor->GetString();
namestr << "~";
namestr << ReplaceTilde( sourceFactor->GetString() );
}
namestr << "~";
namestr << targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString();
namestr << "~~";
namestr << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
namestr << ",";
namestr << targetFactor->GetString();
namestr << "~";
namestr << ReplaceTilde( targetFactor->GetString() );
}
scoreBreakdown.SparsePlusEquals(namestr.str(),1);
@ -244,6 +231,31 @@ void PhrasePairFeature::EvaluateWithSourceContext(const InputType &input
}
}
void PhrasePairFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
if (m_simple) {
ostringstream namestr;
namestr << m_description << "_";
namestr << ReplaceTilde( source.GetWord(0).GetFactor(m_sourceFactorId)->GetString() );
for (size_t i = 1; i < source.GetSize(); ++i) {
const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
namestr << "~";
namestr << ReplaceTilde( sourceFactor->GetString() );
}
namestr << "~~";
namestr << ReplaceTilde( targetPhrase.GetWord(0).GetFactor(m_targetFactorId)->GetString() );
for (size_t i = 1; i < targetPhrase.GetSize(); ++i) {
const Factor* targetFactor = targetPhrase.GetWord(i).GetFactor(m_targetFactorId);
namestr << "~";
namestr << ReplaceTilde( targetFactor->GetString() );
}
scoreBreakdown.SparsePlusEquals(namestr.str(),1);
}
}
bool PhrasePairFeature::IsUseable(const FactorMask &mask) const
{
bool ret = mask[m_targetFactorId];

View File

@ -1,5 +1,4 @@
#ifndef moses_PhrasePairFeature_h
#define moses_PhrasePairFeature_h
#pragma once
#include <stdexcept>
#include <boost/unordered_set.hpp>
@ -32,6 +31,16 @@ class PhrasePairFeature: public StatelessFeatureFunction
CharHash m_punctuationHash;
std::string m_filePathSource;
inline std::string ReplaceTilde(const StringPiece &str) const {
std::string out = str.as_string();
size_t pos = out.find('~');
while ( pos != std::string::npos ) {
out.replace(pos,1,"<TILDE>");
pos = out.find('~',pos);
}
return out;
};
public:
PhrasePairFeature(const std::string &line);
@ -43,8 +52,7 @@ public:
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const {
}
, ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
@ -69,5 +77,3 @@ public:
}
#endif

View File

@ -12,7 +12,7 @@ namespace Moses
{
RulePairUnlexicalizedSource::RulePairUnlexicalizedSource(const std::string &line)
: StatelessFeatureFunction(0, line)
: StatelessFeatureFunction(1, line)
, m_glueRules(false)
, m_nonGlueRules(true)
, m_glueTargetLHSStr("Q")
@ -81,6 +81,9 @@ void RulePairUnlexicalizedSource::EvaluateInIsolation(const Phrase &source
}
scoreBreakdown.PlusEquals(this, namestr.str(), 1);
if ( targetPhraseLHS != m_glueTargetLHS ) {
scoreBreakdown.PlusEquals(this, 1);
}
}
}

View File

@ -34,7 +34,7 @@ public:
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const {
vector<float> newScores(m_numScoreComponents);
std::vector<float> newScores(m_numScoreComponents);
newScores[0] = translationOptionList.size();
TranslationOptionList::const_iterator iterTransOpt;

View File

@ -13,6 +13,7 @@ namespace Moses
SoftMatchingFeature::SoftMatchingFeature(const std::string &line)
: StatelessFeatureFunction(0, line)
, m_softMatches(moses_MaxNumNonterminals)
, m_scoreIdentical(true)
{
ReadParameters();
}
@ -26,6 +27,8 @@ void SoftMatchingFeature::SetParameter(const std::string& key, const std::string
} else if (key == "path") {
const std::string filePath = value;
Load(filePath);
} else if (key == "score-identical") {
m_scoreIdentical = Scan<bool>(value);
} else {
UTIL_THROW(util::Exception, "Unknown argument " << key << "=" << value);
}
@ -80,8 +83,10 @@ void SoftMatchingFeature::EvaluateWhenApplied(const ChartHypothesis& hypo,
const ChartHypothesis* prevHypo = hypo.GetPrevHypo(nonTermInd);
const Word& prevLHS = prevHypo->GetTargetLHS();
const std::string &name = GetOrSetFeatureName(word, prevLHS);
accumulator->PlusEquals(this,name,1);
if ( (word != prevLHS) || m_scoreIdentical ) {
const std::string &name = GetOrSetFeatureName(word, prevLHS);
accumulator->PlusEquals(this,name,1);
}
}
}
}

View File

@ -55,6 +55,7 @@ public:
private:
mutable std::vector<std::vector<Word> > m_softMatches; // map RHS of new rule to list of possible LHS of old rule (subtree)
mutable std::vector<std::vector<std::string> > m_nameCache;
bool m_scoreIdentical;
#ifdef WITH_THREADS
//reader-writer lock

View File

@ -38,9 +38,8 @@ void SourceWordDeletionFeature::SetParameter(const std::string& key, const std::
void SourceWordDeletionFeature::Load()
{
if (m_filename == "") {
if (m_filename.empty())
return;
}
FEATUREVERBOSE(1, "Loading source word deletion word list from " << m_filename << std::endl);
ifstream inFile(m_filename.c_str());

View File

@ -13,33 +13,12 @@ void TreeStructureFeature::Load()
// syntactic constraints can be hooked in here.
m_constraints = NULL;
m_labelset = NULL;
StaticData &staticData = StaticData::InstanceNonConst();
staticData.SetTreeStructure(this);
}
// define NT labels (ints) that are mapped from strings for quicker comparison.
void TreeStructureFeature::AddNTLabels(TreePointer root) const
{
std::string label = root->GetLabel();
if (root->IsTerminal()) {
return;
}
std::map<std::string, NTLabel>::const_iterator it = m_labelset->string_to_label.find(label);
if (it != m_labelset->string_to_label.end()) {
root->SetNTLabel(it->second);
}
std::vector<TreePointer> children = root->GetChildren();
for (std::vector<TreePointer>::const_iterator it2 = children.begin(); it2 != children.end(); ++it2) {
AddNTLabels(*it2);
}
}
FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
, int featureID /* used to index the state in the previous hypotheses */
, ScoreComponentCollection* accumulator) const
@ -48,10 +27,6 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
const std::string *tree = property->GetValueString();
TreePointer mytree (boost::make_shared<InternalTree>(*tree));
if (m_labelset) {
AddNTLabels(mytree);
}
//get subtrees (in target order)
std::vector<TreePointer> previous_trees;
for (size_t pos = 0; pos < cur_hypo.GetCurrTargetPhrase().GetSize(); ++pos) {
@ -70,7 +45,7 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
}
mytree->Combine(previous_trees);
bool full_sentence = (mytree->GetChildren().back()->GetLabel() == "</s>" || (mytree->GetChildren().back()->GetLabel() == "SEND" && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == "</s>"));
bool full_sentence = (mytree->GetChildren().back()->GetLabel() == m_send || (mytree->GetChildren().back()->GetLabel() == m_send_nt && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == m_send));
if (m_binarized && full_sentence) {
mytree->Unbinarize();
}

View File

@ -4,6 +4,7 @@
#include <map>
#include "StatefulFeatureFunction.h"
#include "FFState.h"
#include "moses/Word.h"
#include "InternalTree.h"
namespace Moses
@ -35,11 +36,18 @@ class TreeStructureFeature : public StatefulFeatureFunction
SyntaxConstraints* m_constraints;
LabelSet* m_labelset;
bool m_binarized;
Word m_send;
Word m_send_nt;
public:
TreeStructureFeature(const std::string &line)
:StatefulFeatureFunction(0, line)
, m_binarized(false) {
ReadParameters();
std::vector<FactorType> factors;
factors.push_back(0);
m_send.CreateFromString(Output, factors, "</s>", false);
m_send_nt.CreateFromString(Output, factors, "SEND", true);
}
~TreeStructureFeature() {
delete m_constraints;
@ -49,8 +57,6 @@ public:
return new TreeState(TreePointer());
}
void AddNTLabels(TreePointer root) const;
bool IsUseable(const FactorMask &mask) const {
return true;
}

View File

@ -307,7 +307,7 @@ public:
}
virtual void InitializeForInput(ttasksptr const& ttask) {
InputType const& source = ttask->GetSource();
InputType const& source = *(ttask->GetSource().get());
// tabbed sentence is assumed only in training
if (! m_train)
return;

View File

@ -5,6 +5,7 @@
#include "vw/Classifier.h"
#include "moses/TypeDef.h"
#include "moses/TranslationTask.h"
#include "moses/Util.h"
#include "moses/FF/StatelessFeatureFunction.h"

View File

@ -40,7 +40,7 @@ public:
}
virtual void InitializeForInput(ttasksptr const& ttask) {
InputType const& source = ttask->GetSource();
InputType const& source = *(ttask->GetSource().get());
UTIL_THROW_IF2(source.GetType() != TabbedSentenceInput,
"This feature function requires the TabbedSentence input type");

View File

@ -110,7 +110,8 @@ void WordTranslationFeature::Load()
}
inFileSource.close();
} else {
} else if (!m_filePathSource.empty() || !m_filePathTarget.empty()) {
return;
// restricted source word vocabulary
ifstream inFileSource(m_filePathSource.c_str());
UTIL_THROW_IF2(!inFileSource, "could not open file " << m_filePathSource);

View File

@ -110,7 +110,7 @@ public:
*/
const Factor *AddFactor(const StringPiece &factorString, bool isNonTerminal = false);
const size_t GetNumNonTerminals() {
size_t GetNumNonTerminals() {
return m_factorIdNonTerminal;
}

View File

@ -213,7 +213,8 @@ RecombineCompare(const Hypothesis &compare) const
for (unsigned i = 0; i < m_ffStates.size(); ++i) {
if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) {
comp = m_ffStates[i] - compare.m_ffStates[i];
// TODO: Can this situation actually occur?
comp = int(m_ffStates[i] != NULL) - int(compare.m_ffStates[i] != NULL);
} else {
comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
}
@ -361,14 +362,14 @@ CleanupArcList()
* so we'll keep all of arc list if nedd distinct n-best list
*/
const StaticData &staticData = StaticData::Instance();
size_t nBestSize = staticData.GetNBestSize();
bool distinctNBest = (staticData.GetDistinctNBest() ||
size_t nBestSize = staticData.options().nbest.nbest_size;
bool distinctNBest = (m_manager.options().nbest.only_distinct ||
staticData.GetLatticeSamplesSize() ||
staticData.UseMBR() ||
m_manager.options().mbr.enabled ||
staticData.GetOutputSearchGraph() ||
staticData.GetOutputSearchGraphSLF() ||
staticData.GetOutputSearchGraphHypergraph() ||
staticData.UseLatticeMBR());
m_manager.options().lmbr.enabled);
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
// prune arc list only if there too many arcs
@ -585,7 +586,9 @@ OutputSurface(std::ostream &out, const Hypothesis &edge,
//preface surface form with UNK if marking unknowns
const Word &word = phrase.GetWord(pos);
if(markUnknown && word.IsOOV()) {
out << "UNK" << *factor;
out << StaticData::Instance().GetUnknownWordPrefix()
<< *factor
<< StaticData::Instance().GetUnknownWordSuffix();
} else {
out << *factor;
}

View File

@ -36,7 +36,7 @@ namespace Moses
HypothesisStackCubePruning::HypothesisStackCubePruning(Manager& manager) :
HypothesisStack(manager)
{
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = -std::numeric_limits<float>::infinity();
}

View File

@ -36,7 +36,7 @@ namespace Moses
HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
HypothesisStack(manager)
{
m_nBestIsEnabled = StaticData::Instance().IsNBestEnabled();
m_nBestIsEnabled = StaticData::Instance().options().nbest.enabled;
m_bestScore = -std::numeric_limits<float>::infinity();
m_worstScore = -std::numeric_limits<float>::infinity();
}

View File

@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <iostream>
#include <stack>
#include <boost/algorithm/string.hpp>
#include <boost/foreach.hpp>
#include "moses/Syntax/KBestExtractor.h"
#include "moses/Syntax/PVertex.h"
@ -78,12 +79,12 @@ namespace Moses
IOWrapper::IOWrapper()
: m_nBestStream(NULL)
, m_outputWordGraphStream(NULL)
, m_outputSearchGraphStream(NULL)
, m_detailedTranslationReportingStream(NULL)
, m_unknownsStream(NULL)
, m_alignmentInfoStream(NULL)
, m_latticeSamplesStream(NULL)
// , m_outputWordGraphStream(NULL)
// , m_outputSearchGraphStream(NULL)
// , m_detailedTranslationReportingStream(NULL)
// , m_unknownsStream(NULL)
// , m_alignmentInfoStream(NULL)
// , m_latticeSamplesStream(NULL)
, m_surpressSingleBestOutput(false)
, m_look_ahead(0)
, m_look_back(0)
@ -93,10 +94,11 @@ IOWrapper::IOWrapper()
, spe_aln(NULL)
{
const StaticData &staticData = StaticData::Instance();
Parameter const& P = staticData.GetParameter();
// context buffering for context-sensitive decoding
m_look_ahead = staticData.GetContextParameters().look_ahead;
m_look_back = staticData.GetContextParameters().look_back;
m_look_ahead = staticData.options().context.look_ahead;
m_look_back = staticData.options().context.look_back;
m_inputType = staticData.GetInputType();
@ -107,8 +109,8 @@ IOWrapper::IOWrapper()
m_inputFactorOrder = &staticData.GetInputFactorOrder();
size_t nBestSize = staticData.GetNBestSize();
string nBestFilePath = staticData.GetNBestFilePath();
size_t nBestSize = staticData.options().nbest.nbest_size;
string nBestFilePath = staticData.options().nbest.output_file_path;
staticData.GetParameter().SetParameter<string>(m_inputFilePath, "input-file", "");
if (m_inputFilePath.empty()) {
@ -121,95 +123,38 @@ IOWrapper::IOWrapper()
}
if (nBestSize > 0) {
if (nBestFilePath == "-" || nBestFilePath == "/dev/stdout") {
m_nBestStream = &std::cout;
m_nBestOutputCollector.reset(new Moses::OutputCollector(&std::cout));
m_nBestOutputCollector.reset(new Moses::OutputCollector(nBestFilePath));
if (m_nBestOutputCollector->OutputIsCout()) {
m_surpressSingleBestOutput = true;
} else {
std::ofstream *file = new std::ofstream;
file->open(nBestFilePath.c_str());
m_nBestStream = file;
m_nBestOutputCollector.reset(new Moses::OutputCollector(file));
//m_nBestOutputCollector->HoldOutputStream();
}
}
// search graph output
if (staticData.GetOutputSearchGraph()) {
string fileName;
if (staticData.GetOutputSearchGraphExtended()) {
staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph-extended", "");
} else {
staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph", "");
}
std::ofstream *file = new std::ofstream;
m_outputSearchGraphStream = file;
file->open(fileName.c_str());
}
std::string path;
P.SetParameter<std::string>(path, "output-search-graph-extended", "");
if (!path.size()) P.SetParameter<std::string>(path, "output-search-graph", "");
if (path.size()) m_searchGraphOutputCollector.reset(new OutputCollector(path));
if (!staticData.GetOutputUnknownsFile().empty()) {
m_unknownsStream = new std::ofstream(staticData.GetOutputUnknownsFile().c_str());
m_unknownsCollector.reset(new Moses::OutputCollector(m_unknownsStream));
UTIL_THROW_IF2(!m_unknownsStream->good(),
"File for unknowns words could not be opened: " <<
staticData.GetOutputUnknownsFile());
}
P.SetParameter<std::string>(path, "output-unknowns", "");
if (path.size()) m_unknownsCollector.reset(new OutputCollector(path));
if (!staticData.GetAlignmentOutputFile().empty()) {
m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str());
m_alignmentInfoCollector.reset(new Moses::OutputCollector(m_alignmentInfoStream));
UTIL_THROW_IF2(!m_alignmentInfoStream->good(),
"File for alignment output could not be opened: " << staticData.GetAlignmentOutputFile());
}
P.SetParameter<std::string>(path, "alignment-output-file", "");
if (path.size()) m_alignmentInfoCollector.reset(new OutputCollector(path));
if (staticData.GetOutputSearchGraph()) {
string fileName;
staticData.GetParameter().SetParameter<string>(fileName, "output-search-graph", "");
P.SetParameter<string>(path, "translation-details", "");
if (path.size()) m_detailedTranslationCollector.reset(new OutputCollector(path));
std::ofstream *file = new std::ofstream;
m_outputSearchGraphStream = file;
file->open(fileName.c_str());
m_searchGraphOutputCollector.reset(new Moses::OutputCollector(m_outputSearchGraphStream));
}
P.SetParameter<string>(path, "tree-translation-details", "");
if (path.size()) m_detailTreeFragmentsOutputCollector.reset(new OutputCollector(path));
// detailed translation reporting
if (staticData.IsDetailedTranslationReportingEnabled()) {
const std::string &path = staticData.GetDetailedTranslationReportingFilePath();
m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
m_detailedTranslationCollector.reset(new Moses::OutputCollector(m_detailedTranslationReportingStream));
}
if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) {
const std::string &path = staticData.GetDetailedTreeFragmentsTranslationReportingFilePath();
m_detailedTreeFragmentsTranslationReportingStream = new std::ofstream(path.c_str());
m_detailTreeFragmentsOutputCollector.reset(new Moses::OutputCollector(m_detailedTreeFragmentsTranslationReportingStream));
}
// wordgraph output
if (staticData.GetOutputWordGraph()) {
string fileName;
staticData.GetParameter().SetParameter<string>(fileName, "output-word-graph", "");
std::ofstream *file = new std::ofstream;
m_outputWordGraphStream = file;
file->open(fileName.c_str());
m_wordGraphCollector.reset(new OutputCollector(m_outputWordGraphStream));
}
P.SetParameter<string>(path, "output-word-graph", "");
if (path.size()) m_wordGraphCollector.reset(new OutputCollector(path));
size_t latticeSamplesSize = staticData.GetLatticeSamplesSize();
string latticeSamplesFile = staticData.GetLatticeSamplesFilePath();
if (latticeSamplesSize) {
if (latticeSamplesFile == "-" || latticeSamplesFile == "/dev/stdout") {
m_latticeSamplesCollector.reset(new OutputCollector());
m_latticeSamplesCollector.reset(new OutputCollector(latticeSamplesFile));
if (m_latticeSamplesCollector->OutputIsCout()) {
m_surpressSingleBestOutput = true;
} else {
m_latticeSamplesStream = new ofstream(latticeSamplesFile.c_str());
if (!m_latticeSamplesStream->good()) {
TRACE_ERR("ERROR: Failed to open " << latticeSamplesFile << " for lattice samples" << endl);
exit(1);
}
m_latticeSamplesCollector.reset(new OutputCollector(m_latticeSamplesStream));
}
}
@ -235,6 +180,7 @@ IOWrapper::IOWrapper()
<< "' for hypergraph output!");
fmt += string("%d.") + extension;
// input streams for simulated post-editing
if (staticData.GetParameter().GetParam("spe-src")) {
spe_src = new ifstream(staticData.GetParameter().GetParam("spe-src")->at(0).c_str());
spe_trg = new ifstream(staticData.GetParameter().GetParam("spe-trg")->at(0).c_str());
@ -246,17 +192,17 @@ IOWrapper::~IOWrapper()
{
if (m_inputFile != NULL)
delete m_inputFile;
if (m_nBestStream != NULL && !m_surpressSingleBestOutput) {
// outputting n-best to file, rather than stdout. need to close file and delete obj
delete m_nBestStream;
}
// if (m_nBestStream != NULL && !m_surpressSingleBestOutput) {
// outputting n-best to file, rather than stdout. need to close file and delete obj
// delete m_nBestStream;
// }
delete m_detailedTranslationReportingStream;
delete m_alignmentInfoStream;
delete m_unknownsStream;
delete m_outputSearchGraphStream;
delete m_outputWordGraphStream;
delete m_latticeSamplesStream;
// delete m_detailedTranslationReportingStream;
// delete m_alignmentInfoStream;
// delete m_unknownsStream;
// delete m_outputSearchGraphStream;
// delete m_outputWordGraphStream;
// delete m_latticeSamplesStream;
}
// InputType*
@ -297,7 +243,7 @@ GetBufferedInput()
boost::shared_ptr<InputType>
IOWrapper::
ReadInput()
ReadInput(boost::shared_ptr<std::vector<std::string> >* cw)
{
#ifdef WITH_THREADS
boost::lock_guard<boost::mutex> lock(m_lock);
@ -305,48 +251,32 @@ ReadInput()
boost::shared_ptr<InputType> source = GetBufferedInput();
if (source) {
source->SetTranslationId(m_currentLine++);
if (m_look_ahead || m_look_back)
this->set_context_for(*source);
// when using a sliding context window, remove obsolete past input from buffer:
if (m_past_input.size() && m_look_back != std::numeric_limits<size_t>::max()) {
list<boost::shared_ptr<InputType> >::iterator m = m_past_input.end();
for (size_t cnt = 0; cnt < m_look_back && --m != m_past_input.begin();)
cnt += (*m)->GetSize();
while (m_past_input.begin() != m) m_past_input.pop_front();
}
if (m_look_back)
m_past_input.push_back(source);
}
m_past_input.push_back(source);
if (cw) *cw = GetCurrentContextWindow();
return source;
}
void
boost::shared_ptr<std::vector<std::string> >
IOWrapper::
set_context_for(InputType& source)
GetCurrentContextWindow() const
{
boost::shared_ptr<string> context(new string);
list<boost::shared_ptr<InputType> >::iterator m = m_past_input.end();
// remove obsolete past input from buffer:
if (m_past_input.end() != m_past_input.begin()) {
for (size_t cnt = 0; cnt < m_look_back && --m != m_past_input.begin();
cnt += (*m)->GetSize());
while (m_past_input.begin() != m) m_past_input.pop_front();
}
// cerr << string(80,'=') << endl;
if (m_past_input.size()) {
m = m_past_input.begin();
*context += (*m)->ToString();
// cerr << (*m)->ToString() << endl;
for (++m; m != m_past_input.end(); ++m) {
// cerr << "\n" << (*m)->ToString() << endl;
*context += string(" ") + (*m)->ToString();
}
// cerr << string(80,'-') << endl;
}
// cerr << source.ToString() << endl;
if (m_future_input.size()) {
// cerr << string(80,'-') << endl;
for (m = m_future_input.begin(); m != m_future_input.end(); ++m) {
// if (m != m_future_input.begin()) cerr << "\n";
// cerr << (*m)->ToString() << endl;
if (context->size()) *context += " ";
*context += (*m)->ToString();
}
}
// cerr << string(80,'=') << endl;
if (context->size()) source.SetContext(context);
boost::shared_ptr<std::vector<string> > context(new std::vector<string>);
BOOST_FOREACH(boost::shared_ptr<InputType> const& i, m_past_input)
context->push_back(i->ToString());
BOOST_FOREACH(boost::shared_ptr<InputType> const& i, m_future_input)
context->push_back(i->ToString());
return context;
}

View File

@ -1,4 +1,4 @@
// -*- c++ -*-
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
// $Id$
/***********************************************************************
@ -45,6 +45,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <vector>
#include <list>
#include <iomanip>
#include <limits>
#include "moses/TypeDef.h"
#include "moses/Sentence.h"
@ -85,11 +86,11 @@ protected:
Moses::InputFileStream *m_inputFile;
std::istream *m_inputStream;
std::ostream *m_nBestStream;
std::ostream *m_outputWordGraphStream;
std::ostream *m_outputSearchGraphStream;
std::ostream *m_detailedTranslationReportingStream;
// std::ostream *m_outputWordGraphStream;
// std::auto_ptr<std::ostream> m_outputSearchGraphStream;
// std::ostream *m_detailedTranslationReportingStream;
std::ostream *m_unknownsStream;
std::ostream *m_detailedTreeFragmentsTranslationReportingStream;
// std::ostream *m_detailedTreeFragmentsTranslationReportingStream;
std::ofstream *m_alignmentInfoStream;
std::ofstream *m_latticeSamplesStream;
@ -127,7 +128,9 @@ public:
~IOWrapper();
// Moses::InputType* GetInput(Moses::InputType *inputType);
boost::shared_ptr<InputType> ReadInput();
boost::shared_ptr<InputType>
ReadInput(boost::shared_ptr<std::vector<std::string> >* cw = NULL);
Moses::OutputCollector *GetSingleBestOutputCollector() {
return m_singleBestOutputCollector.get();
@ -181,6 +184,21 @@ public:
// post editing
std::ifstream *spe_src, *spe_trg, *spe_aln;
std::list<boost::shared_ptr<InputType> > const& GetPastInput() const {
return m_past_input;
}
std::list<boost::shared_ptr<InputType> > const& GetFutureInput() const {
return m_future_input;
}
size_t GetLookAhead() const {
return m_look_ahead;
}
size_t GetLookBack() const {
return m_look_back;
}
private:
template<class itype>
boost::shared_ptr<InputType>
@ -189,8 +207,8 @@ private:
boost::shared_ptr<InputType>
GetBufferedInput();
void
set_context_for(InputType& source);
boost::shared_ptr<std::vector<std::string> >
GetCurrentContextWindow() const;
};
template<class itype>
@ -210,10 +228,10 @@ BufferInput()
return ret;
ret = source;
}
while (m_buffered_ahead < m_look_ahead) {
source.reset(new itype);
if (!source->Read(*m_inputStream, *m_inputFactorOrder)) break;
if (!source->Read(*m_inputStream, *m_inputFactorOrder))
break;
m_future_input.push_back(source);
m_buffered_ahead += source->GetSize();
}

View File

@ -208,7 +208,7 @@ Manager::Manager(ttasksptr const& ttask)
: BaseManager(ttask)
, cells_(m_source, ChartCellBaseFactory(), parser_)
, parser_(ttask, cells_)
, n_best_(search::NBestConfig(StaticData::Instance().GetNBestSize()))
, n_best_(search::NBestConfig(StaticData::Instance().options().nbest.nbest_size))
{ }
Manager::~Manager()
@ -223,12 +223,17 @@ namespace
const float log_10 = logf(10);
}
template <class Model, class Best> search::History Manager::PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
template <class Model, class Best>
search::History
Manager::
PopulateBest(const Model &model, const std::vector<lm::WordIndex> &words, Best &out)
{
const LanguageModel &abstract = LanguageModel::GetFirstLM();
const float oov_weight = abstract.OOVFeatureEnabled() ? abstract.GetOOVWeight() : 0.0;
const StaticData &data = StaticData::Instance();
search::Config config(abstract.GetWeight() * log_10, data.GetCubePruningPopLimit(), search::NBestConfig(data.GetNBestSize()));
size_t cpl = data.options().cube.pop_limit;
size_t nbs = data.options().nbest.nbest_size;
search::Config config(abstract.GetWeight() * log_10, cpl, search::NBestConfig(nbs));
search::Context<Model> context(config, model);
size_t size = m_source.GetSize();
@ -255,7 +260,7 @@ template <class Model, class Best> search::History Manager::PopulateBest(const M
template <class Model> void Manager::LMCallback(const Model &model, const std::vector<lm::WordIndex> &words)
{
std::size_t nbest = StaticData::Instance().GetNBestSize();
std::size_t nbest = StaticData::Instance().options().nbest.nbest_size;
if (nbest <= 1) {
search::History ret = PopulateBest(model, words, single_best_);
if (ret) {

View File

@ -58,7 +58,7 @@ protected:
ReorderingConstraint m_reorderingConstraint; /**< limits on reordering specified either by "-mp" switch or xml tags */
std::string m_textType;
std::string m_passthrough;
boost::shared_ptr<std::string> m_context;
boost::shared_ptr<std::vector<std::string> > m_context;
public:
// used in -continue-partial-translation
@ -173,13 +173,13 @@ public:
//! number of words in this sentence/confusion network
virtual size_t GetSize() const =0;
virtual boost::shared_ptr<std::string> const&
virtual boost::shared_ptr<std::vector<std::string> > const&
GetContext() const {
return m_context;
}
virtual void
SetContext(boost::shared_ptr<std::string> const& ctx) {
SetContext(boost::shared_ptr<std::vector<std::string> > const& ctx) {
m_context = ctx;
}

View File

@ -88,9 +88,9 @@ if $(with-ldhtlm) {
local with-nplm = [ option.get "with-nplm" ] ;
if $(with-nplm) {
lib nplm : : <search>$(with-nplm)/lib <search>$(with-nplm)/lib64 ;
obj NeuralLMWrapper.o : NeuralLMWrapper.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen ;
obj BiLM_NPLM.o : bilingual-lm/BiLM_NPLM.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen ;
obj RDLM.o : RDLM.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen ;
obj NeuralLMWrapper.o : NeuralLMWrapper.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen <define>NPLM_DOUBLE_PRECISION=0 ;
obj BiLM_NPLM.o : bilingual-lm/BiLM_NPLM.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen <cxxflags>-fopenmp <define>NPLM_DOUBLE_PRECISION=0 ;
obj RDLM.o : RDLM.cpp nplm ..//headers : <include>$(with-nplm)/src <include>$(with-nplm)/3rdparty/eigen <define>NPLM_DOUBLE_PRECISION=0 ;
alias neural : NeuralLMWrapper.o nplm : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>LM_NEURAL ;
alias bilinguallm : BiLM_NPLM.o nplm : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>LM_NEURAL ;
alias rdlm : RDLM.o nplm : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>LM_NEURAL ;
@ -134,11 +134,11 @@ if $(with-dalm) {
}
#ORLM is always compiled but needs special headers
obj ORLM.o : ORLM.cpp ..//headers ../TranslationModel/DynSAInclude//dynsa : : : <include>../TranslationModel/DynSAInclude ;
#obj ORLM.o : ORLM.cpp ..//headers ../TranslationModel/DynSAInclude//dynsa : : : <include>../TranslationModel/DynSAInclude ;
#Top-level LM library. If you've added a file that doesn't depend on external
#libraries, put it here.
alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp ORLM.o
alias LM : Backward.cpp BackwardLMState.cpp Base.cpp BilingualLM.cpp Implementation.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp SkeletonLM.cpp
../../lm//kenlm ..//headers $(dependencies) ;
alias macros : : : : <define>$(lmmacros) ;

View File

@ -1,107 +0,0 @@
#include <limits>
#include <iostream>
#include <fstream>
#include "moses/FactorCollection.h"
#include "moses/Phrase.h"
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "ORLM.h"
using std::map;
namespace Moses
{
bool LanguageModelORLM::Load(const std::string &filePath, FactorType factorType,
size_t nGramOrder)
{
cerr << "Loading LanguageModelORLM..." << endl;
m_filePath = filePath;
m_factorType = factorType;
m_nGramOrder = nGramOrder;
FileHandler fLmIn(m_filePath, std::ios::in|std::ios::binary, true);
m_lm = new OnlineRLM<T>(&fLmIn, m_nGramOrder);
fLmIn.close();
//m_lm = new MultiOnlineRLM<T>(m_filePath, m_nGramOrder);
// get special word ids
m_oov_id = m_lm->vocab_->GetWordID("<unk>");
CreateFactors();
return true;
}
void LanguageModelORLM::CreateFactors()
{
FactorCollection &factorCollection = FactorCollection::Instance();
size_t maxFactorId = 0; // to create lookup vector later on
std::map<size_t, wordID_t> m_lmids_map; // map from factor id -> word id
for(std::map<Word, wordID_t>::const_iterator vIter = m_lm->vocab_->VocabStart();
vIter != m_lm->vocab_->VocabEnd(); vIter++) {
// get word from ORLM vocab and associate with (new) factor id
size_t factorId = factorCollection.AddFactor(Output,m_factorType,vIter->first.ToString())->GetId();
m_lmids_map[factorId] = vIter->second;
maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
}
// add factors for BOS and EOS and store bf word ids
size_t factorId;
m_sentenceStart = factorCollection.AddFactor(Output, m_factorType, "<s>");
factorId = m_sentenceStart->GetId();
maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
m_sentenceStartWord[m_factorType] = m_sentenceStart;
m_sentenceEnd = factorCollection.AddFactor(Output, m_factorType, "</s>");
factorId = m_sentenceEnd->GetId();
maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
m_sentenceEndWord[m_factorType] = m_sentenceEnd;
// add to lookup vector in object
lm_ids_vec_.resize(maxFactorId+1);
// fill with OOV code
fill(lm_ids_vec_.begin(), lm_ids_vec_.end(), m_oov_id);
for (map<size_t, wordID_t>::const_iterator iter = m_lmids_map.begin();
iter != m_lmids_map.end() ; ++iter)
lm_ids_vec_[iter->first] = iter->second;
}
wordID_t LanguageModelORLM::GetLmID(const std::string& str) const
{
return m_lm->vocab_->GetWordID(str);
}
wordID_t LanguageModelORLM::GetLmID(const Factor* factor) const
{
size_t factorId = factor->GetId();
return (factorId >= lm_ids_vec_.size()) ? m_oov_id : lm_ids_vec_[factorId];
}
LMResult LanguageModelORLM::GetValue(const std::vector<const Word*> &contextFactor,
State* finalState) const
{
FactorType factorType = GetFactorType();
// set up context
//std::vector<long unsigned int> factor(1,0);
//std::vector<string> sngram;
wordID_t ngram[MAX_NGRAM_SIZE];
int count = contextFactor.size();
for (int i = 0; i < count; i++) {
ngram[i] = GetLmID((*contextFactor[i])[factorType]);
//sngram.push_back(contextFactor[i]->GetString(factor, false));
}
//float logprob = FloorScore(TransformLMScore(lm_->getProb(sngram, count, finalState)));
LMResult ret;
ret.score = FloorScore(TransformLMScore(m_lm->getProb(&ngram[0], count, finalState)));
ret.unknown = count && (ngram[count - 1] == m_oov_id);
/*if (finalState)
std::cout << " = " << logprob << "(" << *finalState << ", " << *len <<")"<< std::endl;
else
std::cout << " = " << logprob << std::endl;
*/
return ret;
}
bool LanguageModelORLM::UpdateORLM(const std::vector<string>& ngram, const int value)
{
/*cerr << "Inserting into ORLM: \"";
iterate(ngram, nit)
cerr << *nit << " ";
cerr << "\"\t" << value << endl; */
m_lm->vocab_->MakeOpen();
bool res = m_lm->update(ngram, value);
m_lm->vocab_->MakeClosed();
return res;
}
}

View File

@ -1,53 +0,0 @@
#pragma once
#include <string>
#include <vector>
#include "moses/Factor.h"
#include "moses/Util.h"
#include "SingleFactor.h"
#include "moses/TranslationModel/DynSAInclude/onlineRLM.h"
//#include "multiOnlineRLM.h"
#include "moses/TranslationModel/DynSAInclude/FileHandler.h"
#include "moses/TranslationModel/DynSAInclude/vocab.h"
namespace Moses
{
class Factor;
class Phrase;
/** @todo ask ollie
*/
class LanguageModelORLM : public LanguageModelSingleFactor
{
public:
typedef count_t T; // type for ORLM filter
LanguageModelORLM(const std::string &line)
:LanguageModelSingleFactor(line)
,m_lm(0) {
}
bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder);
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
~LanguageModelORLM() {
//save LM with markings
Utils::rtrim(m_filePath, ".gz");
FileHandler fout(m_filePath + ".marked.gz", std::ios::out|std::ios::binary, false);
m_lm->save(&fout);
fout.close();
delete m_lm;
}
void CleanUpAfterSentenceProcessing() {
m_lm->clearCache(); // clear caches
}
bool UpdateORLM(const std::vector<string>& ngram, const int value);
protected:
OnlineRLM<T>* m_lm;
//MultiOnlineRLM<T>* m_lm;
wordID_t m_oov_id;
std::vector<wordID_t> lm_ids_vec_;
void CreateFactors();
wordID_t GetLmID(const std::string &str) const;
wordID_t GetLmID(const Factor *factor) const;
};
} // end namespace

View File

@ -11,6 +11,26 @@
namespace Moses
{
namespace rdlm
{
ThreadLocal::ThreadLocal(nplm::neuralTM *lm_head_base_instance_, nplm::neuralTM *lm_label_base_instance_, bool normalizeHeadLM, bool normalizeLabelLM, int cacheSize)
{
lm_head = new nplm::neuralTM(*lm_head_base_instance_);
lm_label = new nplm::neuralTM(*lm_label_base_instance_);
lm_head->set_normalization(normalizeHeadLM);
lm_label->set_normalization(normalizeLabelLM);
lm_head->set_cache(cacheSize);
lm_label->set_cache(cacheSize);
}
ThreadLocal::~ThreadLocal()
{
delete lm_head;
delete lm_label;
}
}
typedef Eigen::Map<Eigen::Matrix<int,Eigen::Dynamic,1> > EigenMap;
RDLM::~RDLM()
@ -70,7 +90,7 @@ void RDLM::Load()
static_label_null[i] = lm_label_base_instance_->lookup_input_word(numstr);
}
static_dummy_head = lm_head_base_instance_->lookup_input_word(dummy_head);
static_dummy_head = lm_head_base_instance_->lookup_input_word(dummy_head.GetString(0).as_string());
static_start_head = lm_head_base_instance_->lookup_input_word("<start_head>");
static_start_label = lm_head_base_instance_->lookup_input_word("<start_label>");
@ -99,8 +119,16 @@ void RDLM::Load()
// TreePointer mytree4 (new InternalTree("[pred [det [ART die]] [attr [adv [adv [PTKNEG nicht]] [ADV fast]] [ADJA neue]] [attr [ADJA]] [NN Zeit]]]"));
// TreePointer mytree2 (new InternalTree("[vroot [subj [PPER ich]] [VAFIN bin] [pred]]"));
//
// std::vector<int> ancestor_heads;
// std::vector<int> ancestor_labels;
// rdlm::ThreadLocal *thread_objects = thread_objects_backend_.get();
// if (!thread_objects) {
// thread_objects = new rdlm::ThreadLocal(lm_head_base_instance_, lm_label_base_instance_, m_normalizeHeadLM, m_normalizeLabelLM, m_cacheSize);
// thread_objects_backend_.reset(thread_objects);
// }
//
// #ifdef WITH_THREADS
// //read-lock for cache; cache resizes are so rare that we want to minimize number of calls, not scope
// m_accessLock.lock_shared();
// #endif
//
// size_t boundary_hash(0);
// boost::array<float, 4> score;
@ -108,13 +136,13 @@ void RDLM::Load()
// std::cerr << "scoring: " << mytree3->GetString() << std::endl;
// std::vector<TreePointer> previous_trees;
// TreePointerMap back_pointers = AssociateLeafNTs(mytree3.get(), previous_trees);
// Score(mytree3.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// Score(mytree3.get(), back_pointers, score, boundary_hash, *thread_objects);
// std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
//
// previous_trees.push_back(mytree3);
// back_pointers = AssociateLeafNTs(mytree4.get(), previous_trees);
// std::cerr << "scoring: " << mytree4->GetString() << std::endl;
// Score(mytree4.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// Score(mytree4.get(), back_pointers, score, boundary_hash, *thread_objects);
// std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
//
// mytree4->Combine(previous_trees);
@ -125,7 +153,7 @@ void RDLM::Load()
//
// score[1] = 0;
// score[3] = 0;
// Score(mytree2.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// Score(mytree2.get(), back_pointers, score, boundary_hash, *thread_objects);
// std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
//
// score[0] = 0;
@ -134,12 +162,12 @@ void RDLM::Load()
// score[3] = 0;
// std::cerr << "scoring: " << mytree->GetString() << std::endl;
//
// Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// Score(mytree.get(), back_pointers, score, boundary_hash, *thread_objects);
// std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
//
// }
// UTIL_THROW2("Finished");
//
// #ifdef WITH_THREADS
// m_accessLock.unlock_shared();
// #endif
// }
//
// {
@ -149,8 +177,16 @@ void RDLM::Load()
// TreePointer mytree4 (new InternalTree("[^pred [attr [adv [adv [PTKNEG nicht]] [ADV fast]] [ADJA neue]] [^pred [attr [ADJA]] [NN Zeit]]]"));
// TreePointer mytree2 (new InternalTree("[vroot [subj [PPER ich]] [^vroot [VAFIN bin] [pred [det [ART die]] [^pred]]]]"));
//
// std::vector<int> ancestor_heads;
// std::vector<int> ancestor_labels;
// rdlm::ThreadLocal *thread_objects = thread_objects_backend_.get();
// if (!thread_objects) {
// thread_objects = new rdlm::ThreadLocal(lm_head_base_instance_, lm_label_base_instance_, m_normalizeHeadLM, m_normalizeLabelLM, m_cacheSize);
// thread_objects_backend_.reset(thread_objects);
// }
//
// #ifdef WITH_THREADS
// //read-lock for cache; cache resizes are so rare that we want to minimize number of calls, not scope
// m_accessLock.lock_shared();
// #endif
//
// size_t boundary_hash(0);
// boost::array<float, 4> score;
@ -158,13 +194,13 @@ void RDLM::Load()
// std::cerr << "scoring: " << mytree3->GetString() << std::endl;
// std::vector<TreePointer> previous_trees;
// TreePointerMap back_pointers = AssociateLeafNTs(mytree3.get(), previous_trees);
// Score(mytree3.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// Score(mytree3.get(), back_pointers, score, boundary_hash, *thread_objects);
// std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
//
// previous_trees.push_back(mytree3);
// back_pointers = AssociateLeafNTs(mytree4.get(), previous_trees);
// std::cerr << "scoring: " << mytree4->GetString() << std::endl;
// Score(mytree4.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// Score(mytree4.get(), back_pointers, score, boundary_hash, *thread_objects);
// std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
//
// mytree4->Combine(previous_trees);
@ -175,7 +211,7 @@ void RDLM::Load()
//
// score[1] = 0;
// score[3] = 0;
// Score(mytree2.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// Score(mytree2.get(), back_pointers, score, boundary_hash, *thread_objects);
// std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
//
// score[0] = 0;
@ -184,16 +220,20 @@ void RDLM::Load()
// score[3] = 0;
// std::cerr << "scoring: " << mytree->GetString() << std::endl;
//
// Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
// Score(mytree.get(), back_pointers, score, boundary_hash, *thread_objects);
// std::cerr << "head LM: " << score[0] << " label LM: " << score[2] << " approx: " << score[1] << " - " << score[3] << std::endl;
//
// #ifdef WITH_THREADS
// m_accessLock.unlock_shared();
// #endif
//
// }
// UTIL_THROW2("Finished");
}
void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost::array<float, 4> &score, std::vector<int> &ancestor_heads, std::vector<int> &ancestor_labels, size_t &boundary_hash, int num_virtual, int rescoring_levels) const
void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost::array<float, 4> &score, size_t &boundary_hash, rdlm::ThreadLocal &thread_objects, int num_virtual, int rescoring_levels) const
{
// ignore terminal nodes
@ -205,20 +245,23 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
if (root->GetLabel() == m_glueSymbol) {
// recursion
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
Score(it->get(), back_pointers, score, boundary_hash, thread_objects, num_virtual, rescoring_levels);
}
return;
}
std::vector<int> &ancestor_heads = thread_objects.ancestor_heads;
std::vector<int> &ancestor_labels = thread_objects.ancestor_labels;
// ignore virtual nodes (in binarization; except if it's the root)
if (m_binarized && root->GetLabel()[0] == '^' && !ancestor_heads.empty()) {
if (m_binarized && root->GetLabel().GetString(0).as_string()[0] == '^' && !ancestor_heads.empty()) {
// recursion
if (root->IsLeafNT() && m_context_up > 1 && ancestor_heads.size()) {
root = back_pointers.find(root)->second.get();
rescoring_levels = m_context_up-1;
}
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels);
Score(it->get(), back_pointers, score, boundary_hash, thread_objects, num_virtual, rescoring_levels);
}
return;
}
@ -228,25 +271,19 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
return;
}
nplm::neuralTM *lm_head = lm_head_backend_.get();
if (!lm_head) {
lm_head = new nplm::neuralTM(*lm_head_base_instance_);
lm_head->set_normalization(m_normalizeHeadLM);
lm_head->set_cache(m_cacheSize);
lm_head_backend_.reset(lm_head);
}
// ignore preterminal node (except if we're scoring root nodes)
if (root->GetLength() == 1 && root->GetChildren()[0]->IsTerminal()) {
// root of tree: score without context
if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) {
std::vector<int> ngram_head_null (static_head_null);
ngram_head_null.back() = lm_head->lookup_output_word(root->GetChildren()[0]->GetLabel());
if (m_isPretermBackoff && ngram_head_null.back() == 0) {
ngram_head_null.back() = lm_head->lookup_output_word(root->GetLabel());
std::vector<int> & ngram = thread_objects.ngram;
ngram = static_head_null;
ngram.back() = Factor2ID(root->GetChildren()[0]->GetLabel()[m_factorType], HEAD_OUTPUT);
if (m_isPretermBackoff && ngram.back() == 0) {
ngram.back() = Factor2ID(root->GetLabel()[m_factorType], HEAD_OUTPUT);
}
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head) {
std::vector<int>::iterator it = ngram_head_null.begin();
std::vector<int>::iterator it = ngram.begin();
std::fill_n(it, m_context_left, static_start_head);
it += m_context_left;
std::fill_n(it, m_context_left, static_start_label);
@ -260,10 +297,10 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
}
if (ancestor_labels.size() >= m_context_up && !num_virtual) {
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
score[0] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
} else {
boost::hash_combine(boundary_hash, ngram_head_null.back());
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
boost::hash_combine(boundary_hash, ngram.back());
score[1] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
}
}
return;
@ -281,22 +318,15 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
}
}
nplm::neuralTM *lm_label = lm_label_backend_.get();
if (!lm_label) {
lm_label = new nplm::neuralTM(*lm_label_base_instance_);
lm_label->set_normalization(m_normalizeLabelLM);
lm_label->set_cache(m_cacheSize);
lm_label_backend_.reset(lm_label);
}
std::pair<int,int> head_ids;
InternalTree* found = GetHead(root, back_pointers, head_ids);
if (found == NULL) {
bool found = GetHead(root, back_pointers, head_ids);
if (!found) {
head_ids = std::make_pair(static_dummy_head, static_dummy_head);
}
size_t context_up_nonempty = std::min(m_context_up, ancestor_heads.size());
const std::string & head_label = root->GetLabel();
const StringPiece & head_label = root->GetLabel().GetString(0);
bool virtual_head = false;
int reached_end = 0;
int label_idx, label_idx_out;
@ -308,45 +338,24 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
reached_end = 2; // indicate that we've seen the last symbol of the RHS
}
// with 'full' binarization, direction is encoded in 2nd char
std::string clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1);
label_idx = lm_label->lookup_input_word(clipped_label);
label_idx_out = lm_label->lookup_output_word(clipped_label);
StringPiece clipped_label = (m_binarized == 3) ? head_label.substr(2,head_label.size()-2) : head_label.substr(1,head_label.size()-1);
label_idx = lm_label_base_instance_->lookup_input_word(clipped_label.as_string());
label_idx_out = lm_label_base_instance_->lookup_output_word(clipped_label.as_string());
} else {
reached_end = 3; // indicate that we've seen first and last symbol of the RHS
label_idx = lm_label->lookup_input_word(head_label);
label_idx_out = lm_label->lookup_output_word(head_label);
label_idx = Factor2ID(root->GetLabel()[0], LABEL_INPUT);
label_idx_out = Factor2ID(root->GetLabel()[0], LABEL_OUTPUT);
}
int head_idx = (virtual_head && head_ids.first == static_dummy_head) ? static_label_null[offset_up_head+m_context_up-1] : head_ids.first;
// root of tree: score without context
if (ancestor_heads.empty() || (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head)) {
if (head_idx != static_dummy_head && head_idx != static_head_head) {
std::vector<int> ngram_head_null (static_head_null);
*(ngram_head_null.end()-2) = label_idx;
ngram_head_null.back() = head_ids.second;
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
std::vector<int>::iterator it = ngram_head_null.begin();
std::fill_n(it, m_context_left, static_start_head);
it += m_context_left;
std::fill_n(it, m_context_left, static_start_label);
it += m_context_left;
std::fill_n(it, m_context_right, static_stop_head);
it += m_context_right;
std::fill_n(it, m_context_right, static_stop_label);
it += m_context_right;
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
} else {
boost::hash_combine(boundary_hash, ngram_head_null.back());
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram_head_null.data(), ngram_head_null.size())));
}
}
std::vector<int> ngram_label_null (static_label_null);
ngram_label_null.back() = label_idx_out;
std::vector<int> & ngram = thread_objects.ngram;
ngram = static_label_null;
ngram.back() = label_idx_out;
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
std::vector<int>::iterator it = ngram_label_null.begin();
std::vector<int>::iterator it = ngram.begin();
std::fill_n(it, m_context_left, static_start_head);
it += m_context_left;
std::fill_n(it, m_context_left, static_start_label);
@ -357,10 +366,20 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
it += m_context_right;
it = std::copy(ancestor_heads.end()-context_up_nonempty, ancestor_heads.end(), it);
it = std::copy(ancestor_labels.end()-context_up_nonempty, ancestor_labels.end(), it);
score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
score[2] += FloorScore(thread_objects.lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
} else {
boost::hash_combine(boundary_hash, ngram_label_null.back());
score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram_label_null.data(), ngram_label_null.size())));
boost::hash_combine(boundary_hash, ngram.back());
score[3] += FloorScore(thread_objects.lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
}
if (head_idx != static_dummy_head && head_idx != static_head_head) {
ngram.push_back(head_ids.second);
*(ngram.end()-2) = label_idx;
if (ancestor_heads.size() == m_context_up && ancestor_heads.back() == static_root_head && !num_virtual) {
score[0] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
} else {
boost::hash_combine(boundary_hash, ngram.back());
score[1] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
}
}
}
@ -380,7 +399,8 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
}
size_t up_padding = m_context_up - context_up_nonempty;
std::vector<int> ngram (static_label_null);
std::vector<int> & ngram = thread_objects.ngram;
ngram = static_label_null;
std::vector<int>::iterator it = ngram.begin() + offset_up_head;
if (up_padding > 0) {
@ -401,21 +421,25 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
// get number of children after unbinarization
if (m_binarized) {
num_children = 0;
UnbinarizedChildren real_children(root, back_pointers, m_binarized);
for (std::vector<TreePointer>::const_iterator it = real_children.begin(); it != real_children.end(); it = ++real_children) {
UnbinarizedChildren real_children(root, back_pointers, m_binarized, thread_objects.stack);
for (std::vector<TreePointer>::const_iterator it = real_children.begin(); !real_children.ended(); it = ++real_children) {
num_children++;
}
}
if (m_context_right && (reached_end == 1 || reached_end == 3)) num_children++; //also predict start label
if (m_context_left && (reached_end == 2 || reached_end == 3)) num_children++; //also predict end label
std::vector<int> & heads = thread_objects.heads;
std::vector<int> & labels = thread_objects.labels;
std::vector<int> & heads_output = thread_objects.heads_output;
std::vector<int> & labels_output = thread_objects.labels_output;
std::vector<int> heads(num_children);
std::vector<int> labels(num_children);
std::vector<int> heads_output(num_children);
std::vector<int> labels_output(num_children);
heads.resize(num_children);
labels.resize(num_children);
heads_output.resize(num_children);
labels_output.resize(num_children);
GetChildHeadsAndLabels(root, back_pointers, reached_end, lm_head, lm_label, heads, labels, heads_output, labels_output);
GetChildHeadsAndLabels(root, back_pointers, reached_end, thread_objects);
//left padding; only need to add this initially
if (reached_end == 1 || reached_end == 3) {
@ -469,10 +493,10 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
ngram.back() = labels_output[i];
if (ancestor_labels.size() >= m_context_up && !num_virtual) {
score[2] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
score[2] += FloorScore(thread_objects.lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
} else {
boost::hash_combine(boundary_hash, ngram.back());
score[3] += FloorScore(lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
score[3] += FloorScore(thread_objects.lm_label->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
}
// construct context of head model and predict head
@ -482,10 +506,10 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
ngram.push_back(heads_output[i]);
if (ancestor_labels.size() >= m_context_up && !num_virtual) {
score[0] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
score[0] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
} else {
boost::hash_combine(boundary_hash, ngram.back());
score[1] += FloorScore(lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
score[1] += FloorScore(thread_objects.lm_head->lookup_ngram(EigenMap(ngram.data(), ngram.size())));
}
ngram.pop_back();
}
@ -510,13 +534,13 @@ void RDLM::Score(InternalTree* root, const TreePointerMap & back_pointers, boost
}
// recursion
for (std::vector<TreePointer>::const_iterator it = root->GetChildren().begin(); it != root->GetChildren().end(); ++it) {
Score(it->get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash, num_virtual, rescoring_levels - 1);
Score(it->get(), back_pointers, score, boundary_hash, thread_objects, num_virtual, rescoring_levels - 1);
}
ancestor_heads.pop_back();
ancestor_labels.pop_back();
}
InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_pointers, std::pair<int,int> & IDs, InternalTree* head_ptr) const
bool RDLM::GetHead(InternalTree* root, const TreePointerMap & back_pointers, std::pair<int,int> & IDs) const
{
InternalTree *tree;
@ -527,54 +551,35 @@ InternalTree* RDLM::GetHead(InternalTree* root, const TreePointerMap & back_poin
tree = it->get();
}
if (m_binarized && tree->GetLabel()[0] == '^') {
head_ptr = GetHead(tree, back_pointers, IDs, head_ptr);
if (head_ptr != NULL && !m_isPTKVZ) {
return head_ptr;
if (m_binarized && tree->GetLabel().GetString(0).as_string()[0] == '^') {
bool found = GetHead(tree, back_pointers, IDs);
if (found) {
return true;
}
}
// assumption (only true for dependency parse): each constituent has a preterminal label, and corresponding terminal is head
// if constituent has multiple preterminals, first one is picked; if it has no preterminals, dummy_head is returned
else if (tree->GetLength() == 1 && tree->GetChildren()[0]->IsTerminal() && head_ptr == NULL) {
head_ptr = tree;
if (!m_isPTKVZ) {
GetIDs(head_ptr->GetChildren()[0]->GetLabel(), head_ptr->GetLabel(), IDs);
return head_ptr;
}
}
// add PTKVZ to lemma of verb
else if (m_isPTKVZ && head_ptr && tree->GetLabel() == "avz") {
InternalTree *tree2;
for (std::vector<TreePointer>::const_iterator it2 = tree->GetChildren().begin(); it2 != tree->GetChildren().end(); ++it2) {
if ((*it2)->IsLeafNT()) {
tree2 = back_pointers.find(it2->get())->second.get();
} else {
tree2 = it2->get();
}
if (tree2->GetLabel() == "PTKVZ" && tree2->GetLength() == 1 && tree2->GetChildren()[0]->IsTerminal()) {
std::string verb = tree2->GetChildren()[0]->GetLabel() + head_ptr->GetChildren()[0]->GetLabel();
GetIDs(verb, head_ptr->GetLabel(), IDs);
return head_ptr;
}
}
else if (tree->GetLength() == 1 && tree->GetChildren()[0]->IsTerminal()) {
GetIDs(tree->GetChildren()[0]->GetLabel(), tree->GetLabel(), IDs);
return true;
}
}
if (head_ptr != NULL) {
GetIDs(head_ptr->GetChildren()[0]->GetLabel(), head_ptr->GetLabel(), IDs);
}
return head_ptr;
return false;
}
void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & back_pointers, int reached_end, const nplm::neuralTM *lm_head, const nplm::neuralTM *lm_label, std::vector<int> & heads, std::vector<int> & labels, std::vector<int> & heads_output, std::vector<int> & labels_output) const
void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & back_pointers, int reached_end, rdlm::ThreadLocal &thread_objects) const
{
std::pair<int,int> child_ids;
InternalTree* found;
size_t j = 0;
std::vector<int> & heads = thread_objects.heads;
std::vector<int> & labels = thread_objects.labels;
std::vector<int> & heads_output = thread_objects.heads_output;
std::vector<int> & labels_output = thread_objects.labels_output;
// score start label (if enabled) for all nonterminal nodes (but not for terminal or preterminal nodes)
if (m_context_right && (reached_end == 1 || reached_end == 3)) {
heads[j] = static_start_head;
@ -583,10 +588,10 @@ void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & bac
j++;
}
UnbinarizedChildren real_children(root, back_pointers, m_binarized);
UnbinarizedChildren real_children(root, back_pointers, m_binarized, thread_objects.stack);
// extract head words / labels
for (std::vector<TreePointer>::const_iterator itx = real_children.begin(); itx != real_children.end(); itx = ++real_children) {
for (std::vector<TreePointer>::const_iterator itx = real_children.begin(); !real_children.ended(); itx = ++real_children) {
if ((*itx)->IsTerminal()) {
std::cerr << "non-terminal node " << root->GetLabel() << " has a mix of terminal and non-terminal children. This shouldn't happen..." << std::endl;
std::cerr << "children: ";
@ -616,13 +621,13 @@ void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & bac
continue;
}
found = GetHead(child, back_pointers, child_ids);
if (found == NULL) {
bool found = GetHead(child, back_pointers, child_ids);
if (!found) {
child_ids = std::make_pair(static_dummy_head, static_dummy_head);
}
labels[j] = lm_head->lookup_input_word(child->GetLabel());
labels_output[j] = lm_label->lookup_output_word(child->GetLabel());
labels[j] = Factor2ID(child->GetLabel()[0], LABEL_INPUT);
labels_output[j] = Factor2ID(child->GetLabel()[0], LABEL_OUTPUT);
heads[j] = child_ids.first;
heads_output[j] = child_ids.second;
j++;
@ -637,22 +642,78 @@ void RDLM::GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & bac
}
void RDLM::GetIDs(const std::string & head, const std::string & preterminal, std::pair<int,int> & IDs) const
void RDLM::GetIDs(const Word & head, const Word & preterminal, std::pair<int,int> & IDs) const
{
IDs.first = lm_head_base_instance_->lookup_input_word(head);
IDs.first = Factor2ID(head[m_factorType], HEAD_INPUT);
if (m_isPretermBackoff && IDs.first == 0) {
IDs.first = lm_head_base_instance_->lookup_input_word(preterminal);
IDs.first = Factor2ID(preterminal[0], HEAD_INPUT);
}
if (m_sharedVocab) {
IDs.second = IDs.first;
} else {
IDs.second = lm_head_base_instance_->lookup_output_word(head);
IDs.second = Factor2ID(head[m_factorType], HEAD_OUTPUT);
if (m_isPretermBackoff && IDs.second == 0) {
IDs.second = lm_head_base_instance_->lookup_output_word(preterminal);
IDs.second = Factor2ID(preterminal[0], HEAD_OUTPUT);
}
}
}
// map from moses factor to NPLM ID; use vectors as cache to avoid hash table lookups
int RDLM::Factor2ID(const Factor * const factor, int model_type) const
{
size_t ID = factor->GetId();
int ret;
std::vector<int>* cache = NULL;
switch(model_type) {
case LABEL_INPUT:
cache = &factor2id_label_input;
break;
case LABEL_OUTPUT:
cache = &factor2id_label_output;
break;
case HEAD_INPUT:
cache = &factor2id_head_input;
break;
case HEAD_OUTPUT:
cache = &factor2id_head_output;
break;
}
try {
ret = cache->at(ID);
} catch (const std::out_of_range& oor) {
#ifdef WITH_THREADS //need to resize cache; write lock
m_accessLock.unlock_shared();
m_accessLock.lock();
#endif
cache->resize(ID*2, -1);
#ifdef WITH_THREADS //go back to read lock
m_accessLock.unlock();
m_accessLock.lock_shared();
#endif
ret = -1;
}
if (ret == -1) {
switch(model_type) {
case LABEL_INPUT:
ret = lm_label_base_instance_->lookup_input_word(factor->GetString().as_string());
break;
case LABEL_OUTPUT:
ret = lm_label_base_instance_->lookup_output_word(factor->GetString().as_string());
break;
case HEAD_INPUT:
ret = lm_head_base_instance_->lookup_input_word(factor->GetString().as_string());
break;
case HEAD_OUTPUT:
ret = lm_head_base_instance_->lookup_output_word(factor->GetString().as_string());
break;
}
(*cache)[ID] = ret;
}
return ret;
}
void RDLM::PrintInfo(std::vector<int> &ngram, nplm::neuralTM* lm) const
{
@ -689,18 +750,32 @@ RDLM::TreePointerMap RDLM::AssociateLeafNTs(InternalTree* root, const std::vecto
void RDLM::ScoreFile(std::string &path)
{
InputFileStream inStream(path);
rdlm::ThreadLocal *thread_objects = thread_objects_backend_.get();
if (!thread_objects) {
thread_objects = new rdlm::ThreadLocal(lm_head_base_instance_, lm_label_base_instance_, m_normalizeHeadLM, m_normalizeLabelLM, m_cacheSize);
thread_objects_backend_.reset(thread_objects);
}
std::string line, null;
std::vector<int> ancestor_heads(m_context_up, static_root_head);
std::vector<int> ancestor_labels(m_context_up, static_root_label);
thread_objects->ancestor_heads.resize(0);
thread_objects->ancestor_labels.resize(0);
thread_objects->ancestor_heads.resize(m_context_up, static_root_head);
thread_objects->ancestor_labels.resize(m_context_up, static_root_label);
#ifdef WITH_THREADS
//read-lock for cache; cache resizes are so rare that we want to minimize number of calls, not scope
m_accessLock.lock_shared();
#endif
while(getline(inStream, line)) {
TreePointerMap back_pointers;
boost::array<float, 4> score;
score.fill(0);
InternalTree* mytree (new InternalTree(line));
size_t boundary_hash = 0;
Score(mytree, back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
Score(mytree, back_pointers, score, boundary_hash, *thread_objects);
std::cerr << "head LM: " << score[0] << "label LM: " << score[2] << std::endl;
}
#ifdef WITH_THREADS
m_accessLock.unlock_shared();
#endif
}
@ -714,8 +789,6 @@ void RDLM::SetParameter(const std::string& key, const std::string& value)
m_path_head_lm = value;
} else if (key == "path_label_lm") {
m_path_label_lm = value;
} else if (key == "ptkvz") {
m_isPTKVZ = Scan<bool>(value);
} else if (key == "backoff") {
m_isPretermBackoff = Scan<bool>(value);
} else if (key == "context_up") {
@ -744,7 +817,9 @@ void RDLM::SetParameter(const std::string& key, const std::string& value)
else
UTIL_THROW(util::Exception, "Unknown value for argument " << key << "=" << value);
} else if (key == "glue_symbol") {
m_glueSymbol = value;
m_glueSymbolString = value;
} else if (key == "factor") {
m_factorType = Scan<FactorType>(value);
} else if (key == "cache_size") {
m_cacheSize = Scan<int>(value);
} else {
@ -780,10 +855,6 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
accumulator->PlusEquals(ff_idx+1, prev_approx_label);
bool full_sentence = (mytree->GetChildren().back()->GetLabel() == m_endTag || (mytree->GetChildren().back()->GetLabel() == m_endSymbol && mytree->GetChildren().back()->GetChildren().back()->GetLabel() == m_endTag));
std::vector<int> ancestor_heads ((full_sentence ? m_context_up : 0), static_root_head);
std::vector<int> ancestor_labels ((full_sentence ? m_context_up : 0), static_root_label);
ancestor_heads.reserve(10);
ancestor_labels.reserve(10);
TreePointerMap back_pointers = AssociateLeafNTs(mytree.get(), previous_trees);
boost::array<float, 4> score; // score_head, approx_score_head, score_label, approx_score_label
@ -791,13 +862,45 @@ FFState* RDLM::EvaluateWhenApplied(const ChartHypothesis& cur_hypo
//hash of all boundary symbols (symbols with incomplete context); trees with same hash share state for cube pruning.
size_t boundary_hash = 0;
if (!m_rerank) {
Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
#ifdef WITH_THREADS
//read-lock for cache; cache resizes are so rare that we want to minimize number of calls, not scope
m_accessLock.lock_shared();
#endif
rdlm::ThreadLocal *thread_objects = thread_objects_backend_.get();
if (!thread_objects) {
thread_objects = new rdlm::ThreadLocal(lm_head_base_instance_, lm_label_base_instance_, m_normalizeHeadLM, m_normalizeLabelLM, m_cacheSize);
thread_objects_backend_.reset(thread_objects);
}
thread_objects->ancestor_heads.resize(0);
thread_objects->ancestor_labels.resize(0);
thread_objects->ancestor_heads.resize((full_sentence ? m_context_up : 0), static_root_head);
thread_objects->ancestor_labels.resize((full_sentence ? m_context_up : 0), static_root_label);
Score(mytree.get(), back_pointers, score, boundary_hash, *thread_objects);
#ifdef WITH_THREADS
m_accessLock.unlock_shared();
#endif
accumulator->PlusEquals(ff_idx, score[0] + score[1]);
accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
}
mytree->Combine(previous_trees);
if (m_rerank && full_sentence) {
Score(mytree.get(), back_pointers, score, ancestor_heads, ancestor_labels, boundary_hash);
#ifdef WITH_THREADS
//read-lock for cache; cache resizes are so rare that we want to minimize number of calls, not scope
m_accessLock.lock_shared();
#endif
rdlm::ThreadLocal *thread_objects = thread_objects_backend_.get();
if (!thread_objects) {
thread_objects = new rdlm::ThreadLocal(lm_head_base_instance_, lm_label_base_instance_, m_normalizeHeadLM, m_normalizeLabelLM, m_cacheSize);
thread_objects_backend_.reset(thread_objects);
}
thread_objects->ancestor_heads.resize(0);
thread_objects->ancestor_labels.resize(0);
thread_objects->ancestor_heads.resize((full_sentence ? m_context_up : 0), static_root_head);
thread_objects->ancestor_labels.resize((full_sentence ? m_context_up : 0), static_root_label);
Score(mytree.get(), back_pointers, score, boundary_hash, *thread_objects);
#ifdef WITH_THREADS
m_accessLock.unlock_shared();
#endif
accumulator->PlusEquals(ff_idx, score[0] + score[1]);
accumulator->PlusEquals(ff_idx+1, score[2] + score[3]);
}

View File

@ -3,10 +3,16 @@
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/FFState.h"
#include "moses/FF/InternalTree.h"
#include "moses/Word.h"
#include <boost/thread/tss.hpp>
#include <boost/array.hpp>
#ifdef WITH_THREADS
#include <boost/thread/shared_mutex.hpp>
#endif
// relational dependency language model, described in:
// Sennrich, Rico (2015). Modelling and Optimizing on Syntactic N-Grams for Statistical Machine Translation. Transactions of the Association for Computational Linguistics.
// see 'scripts/training/rdlm' for training scripts
@ -19,6 +25,31 @@ class neuralTM;
namespace Moses
{
namespace rdlm
{
// we re-use some short-lived objects to reduce the number of allocations;
// each thread gets its own instance to prevent collision
// [could be replaced with thread_local keyword in C++11]
class ThreadLocal
{
public:
std::vector<int> ancestor_heads;
std::vector<int> ancestor_labels;
std::vector<int> ngram;
std::vector<int> heads;
std::vector<int> labels;
std::vector<int> heads_output;
std::vector<int> labels_output;
std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack;
nplm::neuralTM* lm_head;
nplm::neuralTM* lm_label;
ThreadLocal(nplm::neuralTM *lm_head_base_instance_, nplm::neuralTM *lm_label_base_instance_, bool normalizeHeadLM, bool normalizeLabelLM, int cacheSize);
~ThreadLocal();
};
}
class RDLMState : public TreeState
{
float m_approx_head; //score that was approximated due to lack of context
@ -56,19 +87,18 @@ class RDLM : public StatefulFeatureFunction
typedef std::map<InternalTree*,TreePointer> TreePointerMap;
nplm::neuralTM* lm_head_base_instance_;
mutable boost::thread_specific_ptr<nplm::neuralTM> lm_head_backend_;
nplm::neuralTM* lm_label_base_instance_;
mutable boost::thread_specific_ptr<nplm::neuralTM> lm_label_backend_;
std::string dummy_head;
std::string m_glueSymbol;
std::string m_startSymbol;
std::string m_endSymbol;
std::string m_endTag;
mutable boost::thread_specific_ptr<rdlm::ThreadLocal> thread_objects_backend_;
std::string m_glueSymbolString;
Word dummy_head;
Word m_glueSymbol;
Word m_startSymbol;
Word m_endSymbol;
Word m_endTag;
std::string m_path_head_lm;
std::string m_path_label_lm;
bool m_isPTKVZ;
bool m_isPretermBackoff;
size_t m_context_left;
size_t m_context_right;
@ -103,15 +133,26 @@ class RDLM : public StatefulFeatureFunction
int static_stop_label_output;
int static_start_label_output;
FactorType m_factorType;
static const int LABEL_INPUT = 0;
static const int LABEL_OUTPUT = 1;
static const int HEAD_INPUT = 2;
static const int HEAD_OUTPUT = 3;
mutable std::vector<int> factor2id_label_input;
mutable std::vector<int> factor2id_label_output;
mutable std::vector<int> factor2id_head_input;
mutable std::vector<int> factor2id_head_output;
#ifdef WITH_THREADS
//reader-writer lock
mutable boost::shared_mutex m_accessLock;
#endif
public:
RDLM(const std::string &line)
: StatefulFeatureFunction(2, line)
, dummy_head("<dummy_head>")
, m_glueSymbol("Q")
, m_startSymbol("SSTART")
, m_endSymbol("SEND")
, m_endTag("</s>")
, m_isPTKVZ(false)
, m_glueSymbolString("Q")
, m_isPretermBackoff(true)
, m_context_left(3)
, m_context_right(0)
@ -122,8 +163,16 @@ public:
, m_normalizeLabelLM(false)
, m_sharedVocab(false)
, m_binarized(0)
, m_cacheSize(1000000) {
, m_cacheSize(1000000)
, m_factorType(0) {
ReadParameters();
std::vector<FactorType> factors;
factors.push_back(0);
dummy_head.CreateFromString(Output, factors, "<dummy_head>", false);
m_glueSymbol.CreateFromString(Output, factors, m_glueSymbolString, true);
m_startSymbol.CreateFromString(Output, factors, "SSTART", true);
m_endSymbol.CreateFromString(Output, factors, "SEND", true);
m_endTag.CreateFromString(Output, factors, "</s>", false);
}
~RDLM();
@ -132,10 +181,11 @@ public:
return new RDLMState(TreePointer(), 0, 0, 0);
}
void Score(InternalTree* root, const TreePointerMap & back_pointers, boost::array<float,4> &score, std::vector<int> &ancestor_heads, std::vector<int> &ancestor_labels, size_t &boundary_hash, int num_virtual = 0, int rescoring_levels = 0) const;
InternalTree* GetHead(InternalTree* root, const TreePointerMap & back_pointers, std::pair<int,int> & IDs, InternalTree * head_ptr=NULL) const;
void GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & back_pointers, int reached_end, const nplm::neuralTM *lm_head, const nplm::neuralTM *lm_labels, std::vector<int> & heads, std::vector<int> & labels, std::vector<int> & heads_output, std::vector<int> & labels_output) const;
void GetIDs(const std::string & head, const std::string & preterminal, std::pair<int,int> & IDs) const;
void Score(InternalTree* root, const TreePointerMap & back_pointers, boost::array<float,4> &score, size_t &boundary_hash, rdlm::ThreadLocal &thread_objects, int num_virtual = 0, int rescoring_levels = 0) const;
bool GetHead(InternalTree* root, const TreePointerMap & back_pointers, std::pair<int,int> & IDs) const;
void GetChildHeadsAndLabels(InternalTree *root, const TreePointerMap & back_pointers, int reached_end, rdlm::ThreadLocal &thread_objects) const;
void GetIDs(const Word & head, const Word & preterminal, std::pair<int,int> & IDs) const;
int Factor2ID(const Factor * const factor, int model_type) const;
void ScoreFile(std::string &path); //for debugging
void PrintInfo(std::vector<int> &ngram, nplm::neuralTM* lm) const; //for debugging
@ -177,22 +227,23 @@ public:
private:
std::vector<TreePointer>::const_iterator iter;
std::vector<TreePointer>::const_iterator _begin;
std::vector<TreePointer>::const_iterator _end;
bool _ended;
InternalTree* current;
const TreePointerMap & back_pointers;
bool binarized;
std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > stack;
std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > &stack;
public:
UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary):
UnbinarizedChildren(InternalTree* root, const TreePointerMap & pointers, bool binary, std::vector<std::pair<InternalTree*,std::vector<TreePointer>::const_iterator> > & persistent_stack):
current(root),
back_pointers(pointers),
binarized(binary) {
stack.reserve(10);
_end = current->GetChildren().end();
binarized(binary),
stack(persistent_stack) {
stack.resize(0);
_ended = current->GetChildren().empty();
iter = current->GetChildren().begin();
// expand virtual node
while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
while (binarized && !(*iter)->GetLabel().GetString(0).empty() && (*iter)->GetLabel().GetString(0).data()[0] == '^') {
stack.push_back(std::make_pair(current, iter));
// also go through trees or previous hypotheses to rescore nodes for which more context has become available
if ((*iter)->IsLeafNT()) {
@ -208,8 +259,8 @@ public:
std::vector<TreePointer>::const_iterator begin() const {
return _begin;
}
std::vector<TreePointer>::const_iterator end() const {
return _end;
bool ended() const {
return _ended;
}
std::vector<TreePointer>::const_iterator operator++() {
@ -224,12 +275,13 @@ public:
break;
}
}
if (iter == _end) {
if (iter == current->GetChildren().end()) {
_ended = true;
return iter;
}
}
// expand virtual node
while (binarized && !(*iter)->GetLabel().empty() && (*iter)->GetLabel()[0] == '^') {
while (binarized && !(*iter)->GetLabel().GetString(0).empty() && (*iter)->GetLabel().GetString(0).data()[0] == '^') {
stack.push_back(std::make_pair(current, iter));
// also go through trees or previous hypotheses to rescore nodes for which more context has become available
if ((*iter)->IsLeafNT()) {

Some files were not shown because too many files have changed in this diff Show More