This commit is contained in:
Ulrich Germann 2014-09-30 13:24:34 +01:00
commit a01b6d6257
97 changed files with 1806 additions and 810 deletions

View File

@ -1,2 +1,2 @@
exe biconcor : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ;
exe phrase-lookup : Vocabulary.cpp SuffixArray.cpp phrase-lookup.cpp ;

132
biconcor/phrase-lookup.cpp Normal file
View File

@ -0,0 +1,132 @@
#include "SuffixArray.h"
#include <getopt.h>
using namespace std;
size_t lookup( string );
vector<string> tokenize( const char input[] );
SuffixArray suffixArray;
int main(int argc, char* argv[]) {
// handle parameters
string query;
string fileNameSuffix;
string fileNameSource;
int loadFlag = false;
int saveFlag = false;
int createFlag = false;
int queryFlag = false;
int stdioFlag = false; // receive requests from STDIN, respond to STDOUT
string info = "usage: biconcor\n\t[--load model-file]\n\t[--save model-file]\n\t[--create corpus]\n\t[--query string]\n\t[--stdio]\n";
while(1) {
static struct option long_options[] = {
{"load", required_argument, 0, 'l'},
{"save", required_argument, 0, 's'},
{"create", required_argument, 0, 'c'},
{"query", required_argument, 0, 'q'},
{"stdio", no_argument, 0, 'i'},
{0, 0, 0, 0}
};
int option_index = 0;
int c = getopt_long (argc, argv, "l:s:c:q:i", long_options, &option_index);
if (c == -1) break;
switch (c) {
case 'l':
fileNameSuffix = string(optarg);
loadFlag = true;
break;
case 's':
fileNameSuffix = string(optarg);
saveFlag = true;
break;
case 'c':
fileNameSource = string(optarg);
createFlag = true;
break;
case 'q':
query = string(optarg);
queryFlag = true;
break;
case 'i':
stdioFlag = true;
break;
default:
cerr << info;
exit(1);
}
}
if (stdioFlag) {
queryFlag = true;
}
// check if parameter settings are legal
if (saveFlag && !createFlag) {
cerr << "error: cannot save without creating\n" << info;
exit(1);
}
if (saveFlag && loadFlag) {
cerr << "error: cannot load and save at the same time\n" << info;
exit(1);
}
if (!loadFlag && !createFlag) {
cerr << "error: neither load or create - i have no info!\n" << info;
exit(1);
}
// do your thing
if (createFlag) {
cerr << "will create\n";
cerr << "corpus is in " << fileNameSource << endl;
suffixArray.Create( fileNameSource );
if (saveFlag) {
suffixArray.Save( fileNameSuffix );
cerr << "will save in " << fileNameSuffix << endl;
}
}
if (loadFlag) {
cerr << "will load from " << fileNameSuffix << endl;
suffixArray.Load( fileNameSuffix );
}
if (stdioFlag) {
while(true) {
string query;
if (getline(cin, query, '\n').eof()) {
return 0;
}
cout << lookup( query ) << endl;
}
}
else if (queryFlag) {
cout << lookup( query ) << endl;
}
return 0;
}
size_t lookup( string query ) {
cerr << "query is " << query << endl;
vector< string > queryString = tokenize( query.c_str() );
return suffixArray.Count( queryString );
}
vector<string> tokenize( const char input[] )
{
vector< string > token;
bool betweenWords = true;
int start=0;
int i=0;
for(; input[i] != '\0'; i++) {
bool isSpace = (input[i] == ' ' || input[i] == '\t');
if (!isSpace && betweenWords) {
start = i;
betweenWords = false;
} else if (isSpace && !betweenWords) {
token.push_back( string( input+start, i-start ) );
betweenWords = true;
}
}
if (!betweenWords)
token.push_back( string( input+start, i-start ) );
return token;
}

View File

@ -1,18 +1,16 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.602770742">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.602770742" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -44,15 +42,14 @@
<option id="gnu.cpp.link.option.libs.1325292383" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="boost_serialization"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
@ -60,13 +57,7 @@
</option>
<option id="gnu.cpp.link.option.paths.815001500" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
@ -90,12 +81,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.168814843" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">

View File

@ -36,7 +36,7 @@
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1041890522" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.674199351" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.libs.1221354875" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="z"/>
</option>
<option id="gnu.cpp.link.option.paths.1494157787" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
@ -121,5 +121,13 @@
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/extract-ghkm"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/extract-ghkm"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
</cproject>

View File

@ -39,8 +39,8 @@
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1701471219" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.libs.1906832553" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_program_options-mt"/>
<listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="z"/>
</option>
<option id="gnu.cpp.link.option.paths.1107413288" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">

View File

@ -39,7 +39,7 @@
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<option id="gnu.cpp.link.option.libs.1356683866" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="z"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1569179988" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">

View File

@ -36,7 +36,7 @@
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.83617569" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.943560690" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.libs.599256050" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="z"/>
</option>
<option id="gnu.cpp.link.option.paths.1223834298" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">

View File

@ -44,8 +44,8 @@
<option id="gnu.cpp.link.option.libs.585257079" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="mert_lib"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="pthread"/>
</option>

View File

@ -36,11 +36,11 @@
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.254144861" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.319879082" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.132164474" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<option id="gnu.cpp.link.option.paths.132164474" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<option id="gnu.cpp.link.option.libs.1017214824" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_program_options-mt"/>
<option id="gnu.cpp.link.option.libs.1017214824" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_program_options"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1672776758" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>

View File

@ -201,6 +201,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mert/GzFileBuf.h</locationURI>
</link>
<link>
<name>HwcmScorer.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mert/HwcmScorer.cpp</locationURI>
</link>
<link>
<name>HwcmScorer.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mert/HwcmScorer.h</locationURI>
</link>
<link>
<name>HypPackEnumerator.cpp</name>
<type>1</type>
@ -211,6 +221,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mert/HypPackEnumerator.h</locationURI>
</link>
<link>
<name>InternalTree.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InternalTree.cpp</locationURI>
</link>
<link>
<name>InternalTree.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InternalTree.h</locationURI>
</link>
<link>
<name>InterpolatedScorer.cpp</name>
<type>1</type>

View File

@ -1,19 +1,17 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.162355801">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.162355801" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -52,15 +50,9 @@
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../probingPT/helpers&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
@ -70,16 +62,15 @@
</option>
<option id="gnu.cpp.link.option.libs.1177721357" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="boost_serialization"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
@ -103,13 +94,13 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.516628324" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">

View File

@ -111,16 +111,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/Main.h</locationURI>
</link>
<link>
<name>TranslationAnalysis.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/TranslationAnalysis.cpp</locationURI>
</link>
<link>
<name>TranslationAnalysis.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/TranslationAnalysis.h</locationURI>
</link>
<link>
<name>mbr.cpp</name>
<type>1</type>

View File

@ -1,19 +1,17 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.461114338">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.461114338" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -48,16 +46,7 @@
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.2096997198" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1546774818" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.523170942" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../probingPT/helpers&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
@ -67,16 +56,15 @@
</option>
<option id="gnu.cpp.link.option.libs.998577284" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="boost_serialization"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
@ -104,13 +92,13 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.2121690436" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">

View File

@ -101,16 +101,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/Jamfile</locationURI>
</link>
<link>
<name>LatticeMBR.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/LatticeMBR.cpp</locationURI>
</link>
<link>
<name>LatticeMBR.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/LatticeMBR.h</locationURI>
</link>
<link>
<name>LatticeMBRGrid.cpp</name>
<type>1</type>
@ -126,16 +116,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/Main.h</locationURI>
</link>
<link>
<name>TranslationAnalysis.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/TranslationAnalysis.cpp</locationURI>
</link>
<link>
<name>TranslationAnalysis.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/TranslationAnalysis.h</locationURI>
</link>
<link>
<name>mbr.cpp</name>
<type>1</type>

View File

@ -50,7 +50,6 @@
<listOptionValue builtIn="false" value="WITH_THREADS"/>
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
<listOptionValue builtIn="false" value="LM_IRST"/>
<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
<listOptionValue builtIn="false" value="_LARGE_FILES"/>
</option>
@ -142,7 +141,7 @@
</toolChain>
</folderInfo>
<sourceEntries>
<entry excluding="LM/SRI.h|LM/SRI.cpp|TranslationModel/UG|LM/DALMWrapper.h|LM/DALMWrapper.cpp|TranslationModel/UG/mm/test-dynamic-im-tsa.cc|TranslationModel/UG/mm/symal2mam.cc|TranslationModel/UG/mm/mtt-dump.cc|TranslationModel/UG/mm/mtt-count-words.cc|TranslationModel/UG/mm/mtt-build.cc|TranslationModel/UG/mm/mmlex-lookup.cc|TranslationModel/UG/mm/mmlex-build.cc|TranslationModel/UG/mm/mam_verify.cc|TranslationModel/UG/mm/mam2symal.cc|TranslationModel/UG/mm/custom-pt.cc|TranslationModel/UG/mm/calc-coverage.cc|TranslationModel/UG/mm/mtt.count.cc|TranslationModel/UG/util|LM/oxlm|LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
<entry excluding="LM/IRST.h|LM/IRST.cpp|LM/SRI.h|LM/SRI.cpp|TranslationModel/UG|LM/DALMWrapper.h|LM/DALMWrapper.cpp|TranslationModel/UG/mm/test-dynamic-im-tsa.cc|TranslationModel/UG/mm/symal2mam.cc|TranslationModel/UG/mm/mtt-dump.cc|TranslationModel/UG/mm/mtt-count-words.cc|TranslationModel/UG/mm/mtt-build.cc|TranslationModel/UG/mm/mmlex-lookup.cc|TranslationModel/UG/mm/mmlex-build.cc|TranslationModel/UG/mm/mam_verify.cc|TranslationModel/UG/mm/mam2symal.cc|TranslationModel/UG/mm/custom-pt.cc|TranslationModel/UG/mm/calc-coverage.cc|TranslationModel/UG/mm/mtt.count.cc|TranslationModel/UG/util|LM/oxlm|LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>

View File

@ -491,6 +491,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LVoc.h</locationURI>
</link>
<link>
<name>LatticeMBR.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LatticeMBR.cpp</locationURI>
</link>
<link>
<name>LatticeMBR.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/LatticeMBR.h</locationURI>
</link>
<link>
<name>Manager.cpp</name>
<type>1</type>
@ -811,6 +821,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/Timer.h</locationURI>
</link>
<link>
<name>TranslationAnalysis.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationAnalysis.cpp</locationURI>
</link>
<link>
<name>TranslationAnalysis.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationAnalysis.h</locationURI>
</link>
<link>
<name>TranslationModel</name>
<type>2</type>
@ -1161,6 +1181,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InputFeature.h</locationURI>
</link>
<link>
<name>FF/InternalTree.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InternalTree.cpp</locationURI>
</link>
<link>
<name>FF/InternalTree.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InternalTree.h</locationURI>
</link>
<link>
<name>FF/LexicalReordering</name>
<type>2</type>
@ -1216,6 +1246,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/PhraseLengthFeatureTest.cpp</locationURI>
</link>
<link>
<name>FF/PhraseOrientationFeature.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/PhraseOrientationFeature.cpp</locationURI>
</link>
<link>
<name>FF/PhraseOrientationFeature.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/PhraseOrientationFeature.h</locationURI>
</link>
<link>
<name>FF/PhrasePairFeature.cpp</name>
<type>1</type>
@ -1456,6 +1496,11 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/WordTranslationFeature.h</locationURI>
</link>
<link>
<name>FF/extract-ghkm</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>LM/Backward.cpp</name>
<type>1</type>
@ -2016,6 +2061,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/osmHyp.h</locationURI>
</link>
<link>
<name>FF/extract-ghkm/PhraseOrientation.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/PhraseOrientation.cpp</locationURI>
</link>
<link>
<name>FF/extract-ghkm/PhraseOrientation.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/PhraseOrientation.h</locationURI>
</link>
<link>
<name>LM/oxlm/LBLLM.cpp</name>
<type>1</type>

View File

@ -1,18 +1,16 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.852684782">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.852684782" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -39,16 +37,7 @@
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.9477188" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1008235812" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.2139594100" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../probingPT/helpers&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
@ -58,16 +47,15 @@
</option>
<option id="gnu.cpp.link.option.libs.615408765" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="boost_serialization"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
@ -91,12 +79,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1878418244" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">

View File

@ -5,6 +5,10 @@ import path ;
with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ;
if $(with-xmlrpc-c) {
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
echo "!!! You are linking the XMLRPC-C library; Do NOT use v.1.25.29 !!!" ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
build-moses-server = true ;
xmlrpc-command = $(with-xmlrpc-c)/bin/xmlrpc-c-config ;
if ! [ path.exists $(xmlrpc-command) ] {

View File

@ -9,6 +9,7 @@
#include "moses/Hypothesis.h"
#include "moses/Manager.h"
#include "moses/StaticData.h"
#include "moses/ThreadPool.h"
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
#if PT_UG
@ -199,24 +200,29 @@ public:
}
};
class Translator : public xmlrpc_c::method
{
/**
* Required so that translations can be sent to a thread pool.
**/
class TranslationTask : public virtual Moses::Task {
public:
Translator() {
// signature and help strings are documentation -- the client
// can query this information with a system.methodSignature and
// system.methodHelp RPC.
this->_signature = "S:S";
this->_help = "Does translation";
}
TranslationTask(xmlrpc_c::paramList const& paramList,
boost::condition_variable& cond, boost::mutex& mut)
: m_paramList(paramList),
m_cond(cond),
m_mut(mut),
m_done(false)
{}
void
execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP) {
virtual bool DeleteAfterExecution() {return false;}
const params_t params = paramList.getStruct(0);
paramList.verifyEnd(1);
bool IsDone() const {return m_done;}
const map<string, xmlrpc_c::value>& GetRetData() { return m_retData;}
virtual void Run() {
const params_t params = m_paramList.getStruct(0);
m_paramList.verifyEnd(1);
params_t::const_iterator si = params.find("text");
if (si == params.end()) {
throw xmlrpc_c::fault(
@ -268,12 +274,11 @@ public:
}
stringstream out, graphInfo, transCollOpts;
map<string, xmlrpc_c::value> retData;
if (staticData.IsChart()) {
TreeInput tinput;
const vector<FactorType>&
inputFactorOrder = staticData.GetInputFactorOrder();
inputFactorOrder = staticData.GetInputFactorOrder();
stringstream in(source + "\n");
tinput.Read(in,inputFactorOrder);
ChartManager manager(0,tinput);
@ -284,23 +289,23 @@ public:
// const size_t translationId = tinput.GetTranslationId();
std::ostringstream sgstream;
manager.OutputSearchGraphMoses(sgstream);
retData.insert(pair<string, xmlrpc_c::value>("sg", xmlrpc_c::value_string(sgstream.str())));
m_retData.insert(pair<string, xmlrpc_c::value>("sg", xmlrpc_c::value_string(sgstream.str())));
}
} else {
Sentence sentence;
const vector<FactorType> &
inputFactorOrder = staticData.GetInputFactorOrder();
inputFactorOrder = staticData.GetInputFactorOrder();
stringstream in(source + "\n");
sentence.Read(in,inputFactorOrder);
size_t lineNumber = 0; // TODO: Include sentence request number here?
size_t lineNumber = 0; // TODO: Include sentence request number here?
Manager manager(lineNumber, sentence, staticData.GetSearchAlgorithm());
manager.ProcessSentence();
manager.ProcessSentence();
const Hypothesis* hypo = manager.GetBestHypothesis();
vector<xmlrpc_c::value> alignInfo;
outputHypo(out,hypo,addAlignInfo,alignInfo,reportAllFactors);
if (addAlignInfo) {
retData.insert(pair<string, xmlrpc_c::value>("align", xmlrpc_c::value_array(alignInfo)));
m_retData.insert(pair<string, xmlrpc_c::value>("align", xmlrpc_c::value_array(alignInfo)));
}
if (addWordAlignInfo) {
stringstream wordAlignment;
@ -314,26 +319,31 @@ public:
wordAlignInfo["target-word"] = xmlrpc_c::value_int(atoi(alignmentPair.substr(pos + 1).c_str()));
alignments.push_back(xmlrpc_c::value_struct(wordAlignInfo));
}
retData.insert(pair<string, xmlrpc_c::value_array>("word-align", alignments));
m_retData.insert(pair<string, xmlrpc_c::value_array>("word-align", alignments));
}
if (addGraphInfo) {
insertGraphInfo(manager,retData);
insertGraphInfo(manager,m_retData);
(const_cast<StaticData&>(staticData)).SetOutputSearchGraph(false);
}
if (addTopts) {
insertTranslationOptions(manager,retData);
insertTranslationOptions(manager,m_retData);
}
if (nbest_size>0) {
outputNBest(manager, retData, nbest_size, nbest_distinct,
outputNBest(manager, m_retData, nbest_size, nbest_distinct,
reportAllFactors, addAlignInfo, addScoreBreakdown);
}
}
pair<string, xmlrpc_c::value>
text("text", xmlrpc_c::value_string(out.str()));
retData.insert(text);
m_retData.insert(text);
XVERBOSE(1,"Output: " << out.str() << endl);
*retvalP = xmlrpc_c::value_struct(retData);
{
boost::lock_guard<boost::mutex> lock(m_mut);
m_done = true;
}
m_cond.notify_one();
}
void outputHypo(ostream& out, const Hypothesis* hypo, bool addAlignmentInfo, vector<xmlrpc_c::value>& alignInfo, bool reportAllFactors = false) {
@ -520,7 +530,43 @@ public:
}
}
retData.insert(pair<string, xmlrpc_c::value>("topt", xmlrpc_c::value_array(toptsXml)));
}
private:
xmlrpc_c::paramList const& m_paramList;
map<string, xmlrpc_c::value> m_retData;
boost::condition_variable& m_cond;
boost::mutex& m_mut;
bool m_done;
};
class Translator : public xmlrpc_c::method
{
public:
Translator(size_t numThreads = 10) : m_threadPool(numThreads) {
// signature and help strings are documentation -- the client
// can query this information with a system.methodSignature and
// system.methodHelp RPC.
this->_signature = "S:S";
this->_help = "Does translation";
}
void
execute(xmlrpc_c::paramList const& paramList,
xmlrpc_c::value * const retvalP) {
boost::condition_variable cond;
boost::mutex mut;
TranslationTask task(paramList,cond,mut);
m_threadPool.Submit(&task);
boost::unique_lock<boost::mutex> lock(mut);
while (!task.IsDone()) {
cond.wait(lock);
}
*retvalP = xmlrpc_c::value_struct(task.GetRetData());
}
private:
Moses::ThreadPool m_threadPool;
};
static
@ -580,6 +626,7 @@ int main(int argc, char** argv)
int port = 8080;
const char* logfile = "/dev/null";
bool isSerial = false;
size_t numThreads = 10; //for translation tasks
for (int i = 0; i < argc; ++i) {
if (!strcmp(argv[i],"--server-port")) {
@ -598,6 +645,14 @@ int main(int argc, char** argv)
} else {
logfile = argv[i];
}
} else if (!strcmp(argv[i], "--threads")) {
++i;
if (i>=argc) {
cerr << "Error: Missing argument to --threads" << endl;
exit(1);
} else {
numThreads = atoi(argv[i]);
}
} else if (!strcmp(argv[i], "--serial")) {
cerr << "Running single-threaded server" << endl;
isSerial = true;
@ -627,7 +682,7 @@ int main(int argc, char** argv)
xmlrpc_c::registry myRegistry;
xmlrpc_c::methodPtr const translator(new Translator);
xmlrpc_c::methodPtr const translator(new Translator(numThreads));
xmlrpc_c::methodPtr const updater(new Updater);
xmlrpc_c::methodPtr const optimizer(new Optimizer);

View File

@ -131,10 +131,7 @@ if $(with-macports) {
#Convenience rule for boost libraries. Defines library boost_$(name).
rule boost-lib ( name macro : deps * ) {
#Link multi-threaded programs against the -mt version if available. Old
#versions of boost do not have -mt tagged versions of all libraries. Sadly,
#boost.jam does not handle this correctly.
flags = $(L-boost-search)" -lboost_"$(name)"-mt$(boost-lib-version)" ;
flags = $(L-boost-search)" -lboost_"$(name)"$(boost-lib-version)" ;
local main ;
if $(name) = "unit_test_framework" {
main = "BOOST_AUTO_TEST_CASE(foo) {}" ;
@ -147,7 +144,7 @@ rule boost-lib ( name macro : deps * ) {
}
if [ test_flags $(flags) : $(main) ] {
lib inner_boost_$(name) : : <threading>single $(boost-search) <name>boost_$(name)$(boost-lib-version) : <link>static : <library>$(deps) ;
lib inner_boost_$(name) : : <threading>multi $(boost-search) <name>boost_$(name)-mt$(boost-lib-version) : <link>static : <library>$(deps) ;
lib inner_boost_$(name) : : <threading>multi $(boost-search) <name>boost_$(name)$(boost-lib-version) : <link>static : <library>$(deps) ;
} else {
lib inner_boost_$(name) : : $(boost-search) <name>boost_$(name)$(boost-lib-version) : : <library>$(deps) ;
}

View File

@ -186,7 +186,7 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
entry.set(stats);
}
statscore_t BleuScorer::calculateScore(const vector<int>& comps) const
statscore_t BleuScorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
UTIL_THROW_IF(comps.size() != kBleuNgramOrder * 2 + 1, util::Exception, "Error");
@ -289,23 +289,6 @@ float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vec
return exp(logbleu) * stats[kBleuNgramOrder*2];
}
float unsmoothedBleu(const std::vector<float>& stats)
{
UTIL_THROW_IF(stats.size() != kBleuNgramOrder * 2 + 1, util::Exception, "Error");
float logbleu = 0.0;
for (int j = 0; j < kBleuNgramOrder; j++) {
logbleu += log(stats[2 * j]) - log(stats[2 * j + 1]);
}
logbleu /= kBleuNgramOrder;
const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
if (brevity < 0.0) {
logbleu += brevity;
}
return exp(logbleu);
}
vector<float> BleuScorer::ScoreNbestList(const string& scoreFile, const string& featureFile)
{
vector<string> scoreFiles;

View File

@ -37,7 +37,7 @@ public:
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual statscore_t calculateScore(const std::vector<int>& comps) const;
virtual statscore_t calculateScore(const std::vector<ScoreStatsType>& comps) const;
virtual std::size_t NumberOfScores() const {
return 2 * kBleuNgramOrder + 1;
}
@ -55,6 +55,10 @@ public:
return m_references.get();
}
virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const {
return totals[kBleuNgramOrder*2];
}
/**
* Count the ngrams of each type, up to the given length in the input line.
*/
@ -93,11 +97,6 @@ float smoothedSentenceBleu
*/
float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg);
/**
* Computes plain old BLEU from a vector of stats
*/
float unsmoothedBleu(const std::vector<float>& stats);
}
#endif // MERT_BLEU_SCORER_H_

View File

@ -235,7 +235,7 @@ BOOST_AUTO_TEST_CASE(bleu_clipped_counts)
BOOST_AUTO_TEST_CASE(calculate_actual_score)
{
BOOST_REQUIRE(4 == kBleuNgramOrder);
std::vector<int> stats(2 * kBleuNgramOrder + 1);
std::vector<ScoreStatsType> stats(2 * kBleuNgramOrder + 1);
BleuScorer scorer;
// unigram

View File

@ -52,18 +52,18 @@ void CderScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
string sentence = this->preprocessSentence(text);
vector<int> stats;
vector<ScoreStatsType> stats;
prepareStatsVector(sid, sentence, stats);
entry.set(stats);
}
void CderScorer::prepareStatsVector(size_t sid, const string& text, vector<int>& stats)
void CderScorer::prepareStatsVector(size_t sid, const string& text, vector<ScoreStatsType>& stats)
{
sent_t cand;
TokenizeAndEncode(text, cand);
float max = -2;
vector<int> tmp;
vector<ScoreStatsType> tmp;
for (size_t rid = 0; rid < m_ref_sentences.size(); ++rid) {
const sent_t& ref = m_ref_sentences[rid][sid];
tmp.clear();
@ -79,7 +79,7 @@ void CderScorer::prepareStatsVector(size_t sid, const string& text, vector<int>&
}
}
float CderScorer::calculateScore(const vector<int>& comps) const
float CderScorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
if (comps.size() != 2) {
throw runtime_error("Size of stat vector for CDER is not 2");
@ -89,7 +89,7 @@ float CderScorer::calculateScore(const vector<int>& comps) const
}
void CderScorer::computeCD(const sent_t& cand, const sent_t& ref,
vector<int>& stats) const
vector<ScoreStatsType>& stats) const
{
int I = cand.size() + 1; // Number of inter-words positions in candidate sentence
int L = ref.size() + 1; // Number of inter-words positions in reference sentence

View File

@ -23,13 +23,13 @@ public:
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual void prepareStatsVector(std::size_t sid, const std::string& text, std::vector<int>& stats);
virtual void prepareStatsVector(std::size_t sid, const std::string& text, std::vector<ScoreStatsType>& stats);
virtual std::size_t NumberOfScores() const {
return 2;
}
virtual float calculateScore(const std::vector<int>& comps) const;
virtual float calculateScore(const std::vector<ScoreStatsType>& comps) const;
private:
bool m_allowed_long_jumps;
@ -38,7 +38,7 @@ private:
std::vector<std::vector<sent_t> > m_ref_sentences;
void computeCD(const sent_t& cand, const sent_t& ref,
std::vector<int>& stats) const;
std::vector<ScoreStatsType>& stats) const;
// no copying allowed
CderScorer(const CderScorer&);

View File

@ -135,7 +135,7 @@ void Data::load(const std::string &featfile, const std::string &scorefile)
m_score_data->load(scorefile);
}
void Data::loadNBest(const string &file)
void Data::loadNBest(const string &file, bool oneBest)
{
TRACE_ERR("loading nbest from " << file << endl);
util::FilePiece in(file.c_str());
@ -154,6 +154,7 @@ void Data::loadNBest(const string &file)
util::TokenIter<util::MultiCharacter> it(line, util::MultiCharacter("|||"));
sentence_index = ParseInt(*it);
if (oneBest && m_score_data->exists(sentence_index)) continue;
++it;
sentence = it->as_string();
++it;
@ -164,10 +165,9 @@ void Data::loadNBest(const string &file)
++it; // skip model score.
if (it) {
++it;
alignment = it->as_string(); //fifth field (if present) is either phrase or word alignment
++it;
if (it) {
++it;
alignment = it->as_string(); //sixth field (if present) is word alignment
}
}

View File

@ -67,7 +67,7 @@ public:
m_feature_data->Features(f);
}
void loadNBest(const std::string &file);
void loadNBest(const std::string &file, bool oneBest=false);
void load(const std::string &featfile, const std::string &scorefile);

View File

@ -28,7 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "util/exception.hh"
#include "util/file_piece.hh"
#include "BleuScorer.h"
#include "Scorer.h"
#include "HopeFearDecoder.h"
using namespace std;
@ -39,7 +39,7 @@ namespace MosesTuning {
static const ValType BLEU_RATIO = 5;
ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
vector<ValType> stats(kBleuNgramOrder*2+1,0);
vector<ValType> stats(scorer_->NumberOfScores(),0);
for(reset(); !finished(); next()) {
vector<ValType> sent;
MaxModel(wv,&sent);
@ -47,7 +47,7 @@ ValType HopeFearDecoder::Evaluate(const AvgWeightVector& wv) {
stats[i]+=sent[i];
}
}
return unsmoothedBleu(stats);
return scorer_->calculateScore(stats);
}
NbestHopeFearDecoder::NbestHopeFearDecoder(
@ -55,8 +55,10 @@ NbestHopeFearDecoder::NbestHopeFearDecoder(
const vector<string>& scoreFiles,
bool streaming,
bool no_shuffle,
bool safe_hope
bool safe_hope,
Scorer* scorer
) : safe_hope_(safe_hope) {
scorer_ = scorer;
if (streaming) {
train_.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
} else {
@ -93,7 +95,7 @@ void NbestHopeFearDecoder::HopeFear(
for(size_t i=0; i< train_->cur_size(); i++) {
const MiraFeatureVector& vec=train_->featuresAt(i);
ValType score = wv.score(vec);
ValType bleu = sentenceLevelBackgroundBleu(train_->scoresAt(i),backgroundBleu);
ValType bleu = scorer_->calculateSentenceLevelBackgroundScore(train_->scoresAt(i),backgroundBleu);
// Hope
if(i==0 || (hope_scale*score + bleu) > hope_score) {
hope_score = hope_scale*score + bleu;
@ -124,9 +126,9 @@ void NbestHopeFearDecoder::HopeFear(
hopeFear->fearFeatures = train_->featuresAt(fear_index);
hopeFear->hopeStats = train_->scoresAt(hope_index);
hopeFear->hopeBleu = sentenceLevelBackgroundBleu(hopeFear->hopeStats, backgroundBleu);
hopeFear->hopeBleu = scorer_->calculateSentenceLevelBackgroundScore(hopeFear->hopeStats, backgroundBleu);
const vector<float>& fear_stats = train_->scoresAt(fear_index);
hopeFear->fearBleu = sentenceLevelBackgroundBleu(fear_stats, backgroundBleu);
hopeFear->fearBleu = scorer_->calculateSentenceLevelBackgroundScore(fear_stats, backgroundBleu);
hopeFear->modelStats = train_->scoresAt(model_index);
hopeFear->hopeFearEqual = (hope_index == fear_index);
@ -158,7 +160,8 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
bool no_shuffle,
bool safe_hope,
size_t hg_pruning,
const MiraWeightVector& wv
const MiraWeightVector& wv,
Scorer* scorer
) :
num_dense_(num_dense) {
@ -169,6 +172,7 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
SparseVector weights;
wv.ToSparse(&weights);
scorer_ = scorer;
static const string kWeights = "weights";
fs::directory_iterator dend;
@ -260,9 +264,9 @@ void HypergraphHopeFearDecoder::HopeFear(
//Only C++11
//hopeFear->modelStats.assign(std::begin(modelHypo.bleuStats), std::end(modelHypo.bleuStats));
vector<ValType> fearStats(kBleuNgramOrder*2+1);
hopeFear->hopeStats.reserve(kBleuNgramOrder*2+1);
hopeFear->modelStats.reserve(kBleuNgramOrder*2+1);
vector<ValType> fearStats(scorer_->NumberOfScores());
hopeFear->hopeStats.reserve(scorer_->NumberOfScores());
hopeFear->modelStats.reserve(scorer_->NumberOfScores());
for (size_t i = 0; i < fearStats.size(); ++i) {
hopeFear->modelStats.push_back(modelHypo.bleuStats[i]);
hopeFear->hopeStats.push_back(hopeHypo.bleuStats[i]);
@ -320,7 +324,7 @@ void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValTy
size_t sentenceId = *sentenceIdIter_;
SparseVector weights;
wv.ToSparse(&weights);
vector<ValType> bg(kBleuNgramOrder*2+1);
vector<ValType> bg(scorer_->NumberOfScores());
Viterbi(*(graphs_[sentenceId]), weights, 0, references_, sentenceId, bg, &bestHypo);
stats->resize(bestHypo.bleuStats.size());
/*

View File

@ -37,6 +37,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace MosesTuning {
class Scorer;
/** To be filled in by the decoder */
struct HopeFearData {
MiraFeatureVector modelFeatures;
@ -60,6 +62,8 @@ public:
virtual void next() = 0;
virtual bool finished() = 0;
virtual ~HopeFearDecoder() {};
/**
* Calculate hope, fear and model hypotheses
**/
@ -76,6 +80,8 @@ public:
/** Calculate bleu on training set */
ValType Evaluate(const AvgWeightVector& wv);
protected:
Scorer* scorer_;
};
@ -86,7 +92,8 @@ public:
const std::vector<std::string>& scoreFiles,
bool streaming,
bool no_shuffle,
bool safe_hope
bool safe_hope,
Scorer* scorer
);
virtual void reset();
@ -120,7 +127,8 @@ public:
bool no_shuffle,
bool safe_hope,
size_t hg_pruning,
const MiraWeightVector& wv
const MiraWeightVector& wv,
Scorer* scorer_
);
virtual void reset();

165
mert/HwcmScorer.cpp Normal file
View File

@ -0,0 +1,165 @@
#include "HwcmScorer.h"
#include <fstream>
#include "ScoreStats.h"
#include "Util.h"
#include "util/tokenize_piece.hh"
// HWCM score (Liu and Gildea, 2005). Implements F1 instead of precision for better modelling of hypothesis length.
// assumes dependency trees on target side (generated by scripts/training/wrappers/conll2mosesxml.py ; use with option --brackets for reference).
// reads reference trees from separate file {REFERENCE_FILE}.trees to support mix of string-based and tree-based metrics.
using namespace std;
namespace MosesTuning
{
HwcmScorer::HwcmScorer(const string& config)
: StatisticsBasedScorer("HWCM",config) {}
HwcmScorer::~HwcmScorer() {}
void HwcmScorer::setReferenceFiles(const vector<string>& referenceFiles)
{
// For each line in the reference file, create a tree object
if (referenceFiles.size() != 1) {
throw runtime_error("HWCM only supports a single reference");
}
m_ref_trees.clear();
m_ref_hwc.clear();
ifstream in((referenceFiles[0] + ".trees").c_str());
if (!in) {
throw runtime_error("Unable to open " + referenceFiles[0] + ".trees");
}
string line;
while (getline(in,line)) {
line = this->preprocessSentence(line);
TreePointer tree (boost::make_shared<InternalTree>(line));
m_ref_trees.push_back(tree);
vector<map<string, int> > hwc (kHwcmOrder);
vector<string> history(kHwcmOrder);
extractHeadWordChain(tree, history, hwc);
m_ref_hwc.push_back(hwc);
vector<int> totals(kHwcmOrder);
for (size_t i = 0; i < kHwcmOrder; i++) {
for (map<string, int>::const_iterator it = m_ref_hwc.back()[i].begin(); it != m_ref_hwc.back()[i].end(); it++) {
totals[i] += it->second;
}
}
m_ref_lengths.push_back(totals);
}
TRACE_ERR(endl);
}
void HwcmScorer::extractHeadWordChain(TreePointer tree, vector<string> & history, vector<map<string, int> > & hwc) {
if (tree->GetLength() > 0) {
string head = getHead(tree);
if (head.empty()) {
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
extractHeadWordChain(*it, history, hwc);
}
}
else {
vector<string> new_history(kHwcmOrder);
new_history[0] = head;
hwc[0][head]++;
for (size_t hist_idx = 0; hist_idx < kHwcmOrder-1; hist_idx++) {
if (!history[hist_idx].empty()) {
string chain = history[hist_idx] + " " + head;
hwc[hist_idx+1][chain]++;
if (hist_idx+2 < kHwcmOrder) {
new_history[hist_idx+1] = chain;
}
}
}
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it) {
extractHeadWordChain(*it, new_history, hwc);
}
}
}
}
string HwcmScorer::getHead(TreePointer tree) {
// assumption (only true for dependency parse: each constituent has a preterminal label, and corresponding terminal is head)
// if constituent has multiple preterminals, first one is picked; if it has no preterminals, empty string is returned
for (std::vector<TreePointer>::const_iterator it = tree->GetChildren().begin(); it != tree->GetChildren().end(); ++it)
{
TreePointer child = *it;
if (child->GetLength() == 1 && child->GetChildren()[0]->IsTerminal()) {
return child->GetChildren()[0]->GetLabel();
}
}
return "";
}
void HwcmScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
if (sid >= m_ref_trees.size()) {
stringstream msg;
msg << "Sentence id (" << sid << ") not found in reference set";
throw runtime_error(msg.str());
}
string sentence = this->preprocessSentence(text);
// if sentence has '|||', assume that tree is in second position (n-best-list);
// otherwise, assume it is in first position (calling 'evaluate' with tree as reference)
util::TokenIter<util::MultiCharacter> it(sentence, util::MultiCharacter("|||"));
++it;
if (it) {
sentence = it->as_string();
}
TreePointer tree (boost::make_shared<InternalTree>(sentence));
vector<map<string, int> > hwc_test (kHwcmOrder);
vector<string> history(kHwcmOrder);
extractHeadWordChain(tree, history, hwc_test);
ostringstream stats;
for (size_t i = 0; i < kHwcmOrder; i++) {
int correct = 0;
int test_total = 0;
for (map<string, int>::const_iterator it = hwc_test[i].begin(); it != hwc_test[i].end(); it++) {
test_total += it->second;
map<string, int>::const_iterator it2 = m_ref_hwc[sid][i].find(it->first);
if (it2 != m_ref_hwc[sid][i].end()) {
correct += std::min(it->second, it2->second);
}
}
stats << correct << " " << test_total << " " << m_ref_lengths[sid][i] << " " ;
}
string stats_str = stats.str();
entry.set(stats_str);
}
float HwcmScorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
float precision = 0;
float recall = 0;
for (size_t i = 0; i < kHwcmOrder; i++) {
float matches = comps[i*3];
float test_total = comps[1+(i*3)];
float ref_total = comps[2+(i*3)];
if (test_total > 0) {
precision += matches/test_total;
}
if (ref_total > 0) {
recall += matches/ref_total;
}
}
precision /= (float)kHwcmOrder;
recall /= (float)kHwcmOrder;
return (2*precision*recall)/(precision+recall); // f1-score
}
}

64
mert/HwcmScorer.h Normal file
View File

@ -0,0 +1,64 @@
#ifndef MERT_HWCM_SCORER_H_
#define MERT_HWCM_SCORER_H_
#include <string>
#include <vector>
#include "StatisticsBasedScorer.h"
#include "moses/FF/InternalTree.h"
using Moses::TreePointer;
using Moses::InternalTree;
namespace MosesTuning
{
class ScoreStats;
const size_t kHwcmOrder = 4;
/**
* HWCM scoring (Liu and Gildea 2005), but F1 instead of precision.
*/
class HwcmScorer: public StatisticsBasedScorer
{
public:
explicit HwcmScorer(const std::string& config = "");
~HwcmScorer();
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual std::size_t NumberOfScores() const {
return kHwcmOrder*3;
}
virtual float calculateScore(const std::vector<ScoreStatsType>& comps) const;
virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const {
return totals[2];
}
//TODO: actually, we use trees which we store in place of alignment. Maybe use something analogous to Phrase Properties to cleanly store trees?
bool useAlignment() const {
return true;
}
private:
// data extracted from reference files
std::vector<TreePointer> m_ref_trees;
std::vector<std::vector<std::map<std::string, int> > > m_ref_hwc;
std::vector<std::vector<int> > m_ref_lengths;
void extractHeadWordChain(TreePointer tree, std::vector<std::string> & history, std::vector<std::map<std::string, int> > & hwc);
std::string getHead(TreePointer tree);
// no copying allowed
HwcmScorer(const HwcmScorer&);
HwcmScorer& operator=(const HwcmScorer&);
};
}
#endif // MERT_HWCM_SCORER_H_

View File

@ -153,6 +153,41 @@ void InterpolatedScorer::score(const candidates_t& candidates, const diffs_t& di
}
/** Interpolated scorer gets a vector of sufficient statistics, calls all scorers with corresponding statistics,
and combines them with weights **/
float InterpolatedScorer::calculateScore(const std::vector<ScoreStatsType>& totals) const
{
size_t scorerNum = 0;
size_t last = 0;
float score = 0;
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
itsc != m_scorers.end(); ++itsc) {
int numScoresScorer = (*itsc)->NumberOfScores();
std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
score += (*itsc)->calculateScore(totals_scorer) * m_scorer_weights[scorerNum];
last += numScoresScorer;
scorerNum++;
}
return score;
}
float InterpolatedScorer::getReferenceLength(const std::vector<ScoreStatsType>& totals) const
{
size_t scorerNum = 0;
size_t last = 0;
float refLen = 0;
for (ScopedVector<Scorer>::const_iterator itsc = m_scorers.begin();
itsc != m_scorers.end(); ++itsc) {
int numScoresScorer = (*itsc)->NumberOfScores();
std::vector<ScoreStatsType> totals_scorer(totals.begin()+last, totals.begin()+last+numScoresScorer);
refLen += (*itsc)->getReferenceLength(totals_scorer) * m_scorer_weights[scorerNum];
last += numScoresScorer;
scorerNum++;
}
return refLen;
}
void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
{
for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();

View File

@ -39,6 +39,10 @@ public:
virtual void setScoreData(ScoreData* data);
virtual float calculateScore(const std::vector<ScoreStatsType>& totals) const;
virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const;
/**
* Set the factors, which should be used for this metric
*/

View File

@ -29,6 +29,8 @@ SemposOverlapping.cpp
InterpolatedScorer.cpp
Point.cpp
PerScorer.cpp
HwcmScorer.cpp
../moses/FF/InternalTree.cpp
Scorer.cpp
ScorerFactory.cpp
Optimizer.cpp

View File

@ -144,7 +144,7 @@ void MeteorScorer::prepareStats(size_t sid, const string& text, ScoreStats& entr
entry.set(stats_str);
}
float MeteorScorer::calculateScore(const vector<int>& comps) const
float MeteorScorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
string score;
stringstream input;

View File

@ -54,7 +54,7 @@ public:
return 23;
}
virtual float calculateScore(const std::vector<int>& comps) const;
virtual float calculateScore(const std::vector<ScoreStatsType>& comps) const;
private:
// Meteor and process IO

View File

@ -79,10 +79,10 @@ void PerScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
entry.set(stats_str);
}
float PerScorer::calculateScore(const vector<int>& comps) const
float PerScorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
float denom = comps[2];
float num = comps[0] - max(0,comps[1]-comps[2]);
float num = comps[0] - max(0.0f,comps[1]-comps[2]);
if (denom == 0) {
// This shouldn't happen!
return 0.0;

View File

@ -30,7 +30,7 @@ public:
virtual std::size_t NumberOfScores() const {
return 3;
}
virtual float calculateScore(const std::vector<int>& comps) const;
virtual float calculateScore(const std::vector<ScoreStatsType>& comps) const;
private:
// no copying allowed

View File

@ -234,7 +234,7 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
}
//Will just be final score
statscore_t PermutationScorer::calculateScore(const vector<int>& comps) const
statscore_t PermutationScorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
//cerr << "*******PermutationScorer::calculateScore" ;
//cerr << " " << comps[0]/comps[1] << endl;

View File

@ -49,7 +49,7 @@ public:
protected:
statscore_t calculateScore(const std::vector<int>& scores) const;
statscore_t calculateScore(const std::vector<ScoreStatsType>& scores) const;
PermutationScorer(const PermutationScorer&);
~PermutationScorer() {};
PermutationScorer& operator=(const PermutationScorer&);

View File

@ -42,6 +42,19 @@ public:
*/
virtual std::size_t NumberOfScores() const = 0;
/**
* Calculate score based on a vector of sufficient statistics.
*/
virtual float calculateScore(const std::vector<ScoreStatsType>& totals) const = 0;
float calculateSentenceLevelBackgroundScore(const std::vector<ScoreStatsType>& totals, const std::vector<ScoreStatsType>& bg) {
std::vector<ScoreStatsType> stats(totals.size());
for(size_t i=0; i<stats.size(); i++)
stats[i] = totals[i]+bg[i];
// Get score and scale by reference length (as per Chiang et al 08)
return calculateScore(stats) * getReferenceLength(stats);
}
/**
* Set the reference files. This must be called before prepareStats().
*/
@ -97,6 +110,11 @@ public:
return 0;
}
/**
* Based on vector of sufficient statistics, return length of reference.
*/
virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const = 0;
/**
* Set the score data, prior to scoring.
*/

View File

@ -11,6 +11,7 @@
#include "SemposScorer.h"
#include "PermutationScorer.h"
#include "MeteorScorer.h"
#include "HwcmScorer.h"
#include "Reference.h"
using namespace std;
@ -32,6 +33,7 @@ vector<string> ScorerFactory::getTypes()
types.push_back(string("SEMPOS"));
types.push_back(string("LRSCORE"));
types.push_back(string("METEOR"));
types.push_back(string("HWCM"));
return types;
}
@ -56,6 +58,8 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config)
return (PermutationScorer*) new PermutationScorer(type, config);
} else if (type == "METEOR") {
return new MeteorScorer(config);
} else if (type == "HWCM") {
return new HwcmScorer(config);
} else {
if (type.find(',') != string::npos) {
return new InterpolatedScorer(type, config);

View File

@ -33,9 +33,9 @@ void SemposOverlappingFactory::SetOverlapping(SemposOverlapping* ovr)
g_overlapping = ovr;
}
vector<int> CapMicroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
vector<ScoreStatsType> CapMicroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
{
vector<int> stats(2);
vector<ScoreStatsType> stats(2);
sentence_t intersection;
set_intersection(cand.begin(), cand.end(), ref.begin(), ref.end(),
@ -53,12 +53,12 @@ vector<int> CapMicroOverlapping::prepareStats(const sentence_t& cand, const sent
refSum += semposScorer->weight(it->first);
}
stats[0] = (int)(multCoeff * interSum);
stats[1] = (int)(multCoeff * refSum);
stats[0] = (ScoreStatsType)(multCoeff * interSum);
stats[1] = (ScoreStatsType)(multCoeff * refSum);
return stats;
}
float CapMicroOverlapping::calculateScore(const vector<int>& stats) const
float CapMicroOverlapping::calculateScore(const vector<ScoreStatsType>& stats) const
{
if (stats.size() != 2) {
throw std::runtime_error("Size of stats vector has to be 2");
@ -67,9 +67,9 @@ float CapMicroOverlapping::calculateScore(const vector<int>& stats) const
return stats[0] / static_cast<float>(stats[1]);
}
vector<int> CapMacroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
vector<ScoreStatsType> CapMacroOverlapping::prepareStats(const sentence_t& cand, const sentence_t& ref)
{
vector<int> stats(2 * kMaxNOC);
vector<ScoreStatsType> stats(2 * kMaxNOC);
sentence_t intersection;
set_intersection(cand.begin(), cand.end(), ref.begin(), ref.end(),
@ -92,7 +92,7 @@ vector<int> CapMacroOverlapping::prepareStats(const sentence_t& cand, const sent
return stats;
}
float CapMacroOverlapping::calculateScore(const vector<int>& stats) const
float CapMacroOverlapping::calculateScore(const vector<ScoreStatsType>& stats) const
{
if (stats.size() != 2 * kMaxNOC) {
// TODO: Add some comments. The number "38" looks like a magic number.

View File

@ -7,6 +7,8 @@
#include <utility>
#include <vector>
#include "Types.h"
namespace MosesTuning
{
@ -31,8 +33,8 @@ class SemposOverlapping
{
public:
virtual ~SemposOverlapping() {}
virtual std::vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref) = 0;
virtual float calculateScore(const std::vector<int>& stats) const = 0;
virtual std::vector<ScoreStatsType> prepareStats(const sentence_t& cand, const sentence_t& ref) = 0;
virtual float calculateScore(const std::vector<ScoreStatsType>& stats) const = 0;
virtual std::size_t NumberOfScores() const = 0;
};
@ -61,8 +63,8 @@ public:
CapMicroOverlapping(const SemposScorer* sempos) : semposScorer(sempos) {}
~CapMicroOverlapping() {}
virtual std::vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref);
virtual float calculateScore(const std::vector<int>& stats) const;
virtual std::vector<ScoreStatsType> prepareStats(const sentence_t& cand, const sentence_t& ref);
virtual float calculateScore(const std::vector<ScoreStatsType>& stats) const;
virtual std::size_t NumberOfScores() const {
return 2;
}
@ -83,8 +85,8 @@ public:
CapMacroOverlapping(const SemposScorer* sempos) : semposScorer(sempos) {}
~CapMacroOverlapping() {}
virtual std::vector<int> prepareStats(const sentence_t& cand, const sentence_t& ref);
virtual float calculateScore(const std::vector<int>& stats) const;
virtual std::vector<ScoreStatsType> prepareStats(const sentence_t& cand, const sentence_t& ref);
virtual float calculateScore(const std::vector<ScoreStatsType>& stats) const;
virtual std::size_t NumberOfScores() const {
return kMaxNOC * 2;
}

View File

@ -35,7 +35,7 @@ public:
virtual std::size_t NumberOfScores() const {
return m_ovr->NumberOfScores();
}
virtual float calculateScore(const std::vector<int>& comps) const {
virtual float calculateScore(const std::vector<ScoreStatsType>& comps) const {
return m_ovr->calculateScore(comps);
}

View File

@ -67,7 +67,7 @@ void StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t
throw runtime_error("No candidates supplied");
}
int numCounts = m_score_data->get(0,candidates[0]).size();
vector<int> totals(numCounts);
vector<ScoreStatsType> totals(numCounts);
for (size_t i = 0; i < candidates.size(); ++i) {
ScoreStats stats = m_score_data->get(i,candidates[i]);
if (stats.size() != totals.size()) {

View File

@ -11,6 +11,8 @@
#include "Scorer.h"
#include "util/exception.hh"
namespace MosesTuning
{
@ -21,6 +23,8 @@ namespace MosesTuning
*/
class StatisticsBasedScorer : public Scorer
{
friend class HopeFearDecoder;
public:
StatisticsBasedScorer(const std::string& name, const std::string& config);
virtual ~StatisticsBasedScorer() {}
@ -38,7 +42,12 @@ protected:
/**
* Calculate the actual score.
*/
virtual statscore_t calculateScore(const std::vector<int>& totals) const = 0;
virtual statscore_t calculateScore(const std::vector<ScoreStatsType>& totals) const = 0;
virtual float getReferenceLength(const std::vector<ScoreStatsType>& totals) const {
UTIL_THROW(util::Exception, "getReferenceLength not implemented for this scorer type.");
return 0;
}
// regularisation
RegularisationType m_regularization_type;

View File

@ -101,7 +101,7 @@ void TerScorer::prepareStats ( size_t sid, const string& text, ScoreStats& entry
entry.set ( stats_str );
}
float TerScorer::calculateScore(const vector<int>& comps) const
float TerScorer::calculateScore(const vector<ScoreStatsType>& comps) const
{
float denom = 1.0 * comps[1];
float num = -1.0 * comps[0];

View File

@ -31,7 +31,7 @@ public:
return kLENGTH + 1;
}
virtual float calculateScore(const std::vector<int>& comps) const;
virtual float calculateScore(const std::vector<ScoreStatsType>& comps) const;
private:
const int kLENGTH;

View File

@ -33,7 +33,7 @@ typedef FeatureStatsType* featstats_t;
typedef std::vector<FeatureStats> featarray_t;
typedef std::vector<FeatureArray> featdata_t;
typedef int ScoreStatsType;
typedef float ScoreStatsType;
typedef ScoreStatsType* scorestats_t;
//typedef std::vector<ScoreStatsType> scorestats_t;
typedef std::vector<ScoreStats> scorearray_t;

View File

@ -14,6 +14,7 @@
#include "ScorerFactory.h"
#include "Timer.h"
#include "Util.h"
#include "Data.h"
using namespace std;
using namespace MosesTuning;
@ -30,17 +31,20 @@ const float g_alpha = 0.05;
class EvaluatorUtil
{
public:
static void evaluate(const string& candFile, int bootstrap);
static void evaluate(const string& candFile, int bootstrap, bool nbest_mode);
static float average(const vector<float>& list);
static string int2string(int n);
static vector<ScoreStats> loadNBest(const string& nBestFile);
static vector<ScoreStats> loadCand(const string& candFile);
private:
EvaluatorUtil() {}
~EvaluatorUtil() {}
};
void EvaluatorUtil::evaluate(const string& candFile, int bootstrap)
{
// load hypothesis from candidate output
vector<ScoreStats> EvaluatorUtil::loadCand(const string& candFile) {
ifstream cand(candFile.c_str());
if (!cand.good()) throw runtime_error("Error opening candidate file");
@ -53,6 +57,34 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap)
g_scorer->prepareStats(entries.size(), line, scoreentry);
entries.push_back(scoreentry);
}
return entries;
}
// load 1-best hypothesis from n-best file (useful if relying on alignment/tree information)
vector<ScoreStats> EvaluatorUtil::loadNBest(const string& nBestFile) {
vector<ScoreStats> entries;
Data data(g_scorer);
data.loadNBest(nBestFile, true);
const ScoreDataHandle & score_data = data.getScoreData();
for (size_t i = 0; i != score_data->size(); i++) {
entries.push_back(score_data->get(i, 0));
}
return entries;
}
void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_input)
{
vector<ScoreStats> entries;
if (nbest_input) {
entries = loadNBest(candFile);
}
else {
entries = loadCand(candFile);
}
int n = entries.size();
if (bootstrap) {
@ -131,6 +163,7 @@ void usage()
cerr << "\tThis is of the form NAME1:VAL1,NAME2:VAL2 etc " << endl;
cerr << "[--reference|-R] comma separated list of reference files" << endl;
cerr << "[--candidate|-C] comma separated list of candidate files" << endl;
cerr << "[--nbest|-n] comma separated list of nbest files (only 1-best is evaluated)" << endl;
cerr << "[--factors|-f] list of factors passed to the scorer (e.g. 0|2)" << endl;
cerr << "[--filter|-l] filter command which will be used to preprocess the sentences" << endl;
cerr << "[--bootstrap|-b] number of booststraped samples (default 0 - no bootstraping)" << endl;
@ -162,6 +195,7 @@ static struct option long_options[] = {
{"scconfig", required_argument, 0, 'c'},
{"reference", required_argument, 0, 'R'},
{"candidate", required_argument, 0, 'C'},
{"nbest", required_argument, 0, 'n'},
{"bootstrap", required_argument, 0, 'b'},
{"rseed", required_argument, 0, 'r'},
{"factors", required_argument, 0, 'f'},
@ -176,6 +210,7 @@ struct ProgramOption {
vector<string> scorer_configs;
string reference;
string candidate;
string nbest;
vector<string> scorer_factors;
vector<string> scorer_filter;
int bootstrap;
@ -185,6 +220,7 @@ struct ProgramOption {
ProgramOption()
: reference(""),
candidate(""),
nbest(""),
bootstrap(0),
seed(0),
has_seed(false) { }
@ -195,7 +231,7 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt)
int c;
int option_index;
int last_scorer_index = -1;
while ((c = getopt_long(argc, argv, "s:c:R:C:b:r:f:l:h", long_options, &option_index)) != -1) {
while ((c = getopt_long(argc, argv, "s:c:R:C:n:b:r:f:l:h", long_options, &option_index)) != -1) {
switch(c) {
case 's':
opt->scorer_types.push_back(string(optarg));
@ -205,6 +241,7 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt)
last_scorer_index++;
break;
case 'c':
if (last_scorer_index == -1) throw runtime_error("You need to specify a scorer before its config string.");
opt->scorer_configs[last_scorer_index] = string(optarg);
break;
case 'R':
@ -213,6 +250,9 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt)
case 'C':
opt->candidate = string(optarg);
break;
case 'n':
opt->nbest = string(optarg);
break;
case 'b':
opt->bootstrap = atoi(optarg);
break;
@ -221,9 +261,11 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt)
opt->has_seed = true;
break;
case 'f':
if (last_scorer_index == -1) throw runtime_error("You need to specify a scorer before its list of factors.");
opt->scorer_factors[last_scorer_index] = string(optarg);
break;
case 'l':
if (last_scorer_index == -1) throw runtime_error("You need to specify a scorer before its filter.");
opt->scorer_filter[last_scorer_index] = string(optarg);
break;
default:
@ -271,8 +313,13 @@ int main(int argc, char** argv)
if (option.reference.length() == 0) throw runtime_error("You have to specify at least one reference file.");
split(option.reference, ',', refFiles);
if (option.candidate.length() == 0) throw runtime_error("You have to specify at least one candidate file.");
split(option.candidate, ',', candFiles);
if (option.candidate.length() == 0 && option.nbest.length() == 0) throw runtime_error("You have to specify at least one candidate (or n-best) file.");
if (option.candidate.length() > 0 && option.nbest.length() > 0) throw runtime_error("You can either specify candidate files or n-best files, but not both.");
bool nbest_input = option.nbest.length() > 0;
if (nbest_input)
split(option.nbest, ',', candFiles);
else
split(option.candidate, ',', candFiles);
if (candFiles.size() > 1) g_has_more_files = true;
if (option.scorer_types.size() > 1) g_has_more_scorers = true;
@ -283,7 +330,7 @@ int main(int argc, char** argv)
g_scorer->setFactors(option.scorer_factors[i]);
g_scorer->setFilter(option.scorer_filter[i]);
g_scorer->setReferenceFiles(refFiles);
EvaluatorUtil::evaluate(*fileIt, option.bootstrap);
EvaluatorUtil::evaluate(*fileIt, option.bootstrap, nbest_input);
delete g_scorer;
}
}

View File

@ -46,6 +46,9 @@ de recherches du Canada
#include "MiraFeatureVector.h"
#include "MiraWeightVector.h"
#include "Scorer.h"
#include "ScorerFactory.h"
using namespace std;
using namespace MosesTuning;
@ -57,6 +60,8 @@ int main(int argc, char** argv)
string denseInitFile;
string sparseInitFile;
string type = "nbest";
string sctype = "BLEU";
string scconfig = "";
vector<string> scoreFiles;
vector<string> featureFiles;
vector<string> referenceFiles; //for hg mira
@ -78,6 +83,8 @@ int main(int argc, char** argv)
desc.add_options()
("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
("type,t", po::value<string>(&type), "Either nbest or hypergraph")
("sctype", po::value<string>(&sctype), "the scorer type (default BLEU)")
("scconfig,c", po::value<string>(&scconfig), "configuration string passed to scorer")
("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
("hgdir,H", po::value<string> (&hgDir), "Directory containing hypergraphs")
@ -209,19 +216,20 @@ int main(int argc, char** argv)
MiraWeightVector wv(initParams);
// Initialize background corpus
vector<ValType> bg;
for(int j=0; j<kBleuNgramOrder; j++) {
bg.push_back(kBleuNgramOrder-j);
bg.push_back(kBleuNgramOrder-j);
// Initialize scorer
if(sctype != "BLEU" && type == "hypergraph") {
UTIL_THROW(util::Exception, "hypergraph mira only supports BLEU");
}
bg.push_back(kBleuNgramOrder);
boost::scoped_ptr<Scorer> scorer(ScorerFactory::getScorer(sctype, scconfig));
// Initialize background corpus
vector<ValType> bg(scorer->NumberOfScores(), 1);
boost::scoped_ptr<HopeFearDecoder> decoder;
if (type == "nbest") {
decoder.reset(new NbestHopeFearDecoder(featureFiles, scoreFiles, streaming, no_shuffle, safe_hope));
decoder.reset(new NbestHopeFearDecoder(featureFiles, scoreFiles, streaming, no_shuffle, safe_hope, scorer.get()));
} else if (type == "hypergraph") {
decoder.reset(new HypergraphHopeFearDecoder(hgDir, referenceFiles, initDenseSize, streaming, no_shuffle, safe_hope, hgPruning, wv));
decoder.reset(new HypergraphHopeFearDecoder(hgDir, referenceFiles, initDenseSize, streaming, no_shuffle, safe_hope, hgPruning, wv, scorer.get()));
} else {
UTIL_THROW(util::Exception, "Unknown batch mira type: '" << type << "'");
}

View File

@ -65,18 +65,15 @@ static void outputTopN(const StringPiece& sourcePhraseString, PhraseDictionary*
InputPathList inputPaths;
inputPaths.push_back(&inputPath);
phraseTable->GetTargetPhraseCollectionBatch(inputPaths);
//EvaluateInIsolation ??
const TargetPhraseCollection* targetPhrases = inputPath.GetTargetPhrases(*phraseTable);
//sort by total score and prune
// - Already done?
//print phrases
const std::vector<FactorType>& output = StaticData::Instance().GetOutputFactorOrder();
if (targetPhrases) {
//if (targetPhrases->GetSize() > 10) cerr << "src " << sourcePhrase << " tgt count " << targetPhrases->GetSize() << endl;
for (TargetPhraseCollection::const_iterator i = targetPhrases->begin(); i != targetPhrases->end(); ++i) {
const TargetPhrase* targetPhrase = *i;
out << sourcePhrase.GetStringRep(input);
@ -141,6 +138,7 @@ int main(int argc, char** argv)
mosesargs.push_back(config_file);
for (size_t i = 0; i < parsed.options.size(); ++i) {
if (parsed.options[i].position_key == -1 && !parsed.options[i].unregistered) continue;
/*
const string& key = parsed.options[i].string_key;
if (!key.empty()) {
mosesargs.push_back(key);
@ -150,6 +148,10 @@ int main(int argc, char** argv)
if (!value.empty()) {
mosesargs.push_back(value);
}
}*/
for (size_t j = 0; j < parsed.options[i].original_tokens.size(); ++j) {
mosesargs.push_back(parsed.options[i].original_tokens[j]);
}
}

View File

@ -712,6 +712,8 @@ void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
bool includeWordAlignment =
StaticData::Instance().PrintAlignmentInfoInNbest();
bool PrintNBestTrees = StaticData::Instance().PrintNBestTrees();
for (ChartKBestExtractor::KBestVec::const_iterator p = nBestList.begin();
p != nBestList.end(); ++p) {
const ChartKBestExtractor::Derivation &derivation = **p;
@ -743,6 +745,12 @@ void IOWrapper::OutputNBestList(const ChartKBestExtractor::KBestVec &nBestList,
}
}
// optionally, print tree
if (PrintNBestTrees) {
TreePointer tree = ChartKBestExtractor::GetOutputTree(derivation);
out << " ||| " << tree->GetString();
}
out << std::endl;
}

View File

@ -128,6 +128,7 @@ public:
void OutputBestNone(long translationId);
void OutputNBestList(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, long translationId);
void OutputNBestList(const std::vector<search::Applied> &nbest, long translationId);
void OutputNBestTrees(const std::vector<boost::shared_ptr<Moses::ChartKBestExtractor::Derivation> > &nBestList, long translationId);
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
void OutputDetailedTranslationReport(const search::Applied *applied, const Moses::Sentence &sentence, long translationId);
void OutputDetailedTreeFragmentsTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);

View File

@ -1,2 +1,2 @@
exe moses_chart : Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp ../moses//moses $(TOP)//boost_iostreams ..//boost_filesystem ..//z ;
exe moses_chart : Main.cpp mbr.cpp IOWrapper.cpp ../moses//moses $(TOP)//boost_iostreams ..//boost_filesystem ..//z ;

View File

@ -40,7 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <exception>
#include <fstream>
#include "Main.h"
#include "TranslationAnalysis.h"
#include "moses/TranslationAnalysis.h"
#include "mbr.h"
#include "IOWrapper.h"

View File

@ -1,52 +0,0 @@
// $Id$
#include <iostream>
#include <sstream>
#include <algorithm>
#include "TranslationAnalysis.h"
#include "moses/StaticData.h"
#include "moses/TranslationOption.h"
#include "moses/DecodeStepTranslation.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include "moses/LM/Base.h"
using namespace std;
using namespace Moses;
namespace TranslationAnalysis
{
void PrintTranslationAnalysis(ostream & /* os */, const Hypothesis* /* hypo */)
{
/*
os << endl << "TRANSLATION HYPOTHESIS DETAILS:" << endl;
queue<const Hypothesis*> translationPath;
while (hypo)
{
translationPath.push(hypo);
hypo = hypo->GetPrevHypo();
}
while (!translationPath.empty())
{
hypo = translationPath.front();
translationPath.pop();
const TranslationOption *transOpt = hypo->GetTranslationOption();
if (transOpt != NULL)
{
os << hypo->GetCurrSourceWordsRange() << " ";
for (size_t decodeStepId = 0; decodeStepId < DecodeStepTranslation::GetNumTransStep(); ++decodeStepId)
os << decodeStepId << "=" << transOpt->GetSubRangeCount(decodeStepId) << ",";
os << *transOpt << endl;
}
}
os << "END TRANSLATION" << endl;
*/
}
}

View File

@ -1,24 +0,0 @@
// $Id$
/*
* also see moses/SentenceStats
*/
#ifndef _TRANSLATION_ANALYSIS_H_
#define _TRANSLATION_ANALYSIS_H_
#include <iostream>
#include "moses/ChartHypothesis.h"
namespace TranslationAnalysis
{
/**
* print details about the translation represented in hypothesis to
* os. Included information: phrase alignment, words dropped, scores
*/
void PrintTranslationAnalysis(std::ostream &os, const Moses::Hypothesis* hypo);
}
#endif

View File

@ -50,7 +50,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/InputFileStream.h"
#include "moses/InputType.h"
#include "moses/WordLattice.h"
#include "LatticeMBR.h"
#include "moses/LatticeMBR.h"
namespace Moses
{

View File

@ -1,4 +1,4 @@
alias deps : IOWrapper.cpp mbr.cpp LatticeMBR.cpp TranslationAnalysis.cpp ..//z ..//boost_iostreams ..//boost_filesystem ../moses//moses ;
alias deps : IOWrapper.cpp mbr.cpp ..//z ..//boost_iostreams ..//boost_filesystem ../moses//moses ;
exe moses : Main.cpp deps ;
exe lmbrgrid : LatticeMBRGrid.cpp deps ;

View File

@ -47,7 +47,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <set>
#include "IOWrapper.h"
#include "LatticeMBR.h"
#include "moses/LatticeMBR.h"
#include "moses/Manager.h"
#include "moses/StaticData.h"
#include "util/exception.hh"

View File

@ -34,7 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
//#include <vld.h>
#endif
#include "TranslationAnalysis.h"
#include "moses/TranslationAnalysis.h"
#include "IOWrapper.h"
#include "mbr.h"

View File

@ -124,6 +124,35 @@ Phrase ChartKBestExtractor::GetOutputPhrase(const Derivation &d)
return ret;
}
// Generate the target tree of the derivation d.
TreePointer ChartKBestExtractor::GetOutputTree(const Derivation &d)
{
const ChartHypothesis &hypo = d.edge.head->hypothesis;
const TargetPhrase &phrase = hypo.GetCurrTargetPhrase();
if (const PhraseProperty *property = phrase.GetProperty("Tree")) {
const std::string *tree = property->GetValueString();
TreePointer mytree (boost::make_shared<InternalTree>(*tree));
//get subtrees (in target order)
std::vector<TreePointer> previous_trees;
for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
const Word &word = phrase.GetWord(pos);
if (word.IsNonTerminal()) {
size_t nonTermInd = phrase.GetAlignNonTerm().GetNonTermIndexMap()[pos];
const Derivation &subderivation = *d.subderivations[nonTermInd];
const TreePointer prev_tree = GetOutputTree(subderivation);
previous_trees.push_back(prev_tree);
}
}
mytree->Combine(previous_trees);
return mytree;
}
else {
UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
}
}
// Create an unweighted hyperarc corresponding to the given ChartHypothesis.
ChartKBestExtractor::UnweightedHyperarc ChartKBestExtractor::CreateEdge(
const ChartHypothesis &h)

View File

@ -22,6 +22,7 @@
#include <cassert>
#include "ChartHypothesis.h"
#include "ScoreComponentCollection.h"
#include "FF/InternalTree.h"
#include <boost/unordered_set.hpp>
#include <boost/weak_ptr.hpp>
@ -89,6 +90,7 @@ public:
std::size_t k, KBestVec &);
static Phrase GetOutputPhrase(const Derivation &);
static TreePointer GetOutputTree(const Derivation &);
private:
typedef boost::unordered_map<const ChartHypothesis *,

View File

@ -49,6 +49,7 @@
#include "NieceTerminal.h"
#include "SpanLength.h"
#include "SyntaxRHS.h"
#include "moses/FF/PhraseOrientationFeature.h"
#include "moses/FF/SkeletonStatelessFF.h"
#include "moses/FF/SkeletonStatefulFF.h"
@ -211,6 +212,7 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SparseHieroReorderingFeature);
MOSES_FNAME(SpanLength);
MOSES_FNAME(SyntaxRHS);
MOSES_FNAME(PhraseOrientationFeature);
MOSES_FNAME(SkeletonStatelessFF);
MOSES_FNAME(SkeletonStatefulFF);

230
moses/FF/InternalTree.cpp Normal file
View File

@ -0,0 +1,230 @@
#include "InternalTree.h"
namespace Moses
{
InternalTree::InternalTree(const std::string & line, const bool terminal):
m_value_nt(0),
m_isTerminal(terminal)
{
size_t found = line.find_first_of("[] ");
if (found == line.npos) {
m_value = line;
}
else {
AddSubTree(line, 0);
}
}
size_t InternalTree::AddSubTree(const std::string & line, size_t pos) {
std::string value;
char token = 0;
while (token != ']' && pos != std::string::npos)
{
size_t oldpos = pos;
pos = line.find_first_of("[] ", pos);
if (pos == std::string::npos) break;
token = line[pos];
value = line.substr(oldpos,pos-oldpos);
if (token == '[') {
if (m_value.size() > 0) {
m_children.push_back(boost::make_shared<InternalTree>(value,false));
pos = m_children.back()->AddSubTree(line, pos+1);
}
else {
if (value.size() > 0) {
m_value = value;
}
pos = AddSubTree(line, pos+1);
}
}
else if (token == ' ' || token == ']') {
if (value.size() > 0 && ! m_value.size() > 0) {
m_value = value;
}
else if (value.size() > 0) {
m_isTerminal = false;
m_children.push_back(boost::make_shared<InternalTree>(value,true));
}
if (token == ' ') {
pos++;
}
}
if (m_children.size() > 0) {
m_isTerminal = false;
}
}
if (pos == std::string::npos) {
return line.size();
}
return std::min(line.size(),pos+1);
}
std::string InternalTree::GetString(bool start) const {
std::string ret = "";
if (!start) {
ret += " ";
}
if (!m_isTerminal) {
ret += "[";
}
ret += m_value;
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it)
{
ret += (*it)->GetString(false);
}
if (!m_isTerminal) {
ret += "]";
}
return ret;
}
void InternalTree::Combine(const std::vector<TreePointer> &previous) {
std::vector<TreePointer>::iterator it;
bool found = false;
leafNT next_leafNT(this);
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
found = next_leafNT(it);
if (found) {
*it = *it_prev;
}
else {
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
}
}
}
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
}

177
moses/FF/InternalTree.h Normal file
View File

@ -0,0 +1,177 @@
#pragma once
#include <iostream>
#include <string>
#include <map>
#include <vector>
#include "FFState.h"
#include <boost/shared_ptr.hpp>
#include <boost/make_shared.hpp>
#include "util/generator.hh"
#include "util/exception.hh"
namespace Moses
{
class InternalTree;
typedef boost::shared_ptr<InternalTree> TreePointer;
typedef int NTLabel;
class InternalTree
{
std::string m_value;
NTLabel m_value_nt;
std::vector<TreePointer> m_children;
bool m_isTerminal;
public:
InternalTree(const std::string & line, const bool terminal = false);
InternalTree(const InternalTree & tree):
m_value(tree.m_value),
m_isTerminal(tree.m_isTerminal) {
const std::vector<TreePointer> & children = tree.m_children;
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
m_children.push_back(boost::make_shared<InternalTree>(**it));
}
}
size_t AddSubTree(const std::string & line, size_t start);
std::string GetString(bool start = true) const;
void Combine(const std::vector<TreePointer> &previous);
const std::string & GetLabel() const {
return m_value;
}
// optionally identify label by int instead of string;
// allows abstraction if multiple nonterminal strings should map to same label.
const NTLabel & GetNTLabel() const {
return m_value_nt;
}
void SetNTLabel(NTLabel value) {
m_value_nt = value;
}
size_t GetLength() const {
return m_children.size();
}
std::vector<TreePointer> & GetChildren() {
return m_children;
}
bool IsTerminal() const {
return m_isTerminal;
}
bool IsLeafNT() const {
return (!m_isTerminal && m_children.size() == 0);
}
// different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
// can be used for formulating syntax constraints.
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// pass vector of possible labels to search
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
};
class TreeState : public FFState
{
TreePointer m_tree;
public:
TreeState(TreePointer tree)
:m_tree(tree)
{}
TreePointer GetTree() const {
return m_tree;
}
int Compare(const FFState& other) const {return 0;};
};
// Python-like generator that yields next nonterminal leaf on every call
$generator(leafNT) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {}
$emit(std::vector<TreePointer>::iterator)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(it);
}
else if ((*it)->GetLength() > 0) {
if ((*it).get()) { // normal pointer to same object that TreePointer points to
$restart(tree = (*it).get());
}
}
}
$stop;
};
// Python-like generator that yields the parent of the next nonterminal leaf on every call
$generator(leafNTParent) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNTParent(InternalTree* root = 0): tree(root) {}
$emit(InternalTree*)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(tree);
}
else if ((*it)->GetLength() > 0) {
if ((*it).get()) {
$restart(tree = (*it).get());
}
}
}
$stop;
};
// Python-like generator that yields the next nonterminal leaf on every call, and also stores the path from the root of the tree to the nonterminal
$generator(leafNTPath) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
std::vector<InternalTree*> * path;
leafNTPath(InternalTree* root = NULL, std::vector<InternalTree*> * orig = NULL): tree(root), path(orig) {}
$emit(std::vector<TreePointer>::iterator)
path->push_back(tree);
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
path->push_back((*it).get());
$yield(it);
path->pop_back();
}
else if ((*it)->GetLength() > 0) {
if ((*it).get()) {
$restart(tree = (*it).get());
}
}
}
path->pop_back();
$stop;
};
}

View File

@ -15,6 +15,7 @@ LexicalReordering::LexicalReordering(const std::string &line)
std::cerr << "Initializing LexicalReordering.." << std::endl;
map<string,string> sparseArgs;
m_haveDefaultScores = false;
for (size_t i = 0; i < m_args.size(); ++i) {
const vector<string> &args = m_args[i];
@ -30,6 +31,12 @@ LexicalReordering::LexicalReordering(const std::string &line)
m_filePath = args[1];
} else if (args[0].substr(0,7) == "sparse-") {
sparseArgs[args[0].substr(7)] = args[1];
} else if (args[0] == "default-scores") {
vector<string> tokens = Tokenize(args[1],",");
for(size_t i=0; i<tokens.size(); i++) {
m_defaultScores.push_back( TransformScore( Scan<float>(tokens[i]) ) );
}
m_haveDefaultScores = true;
} else {
UTIL_THROW(util::Exception,"Unknown argument " + args[0]);
}
@ -52,6 +59,13 @@ LexicalReordering::LexicalReordering(const std::string &line)
UTIL_THROW(util::Exception,"Unknown conditioning option!");
}
// sanity check: number of default scores
if (m_haveDefaultScores) {
if(m_defaultScores.size() != m_configuration->GetNumScoreComponents()) {
UTIL_THROW(util::Exception,"wrong number of default scores (" << m_defaultScores.size() << ") for lexicalized reordering model (expected " << m_configuration->GetNumScoreComponents() << ")");
}
}
m_configuration->ConfigureSparse(sparseArgs, this);
}

View File

@ -66,6 +66,8 @@ public:
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{}
bool GetHaveDefaultScores() { return m_haveDefaultScores; }
float GetDefaultScore( size_t i ) { return m_defaultScores[i]; }
private:
bool DecodeCondition(std::string s);
@ -82,6 +84,8 @@ private:
//bool m_oneScorePerDirection;
std::vector<FactorType> m_factorsE, m_factorsF;
std::string m_filePath;
bool m_haveDefaultScores;
Scores m_defaultScores;
};
}

View File

@ -139,18 +139,32 @@ void LexicalReorderingState::CopyScores(ScoreComponentCollection* accum, const
if (m_direction != LexicalReorderingConfiguration::Backward) relevantOpt = m_prevOption;
const Scores *cachedScores = relevantOpt->GetLexReorderingScores(m_configuration.GetScoreProducer());
// look up applicable score from vectore of scores
if(cachedScores) {
Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
const Scores &scoreSet = *cachedScores;
if(m_configuration.CollapseScores())
if(m_configuration.CollapseScores()) {
scores[m_offset] = scoreSet[m_offset + reoType];
}
else {
std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
scores[m_offset + reoType] = scoreSet[m_offset + reoType];
}
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
}
// else: use default scores (if specified)
else if (m_configuration.GetScoreProducer()->GetHaveDefaultScores()) {
Scores scores(m_configuration.GetScoreProducer()->GetNumScoreComponents(),0);
if(m_configuration.CollapseScores()) {
scores[m_offset] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
}
else {
scores[m_offset + reoType] = m_configuration.GetScoreProducer()->GetDefaultScore(m_offset + reoType);
}
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
}
// note: if no default score, no cost
const SparseReordering* sparse = m_configuration.GetSparseReordering();
if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType, m_direction, accum);

View File

@ -0,0 +1,201 @@
#include <vector>
#include "PhraseOrientationFeature.h"
#include "moses/InputFileStream.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/StaticData.h"
#include "moses/Hypothesis.h"
#include "moses/ChartHypothesis.h"
#include "moses/ChartManager.h"
#include "moses/FactorCollection.h"
#include "moses/PP/OrientationPhraseProperty.h"
#include "phrase-extract/extract-ghkm/Alignment.h"
using namespace std;
namespace Moses
{
PhraseOrientationFeature::PhraseOrientationFeature(const std::string &line)
: StatelessFeatureFunction(8, line)
{
VERBOSE(1, "Initializing feature " << GetScoreProducerDescription() << " ...");
ReadParameters();
VERBOSE(1, " Done.");
}
void PhraseOrientationFeature::SetParameter(const std::string& key, const std::string& value)
{
if (key == "tuneable") {
m_tuneable = Scan<bool>(value);
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
targetPhrase.SetRuleSource(source);
}
void PhraseOrientationFeature::EvaluateWhenApplied(
const ChartHypothesis& hypo,
ScoreComponentCollection* accumulator) const
{
// Dense scores
std::vector<float> newScores(m_numScoreComponents,0); // m_numScoreComponents == 8
// Read Orientation property
const TargetPhrase &currTarPhr = hypo.GetCurrTargetPhrase();
const Phrase *currSrcPhr = currTarPhr.GetRuleSource();
// const Factor* targetLHS = currTarPhr.GetTargetLHS()[0];
// bool isGlueGrammarRule = false;
std::map<size_t,size_t> alignMap;
alignMap.insert(
currTarPhr.GetAlignTerm().begin(),
currTarPhr.GetAlignTerm().end());
alignMap.insert(
currTarPhr.GetAlignNonTerm().begin(),
currTarPhr.GetAlignNonTerm().end());
Moses::GHKM::Alignment alignment;
std::vector<int> alignmentNTs(currTarPhr.GetSize(),-1); // TODO: can be smaller (number of right-hand side non-terminals)
for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignTerm().begin();
it!=currTarPhr.GetAlignTerm().end(); ++it) {
alignment.push_back(std::make_pair(it->first, it->second));
// std::cerr << "alignTerm " << it->first << " " << it->second << std::endl;
}
for (AlignmentInfo::const_iterator it=currTarPhr.GetAlignNonTerm().begin();
it!=currTarPhr.GetAlignNonTerm().end(); ++it) {
alignment.push_back(std::make_pair(it->first, it->second));
alignmentNTs[it->second] = it->first;
// std::cerr << "alignNonTerm " << it->first << " " << it->second << std::endl;
}
// Initialize phrase orientation scoring object
Moses::GHKM::PhraseOrientation phraseOrientation(currSrcPhr->GetSize(), currTarPhr.GetSize(), alignment);
// TODO: Efficiency! This should be precomputed.
// std::cerr << *currSrcPhr << std::endl;
// std::cerr << currTarPhr << std::endl;
// std::cerr << currSrcPhr->GetSize() << std::endl;
// std::cerr << currTarPhr.GetSize() << std::endl;
// Get index map for underlying hypotheses
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
currTarPhr.GetAlignNonTerm().GetNonTermIndexMap();
// Determine & score orientations
size_t nonTerminalNumber = 0;
for (size_t phrasePos=0; phrasePos<currTarPhr.GetSize(); ++phrasePos) {
// consult rule for either word or non-terminal
const Word &word = currTarPhr.GetWord(phrasePos);
if ( word.IsNonTerminal() ) {
// non-terminal: consult subderivation
size_t nonTermIndex = nonTermIndexMap[phrasePos];
const ChartHypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndex);
const TargetPhrase &prevTarPhr = prevHypo->GetCurrTargetPhrase();
if (const PhraseProperty *property = prevTarPhr.GetProperty("Orientation")) {
const OrientationPhraseProperty *orientationPhraseProperty = static_cast<const OrientationPhraseProperty*>(property);
// std::cerr << "L2R_Mono " << orientationPhraseProperty->GetLeftToRightProbabilityMono();
// std::cerr << " L2R_Swap " << orientationPhraseProperty->GetLeftToRightProbabilitySwap();
// std::cerr << " L2R_Dright " << orientationPhraseProperty->GetLeftToRightProbabilityDright();
// std::cerr << " L2R_Dleft " << orientationPhraseProperty->GetLeftToRightProbabilityDleft();
// std::cerr << " R2L_Mono " << orientationPhraseProperty->GetRightToLeftProbabilityMono();
// std::cerr << " R2L_Swap " << orientationPhraseProperty->GetRightToLeftProbabilitySwap();
// std::cerr << " R2L_Dright " << orientationPhraseProperty->GetRightToLeftProbabilityDright();
// std::cerr << " R2L_Dleft " << orientationPhraseProperty->GetRightToLeftProbabilityDleft();
// std::cerr << std::endl;
Moses::GHKM::REO_POS l2rOrientation=Moses::GHKM::UNKNOWN, r2lOrientation=Moses::GHKM::UNKNOWN;
int sourceIndex = alignmentNTs[phrasePos];
// std::cerr << "targetIndex " << phrasePos << " sourceIndex " << sourceIndex << std::endl;
l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::L2R);
r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::R2L);
// std::cerr << "l2rOrientation ";
switch(l2rOrientation) {
case Moses::GHKM::LEFT:
newScores[0] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityMono());
// std::cerr << "mono" << std::endl;
break;
case Moses::GHKM::RIGHT:
newScores[1] += std::log(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
// std::cerr << "swap" << std::endl;
break;
case Moses::GHKM::DRIGHT:
newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDright());
// std::cerr << "dright" << std::endl;
break;
case Moses::GHKM::DLEFT:
newScores[3] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDleft());
// std::cerr << "dleft" << std::endl;
break;
case Moses::GHKM::UNKNOWN:
// modelType == Moses::GHKM::REO_MSLR
newScores[2] += std::log(orientationPhraseProperty->GetLeftToRightProbabilityDright());
// std::cerr << "unknown->dright" << std::endl;
break;
default:
UTIL_THROW2(GetScoreProducerDescription()
<< ": Unsupported orientation type.");
break;
}
// std::cerr << "r2lOrientation ";
switch(r2lOrientation) {
case Moses::GHKM::LEFT:
newScores[4] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityMono());
// std::cerr << "mono" << std::endl;
break;
case Moses::GHKM::RIGHT:
newScores[5] += std::log(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
// std::cerr << "swap" << std::endl;
break;
case Moses::GHKM::DRIGHT:
newScores[6] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDright());
// std::cerr << "dright" << std::endl;
break;
case Moses::GHKM::DLEFT:
newScores[7] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDleft());
// std::cerr << "dleft" << std::endl;
break;
case Moses::GHKM::UNKNOWN:
// modelType == Moses::GHKM::REO_MSLR
newScores[6] += std::log(orientationPhraseProperty->GetRightToLeftProbabilityDright());
// std::cerr << "unknown->dright" << std::endl;
break;
default:
UTIL_THROW2(GetScoreProducerDescription()
<< ": Unsupported orientation type.");
break;
}
// TODO: Handle degenerate cases (boundary non-terminals)
} else {
// abort with error message if the phrase does not translate an unknown word
UTIL_THROW_IF2(!prevTarPhr.GetWord(0).IsOOV(), GetScoreProducerDescription()
<< ": Missing Orientation property. "
<< "Please check phrase table and glue rules.");
}
++nonTerminalNumber;
}
}
accumulator->PlusEquals(this, newScores);
}
}

View File

@ -0,0 +1,53 @@
#pragma once
#include <string>
#include "StatelessFeatureFunction.h"
#include "FFState.h"
#include "moses/Factor.h"
#include "phrase-extract/extract-ghkm/PhraseOrientation.h"
namespace Moses
{
class PhraseOrientationFeature : public StatelessFeatureFunction
{
public:
PhraseOrientationFeature(const std::string &line);
~PhraseOrientationFeature() {
}
bool IsUseable(const FactorMask &mask) const {
return true;
}
void SetParameter(const std::string& key, const std::string& value);
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const
{};
void EvaluateWhenApplied(
const Hypothesis& cur_hypo,
ScoreComponentCollection* accumulator) const
{};
void EvaluateWhenApplied(
const ChartHypothesis& cur_hypo,
ScoreComponentCollection* accumulator) const;
};
}

View File

@ -1,241 +1,13 @@
#include "TreeStructureFeature.h"
#include "moses/StaticData.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/Hypothesis.h"
#include "moses/ChartHypothesis.h"
#include "moses/TargetPhrase.h"
#include <vector>
#include "moses/PP/TreeStructurePhraseProperty.h"
using namespace std;
namespace Moses
{
InternalTree::InternalTree(const std::string & line, const bool terminal):
m_value_nt(0),
m_isTerminal(terminal)
{
size_t found = line.find_first_of("[] ");
if (found == line.npos) {
m_value = line;
}
else {
AddSubTree(line, 0);
}
}
size_t InternalTree::AddSubTree(const std::string & line, size_t pos) {
std::string value = "";
char token = 0;
while (token != ']' && pos != std::string::npos)
{
size_t oldpos = pos;
pos = line.find_first_of("[] ", pos);
if (pos == std::string::npos) break;
token = line[pos];
value = line.substr(oldpos,pos-oldpos);
if (token == '[') {
if (m_value.size() > 0) {
TreePointer child(new InternalTree(value, false));
m_children.push_back(child);
pos = child->AddSubTree(line, pos+1);
}
else {
if (value.size() > 0) {
m_value = value;
}
pos = AddSubTree(line, pos+1);
}
}
else if (token == ' ' || token == ']') {
if (value.size() > 0 && ! m_value.size() > 0) {
m_value = value;
}
else if (value.size() > 0) {
m_isTerminal = false;
TreePointer child(new InternalTree(value, true));
m_children.push_back(child);
}
if (token == ' ') {
pos++;
}
}
if (m_children.size() > 0) {
m_isTerminal = false;
}
}
if (pos == std::string::npos) {
return line.size();
}
return min(line.size(),pos+1);
}
std::string InternalTree::GetString() const {
std::string ret = " ";
if (!m_isTerminal) {
ret += "[";
}
ret += m_value;
for (std::vector<TreePointer>::const_iterator it = m_children.begin(); it != m_children.end(); ++it)
{
ret += (*it)->GetString();
}
if (!m_isTerminal) {
ret += "]";
}
return ret;
}
void InternalTree::Combine(const std::vector<TreePointer> &previous) {
std::vector<TreePointer>::iterator it;
bool found = false;
leafNT next_leafNT(this);
for (std::vector<TreePointer>::const_iterator it_prev = previous.begin(); it_prev != previous.end(); ++it_prev) {
found = next_leafNT(it);
if (found) {
*it = *it_prev;
}
else {
std::cerr << "Warning: leaf nonterminal not found in rule; why did this happen?\n";
}
}
}
bool InternalTree::FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetLabel() == label) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if ((*it)->GetNTLabel() == label) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(label, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2)) {
it = it2;
return true;
}
}
return false;
}
bool InternalTree::RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const {
for (it = m_children.begin(); it != m_children.end(); ++it) {
if (std::binary_search(labels.begin(), labels.end(), (*it)->GetNTLabel())) {
parent = this;
return true;
}
std::vector<TreePointer>::const_iterator it2;
if ((*it)->RecursiveSearch(labels, it2, parent)) {
it = it2;
return true;
}
}
return false;
}
void TreeStructureFeature::Load() {
// syntactic constraints can be hooked in here.
@ -272,7 +44,7 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
{
if (const PhraseProperty *property = cur_hypo.GetCurrTargetPhrase().GetProperty("Tree")) {
const std::string *tree = property->GetValueString();
TreePointer mytree (new InternalTree(*tree));
TreePointer mytree (boost::make_shared<InternalTree>(*tree));
if (m_labelset) {
AddNTLabels(mytree);
@ -291,16 +63,11 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
}
}
std::vector<std::string> sparse_features;
if (m_constraints) {
sparse_features = m_constraints->SyntacticRules(mytree, previous_trees);
m_constraints->SyntacticRules(mytree, previous_trees, this, accumulator);
}
mytree->Combine(previous_trees);
//sparse scores
for (std::vector<std::string>::const_iterator feature=sparse_features.begin(); feature != sparse_features.end(); ++feature) {
accumulator->PlusEquals(this, *feature, 1);
}
return new TreeState(mytree);
}
else {
@ -310,4 +77,3 @@ FFState* TreeStructureFeature::EvaluateWhenApplied(const ChartHypothesis& cur_hy
}
}

View File

@ -4,98 +4,13 @@
#include <map>
#include "StatefulFeatureFunction.h"
#include "FFState.h"
#include <boost/shared_ptr.hpp>
#include "util/generator.hh"
#include "util/exception.hh"
#include "InternalTree.h"
namespace Moses
{
class InternalTree;
typedef boost::shared_ptr<InternalTree> TreePointer;
typedef int NTLabel;
class InternalTree
{
std::string m_value;
NTLabel m_value_nt;
std::vector<TreePointer> m_children;
bool m_isTerminal;
public:
InternalTree(const std::string & line, const bool terminal = false);
InternalTree(const InternalTree & tree):
m_value(tree.m_value),
m_isTerminal(tree.m_isTerminal) {
const std::vector<TreePointer> & children = tree.m_children;
for (std::vector<TreePointer>::const_iterator it = children.begin(); it != children.end(); it++) {
TreePointer child (new InternalTree(**it));
m_children.push_back(child);
}
}
size_t AddSubTree(const std::string & line, size_t start);
std::string GetString() const;
void Combine(const std::vector<TreePointer> &previous);
const std::string & GetLabel() const {
return m_value;
}
// optionally identify label by int instead of string;
// allows abstraction if multiple nonterminal strings should map to same label.
const NTLabel & GetNTLabel() const {
return m_value_nt;
}
void SetNTLabel(NTLabel value) {
m_value_nt = value;
}
size_t GetLength() const {
return m_children.size();
}
std::vector<TreePointer> & GetChildren() {
return m_children;
}
void AddChild(TreePointer child) {
m_children.push_back(child);
}
bool IsTerminal() const {
return m_isTerminal;
}
bool IsLeafNT() const {
return (!m_isTerminal && m_children.size() == 0);
}
// different methods to search a tree (either just direct children (FlatSearch) or all children (RecursiveSearch)) for constituents.
// can be used for formulating syntax constraints.
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::string & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// use NTLabel for search to reduce number of string comparisons / deal with synonymous labels
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const NTLabel & label, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
// pass vector of possible labels to search
// if found, 'it' is iterator to first tree node that matches search string
bool FlatSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it) const;
// if found, 'it' is iterator to first tree node that matches search string, and 'parent' to its parent node
bool RecursiveSearch(const std::vector<NTLabel> & labels, std::vector<TreePointer>::const_iterator & it, InternalTree const* &parent) const;
};
// mapping from string nonterminal label to int representation.
// allows abstraction if multiple nonterminal strings should map to same label.
@ -107,30 +22,15 @@ public:
// class to implement language-specific syntactic constraints.
// the method SyntacticRules must return a vector of strings (each identifying a constraint violation), which are then made into sparse features.
// the method SyntacticRules is given pointer to ScoreComponentCollection, so it can add sparse features itself.
class SyntaxConstraints
{
public:
virtual std::vector<std::string> SyntacticRules(TreePointer root, const std::vector<TreePointer> &previous) = 0;
virtual void SyntacticRules(TreePointer root, const std::vector<TreePointer> &previous, const FeatureFunction* sp, ScoreComponentCollection* accumulator) = 0;
virtual ~SyntaxConstraints() {};
};
class TreeState : public FFState
{
TreePointer m_tree;
public:
TreeState(TreePointer tree)
:m_tree(tree)
{}
TreePointer GetTree() const {
return m_tree;
}
int Compare(const FFState& other) const {return 0;};
};
class TreeStructureFeature : public StatefulFeatureFunction
{
SyntaxConstraints* m_constraints;
@ -174,45 +74,5 @@ public:
void Load();
};
// Python-like generator that yields next nonterminal leaf on every call
$generator(leafNT) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNT(InternalTree* root = 0): tree(root) {}
$emit(std::vector<TreePointer>::iterator)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(it);
}
else if ((*it)->GetLength() > 0) {
if (&(**it)) { // normal pointer to same object that TreePointer points to
$restart(tree = &(**it));
}
}
}
$stop;
};
// Python-like generator that yields the parent of the next nonterminal leaf on every call
$generator(leafNTParent) {
std::vector<TreePointer>::iterator it;
InternalTree* tree;
leafNTParent(InternalTree* root = 0): tree(root) {}
$emit(InternalTree*)
for (it = tree->GetChildren().begin(); it !=tree->GetChildren().end(); ++it) {
if (!(*it)->IsTerminal() && (*it)->GetLength() == 0) {
$yield(tree);
}
else if ((*it)->GetLength() > 0) {
if (&(**it)) { // normal pointer to same object that TreePointer points to
$restart(tree = &(**it));
}
}
}
$stop;
};
}

View File

@ -69,6 +69,7 @@ lib moses :
TranslationModel/RuleTable/*.cpp
TranslationModel/Scope3Parser/*.cpp
TranslationModel/CYKPlusParser/*.cpp
../phrase-extract/extract-ghkm/PhraseOrientation.cpp
FF/*.cpp
FF/OSM-Feature/*.cpp
FF/LexicalReordering/*.cpp

View File

@ -19,7 +19,7 @@ if $(with-irstlm) {
dependencies += irst ;
lmmacros += LM_IRST ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
echo "!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!" ;
echo "!!! You are linking with the IRSTLM library; be sure the release is >= 5.70.02 !!!" ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
}
@ -27,6 +27,10 @@ if $(with-irstlm) {
local with-srilm = [ option.get "with-srilm" ] ;
local with-maxent-srilm = [ option.get "with-maxent-srilm" ] ;
if $(with-srilm) {
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
echo "!!! You are linking with the SRILM library; Do NOT use version >= 1.7.1 !!!" ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
if [ option.get "with-srilm-dynamic" : no : yes ] = yes {
lib srilm ;
alias sri-libs : srilm ;

View File

@ -57,6 +57,7 @@ Parameter::Parameter()
AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
AddParam("max-phrase-length", "maximum phrase length (default 20)");
AddParam("n-best-list", "file and size of n-best-list to be generated; specify - as the file in order to write to STDOUT");
AddParam("n-best-trees", "Write n-best target-side trees to n-best-list");
AddParam("lattice-samples", "generate samples from lattice, in same format as nbest list. Uses the file and size arguments, as in n-best-list");
AddParam("n-best-factor", "factor to compute the maximum number of contenders (=factor*nbest-size). value 0 means infinity, i.e. no threshold. default is 0");
AddParam("print-all-derivations", "to print all derivations in search graph");

View File

@ -431,6 +431,7 @@ bool StaticData::LoadData(Parameter *parameter)
if (m_useConsensusDecoding) m_mbr=true;
SetBooleanParameter( &m_defaultNonTermOnlyForEmptyRange, "default-non-term-for-empty-range-only", false );
SetBooleanParameter( &m_printNBestTrees, "n-best-trees", false );
// Compact phrase table and reordering model

View File

@ -199,6 +199,7 @@ protected:
FactorType m_placeHolderFactor;
bool m_useLegacyPT;
bool m_defaultNonTermOnlyForEmptyRange;
bool m_printNBestTrees;
FeatureRegistry m_registry;
PhrasePropertyFactory m_phrasePropertyFactory;
@ -766,6 +767,10 @@ public:
bool GetDefaultNonTermOnlyForEmptyRange() const
{ return m_defaultNonTermOnlyForEmptyRange; }
bool PrintNBestTrees() const {
return m_printNBestTrees;
}
};
}

View File

@ -5,6 +5,7 @@
#include <algorithm>
#include "moses/StaticData.h"
#include "moses/Hypothesis.h"
#include "moses/ChartHypothesis.h"
#include "TranslationAnalysis.h"
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
@ -134,4 +135,33 @@ void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
os << std::endl;
}
void PrintTranslationAnalysis(std::ostream &os, const Moses::ChartHypothesis* hypo)
{
/*
os << endl << "TRANSLATION HYPOTHESIS DETAILS:" << endl;
queue<const Hypothesis*> translationPath;
while (hypo)
{
translationPath.push(hypo);
hypo = hypo->GetPrevHypo();
}
while (!translationPath.empty())
{
hypo = translationPath.front();
translationPath.pop();
const TranslationOption *transOpt = hypo->GetTranslationOption();
if (transOpt != NULL)
{
os << hypo->GetCurrSourceWordsRange() << " ";
for (size_t decodeStepId = 0; decodeStepId < DecodeStepTranslation::GetNumTransStep(); ++decodeStepId)
os << decodeStepId << "=" << transOpt->GetSubRangeCount(decodeStepId) << ",";
os << *transOpt << endl;
}
}
os << "END TRANSLATION" << endl;
*/
}
}

View File

@ -1,14 +1,16 @@
#pragma once
// $Id$
/*
* also see moses/SentenceStats
*/
#ifndef moses_cmd_TranslationAnalysis_h
#define moses_cmd_TranslationAnalysis_h
#include <iostream>
#include "moses/Hypothesis.h"
namespace Moses {
class Hypothesis;
class ChartHypothesis;
}
namespace TranslationAnalysis
{
@ -18,7 +20,7 @@ namespace TranslationAnalysis
* os. Included information: phrase alignment, words dropped, scores
*/
void PrintTranslationAnalysis(std::ostream &os, const Moses::Hypothesis* hypo);
void PrintTranslationAnalysis(std::ostream &os, const Moses::ChartHypothesis* hypo);
}
#endif

View File

@ -95,7 +95,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
if (!options.sourceLabels) {
Error("SourceLabels should be active if SourceLabelSet is supposed to be written to a file");
}
OpenOutputFileOrDie(options.sourceLabelSetFile, sourceLabelSetStream); // TODO: global sourceLabelSet cannot be determined during parallelized extraction
OpenOutputFileOrDie(options.sourceLabelSetFile, sourceLabelSetStream); // note that this is not a global source label set if extraction is parallelized
}
if (!options.unknownWordSoftMatchesFile.empty()) {
OpenOutputFileOrDie(options.unknownWordSoftMatchesFile, unknownWordSoftMatchesStream);
@ -239,7 +239,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
}
// Initialize phrase orientation scoring object
PhraseOrientation phraseOrientation( sourceTokens, targetXmlTreeParser.GetWords(), alignment);
PhraseOrientation phraseOrientation( sourceTokens.size(), targetXmlTreeParser.GetWords().size(), alignment);
// Write the rules, subject to scope pruning.
const std::vector<Node *> &targetNodes = graph.GetTargetNodes();
@ -621,34 +621,43 @@ void ExtractGHKM::WriteGlueGrammar(
size_t sourceLabelGlueX = 1;
// basic rules
out << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| ||| ||| |||";
out << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| 0-0 ||| ||| |||";
if (options.treeFragments) {
out << " {{Tree [" << topLabel << " <s>]}}";
out << " {{Tree [" << topLabel << " [SSTART <s>]]}}";
}
if (options.sourceLabels) {
out << " {{SourceLabels 1 1 " << sourceLabelGlueTop << " 1}}";
}
if (options.phraseOrientation) {
out << " {{Orientation 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25}}";
}
out << std::endl;
out << "[X][" << topLabel << "] </s> [X] ||| [X][" << topLabel << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 ||| ||| |||";
out << "[X][" << topLabel << "] </s> [X] ||| [X][" << topLabel << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 1-1 ||| ||| |||";
if (options.treeFragments) {
out << " {{Tree [" << topLabel << " [" << topLabel << "] </s>]}}";
out << " {{Tree [" << topLabel << " [" << topLabel << "] [SEND </s>]]}}";
}
if (options.sourceLabels) {
out << " {{SourceLabels 2 1 " << sourceLabelGlueTop << " 1 1 " << sourceLabelGlueTop << " 1}}";
}
if (options.phraseOrientation) {
out << " {{Orientation 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25}}";
}
out << std::endl;
// top rules
for (std::map<std::string, int>::const_iterator i = topLabelSet.begin();
i != topLabelSet.end(); ++i) {
out << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 1-1 ||| ||| |||";
out << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 1-1 2-2 ||| ||| |||";
if (options.treeFragments) {
out << " {{Tree [" << topLabel << " <s> [" << i->first << "] </s>]}}";
out << " {{Tree [" << topLabel << " [SSTART <s>] [" << i->first << "] [SEND </s>]]}}";
}
if (options.sourceLabels) {
out << " {{SourceLabels 2 1 " << sourceLabelGlueX << " 1 1 " << sourceLabelGlueTop << " 1}}";
}
if (options.phraseOrientation) {
out << " {{Orientation 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25}}";
}
out << std::endl;
}
@ -660,7 +669,10 @@ void ExtractGHKM::WriteGlueGrammar(
out << " {{Tree [" << topLabel << " ["<< topLabel << "] [" << *i << "]]}}";
}
if (options.sourceLabels) {
out << " {{SourceLabels 3 2.718 " << sourceLabelGlueTop << " " << sourceLabelGlueX << " 2.718 1 " << sourceLabelGlueTop << " 2.718}}"; // TODO: there should be better options than using "SOMELABEL"
out << " {{SourceLabels 3 2.718 " << sourceLabelGlueTop << " " << sourceLabelGlueX << " 2.718 1 " << sourceLabelGlueTop << " 2.718}}";
}
if (options.phraseOrientation) {
out << " {{Orientation 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25}}";
}
out << std::endl;
}
@ -671,7 +683,10 @@ void ExtractGHKM::WriteGlueGrammar(
out << " {{Tree [" << topLabel << " [" << topLabel << "] [X]]}}";
}
if (options.sourceLabels) {
out << " {{SourceLabels 3 1 " << sourceLabelGlueTop << " " << sourceLabelGlueX << " 1 1 " << sourceLabelGlueTop << " 1}}"; // TODO: there should be better options than using "SOMELABEL"
out << " {{SourceLabels 3 1 " << sourceLabelGlueTop << " " << sourceLabelGlueX << " 1 1 " << sourceLabelGlueTop << " 1}}";
}
if (options.phraseOrientation) {
out << " {{Orientation 0.25 0.25 0.25 0.25 0.25 0.25 0.25 0.25}}";
}
out << std::endl;
}

View File

@ -33,28 +33,25 @@ namespace GHKM
std::vector<float> PhraseOrientation::m_l2rOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0);
std::vector<float> PhraseOrientation::m_r2lOrientationPriorCounts = boost::assign::list_of(0)(0)(0)(0)(0);
PhraseOrientation::PhraseOrientation(const std::vector<std::string> &source,
const std::vector<std::string> &target,
PhraseOrientation::PhraseOrientation(int sourceSize,
int targetSize,
const Alignment &alignment)
: m_source(source)
, m_target(target)
: m_countF(sourceSize)
, m_countE(targetSize)
, m_alignment(alignment)
{
int countF = m_source.size();
int countE = m_target.size();
// prepare data structures for alignments
std::vector<std::vector<int> > alignedToS;
for(int i=0; i<countF; ++i) {
for(int i=0; i<m_countF; ++i) {
std::vector< int > dummy;
alignedToS.push_back(dummy);
}
for(int i=0; i<countE; ++i) {
for(int i=0; i<m_countE; ++i) {
std::vector< int > dummy;
m_alignedToT.push_back(dummy);
}
std::vector<int> alignedCountS(countF,0);
std::vector<int> alignedCountS(m_countF,0);
for (Alignment::const_iterator a=alignment.begin(); a!=alignment.end(); ++a) {
m_alignedToT[a->second].push_back(a->first);
@ -62,8 +59,8 @@ PhraseOrientation::PhraseOrientation(const std::vector<std::string> &source,
alignedToS[a->first].push_back(a->second);
}
for (int startF=0; startF<countF; ++startF) {
for (int endF=startF; endF<countF; ++endF) {
for (int startF=0; startF<m_countF; ++startF) {
for (int endF=startF; endF<m_countF; ++endF) {
int minE = std::numeric_limits<int>::max();
int maxE = -1;
@ -85,8 +82,8 @@ PhraseOrientation::PhraseOrientation(const std::vector<std::string> &source,
// check alignments for target phrase startE...endE
// loop over continuous phrases which are compatible with the word alignments
for (int startE=0; startE<countE; ++startE) {
for (int endE=startE; endE<countE; ++endE) {
for (int startE=0; startE<m_countE; ++startE) {
for (int endE=startE; endE<m_countE; ++endE) {
int minF = std::numeric_limits<int>::max();
int maxF = -1;
@ -123,7 +120,7 @@ PhraseOrientation::PhraseOrientation(const std::vector<std::string> &source,
startF--) {
// end point of source phrase may advance over unaligned
for (int endF=maxF;
(endF<countF &&
(endF<m_countF &&
(endF==maxF || alignedCountS[endF]==0)); // unaligned
endF++) { // at this point we have extracted a phrase
@ -197,14 +194,14 @@ const std::string PhraseOrientation::GetOrientationInfoString(int startF, int st
if ( direction == L2R || direction == BIDIR )
hierPrevOrient = GetOrientHierModel(REO_MSLR,
connectedLeftTopP, connectedRightTopP,
startF, endF, startE, endE, m_source.size()-1, 0, 1,
startF, endF, startE, endE, m_countF-1, 0, 1,
&ge, &lt,
m_bottomRight, m_bottomLeft);
if ( direction == R2L || direction == BIDIR )
hierNextOrient = GetOrientHierModel(REO_MSLR,
connectedLeftTopN, connectedRightTopN,
endF, startF, endE, startE, 0, m_source.size()-1, -1,
endF, startF, endE, startE, 0, m_countF-1, -1,
&lt, &ge,
m_bottomLeft, m_bottomRight);
@ -263,14 +260,14 @@ REO_POS PhraseOrientation::GetOrientationInfo(int startF, int startE, int endF,
if ( direction == L2R )
return GetOrientHierModel(REO_MSLR,
connectedLeftTopP, connectedRightTopP,
startF, endF, startE, endE, m_source.size()-1, 0, 1,
startF, endF, startE, endE, m_countF-1, 0, 1,
&ge, &lt,
m_bottomRight, m_bottomLeft);
if ( direction == R2L )
return GetOrientHierModel(REO_MSLR,
connectedLeftTopN, connectedRightTopN,
endF, startF, endE, startE, 0, m_source.size()-1, -1,
endF, startF, endE, startE, 0, m_countF-1, -1,
&lt, &ge,
m_bottomLeft, m_bottomRight);
@ -369,10 +366,10 @@ bool PhraseOrientation::IsAligned(int fi, int ei) const
if (ei <= -1 || fi <= -1)
return false;
if (ei == (int)m_target.size() && fi == (int)m_source.size())
if (ei == m_countE && fi == m_countF)
return true;
if (ei >= (int)m_target.size() || fi >= (int)m_source.size())
if (ei >= m_countE || fi >= m_countF)
return false;
for (size_t i=0; i<m_alignedToT[ei].size(); ++i)

View File

@ -45,9 +45,9 @@ class PhraseOrientation
{
public:
PhraseOrientation(const std::vector<std::string> &source,
const std::vector<std::string> &target,
const Alignment &alignment);
PhraseOrientation(int sourceSize,
int targetSize,
const Alignment &alignment);
REO_POS GetOrientationInfo(int startF, int endF, REO_DIR direction) const;
REO_POS GetOrientationInfo(int startF, int startE, int endF, int endE, REO_DIR direction) const;
@ -80,8 +80,8 @@ private:
static bool le(int first, int second) { return first <= second; };
static bool lt(int first, int second) { return first < second; };
const std::vector<std::string> &m_source;
const std::vector<std::string> &m_target;
const int m_countF;
const int m_countE;
const Alignment &m_alignment;
std::vector<std::vector<int> > m_alignedToT;

View File

@ -5,6 +5,8 @@
use strict;
use warnings;
use Digest::MD5 qw(md5);
use Encode qw(encode_utf8);
use Getopt::Long;
binmode(STDIN, ":utf8");
@ -14,9 +16,11 @@ binmode(STDERR, ":utf8");
my $verbose = 0;
my $n = 1;
my $srcfile = undef;
my $md5 = 0;
GetOptions(
"n=i" => \$n, # the n-grams to search for (default: unigrams)
"verbose" => \$verbose, # emit the list of oov words
"verbose!" => \$verbose, # emit the list of oov words
"md5!" => \$md5, # emit the list of oov words
"src=s" => \$srcfile, # use this source file
) or exit 1;
@ -25,6 +29,8 @@ if (!defined $testf) {
print STDERR "usage: $0 test-corpus < training-corpus
Options:
--n=1 ... use phrases of n words as the unit
set --n=0 to compare *whole sentences* (forces md5 hashing on)
--md5 ... hash each ngram using md5, saves memory for longer n-grams
--verbose ... emit OOV phrases at the end
--src=test-src ... a word in the test-corpus not deemed OOV if present in the
corresponding source sentence in test-src.
@ -39,6 +45,8 @@ Synopsis:
exit 1;
}
my $ngr_or_sent = $n > 0 ? "$n-grams" : "sentences";
# load source file to accept ngrams from source
my $source_confirms = undef;
my $srcfilelen = undef;
@ -51,7 +59,7 @@ if (defined $srcfile) {
chomp;
s/^\s+//;
s/\s+$//;
my $ngrams = ngrams($n, [ split /\s+/, $_ ]);
my $ngrams = ngrams($n, $_);
foreach my $ngr (keys %$ngrams) {
$source_confirms->[$nr]->{$ngr} += $ngrams->{$ngr};
$srctokens += $ngrams->{$ngr};
@ -59,7 +67,7 @@ if (defined $srcfile) {
}
close $fh;
print "Source set sents\t$nr\n";
print "Source set running $n-grams\t$srctokens\n";
print "Source set running $ngr_or_sent\t$srctokens\n" if $n>0;
$srcfilelen = $nr;
}
@ -73,7 +81,7 @@ while (<$fh>) {
chomp;
s/^\s+//;
s/\s+$//;
my $ngrams = ngrams($n, [ split /\s+/, $_ ]);
my $ngrams = ngrams($n, $_);
foreach my $ngr (keys %$ngrams) {
$needed{$ngr} += $ngrams->{$ngr}
unless $source_confirms->[$nr]->{$ngr};
@ -85,9 +93,9 @@ close $fh;
my $testtypesneeded = scalar(keys(%needed));
my $testtypes = scalar(keys(%testtypes));
print "Test set sents\t$nr\n";
print "Test set running $n-grams\t$testtokens\n";
print "Test set unique $n-grams needed\t$testtypesneeded\n";
print "Test set unique $n-grams\t$testtypes\n";
print "Test set running $n-grams\t$testtokens\n" if $n>0;
print "Test set unique $ngr_or_sent needed\t$testtypesneeded\n";
print "Test set unique $ngr_or_sent\t$testtypes\n";
die "Mismatching sent count: $srcfile and $testf ($srcfilelen vs. $nr)"
if defined $srcfile && $srcfilelen != $nr;
@ -102,7 +110,7 @@ while (<>) {
chomp;
s/^\s+//;
s/\s+$//;
my $ngrams = ngrams($n, [ split /\s+/, $_ ]);
my $ngrams = ngrams($n, $_); # [ split /\s+/, $_ ]);
foreach my $ngr (keys %$ngrams) {
$seen{$ngr} = 1 if $ngrams->{$ngr};
$traintokens += $ngrams->{$ngr};
@ -114,8 +122,8 @@ foreach my $ngr (keys %needed) {
print STDERR "Done.\n";
my $traintypes = scalar(keys(%seen));
print "Training set sents\t$nr\n";
print "Training set running $n-grams\t$traintokens\n";
print "Training set unique $n-grams\t$traintypes\n";
print "Training set running $n-grams\t$traintokens\n" if $n>0;
print "Training set unique $ngr_or_sent\t$traintypes\n";
my $oovtypes = scalar(keys(%needed));
@ -123,8 +131,8 @@ my $oovtokens = 0;
foreach my $v (values %needed) {
$oovtokens += $v;
}
printf "OOV $n-gram types\t%i\t%.1f %%\n", $oovtypes, $oovtypes/$testtypes*100;
printf "OOV $n-gram tokens\t%i\t%.1f %%\n", $oovtokens, $oovtokens/$testtokens*100;
printf "OOV $ngr_or_sent types\t%i\t%.1f %%\n", $oovtypes, $oovtypes/$testtypes*100;
printf "OOV $ngr_or_sent tokens\t%i\t%.1f %%\n", $oovtokens, $oovtokens/$testtokens*100;
if ($verbose) {
foreach my $ngr (sort {$needed{$b} <=> $needed{$a}} keys %needed) {
@ -159,17 +167,26 @@ sub my_open {
sub ngrams {
my $n = shift;
my @words = @{shift()};
my $out;
if ($n == 1) {
foreach my $w (@words) {
$out->{$w}++;
}
my $sent = shift;
if ($n == 0) {
return { md5(encode_utf8($sent)) => 1 };
} else {
while ($#words >= $n-1) {
$out->{join(" ", @words[0..$n-1])}++;
shift @words;
my @words = split /\s+/, $sent;
my $out;
if ($n == 1) {
foreach my $w (@words) {
my $usew = $md5 ? md5(encode_utf8($$w)) : $w;
$out->{$w}++;
}
} else {
while ($#words >= $n-1) {
my $ngr = join(" ", @words[0..$n-1]);
my $usengr = $md5 ? md5(encode_utf8($ngr)) : $ngr;
$out->{$ngr}++;
shift @words;
}
}
return $out;
}
return $out;
}

View File

@ -116,12 +116,14 @@ consolidate
in: CORPUS:clean-parsed-stem
out: tokenized-stem
default-name: truecaser/corpus
pass-unless: trainer
template: $moses-script-dir/ems/support/consolidate-training-data.perl $input-extension $output-extension OUT IN
error: number of lines don't match
train
in: tokenized-stem
out: truecase-model
rerun-on-change: trainer
pass-unless: trainer
default-name: truecaser/truecase-model
template: $trainer -model OUT.$input-extension -corpus IN.$input-extension ; $trainer -model OUT.$output-extension -corpus IN.$output-extension

View File

@ -450,7 +450,9 @@ sub find_steps {
}
# go through each module
for(my $m=$#MODULE; $m>=0; $m--) {
while(1) {
my $step_count_before = scalar(@DO_STEP);
for(my $m=$#MODULE; $m>=0; $m--) {
my $module = $MODULE[$m];
# if module is "multiple" go through each set
@ -475,6 +477,8 @@ sub find_steps {
&find_steps_for_module($module,"");
}
}
last if $step_count_before == scalar(@DO_STEP);
}
}
sub find_steps_for_module {
@ -487,6 +491,7 @@ sub find_steps_for_module {
my $step = &construct_name($module,$set,$stepname);
my $defined_step = &defined_step($step); # without set
next if defined($STEP_LOOKUP{$step});
# FIRST, some checking...
print "\tchecking step: $step\n" if $VERBOSE;

View File

@ -5,6 +5,7 @@ use IPC::Open3;
use File::Temp qw/tempdir/;
use File::Path qw/rmtree/;
use Getopt::Long "GetOptions";
use Symbol;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
@ -128,7 +129,6 @@ sub interpolate {
}
# no specified weights -> compute them
else {
# compute perplexity
my $i = 0;
foreach my $lm (@LM) {
@ -193,10 +193,11 @@ sub safesystem {
sub saferun3 {
print STDERR "Executing: @_\n";
my($wtr, $rdr, $err);
my $wtr = gensym();
my $rdr = gensym();
my $err = gensym();
my $pid = open3($wtr, $rdr, $err, @_);
close($wtr);
waitpid($pid, 0);
my $gotout = "";
$gotout .= $_ while (<$rdr>);
close $rdr;
@ -205,6 +206,7 @@ sub saferun3 {
$goterr .= $_ while (<$err>);
close $err;
}
waitpid($pid, 0);
if ($? == -1) {
print STDERR "Failed to execute: @_\n $!\n";
exit(1);

49
scripts/generic/fsa2fsal.pl Executable file
View File

@ -0,0 +1,49 @@
#!/usr/bin/env perl
# A very simple script that converts fsa format (openfst lattices) to the same
# thing represented one sentence per line. It uses '|||' to delimit columns and
# ' ' to delimit nodes (i.e. original lines).
# Some rudimentary sanity checks are done on the fly.
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
use strict;
my $errs = 0;
sub err {
my $nr = shift;
my $msg = shift;
print STDERR "$nr:$msg\n";
$errs++;
}
my $onr = 0;
my @lines = ();
sub flush {
return if 0 == scalar @lines;
print join(" ", @lines);
print "\n";
$onr++;
@lines = ();
}
my $nr = 0;
my $numscores = undef;
while (<>) {
chomp;
if ($_ eq "") {
flush();
next;
}
my ($a, $b, $label, $scores, $rest) = split /\s+/, $_, 5;
err($nr, "The delimiter '|||' can't appear in the input!") if /\|\|\|/;
err($nr, "Node id not numeric: $a") if $a !~ /^\d+$/;
err($nr, "Node id not numeric: $b") if $b !~ /^\d+$/;
err($nr, "Unexpected tail: '$rest'") if defined $rest && $rest !~ /^\s*$/;
my $thisnumscores = ($scores =~ tr/,/,/);
$numscores = $thisnumscores if !defined $numscores;
err($nr, "Incompatible number of arc scores, previous lines had ".($numscores+1).", now ".($thisnumscores+1))
if $numscores != $thisnumscores;
push @lines, join("|||", ($a,$b,$label,$scores));
}
flush();
exit 1 if $errs;

15
scripts/generic/fsal2fsa.pl Executable file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env perl
# A very simple script that converts fsal back to fsa format (openfst lattices)
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
use strict;
while (<>) {
chomp;
tr/ /\n/;
s/\|\|\|/\t/g;
print;
print "\n";
print "\n";
}

View File

@ -2,6 +2,7 @@
use strict;
use Getopt::Long "GetOptions";
use FindBin qw($RealBin);
my ($DIR,$F,$E,$ALIGNMENT,$CORPUS,$SETTINGS);
die("ERROR: syntax is --alignment FILE --corpus FILESTEM --f EXT --e EXT --DIR OUTDIR --settings STRING")
@ -15,8 +16,8 @@ die("ERROR: syntax is --alignment FILE --corpus FILESTEM --f EXT --e EXT --DIR O
&& -e $ALIGNMENT && -e "$CORPUS.$F" && -e "$CORPUS.$E";
`mkdir $DIR`;
`/opt/moses/bin/mtt-build < $CORPUS.$F -i -o $DIR/$F`;
`/opt/moses/bin/mtt-build < $CORPUS.$E -i -o $DIR/$E`;
`/opt/moses/bin/symal2mam < $ALIGNMENT $DIR/$F-$E.mam`;
`/opt/moses/bin/mmlex-build $DIR/ $F $E -o $DIR/$F-$E.lex -c $DIR/$F-$E.cooc`;
`$RealBin/../../bin/mtt-build < $CORPUS.$F -i -o $DIR/$F`;
`$RealBin/../../bin/mtt-build < $CORPUS.$E -i -o $DIR/$E`;
`$RealBin/../../bin/symal2mam < $ALIGNMENT $DIR/$F-$E.mam`;
`$RealBin/../../bin/mmlex-build $DIR/ $F $E -o $DIR/$F-$E.lex -c $DIR/$F-$E.cooc`;

View File

@ -386,7 +386,14 @@ if ($__PROMIX_TRAINING) {
die "To use promix training, need to specify a filter and binarisation command" unless $filtercmd =~ /Binarizer/;
}
$mertargs = "" if !defined $mertargs;
if (!defined $mertargs) {
if (defined $batch_mira_args) {
$mertargs = $batch_mira_args;
}
else {
$mertargs = "";
}
}
my $scconfig = undef;
if ($mertargs =~ /\-\-scconfig\s+(.+?)(\s|$)/) {
@ -395,6 +402,13 @@ if ($mertargs =~ /\-\-scconfig\s+(.+?)(\s|$)/) {
$mertargs =~ s/\-\-scconfig\s+(.+?)(\s|$)//;
}
my $sctype = "--sctype BLEU";
if ($mertargs =~ /(\-\-sctype\s+.+?)(\s|$)/) {
$sctype = $1;
$mertargs =~ s/(\-\-sctype\s+.+?)(\s|$)//;
}
# handling reference lengh strategy
$scconfig .= &setup_reference_length_type();
@ -407,8 +421,7 @@ $scconfig =~ s/\s+/,/g;
$scconfig = "--scconfig $scconfig" if ($scconfig);
my $mert_extract_args = $mertargs;
$mert_extract_args .= " $scconfig";
my $mert_extract_args = "$sctype $scconfig";
$extractorargs = "" unless $extractorargs;
$mert_extract_args .= " $extractorargs";
@ -1113,7 +1126,7 @@ if($___RETURN_BEST_DEV) {
my $bestbleu=0;
my $evalout = "eval.out";
for (my $i = 1; $i < $run; $i++) {
my $cmd = "$mert_eval_cmd --reference " . join(",", @references) . " -s BLEU --candidate run$i.out";
my $cmd = "$mert_eval_cmd --reference " . join(",", @references) . " $mert_extract_args --nbest run$i.best$___N_BEST_LIST_SIZE.out.gz";
$cmd .= " -l $__REMOVE_SEGMENTATION" if defined( $__PROMIX_TRAINING);
safesystem("$cmd 2> /dev/null 1> $evalout");
open my $fh, '<', $evalout or die "Can't read $evalout : $!";

View File

@ -22,7 +22,7 @@ $SCRIPTS_ROOTDIR =~ s/\/training$//;
#$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_SORT_BUFFER_SIZE, $_SORT_BATCH_SIZE, $_SORT_COMPRESS, $_SORT_PARALLEL, $_CORPUS,
$_CORPUS_COMPRESSION, $_FIRST_STEP, $_LAST_STEP, $_F, $_E, $_MAX_PHRASE_LENGTH,
$_CORPUS_COMPRESSION, $_FIRST_STEP, $_LAST_STEP, $_F, $_E, $_MAX_PHRASE_LENGTH, $_DISTORTION_LIMIT,
$_LEXICAL_FILE, $_NO_LEXICAL_WEIGHTING, $_LEXICAL_COUNTS, $_VERBOSE, $_ALIGNMENT,
$_ALIGNMENT_FILE, $_ALIGNMENT_STEM, @_LM, $_EXTRACT_FILE, $_GIZA_OPTION, $_HELP, $_PARTS,
$_DIRECTION, $_ONLY_PRINT_GIZA, $_GIZA_EXTENSION, $_REORDERING,
@ -36,7 +36,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_
$_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2, $_UNKNOWN_WORD_SOFT_MATCHES_FILE,
$_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
$_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
$_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS,
$_CONTINUE,$_MAX_LEXICAL_REORDERING,$_LEXICAL_REORDERING_DEFAULT_SCORES,$_DO_STEPS,
@_ADDITIONAL_INI,$_ADDITIONAL_INI_FILE,$_MMSAPT,
@_BASELINE_ALIGNMENT_MODEL, $_BASELINE_EXTRACT, $_BASELINE_ALIGNMENT,
$_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $_INSTANCE_WEIGHTS_FILE, $_LMODEL_OOV_FEATURE, $_NUM_LATTICE_FEATURES, $IGNORE, $_FLEXIBILITY_SCORE, $_EXTRACT_COMMAND);
@ -54,6 +54,7 @@ $_HELP = 1
'giza-e2f=s' => \$_GIZA_E2F,
'giza-f2e=s' => \$_GIZA_F2E,
'max-phrase-length=s' => \$_MAX_PHRASE_LENGTH,
'distortion-limit=s' => \$_DISTORTION_LIMIT,
'lexical-file=s' => \$_LEXICAL_FILE,
'no-lexical-weighting' => \$_NO_LEXICAL_WEIGHTING,
'write-lexical-counts' => \$_LEXICAL_COUNTS,
@ -130,6 +131,7 @@ $_HELP = 1
'transliteration-phrase-table=s' => \$_TRANSLITERATION_PHRASE_TABLE,
'mmsapt=s' => \$_MMSAPT,
'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING,
'lexical-reordering-default-scores=s' => \$_LEXICAL_REORDERING_DEFAULT_SCORES,
'do-steps=s' => \$_DO_STEPS,
'memscore:s' => \$_MEMSCORE,
'force-factored-filenames' => \$_FORCE_FACTORED_FILENAMES,
@ -440,11 +442,14 @@ $___CONTINUE = $_CONTINUE if $_CONTINUE;
my $___MAX_PHRASE_LENGTH = "7";
$___MAX_PHRASE_LENGTH = "10" if $_HIERARCHICAL;
$___MAX_PHRASE_LENGTH = $_MAX_PHRASE_LENGTH if $_MAX_PHRASE_LENGTH;
my $___DISTORTION_LIMIT = 6;
$___DISTORTION_LIMIT = $_DISTORTION_LIMIT if $_DISTORTION_LIMIT;
my $___LEXICAL_WEIGHTING = 1;
my $___LEXICAL_COUNTS = 0;
my $___LEXICAL_FILE = $___MODEL_DIR."/lex";
$___MAX_PHRASE_LENGTH = $_MAX_PHRASE_LENGTH if $_MAX_PHRASE_LENGTH;
$___LEXICAL_WEIGHTING = 0 if $_NO_LEXICAL_WEIGHTING;
$___LEXICAL_COUNTS = 1 if $_LEXICAL_COUNTS;
$___LEXICAL_FILE = $_LEXICAL_FILE if $_LEXICAL_FILE;
@ -1972,7 +1977,7 @@ sub create_ini {
$phrase_table_impl_name = "PhraseDictionaryOnDisk" if $phrase_table_impl==2;
$phrase_table_impl_name = "PhraseDictionaryMemory" if $phrase_table_impl==6;
$phrase_table_impl_name = "PhraseDictionaryALSuffixArray" if $phrase_table_impl==10;
$phrase_table_impl_name = "Mmsapt" if $phrase_table_impl==11;
$phrase_table_impl_name = "PhraseDictionaryBitextSampling" if $phrase_table_impl==11;
$file .= "/" if $phrase_table_impl==11 && $file !~ /\/$/;
# table limit (maximum number of translation options per input phrase)
@ -1982,9 +1987,8 @@ sub create_ini {
}
# sum up...
$feature_spec .= "$phrase_table_impl_name name=TranslationModel$i num-features=$basic_weight_count ".($phrase_table_impl==11?"base":"path")."=$file input-factor=$input_factor output-factor=$output_factor";
$feature_spec .= "$phrase_table_impl_name name=TranslationModel$i num-features=$basic_weight_count path=$file input-factor=$input_factor output-factor=$output_factor";
$feature_spec .= " L1=$___F L2=$___E ".$_MMSAPT if defined($_MMSAPT); # extra settings for memory mapped suffix array phrase table
$feature_spec .= " table-limit=$table_limit" unless defined($_MMSAPT);
$feature_spec .= "\n";
$weight_spec .= "TranslationModel$i=";
for(my $j=0;$j<$basic_weight_count;$j++) { $weight_spec .= " 0.2"; }
@ -2047,7 +2051,7 @@ sub create_ini {
$table_file .= ".";
$table_file .= $model->{"filename"};
$table_file .= ".gz";
$feature_spec .= "LexicalReordering name=LexicalReordering$i num-features=".$model->{"numfeatures"}." type=".$model->{"config"}." input-factor=$input_factor output-factor=$output_factor path=$table_file\n";
$feature_spec .= "LexicalReordering name=LexicalReordering$i num-features=".$model->{"numfeatures"}." type=".$model->{"config"}." input-factor=$input_factor output-factor=$output_factor path=$table_file".(defined($_LEXICAL_REORDERING_DEFAULT_SCORES)?" default-scores=$_LEXICAL_REORDERING_DEFAULT_SCORES":"")."\n";
$weight_spec .= "LexicalReordering$i=";
for(my $j=0;$j<$model->{"numfeatures"};$j++) { $weight_spec .= " 0.3"; }
$weight_spec .= "\n";
@ -2138,7 +2142,7 @@ sub create_ini {
}
# phrase-based model settings
else {
print INI "[distortion-limit]\n6\n";
print INI "[distortion-limit]\n$___DISTORTION_LIMIT\n";
}
# only set the factor delimiter if it is non-standard

View File

@ -55,6 +55,8 @@ def escape_special_chars(line):
line = line.replace('\'','&apos;') # xml
line = line.replace('"','&quot;') # xml
line = line.replace('[','&#91;') # syntax non-terminal
line = line.replace(']','&#93;') # syntax non-terminal
return line
@ -91,11 +93,11 @@ def write(sentence, output_format='xml'):
out = create_brackets(0,sentence)
out = out.replace('|','&#124;') # factor separator
out = out.replace('[','&#91;') # syntax non-terminal
out = out.replace(']','&#93;') # syntax non-terminal
out = out.replace('&amp;apos;','&apos;') # lxml is buggy if input is escaped
out = out.replace('&amp;quot;','&quot;') # lxml is buggy if input is escaped
out = out.replace('&amp;#91;','&#91;') # lxml is buggy if input is escaped
out = out.replace('&amp;#93;','&#93;') # lxml is buggy if input is escaped
print(out)
@ -138,9 +140,9 @@ def create_subtree(position, sentence):
def create_brackets(position, sentence):
if position:
element = "( " + sentence[position].proj_func + ' '
element = "[ " + sentence[position].proj_func + ' '
else:
element = "( sent "
element = "[ sent "
for i in range(1,position):
if sentence[i].proj_head == position:
@ -148,26 +150,19 @@ def create_brackets(position, sentence):
if position:
word = sentence[position].word
if word == ')':
word = 'RBR'
elif word == '(':
word = 'LBR'
tag = sentence[position].tag
if tag == '$(':
tag = '$BR'
if preterminals:
element += '( ' + tag + ' ' + word + ' ) '
element += '[ ' + tag + ' ' + word + ' ] '
else:
element += word + ' ) '
element += word + ' ] '
for i in range(position, len(sentence)):
if i and sentence[i].proj_head == position:
element += create_brackets(i, sentence)
if preterminals or not position:
element += ') '
element += '] '
return element