This commit is contained in:
Ulrich Germann 2015-03-09 01:27:02 +00:00
commit ccf4cb838c
16 changed files with 335 additions and 143 deletions

View File

@ -14,7 +14,7 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.602770742" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.602770742" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.602770742." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1436139469" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.622899770" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
@ -50,6 +50,7 @@
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
@ -91,7 +92,7 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.168814843" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.168814843" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.168814843." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.844577457" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1635721038" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>

View File

@ -5,16 +5,16 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2091728208" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2091728208" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2091728208" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2091728208." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.69362991" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.641760346" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
@ -24,6 +24,7 @@
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1186248186" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1416850495" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.534201039" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1468157552" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
@ -46,6 +47,7 @@
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
@ -87,16 +89,16 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.185559773" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.185559773" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.185559773" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.185559773." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.33298530" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1524270442" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>

View File

@ -15,7 +15,7 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.461114338" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.461114338" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.461114338." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1896491482" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.target.gnu.platform.exe.debug.2144309834" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
@ -65,6 +65,7 @@
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
@ -103,7 +104,7 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.2121690436" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.2121690436" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.2121690436." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1577734572" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.target.gnu.platform.exe.release.1535487925" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
@ -155,10 +156,10 @@
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/moses-cmd"/>
</configuration>
<configuration configurationName="Debug">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/moses-cmd"/>
</configuration>
</storageModule>

View File

@ -14,7 +14,7 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.852684782" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.852684782" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.852684782." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.628760407" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.40031730" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
@ -56,6 +56,7 @@
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
@ -89,7 +90,7 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1878418244" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1878418244" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1878418244." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1661678477" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.848161857" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>

View File

@ -14,7 +14,7 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1015532240" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1015532240" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1015532240." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1201298107" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.2097807873" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
@ -73,6 +73,7 @@
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
@ -105,7 +106,7 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.179761083" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.179761083" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.179761083." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.2024222442" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1098252145" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>

View File

@ -480,7 +480,12 @@ FFState* BilingualLM::EvaluateWhenApplied(
}
size_t new_state = getStateChart(neuralLMids);
accumulator->PlusEquals(this, -accumulator->GetScoreForProducer(this));
// we're rescoring the full hypothesis, so we need to detract scores from previous hypos
for (std::vector<const ChartHypothesis*>::const_iterator iter = cur_hypo.GetPrevHypos().begin(); iter != cur_hypo.GetPrevHypos().end(); ++iter) {
const ChartHypothesis &prevHypo = **iter;
value -= (prevHypo.GetScoreBreakdown().GetScoreForProducer(this));
}
accumulator->PlusEquals(this, value);
return new BilingualLMState(new_state, alignments, neuralLMids);

View File

@ -27,9 +27,9 @@ if $(with-irstlm) {
local with-srilm = [ option.get "with-srilm" ] ;
local with-maxent-srilm = [ option.get "with-maxent-srilm" ] ;
if $(with-srilm) {
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
echo "!!! You are linking with the SRILM library; Do NOT use version >= 1.7.1 !!!" ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
#echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
#echo "!!! You are linking with the SRILM library; Do NOT use version >= 1.7.1 !!!" ;
#echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
if [ option.get "with-srilm-dynamic" : no : yes ] = yes {
lib srilm ;
@ -52,12 +52,12 @@ if $(with-srilm) {
if $(with-maxent-srilm) {
lib lbfgs : : $(sri-lib) ;
obj MaxEntSRI.o : MaxEntSRI.cpp ..//headers : <include>$(with-srilm)/include <include>$(with-srilm)/lm/src <include>$(with-srilm)/include/srilm <warnings>off ;
alias sri : SRI.o MaxEntSRI.o ParallelBackoff.o lbfgs sri-libs : : : <define>LM_SRI <define>LM_MAXENT_SRI ;
alias sri : SRI.o MaxEntSRI.o ParallelBackoff.o lbfgs sri-libs : : : <define>LM_SRI <define>LM_MAXENT_SRI <linkflags>-fopenmp ;
dependencies += sri ;
lmmacros += LM_SRI ;
lmmacros += LM_MAXENT_SRI ;
} else {
alias sri : SRI.o ParallelBackoff.o sri-libs : : : <define>LM_SRI ;
alias sri : SRI.o ParallelBackoff.o sri-libs : : : <define>LM_SRI <linkflags>-fopenmp ;
dependencies += sri ;
lmmacros += LM_SRI ;
}

View File

@ -378,9 +378,9 @@ CreateTranslationOptions()
}
}
}
VERBOSE(3,"Translation Option Collection\n " << *this << endl);
ProcessUnknownWord();
EvaluateWithSourceContext();
VERBOSE(3,"Translation Option Collection\n " << *this << endl);
Prune();
Sort();
CalcFutureScore(); // future score matrix

View File

@ -83,13 +83,12 @@ void PropertiesConsolidator::ActivatePartsOfSpeechProcessing(const std::string &
}
std::string PropertiesConsolidator::ProcessPropertiesString(const std::string &propertiesString) const
void PropertiesConsolidator::ProcessPropertiesString(const std::string &propertiesString, Moses::OutputFileStream& out) const
{
if ( propertiesString.empty() ) {
return propertiesString;
return;
}
std::ostringstream out;
std::vector<std::string> toks;
Moses::TokenizeMultiCharSeparator(toks, propertiesString, "{{");
for (size_t i = 1; i < toks.size(); ++i) {
@ -102,105 +101,150 @@ std::string PropertiesConsolidator::ProcessPropertiesString(const std::string &p
std::vector<std::string> keyValue = Moses::TokenizeFirstOnly(tok, " ");
assert(keyValue.size() == 2);
// TODO: individual methods for different properties
if ( !keyValue[0].compare("SourceLabels") ) {
if ( m_sourceLabelsFlag ) {
// SourceLabels property: replace strings with vocabulary indices
out << " {{" << keyValue[0];
std::istringstream tokenizer(keyValue[1]);
size_t nNTs;
double totalCount;
if (! (tokenizer >> nNTs)) { // first token: number of non-terminals (incl. left-hand side)
UTIL_THROW2("Not able to read number of non-terminals from SourceLabels property. "
<< "Flawed SourceLabels property?");
}
assert( nNTs > 0 );
out << " " << nNTs;
if (! (tokenizer >> totalCount)) { // second token: overall rule count
UTIL_THROW2("Not able to read overall rule count from SourceLabels property. "
<< "Flawed SourceLabels property?");
}
assert( totalCount > 0.0 );
out << " " << totalCount;
while (tokenizer.peek() != EOF) {
try {
size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
std::string token;
if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
for (size_t i=0; i<nNTs-1; ++i) { // RHS source non-terminal labels
tokenizer >> token; // RHS source non-terminal label
std::map<std::string,size_t>::const_iterator found = m_sourceLabels.find(token);
UTIL_THROW_IF2(found == m_sourceLabels.end(), "Label \"" << token << "\" from the phrase table not found in given label set.");
out << " " << found->second;
}
tokenizer >> token; // sourceLabelsRHSCount
out << " " << token;
tokenizer >> numberOfLHSsGivenRHS;
out << " " << numberOfLHSsGivenRHS;
}
for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS source non-terminal labels seen with this RHS
tokenizer >> token; // LHS source non-terminal label
std::map<std::string,size_t>::const_iterator found = m_sourceLabels.find(token);
UTIL_THROW_IF2(found == m_sourceLabels.end() ,"Label \"" << token << "\" from the phrase table not found in given label set.");
out << " " << found->second;
tokenizer >> token; // ruleSourceLabelledCount
out << " " << token;
}
} catch (const std::exception &e) {
UTIL_THROW2("Flawed item in SourceLabels property?");
}
}
ProcessSourceLabelsPropertyValue(keyValue[1], out);
out << "}}";
} else { // don't process source labels property
} else { // don't process SourceLabels property
out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
}
} else if ( !keyValue[0].compare("POS") ) {
/* DO NOTHING (property is not registered in the decoder at the moment)
if ( m_partsOfSpeechFlag ) {
// POS property: replace strings with vocabulary indices
out << " {{" << keyValue[0];
std::istringstream tokenizer(keyValue[1]);
while (tokenizer.peek() != EOF) {
std::string token;
tokenizer >> token;
std::map<std::string,size_t>::const_iterator found = m_partsOfSpeechVocabulary.find(token);
UTIL_THROW_IF2(found == m_partsOfSpeechVocabulary.end() ,"Part-of-speech \"" << token << "\" from the phrase table not found in given part-of-speech vocabulary.");
out << " " << found->second;
}
ProcessPOSPropertyValue(keyValue[1], out);
out << "}}";
} else { // don't process parts-of-speech property
} else { // don't process POS property
out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
}
*/
} else {
// output other propertyi
// output other property
out << " {{" << keyValue[0] << " " << keyValue[1] << "}}";
}
}
return out.str();
}
void PropertiesConsolidator::ProcessSourceLabelsPropertyValue(const std::string &value, Moses::OutputFileStream& out) const
{
// SourceLabels property: replace strings with vocabulary indices
std::istringstream tokenizer(value);
size_t nNTs;
double totalCount;
if (! (tokenizer >> nNTs)) { // first token: number of non-terminals (incl. left-hand side)
UTIL_THROW2("Not able to read number of non-terminals from SourceLabels property. "
<< "Flawed SourceLabels property?");
}
assert( nNTs > 0 );
out << " " << nNTs;
if (! (tokenizer >> totalCount)) { // second token: overall rule count
UTIL_THROW2("Not able to read overall rule count from SourceLabels property. "
<< "Flawed SourceLabels property?");
}
assert( totalCount > 0.0 );
out << " " << totalCount;
while (tokenizer.peek() != EOF) {
try {
size_t numberOfLHSsGivenRHS = std::numeric_limits<std::size_t>::max();
std::string token;
if (nNTs > 1) { // rule has right-hand side non-terminals, i.e. it's a hierarchical rule
for (size_t i=0; i<nNTs-1; ++i) { // RHS source non-terminal labels
tokenizer >> token; // RHS source non-terminal label
std::map<std::string,size_t>::const_iterator found = m_sourceLabels.find(token);
UTIL_THROW_IF2(found == m_sourceLabels.end(), "Label \"" << token << "\" from the phrase table not found in given label set.");
out << " " << found->second;
}
tokenizer >> token; // sourceLabelsRHSCount
out << " " << token;
tokenizer >> numberOfLHSsGivenRHS;
out << " " << numberOfLHSsGivenRHS;
}
for (size_t i=0; i<numberOfLHSsGivenRHS && tokenizer.peek()!=EOF; ++i) { // LHS source non-terminal labels seen with this RHS
tokenizer >> token; // LHS source non-terminal label
std::map<std::string,size_t>::const_iterator found = m_sourceLabels.find(token);
UTIL_THROW_IF2(found == m_sourceLabels.end() ,"Label \"" << token << "\" from the phrase table not found in given label set.");
out << " " << found->second;
tokenizer >> token; // ruleSourceLabelledCount
out << " " << token;
}
} catch (const std::exception &e) {
UTIL_THROW2("Flawed item in SourceLabels property?");
}
}
}
void PropertiesConsolidator::ProcessPOSPropertyValue(const std::string &value, Moses::OutputFileStream& out) const
{
std::istringstream tokenizer(value);
while (tokenizer.peek() != EOF) {
std::string token;
tokenizer >> token;
std::map<std::string,size_t>::const_iterator found = m_partsOfSpeechVocabulary.find(token);
UTIL_THROW_IF2(found == m_partsOfSpeechVocabulary.end() ,"Part-of-speech \"" << token << "\" from the phrase table not found in given part-of-speech vocabulary.");
out << " " << found->second;
}
}
bool PropertiesConsolidator::GetPOSPropertyValueFromPropertiesString(const std::string &propertiesString, std::vector<std::string>& out) const
{
out.clear();
if ( propertiesString.empty() ) {
return false;
}
std::vector<std::string> toks;
Moses::TokenizeMultiCharSeparator(toks, propertiesString, "{{");
for (size_t i = 1; i < toks.size(); ++i) {
std::string &tok = toks[i];
if (tok.empty()) {
continue;
}
size_t endPos = tok.rfind("}");
tok = tok.substr(0, endPos - 1);
std::vector<std::string> keyValue = Moses::TokenizeFirstOnly(tok, " ");
assert(keyValue.size() == 2);
if ( !keyValue[0].compare("POS") ) {
std::istringstream tokenizer(keyValue[1]);
while (tokenizer.peek() != EOF) {
std::string token;
tokenizer >> token;
out.push_back(token);
}
return true;
}
}
return false;
}
} // namespace MosesTraining

View File

@ -22,6 +22,9 @@
#include <string>
#include <map>
#include <vector>
#include "OutputFileStream.h"
namespace MosesTraining
@ -36,9 +39,14 @@ public:
void ActivateSourceLabelsProcessing(const std::string &sourceLabelSetFile);
void ActivatePartsOfSpeechProcessing(const std::string &partsOfSpeechFile);
std::string ProcessPropertiesString(const std::string &propertiesString) const;
bool GetPOSPropertyValueFromPropertiesString(const std::string &propertiesString, std::vector<std::string>& out) const;
private:
void ProcessPropertiesString(const std::string &propertiesString, Moses::OutputFileStream& out) const;
protected:
void ProcessSourceLabelsPropertyValue(const std::string &value, Moses::OutputFileStream& out) const;
void ProcessPOSPropertyValue(const std::string &value, Moses::OutputFileStream& out) const;
bool m_sourceLabelsFlag;
std::map<std::string,size_t> m_sourceLabels;

View File

@ -25,7 +25,7 @@
#include <cstdlib>
#include <cstring>
#include "tables-core.h"
#include "moses/Util.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
#include "PropertiesConsolidator.h"
@ -41,12 +41,18 @@ bool kneserNeyFlag = false;
bool sourceLabelsFlag = false;
bool partsOfSpeechFlag = false;
bool logProbFlag = false;
bool countsProperty = false;
float minScore0 = 0;
float minScore2 = 0;
inline float maybeLogProb( float a )
{
return logProbFlag ? log(a) : a;
return logProbFlag ? std::log(a) : a;
}
inline bool isNonTerminal( const std::string &word )
{
return (word.length()>=3 && word[0] == '[' && word[word.length()-1] == ']');
}
void processFiles( char*, char*, char*, char*, char*, char* );
@ -122,6 +128,9 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--LogProb") == 0) {
logProbFlag = true;
cerr << "using log-probabilities\n";
} else if (strcmp(argv[i],"--Counts") == 0) {
countsProperty = true;
cerr << "output counts as a property\n";
} else if (strcmp(argv[i],"--SourceLabels") == 0) {
sourceLabelsFlag = true;
if (i+1==argc) {
@ -294,8 +303,8 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
breakdownCoreAndSparse( itemDirect[3], directScores, directSparseScores );
breakdownCoreAndSparse( itemIndirect[3], indirectScores, indirectSparseScores );
vector<string> directCounts = tokenize(itemDirect[4].c_str());
vector<string> indirectCounts = tokenize(itemIndirect[4].c_str());
vector<string> directCounts = Moses::Tokenize(itemDirect[4]);
vector<string> indirectCounts = Moses::Tokenize(itemIndirect[4]);
float countF = atof(directCounts[0].c_str());
float countE = atof(indirectCounts[0].c_str());
float countEF = atof(indirectCounts[1].c_str());
@ -334,8 +343,32 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
continue;
}
// output hierarchical phrase pair (with separated labels)
fileConsolidated << itemDirect[0] << " ||| " << itemDirect[1] << " |||";
// output phrase pair
fileConsolidated << itemDirect[0] << " ||| ";
if (partsOfSpeechFlag) {
// write POS factor from property
std::vector<std::string> targetTokens = Moses::Tokenize(itemDirect[1]);
std::vector<std::string> propertyValuePOS;
propertiesConsolidator.GetPOSPropertyValueFromPropertiesString(itemDirect[5], propertyValuePOS);
size_t targetTerminalIndex = 0;
for (std::vector<std::string>::const_iterator targetTokensIt=targetTokens.begin();
targetTokensIt!=targetTokens.end(); ++targetTokensIt) {
fileConsolidated << *targetTokensIt;
if (!isNonTerminal(*targetTokensIt)) {
assert(propertyValuePOS.size() > targetTerminalIndex);
fileConsolidated << "|" << propertyValuePOS[targetTerminalIndex];
++targetTerminalIndex;
}
fileConsolidated << " ";
}
fileConsolidated << "|||";
} else {
fileConsolidated << itemDirect[1] << " |||";
}
// prob indirect
if (!onlyDirectFlag) {
@ -354,7 +387,7 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
// low count feature
if (lowCountFlag) {
fileConsolidated << " " << maybeLogProb(exp(-1.0/countEF));
fileConsolidated << " " << maybeLogProb(std::exp(-1.0/countEF));
}
// count bin feature (as a core feature)
@ -405,8 +438,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
// arbitrary key-value pairs
fileConsolidated << " |||";
if (itemDirect.size() >= 6) {
propertiesConsolidator.ProcessPropertiesString(itemDirect[5], fileConsolidated);
}
if (countsProperty) {
fileConsolidated << " {{Counts " << countE << " " << countF << " " << countEF << "}}";
//if (sourceLabelsFlag) {
fileConsolidated << propertiesConsolidator.ProcessPropertiesString(itemDirect[5]);
propertiesConsolidator.ProcessPropertiesString(itemDirect[5], fileConsolidated);
//} else {
// fileConsolidated << itemDirect[5];
//}
@ -423,7 +461,7 @@ void breakdownCoreAndSparse( string combined, string &core, string &sparse )
{
core = "";
sparse = "";
vector<string> score = tokenize( combined.c_str() );
vector<string> score = Moses::Tokenize( combined );
for(size_t i=0; i<score.size(); i++) {
if ((score[i][0] >= '0' && score[i][0] <= '9') || i+1 == score.size())
core += " " + score[i];

View File

@ -674,15 +674,23 @@ void ExtractGHKM::WriteGlueGrammar(
const size_t sourceLabelSentenceEnd = 3;
const size_t partOfSpeechSentenceStart = 0;
const size_t partOfSpeechSentenceEnd = 1;
std::string sentenceStartSource = "<s>";
std::string sentenceEndSource = "</s>";
std::string sentenceStartTarget = "<s>";
std::string sentenceEndTarget = "</s>";
if (options.partsOfSpeech) {
sentenceStartTarget = sentenceStartTarget + "|" + sentenceStartTarget;
sentenceEndTarget = sentenceEndTarget + "|" + sentenceEndTarget;
}
// basic rules
out << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| 0-0 ||| ||| |||";
out << sentenceStartSource << " [X] ||| " << sentenceStartTarget << " [" << topLabel << "] ||| 1 ||| 0-0 ||| ||| |||";
if (options.treeFragments) {
out << " {{Tree [" << topLabel << " [SSTART <s>]]}}";
}
if (options.partsOfSpeech) {
out << " {{POS " << partOfSpeechSentenceStart << "}}";
}
// if (options.partsOfSpeech) {
// out << " {{POS " << partOfSpeechSentenceStart << "}}";
// }
if (options.sourceLabels) {
out << " {{SourceLabels 2 1 " << sourceLabelSentenceStart << " 1 1 " << sourceLabelGlueTop << " 1}}";
}
@ -691,13 +699,13 @@ void ExtractGHKM::WriteGlueGrammar(
}
out << std::endl;
out << "[X][" << topLabel << "] </s> [X] ||| [X][" << topLabel << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 1-1 ||| ||| |||";
out << "[X][" << topLabel << "] " << sentenceEndSource << " [X] ||| [X][" << topLabel << "] " << sentenceEndTarget << " [" << topLabel << "] ||| 1 ||| 0-0 1-1 ||| ||| |||";
if (options.treeFragments) {
out << " {{Tree [" << topLabel << " [" << topLabel << "] [SEND </s>]]}}";
}
if (options.partsOfSpeech) {
out << " {{POS " << partOfSpeechSentenceEnd << "}}";
}
// if (options.partsOfSpeech) {
// out << " {{POS " << partOfSpeechSentenceEnd << "}}";
// }
if (options.sourceLabels) {
out << " {{SourceLabels 4 1 " << sourceLabelSentenceStart << " " << sourceLabelGlueTop << " " << sourceLabelSentenceEnd << " 1 1 " << sourceLabelGlueTop << " 1}}";
}
@ -709,13 +717,13 @@ void ExtractGHKM::WriteGlueGrammar(
// top rules
for (std::map<std::string, int>::const_iterator i = topLabelSet.begin();
i != topLabelSet.end(); ++i) {
out << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 1-1 2-2 ||| ||| |||";
out << sentenceStartSource << " [X][" << i->first << "] " << sentenceEndSource << " [X] ||| " << sentenceStartTarget << " [X][" << i->first << "] " << sentenceEndTarget << " [" << topLabel << "] ||| 1 ||| 0-0 1-1 2-2 ||| ||| |||";
if (options.treeFragments) {
out << " {{Tree [" << topLabel << " [SSTART <s>] [" << i->first << "] [SEND </s>]]}}";
}
if (options.partsOfSpeech) {
out << " {{POS " << partOfSpeechSentenceStart << " " << partOfSpeechSentenceEnd << "}}";
}
// if (options.partsOfSpeech) {
// out << " {{POS " << partOfSpeechSentenceStart << " " << partOfSpeechSentenceEnd << "}}";
// }
if (options.sourceLabels) {
out << " {{SourceLabels 4 1 " << sourceLabelSentenceStart << " " << sourceLabelGlueX << " " << sourceLabelSentenceEnd << " 1 1 " << sourceLabelGlueTop << " 1}}";
}

View File

@ -848,7 +848,6 @@ void outputPhrasePair(const ExtractionPhrasePair &phrasePair,
vcbT);
if ( !sourceLabelCounts.empty() ) {
phraseTableFile << " {{SourceLabels "
// << nNTs // for convenience: number of non-terminal symbols in this rule (incl. left hand side NT)
<< phraseSource->size() // for convenience: number of symbols in this rule (incl. left hand side NT)
<< " "
<< count // rule count

View File

@ -18,7 +18,7 @@
***********************************************************************/
#include <string>
#include <vector>
#include <map>
namespace MosesTraining
{

View File

@ -20,7 +20,7 @@ clean
out: clean-stem
default-name: corpus/clean
ignore-if: cleaner
rerun-on-change: max-sentence-length $moses-script-dir/training/clean-corpus-n.perl clean-options
rerun-on-change: max-sentence-length $moses-script-dir/training/clean-corpus-n.perl
template: $moses-script-dir/training/clean-corpus-n.perl IN $input-extension $output-extension OUT 1 $max-sentence-length OUT.lines-retained
error: there is a blank factor
error: is too long! at
@ -28,7 +28,7 @@ custom-clean
in: tokenized-stem
out: clean-stem
default-name: corpus/clean
pass-unless: cleaner
ignore-unless: cleaner
rerun-on-change: max-sentence-length cleaner
template: $cleaner IN $input-extension $output-extension OUT 1 $max-sentence-length OUT.lines-retained
error: there is a blank factor

View File

@ -21,27 +21,111 @@ if ($SCRIPTS_ROOTDIR eq '') {
$SCRIPTS_ROOTDIR =~ s/\/training$//;
#$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_SORT_BUFFER_SIZE, $_SORT_BATCH_SIZE, $_SORT_COMPRESS, $_SORT_PARALLEL, $_CORPUS,
$_CORPUS_COMPRESSION, $_FIRST_STEP, $_LAST_STEP, $_F, $_E, $_MAX_PHRASE_LENGTH, $_DISTORTION_LIMIT,
$_LEXICAL_FILE, $_NO_LEXICAL_WEIGHTING, $_LEXICAL_COUNTS, $_VERBOSE, $_ALIGNMENT,
$_ALIGNMENT_FILE, $_ALIGNMENT_STEM, @_LM, $_EXTRACT_FILE, $_GIZA_OPTION, $_HELP, $_PARTS,
$_DIRECTION, $_ONLY_PRINT_GIZA, $_GIZA_EXTENSION, $_REORDERING,
$_REORDERING_SMOOTH, $_INPUT_FACTOR_MAX, $_ALIGNMENT_FACTORS,
$_TRANSLATION_FACTORS, $_REORDERING_FACTORS, $_GENERATION_FACTORS,
$_DECODING_GRAPH_BACKOFF,
$_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
@_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
$_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_OSM_FACTORS, $_POST_DECODING_TRANSLIT, $_TRANSLITERATION_PHRASE_TABLE,
$_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,
$_GHKM_TREE_FRAGMENTS,$_GHKM_PHRASE_ORIENTATION,$_PHRASE_ORIENTATION_PRIORS_FILE,$_GHKM_SOURCE_LABELS,$_GHKM_SOURCE_LABELS_FILE,$_GHKM_PARTS_OF_SPEECH,$_GHKM_PARTS_OF_SPEECH_FILE,
$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,$_S2T,
$_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2, $_UNKNOWN_WORD_SOFT_MATCHES_FILE,
$_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
$_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
$_CONTINUE,$_MAX_LEXICAL_REORDERING,$_LEXICAL_REORDERING_DEFAULT_SCORES,$_DO_STEPS,
@_ADDITIONAL_INI,$_ADDITIONAL_INI_FILE,$_MMSAPT,
@_BASELINE_ALIGNMENT_MODEL, $_BASELINE_EXTRACT, $_BASELINE_ALIGNMENT,
$_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $_INSTANCE_WEIGHTS_FILE, $_LMODEL_OOV_FEATURE, $_NUM_LATTICE_FEATURES, $IGNORE, $_FLEXIBILITY_SCORE, $_EXTRACT_COMMAND);
my($_EXTERNAL_BINDIR,
$_ROOT_DIR,
$_CORPUS_DIR,
$_GIZA_E2F,
$_GIZA_F2E,
$_MODEL_DIR,
$_TEMP_DIR,
$_SORT_BUFFER_SIZE,
$_SORT_BATCH_SIZE,
$_SORT_COMPRESS,
$_SORT_PARALLEL,
$_CORPUS,
$_CORPUS_COMPRESSION,
$_FIRST_STEP,
$_LAST_STEP,
$_F,
$_E,
$_MAX_PHRASE_LENGTH,
$_DISTORTION_LIMIT,
$_LEXICAL_FILE,
$_NO_LEXICAL_WEIGHTING,
$_LEXICAL_COUNTS,
$_VERBOSE,
$_ALIGNMENT,
$_ALIGNMENT_FILE,
$_ALIGNMENT_STEM,
@_LM,
$_EXTRACT_FILE,
$_GIZA_OPTION,
$_HELP,
$_PARTS,
$_DIRECTION,
$_ONLY_PRINT_GIZA,
$_GIZA_EXTENSION,
$_REORDERING,
$_REORDERING_SMOOTH,
$_INPUT_FACTOR_MAX,
$_ALIGNMENT_FACTORS,
$_TRANSLATION_FACTORS,
$_REORDERING_FACTORS,
$_GENERATION_FACTORS,
$_DECODING_GRAPH_BACKOFF,
$_DECODING_STEPS,
$_PARALLEL,
$_FACTOR_DELIMITER,
@_PHRASE_TABLE,
@_REORDERING_TABLE,
@_GENERATION_TABLE,
@_GENERATION_TYPE,
$_GENERATION_CORPUS,
$_DONT_ZIP,
$_MGIZA,
$_MGIZA_CPUS,
$_SNT2COOC,
$_HMM_ALIGN,
$_CONFIG,
$_OSM,
$_OSM_FACTORS,
$_POST_DECODING_TRANSLIT,
$_TRANSLITERATION_PHRASE_TABLE,
$_HIERARCHICAL,
$_XML,
$_SOURCE_SYNTAX,
$_TARGET_SYNTAX,
$_GLUE_GRAMMAR,
$_GLUE_GRAMMAR_FILE,
$_UNKNOWN_WORD_LABEL_FILE,
$_GHKM,
$_GHKM_TREE_FRAGMENTS,
$_GHKM_PHRASE_ORIENTATION,
$_PHRASE_ORIENTATION_PRIORS_FILE,
$_GHKM_SOURCE_LABELS,
$_GHKM_SOURCE_LABELS_FILE,
$_GHKM_PARTS_OF_SPEECH,
$_GHKM_PARTS_OF_SPEECH_FILE,
$_PCFG,
@_EXTRACT_OPTIONS,
@_SCORE_OPTIONS,
$_S2T,
$_ALT_DIRECT_RULE_SCORE_1,
$_ALT_DIRECT_RULE_SCORE_2,
$_UNKNOWN_WORD_SOFT_MATCHES_FILE,
$_OMIT_WORD_ALIGNMENT,
$_FORCE_FACTORED_FILENAMES,
$_MEMSCORE,
$_FINAL_ALIGNMENT_MODEL,
$_CONTINUE,
$_MAX_LEXICAL_REORDERING,
$_LEXICAL_REORDERING_DEFAULT_SCORES,
$_DO_STEPS,
@_ADDITIONAL_INI,
$_ADDITIONAL_INI_FILE,
$_MMSAPT,
@_BASELINE_ALIGNMENT_MODEL,
$_BASELINE_EXTRACT,
$_BASELINE_ALIGNMENT,
$_DICTIONARY,
$_SPARSE_PHRASE_FEATURES,
$_EPPEX,
$_INSTANCE_WEIGHTS_FILE,
$_LMODEL_OOV_FEATURE,
$_NUM_LATTICE_FEATURES,
$IGNORE,
$_FLEXIBILITY_SCORE,
$_EXTRACT_COMMAND);
my $_BASELINE_CORPUS = "";
my $_CORES = 1;
my $debug = 0; # debug this script, do not delete any files in debug mode