This commit is contained in:
Hieu Hoang 2014-09-02 22:42:07 +01:00
commit b6428f412b
26 changed files with 451 additions and 409 deletions

View File

@ -1,16 +1,18 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.602770742">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.602770742" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -43,14 +45,7 @@
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="dstruct"/>
<listOptionValue builtIn="false" value="flm"/>
<listOptionValue builtIn="false" value="oolm"/>
<listOptionValue builtIn="false" value="lattice"/>
<listOptionValue builtIn="false" value="misc"/>
<listOptionValue builtIn="false" value="dalm"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="RandLM"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
@ -95,12 +90,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.168814843" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">

View File

@ -30,6 +30,7 @@
<option id="gnu.cpp.compiler.option.include.paths.821075319" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../phrase-extract&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1392992841" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>

View File

@ -108,12 +108,12 @@
<link>
<name>OutputFileStream.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/OutputFileStream.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp</locationURI>
</link>
<link>
<name>OutputFileStream.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-mixed-syntax/OutputFileStream.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
</link>
<link>
<name>Parameter.cpp</name>

View File

@ -1,17 +1,19 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.162355801">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.162355801" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -69,14 +71,7 @@
<option id="gnu.cpp.link.option.libs.1177721357" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="dstruct"/>
<listOptionValue builtIn="false" value="dalm"/>
<listOptionValue builtIn="false" value="flm"/>
<listOptionValue builtIn="false" value="oolm"/>
<listOptionValue builtIn="false" value="lattice"/>
<listOptionValue builtIn="false" value="misc"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="RandLM"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
@ -89,7 +84,6 @@
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.128214028" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
@ -109,13 +103,13 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.516628324" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">

View File

@ -1,17 +1,19 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.461114338">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.461114338" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -66,14 +68,7 @@
<option id="gnu.cpp.link.option.libs.998577284" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="dstruct"/>
<listOptionValue builtIn="false" value="flm"/>
<listOptionValue builtIn="false" value="oolm"/>
<listOptionValue builtIn="false" value="lattice"/>
<listOptionValue builtIn="false" value="misc"/>
<listOptionValue builtIn="false" value="dalm"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="RandLM"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
@ -86,7 +81,6 @@
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
<option id="gnu.cpp.link.option.userobjs.1542590830" name="Other objects" superClass="gnu.cpp.link.option.userobjs"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.983725033" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
@ -110,13 +104,13 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.2121690436" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">

View File

@ -44,7 +44,6 @@
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.752586397" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="IS_ECLIPSE"/>
<listOptionValue builtIn="false" value="PT_UG"/>
<listOptionValue builtIn="false" value="HAVE_PROBINGPT"/>
<listOptionValue builtIn="false" value="HAVE_BOOST"/>
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
@ -52,12 +51,10 @@
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
<listOptionValue builtIn="false" value="LM_IRST"/>
<listOptionValue builtIn="false" value="LM_DALM"/>
<listOptionValue builtIn="false" value="LM_NPLM"/>
<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
<listOptionValue builtIn="false" value="_LARGE_FILES"/>
</option>
<option id="gnu.cpp.compiler.option.dialect.std.1734198568" name="Language standard" superClass="gnu.cpp.compiler.option.dialect.std" value="gnu.cpp.compiler.dialect.c++11" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.dialect.std.1734198568" name="Language standard" superClass="gnu.cpp.compiler.option.dialect.std" value="gnu.cpp.compiler.dialect.c++98" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1905116220" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2126314903" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
@ -78,6 +75,7 @@
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
<<<<<<< HEAD
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1176518033" name="/" resourcePath="LM/bilingual-lm">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.2110557759" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug" unusedChildren="">
<tool id="cdt.managedbuild.tool.gnu.archiver.base.2066996463" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base.1976472988"/>
@ -92,6 +90,76 @@
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
<sourceEntries>
<entry excluding="LM/bilingual-lm|TranslationModel/UG/mm/test-dynamic-im-tsa.cc|TranslationModel/UG/mm/symal2mam.cc|TranslationModel/UG/mm/mtt-dump.cc|TranslationModel/UG/mm/mtt-count-words.cc|TranslationModel/UG/mm/mtt-build.cc|TranslationModel/UG/mm/mmlex-lookup.cc|TranslationModel/UG/mm/mmlex-build.cc|TranslationModel/UG/mm/mam_verify.cc|TranslationModel/UG/mm/mam2symal.cc|TranslationModel/UG/mm/custom-pt.cc|TranslationModel/UG/mm/calc-coverage.cc|TranslationModel/UG/mm/mtt.count.cc|TranslationModel/UG/util|LM/oxlm|LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
=======
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1845526535" name="SRI.h" rcbsApplicability="disable" resourcePath="LM/SRI.h" toolsToInvoke=""/>
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1917714409" name="/" resourcePath="TranslationModel">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1874031326" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug" unusedChildren="">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1671760867" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base.1976472988"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2072639167" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327">
<option id="gnu.cpp.compiler.option.preprocessor.def.1387618215" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="IS_ECLIPSE"/>
<listOptionValue builtIn="false" value="HAVE_PROBINGPT"/>
<listOptionValue builtIn="false" value="HAVE_BOOST"/>
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
<listOptionValue builtIn="false" value="WITH_THREADS"/>
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
<listOptionValue builtIn="false" value="LM_IRST"/>
<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
<listOptionValue builtIn="false" value="_LARGE_FILES"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1138059468" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.241920461" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2126314903">
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1408639346" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.505647623" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1168585173"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1809234420" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.2074660557"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.2136353299" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug.933467113">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.190676079" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1401518461" name="/" resourcePath="TranslationModel/fuzzy-match">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.472269246" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug" unusedChildren="">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.84234118" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base.1671760867"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.299872725" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2072639167">
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.664273995" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2044654215" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.241920461">
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1537423216" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1174866714" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug.505647623"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.239716723" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1809234420"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.2078651360" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug.2136353299">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.214869589" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.103170143" name="/" resourcePath="TranslationModel/ProbingPT">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.2026082807" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug" unusedChildren="">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1540835364" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base.1671760867"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1897459756" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2072639167">
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1615949072" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1178947383" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.241920461">
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2013283881" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1997457966" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug.505647623"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.886709003" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1809234420"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.325064995" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug.2136353299">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1281335737" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
<sourceEntries>
<entry excluding="LM/SRI.h|LM/SRI.cpp|TranslationModel/UG|LM/DALMWrapper.h|LM/DALMWrapper.cpp|TranslationModel/UG/mm/test-dynamic-im-tsa.cc|TranslationModel/UG/mm/symal2mam.cc|TranslationModel/UG/mm/mtt-dump.cc|TranslationModel/UG/mm/mtt-count-words.cc|TranslationModel/UG/mm/mtt-build.cc|TranslationModel/UG/mm/mmlex-lookup.cc|TranslationModel/UG/mm/mmlex-build.cc|TranslationModel/UG/mm/mam_verify.cc|TranslationModel/UG/mm/mam2symal.cc|TranslationModel/UG/mm/custom-pt.cc|TranslationModel/UG/mm/calc-coverage.cc|TranslationModel/UG/mm/mtt.count.cc|TranslationModel/UG/util|LM/oxlm|LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
>>>>>>> master
</sourceEntries>
</configuration>
</storageModule>
@ -223,8 +291,8 @@
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380.LM/Rand.h" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380.LM/DALMWrapper.h" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380.LM/IRST.h" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.916939380.LM/DALMWrapper.h" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
<sourceEntries>
<entry excluding="LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>

View File

@ -1161,16 +1161,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InputFeature.h</locationURI>
</link>
<link>
<name>FF/InternalStructStatelessFF.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InternalStructStatelessFF.cpp</locationURI>
</link>
<link>
<name>FF/InternalStructStatelessFF.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/InternalStructStatelessFF.h</locationURI>
</link>
<link>
<name>FF/LexicalReordering</name>
<type>2</type>

View File

@ -1,16 +1,18 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.852684782">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.852684782" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -57,14 +59,7 @@
<option id="gnu.cpp.link.option.libs.615408765" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="dstruct"/>
<listOptionValue builtIn="false" value="flm"/>
<listOptionValue builtIn="false" value="oolm"/>
<listOptionValue builtIn="false" value="lattice"/>
<listOptionValue builtIn="false" value="misc"/>
<listOptionValue builtIn="false" value="dalm"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="RandLM"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
@ -77,7 +72,6 @@
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.202044854" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
@ -97,12 +91,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1878418244" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">

0
contrib/rt/Empty.c Normal file
View File

9
contrib/rt/README Normal file
View File

@ -0,0 +1,9 @@
FOR OSX ONLY
------------
This creates an empty library file
librt.a
It should be used when you are compile with Eclipse on OSX.
The Eclipse projects are set up to link to librt but OSX doesn't have it so this just creates a dummy library.

2
contrib/rt/compile.sh Executable file
View File

@ -0,0 +1,2 @@
gcc -c Empty.c -o Empty.o
ar rcs librt.a Empty.o

View File

@ -124,6 +124,15 @@ public:
}
}
const ChartCellLabel *Find(size_t idx) const {
try {
return m_map.at(idx);
}
catch (const std::out_of_range& oor) {
return NULL;
}
}
ChartCellLabel::Stack &FindOrInsert(const Word &w) {
size_t idx = w[0]->GetId();
if (! ChartCellExists(idx)) {

View File

@ -1,43 +0,0 @@
#include "InternalStructStatelessFF.h"
#include "moses/InputPath.h"
#include "moses/ScoreComponentCollection.h"
using namespace std;
namespace Moses
{
void InternalStructStatelessFF::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
// cerr << "MARIA!!!" << endl;
scoreBreakdown.PlusEquals(this, 0);
}
void InternalStructStatelessFF::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const
{
cerr << "HHHHH" << scoreBreakdown << endl;
scoreBreakdown.PlusEquals(this, 66);
/* FactorList f_mask;
f_mask.push_back(0);
//if(inputPath.GetPhrase().GetStringRep(f_mask).)
int score =50;
for(size_t i=0;i<inputPath.GetPhrase().GetSize();i++){
if(inputPath.GetPhrase(). GetFactor(i,0)->GetString().as_string()=="ist"){
//cout<<inputPath.GetPhrase().GetStringRep(f_mask);
score+=1;
}
}
scoreBreakdown.PlusEquals(this, score);
*/
}
}

View File

@ -1,40 +0,0 @@
#pragma once
#include <string>
#include "StatelessFeatureFunction.h"
namespace Moses
{
class InternalStructStatelessFF : public StatelessFeatureFunction
{
public:
InternalStructStatelessFF(const std::string &line)
:StatelessFeatureFunction(line)
{}
bool IsUseable(const FactorMask &mask) const
{ return true; }
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
virtual void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
};
}

View File

@ -50,6 +50,19 @@ protected:
StackVec m_stackVec;
};
// struct that caches cellLabel, its end position and score for quicker lookup
struct ChartCellCache
{
ChartCellCache(size_t endPos, const ChartCellLabel* cellLabel, float score)
: endPos(endPos)
, cellLabel(cellLabel)
, score(score) {}
size_t endPos;
const ChartCellLabel* cellLabel;
float score;
};
} // namespace Moses
#endif

View File

@ -22,10 +22,12 @@
#include "moses/ChartParser.h"
#include "moses/InputType.h"
#include "moses/Terminal.h"
#include "moses/ChartParserCallback.h"
#include "moses/StaticData.h"
#include "moses/NonTerminal.h"
#include "moses/ChartCellCollection.h"
#include "moses/FactorCollection.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
using namespace std;
@ -59,9 +61,13 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
m_lastPos = lastPos;
m_stackVec.clear();
m_stackScores.clear();
m_outColl = &outColl;
m_unaryPos = absEndPos-1; // rules ending in this position are unary and should not be added to collection
// create/update data structure to quickly look up all chart cells that match start position and label.
UpdateCompressedMatrix(startPos, absEndPos, lastPos);
const PhraseDictionaryNodeMemory &rootNode = m_ruleTable.GetRootNode();
// size-1 terminal rules
@ -77,7 +83,7 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
}
// all rules starting with nonterminal
else if (absEndPos > startPos) {
GetNonTerminalExtension(&rootNode, startPos, absEndPos-1);
GetNonTerminalExtension(&rootNode, startPos);
// all (non-unary) rules starting with terminal
if (absEndPos == startPos+1) {
GetTerminalExtension(&rootNode, absEndPos-1);
@ -94,21 +100,87 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
}
// Create/update compressed matrix that stores all valid ChartCellLabels for a given start position and label.
void ChartRuleLookupManagerMemory::UpdateCompressedMatrix(size_t startPos,
size_t origEndPos,
size_t lastPos) {
std::vector<size_t> endPosVec;
size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
m_compressedMatrixVec.resize(lastPos+1);
// we only need to update cell at [startPos, origEndPos-1] for initial lookup
if (startPos < origEndPos) {
endPosVec.push_back(origEndPos-1);
}
// update all cells starting from startPos+1 for lookup of rule extensions
else if (startPos == origEndPos)
{
startPos++;
for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
endPosVec.push_back(endPos);
}
//re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
for (size_t pos = startPos+1; pos <= lastPos; pos++) {
CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
cellMatrix.resize(numNonTerms);
for (size_t i = 0; i < numNonTerms; i++) {
if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
cellMatrix[i].pop_back();
}
}
}
}
if (startPos > lastPos) {
return;
}
// populate compressed matrix with all chart cells that start at current start position
CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
cellMatrix.clear();
cellMatrix.resize(numNonTerms);
for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
size_t endPos = *p;
// target non-terminal labels for the span
const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
if (targetNonTerms.GetSize() == 0) {
continue;
}
#if !defined(UNLABELLED_SOURCE)
// source non-terminal labels for the span
const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
const std::vector<bool> &sourceNonTermArray = inputPath.GetNonTerminalArray();
// can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
if (inputPath.GetNonTerminalSet().size() == 0) {
continue;
}
#endif
for (size_t i = 0; i < numNonTerms; i++) {
const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
if (cellLabel != NULL) {
float score = cellLabel->GetBestScore(m_outColl);
cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
}
}
}
}
// if a (partial) rule matches, add it to list completed rules (if non-unary and non-empty), and try find expansions that have this partial rule as prefix.
void ChartRuleLookupManagerMemory::AddAndExtend(
const PhraseDictionaryNodeMemory *node,
size_t endPos,
const ChartCellLabel *cellLabel) {
// add backpointer
if (cellLabel != NULL) {
m_stackVec.push_back(cellLabel);
}
size_t endPos) {
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
// add target phrase collection (except if rule is empty or unary)
if (!tpc.IsEmpty() && endPos != m_unaryPos) {
m_completedRules[endPos].Add(tpc, m_stackVec, *m_outColl);
m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
}
// get all further extensions of rule (until reaching end of sentence or max-chart-span)
@ -117,18 +189,12 @@ void ChartRuleLookupManagerMemory::AddAndExtend(
GetTerminalExtension(node, endPos+1);
}
if (!node->GetNonTerminalMap().empty()) {
for (size_t newEndPos = endPos+1; newEndPos <= m_lastPos; newEndPos++) {
GetNonTerminalExtension(node, endPos+1, newEndPos);
}
GetNonTerminalExtension(node, endPos+1);
}
}
// remove backpointer
if (cellLabel != NULL) {
m_stackVec.pop_back();
}
}
// search all possible terminal extensions of a partial rule (pointed at by node) at a given position
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemory::GetTerminalExtension(
@ -142,9 +208,10 @@ void ChartRuleLookupManagerMemory::GetTerminalExtension(
if (terminals.size() < 5) {
for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
const Word & word = iter->first;
if (word == sourceWord) {
if (TerminalEqualityPred()(word, sourceWord)) {
const PhraseDictionaryNodeMemory *child = & iter->second;
AddAndExtend(child, pos, NULL);
AddAndExtend(child, pos);
break;
}
}
}
@ -152,39 +219,26 @@ void ChartRuleLookupManagerMemory::GetTerminalExtension(
else {
const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
if (child != NULL) {
AddAndExtend(child, pos, NULL);
AddAndExtend(child, pos);
}
}
}
// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a given span (StartPos, endPos).
// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a variable span (starting from startPos).
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemory::GetNonTerminalExtension(
const PhraseDictionaryNodeMemory *node,
size_t startPos,
size_t endPos) {
size_t startPos) {
// target non-terminal labels for the span
const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
if (targetNonTerms.GetSize() == 0) {
return;
}
#if !defined(UNLABELLED_SOURCE)
// source non-terminal labels for the span
const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
const std::vector<bool> &sourceNonTermArray = inputPath.GetNonTerminalArray();
// can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
if (inputPath.GetNonTerminalSet().size() == 0) {
return;
}
#endif
const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
// non-terminal labels in phrase dictionary node
const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
// make room for back pointer
m_stackVec.push_back(NULL);
m_stackScores.push_back(0);
// loop over possible expansions of the rule
PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
@ -193,37 +247,32 @@ void ChartRuleLookupManagerMemory::GetNonTerminalExtension(
#if defined(UNLABELLED_SOURCE)
const Word &targetNonTerm = p->first;
#else
const PhraseDictionaryNodeMemory::NonTerminalMapKey &key = p->first;
const Word &sourceNonTerm = key.first;
// check if source label matches
if (! sourceNonTermArray[sourceNonTerm[0]->GetId()]) {
continue;
}
const Word &targetNonTerm = key.second;
const Word &targetNonTerm = p->first.second;
#endif
const PhraseDictionaryNodeMemory *child = &p->second;
//soft matching of NTs
if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
const ChartCellLabel *cellLabel = targetNonTerms.Find(*softMatch);
if (cellLabel == NULL) {
continue;
const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
m_stackVec.back() = match->cellLabel;
m_stackScores.back() = match->score;
AddAndExtend(child, match->endPos);
}
// create new rule
const PhraseDictionaryNodeMemory &child = p->second;
AddAndExtend(&child, endPos, cellLabel);
}
} // end of soft matches lookup
const ChartCellLabel *cellLabel = targetNonTerms.Find(targetNonTerm);
if (cellLabel == NULL) {
continue;
const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
m_stackVec.back() = match->cellLabel;
m_stackScores.back() = match->score;
AddAndExtend(child, match->endPos);
}
// create new rule
const PhraseDictionaryNodeMemory &child = p->second;
AddAndExtend(&child, endPos, cellLabel);
}
// remove last back pointer
m_stackVec.pop_back();
m_stackScores.pop_back();
}
} // namespace Moses

View File

@ -40,6 +40,10 @@ class WordsRange;
class ChartRuleLookupManagerMemory : public ChartRuleLookupManagerCYKPlus
{
public:
typedef std::vector<ChartCellCache> CompressedColumn;
typedef std::vector<CompressedColumn> CompressedMatrix;
ChartRuleLookupManagerMemory(const ChartParser &parser,
const ChartCellCollectionBase &cellColl,
const PhraseDictionaryMemory &ruleTable);
@ -53,19 +57,21 @@ public:
private:
void GetTerminalExtension(
void GetTerminalExtension(
const PhraseDictionaryNodeMemory *node,
size_t pos);
void GetNonTerminalExtension(
void GetNonTerminalExtension(
const PhraseDictionaryNodeMemory *node,
size_t startPos,
size_t endPos);
size_t startPos);
void AddAndExtend(
const PhraseDictionaryNodeMemory *node,
size_t endPos);
void UpdateCompressedMatrix(size_t startPos,
size_t endPos,
const ChartCellLabel *cellLabel);
size_t lastPos);
const PhraseDictionaryMemory &m_ruleTable;
@ -80,8 +86,13 @@ void GetNonTerminalExtension(
size_t m_unaryPos;
StackVec m_stackVec;
std::vector<float> m_stackScores;
std::vector<const Word*> m_sourceWords;
ChartParserCallback* m_outColl;
std::vector<CompressedMatrix> m_compressedMatrixVec;
};
} // namespace Moses

View File

@ -22,10 +22,12 @@
#include "moses/ChartParser.h"
#include "moses/InputType.h"
#include "moses/Terminal.h"
#include "moses/ChartParserCallback.h"
#include "moses/StaticData.h"
#include "moses/NonTerminal.h"
#include "moses/ChartCellCollection.h"
#include "moses/FactorCollection.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
using namespace std;
@ -59,9 +61,13 @@ void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
m_lastPos = lastPos;
m_stackVec.clear();
m_stackScores.clear();
m_outColl = &outColl;
m_unaryPos = absEndPos-1; // rules ending in this position are unary and should not be added to collection
// create/update data structure to quickly look up all chart cells that match start position and label.
UpdateCompressedMatrix(startPos, absEndPos, lastPos);
const PhraseDictionaryNodeMemory &rootNode = m_ruleTable.GetRootNode(GetParser().GetTranslationId());
// size-1 terminal rules
@ -77,7 +83,7 @@ void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
}
// all rules starting with nonterminal
else if (absEndPos > startPos) {
GetNonTerminalExtension(&rootNode, startPos, absEndPos-1);
GetNonTerminalExtension(&rootNode, startPos);
// all (non-unary) rules starting with terminal
if (absEndPos == startPos+1) {
GetTerminalExtension(&rootNode, absEndPos-1);
@ -94,21 +100,87 @@ void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
}
// Create/update compressed matrix that stores all valid ChartCellLabels for a given start position and label.
void ChartRuleLookupManagerMemoryPerSentence::UpdateCompressedMatrix(size_t startPos,
size_t origEndPos,
size_t lastPos) {
std::vector<size_t> endPosVec;
size_t numNonTerms = FactorCollection::Instance().GetNumNonTerminals();
m_compressedMatrixVec.resize(lastPos+1);
// we only need to update cell at [startPos, origEndPos-1] for initial lookup
if (startPos < origEndPos) {
endPosVec.push_back(origEndPos-1);
}
// update all cells starting from startPos+1 for lookup of rule extensions
else if (startPos == origEndPos)
{
startPos++;
for (size_t endPos = startPos; endPos <= lastPos; endPos++) {
endPosVec.push_back(endPos);
}
//re-use data structure for cells with later start position, but remove chart cells that would break max-chart-span
for (size_t pos = startPos+1; pos <= lastPos; pos++) {
CompressedMatrix & cellMatrix = m_compressedMatrixVec[pos];
cellMatrix.resize(numNonTerms);
for (size_t i = 0; i < numNonTerms; i++) {
if (!cellMatrix[i].empty() && cellMatrix[i].back().endPos > lastPos) {
cellMatrix[i].pop_back();
}
}
}
}
if (startPos > lastPos) {
return;
}
// populate compressed matrix with all chart cells that start at current start position
CompressedMatrix & cellMatrix = m_compressedMatrixVec[startPos];
cellMatrix.clear();
cellMatrix.resize(numNonTerms);
for (std::vector<size_t>::iterator p = endPosVec.begin(); p != endPosVec.end(); ++p) {
size_t endPos = *p;
// target non-terminal labels for the span
const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
if (targetNonTerms.GetSize() == 0) {
continue;
}
#if !defined(UNLABELLED_SOURCE)
// source non-terminal labels for the span
const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
const std::vector<bool> &sourceNonTermArray = inputPath.GetNonTerminalArray();
// can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
if (inputPath.GetNonTerminalSet().size() == 0) {
continue;
}
#endif
for (size_t i = 0; i < numNonTerms; i++) {
const ChartCellLabel *cellLabel = targetNonTerms.Find(i);
if (cellLabel != NULL) {
float score = cellLabel->GetBestScore(m_outColl);
cellMatrix[i].push_back(ChartCellCache(endPos, cellLabel, score));
}
}
}
}
// if a (partial) rule matches, add it to list completed rules (if non-unary and non-empty), and try find expansions that have this partial rule as prefix.
void ChartRuleLookupManagerMemoryPerSentence::AddAndExtend(
const PhraseDictionaryNodeMemory *node,
size_t endPos,
const ChartCellLabel *cellLabel) {
// add backpointer
if (cellLabel != NULL) {
m_stackVec.push_back(cellLabel);
}
size_t endPos) {
const TargetPhraseCollection &tpc = node->GetTargetPhraseCollection();
// add target phrase collection (except if rule is empty or unary)
if (!tpc.IsEmpty() && endPos != m_unaryPos) {
m_completedRules[endPos].Add(tpc, m_stackVec, *m_outColl);
m_completedRules[endPos].Add(tpc, m_stackVec, m_stackScores, *m_outColl);
}
// get all further extensions of rule (until reaching end of sentence or max-chart-span)
@ -117,18 +189,12 @@ void ChartRuleLookupManagerMemoryPerSentence::AddAndExtend(
GetTerminalExtension(node, endPos+1);
}
if (!node->GetNonTerminalMap().empty()) {
for (size_t newEndPos = endPos+1; newEndPos <= m_lastPos; newEndPos++) {
GetNonTerminalExtension(node, endPos+1, newEndPos);
}
GetNonTerminalExtension(node, endPos+1);
}
}
// remove backpointer
if (cellLabel != NULL) {
m_stackVec.pop_back();
}
}
// search all possible terminal extensions of a partial rule (pointed at by node) at a given position
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemoryPerSentence::GetTerminalExtension(
@ -142,9 +208,10 @@ void ChartRuleLookupManagerMemoryPerSentence::GetTerminalExtension(
if (terminals.size() < 5) {
for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
const Word & word = iter->first;
if (word == sourceWord) {
if (TerminalEqualityPred()(word, sourceWord)) {
const PhraseDictionaryNodeMemory *child = & iter->second;
AddAndExtend(child, pos, NULL);
AddAndExtend(child, pos);
break;
}
}
}
@ -152,39 +219,26 @@ void ChartRuleLookupManagerMemoryPerSentence::GetTerminalExtension(
else {
const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
if (child != NULL) {
AddAndExtend(child, pos, NULL);
AddAndExtend(child, pos);
}
}
}
// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a given span (StartPos, endPos).
// search all nonterminal possible nonterminal extensions of a partial rule (pointed at by node) for a variable span (starting from startPos).
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemoryPerSentence::GetNonTerminalExtension(
const PhraseDictionaryNodeMemory *node,
size_t startPos,
size_t endPos) {
size_t startPos) {
// target non-terminal labels for the span
const ChartCellLabelSet &targetNonTerms = GetTargetLabelSet(startPos, endPos);
if (targetNonTerms.GetSize() == 0) {
return;
}
#if !defined(UNLABELLED_SOURCE)
// source non-terminal labels for the span
const InputPath &inputPath = GetParser().GetInputPath(startPos, endPos);
const std::vector<bool> &sourceNonTermArray = inputPath.GetNonTerminalArray();
// can this ever be true? Moses seems to pad the non-terminal set of the input with [X]
if (inputPath.GetNonTerminalSet().size() == 0) {
return;
}
#endif
const CompressedMatrix &compressedMatrix = m_compressedMatrixVec[startPos];
// non-terminal labels in phrase dictionary node
const PhraseDictionaryNodeMemory::NonTerminalMap & nonTermMap = node->GetNonTerminalMap();
// make room for back pointer
m_stackVec.push_back(NULL);
m_stackScores.push_back(0);
// loop over possible expansions of the rule
PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator p;
PhraseDictionaryNodeMemory::NonTerminalMap::const_iterator end = nonTermMap.end();
@ -193,38 +247,32 @@ void ChartRuleLookupManagerMemoryPerSentence::GetNonTerminalExtension(
#if defined(UNLABELLED_SOURCE)
const Word &targetNonTerm = p->first;
#else
const PhraseDictionaryNodeMemory::NonTerminalMapKey &key = p->first;
const Word &sourceNonTerm = key.first;
// check if source label matches
if (! sourceNonTermArray[sourceNonTerm[0]->GetId()]) {
continue;
}
const Word &targetNonTerm = key.second;
const Word &targetNonTerm = p->first.second;
#endif
const PhraseDictionaryNodeMemory *child = &p->second;
//soft matching of NTs
if (m_isSoftMatching && !m_softMatchingMap[targetNonTerm[0]->GetId()].empty()) {
const std::vector<Word>& softMatches = m_softMatchingMap[targetNonTerm[0]->GetId()];
for (std::vector<Word>::const_iterator softMatch = softMatches.begin(); softMatch != softMatches.end(); ++softMatch) {
const ChartCellLabel *cellLabel = targetNonTerms.Find(*softMatch);
if (cellLabel == NULL) {
continue;
const CompressedColumn &matches = compressedMatrix[(*softMatch)[0]->GetId()];
for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
m_stackVec.back() = match->cellLabel;
m_stackScores.back() = match->score;
AddAndExtend(child, match->endPos);
}
// create new rule
const PhraseDictionaryNodeMemory &child = p->second;
AddAndExtend(&child, endPos, cellLabel);
}
} // end of soft matches lookup
const ChartCellLabel *cellLabel = targetNonTerms.Find(targetNonTerm);
if (cellLabel == NULL) {
continue;
const CompressedColumn &matches = compressedMatrix[targetNonTerm[0]->GetId()];
for (CompressedColumn::const_iterator match = matches.begin(); match != matches.end(); ++match) {
m_stackVec.back() = match->cellLabel;
m_stackScores.back() = match->score;
AddAndExtend(child, match->endPos);
}
// create new rule
const PhraseDictionaryNodeMemory &child = p->second;
AddAndExtend(&child, endPos, cellLabel);
}
// remove last back pointer
m_stackVec.pop_back();
m_stackScores.pop_back();
}
} // namespace Moses

View File

@ -40,6 +40,9 @@ class WordsRange;
class ChartRuleLookupManagerMemoryPerSentence : public ChartRuleLookupManagerCYKPlus
{
public:
typedef std::vector<ChartCellCache> CompressedColumn;
typedef std::vector<CompressedColumn> CompressedMatrix;
ChartRuleLookupManagerMemoryPerSentence(const ChartParser &parser,
const ChartCellCollectionBase &cellColl,
const PhraseDictionaryFuzzyMatch &ruleTable);
@ -53,19 +56,21 @@ public:
private:
void GetTerminalExtension(
void GetTerminalExtension(
const PhraseDictionaryNodeMemory *node,
size_t pos);
void GetNonTerminalExtension(
void GetNonTerminalExtension(
const PhraseDictionaryNodeMemory *node,
size_t startPos,
size_t endPos);
size_t startPos);
void AddAndExtend(
const PhraseDictionaryNodeMemory *node,
size_t endPos);
void UpdateCompressedMatrix(size_t startPos,
size_t endPos,
const ChartCellLabel *cellLabel);
size_t lastPos);
const PhraseDictionaryFuzzyMatch &m_ruleTable;
@ -80,8 +85,12 @@ void GetNonTerminalExtension(
size_t m_unaryPos;
StackVec m_stackVec;
std::vector<float> m_stackScores;
std::vector<const Word*> m_sourceWords;
ChartParserCallback* m_outColl;
std::vector<CompressedMatrix> m_compressedMatrixVec;
};
} // namespace Moses

View File

@ -77,4 +77,47 @@ void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
}
}
// copies some functionality (pruning) from ChartTranslationOptionList::Add
void CompletedRuleCollection::Add(const TargetPhraseCollection &tpc,
const StackVec &stackVec,
const std::vector<float> &stackScores,
const ChartParserCallback &outColl)
{
if (tpc.IsEmpty()) {
return;
}
const TargetPhrase &targetPhrase = **(tpc.begin());
float score = std::accumulate(stackScores.begin(), stackScores.end(), targetPhrase.GetFutureScore());
// If the rule limit has already been reached then don't add the option
// unless it is better than at least one existing option.
if (m_collection.size() > m_ruleLimit && score < m_scoreThreshold) {
return;
}
CompletedRule *completedRule = new CompletedRule(tpc, stackVec, score);
m_collection.push_back(completedRule);
// If the rule limit hasn't been exceeded then update the threshold.
if (m_collection.size() <= m_ruleLimit) {
m_scoreThreshold = (score < m_scoreThreshold) ? score : m_scoreThreshold;
}
// Prune if bursting
if (m_collection.size() == m_ruleLimit * 2) {
NTH_ELEMENT4(m_collection.begin(),
m_collection.begin() + m_ruleLimit - 1,
m_collection.end(),
CompletedRuleOrdered());
m_scoreThreshold = m_collection[m_ruleLimit-1]->GetScoreEstimate();
for (size_t i = 0 + m_ruleLimit; i < m_collection.size(); i++) {
delete m_collection[i];
}
m_collection.resize(m_ruleLimit);
}
}
}

View File

@ -22,6 +22,7 @@
#define moses_CompletedRuleCollectionS_h
#include <vector>
#include <numeric>
#include "moses/StackVec.h"
#include "moses/TargetPhraseCollection.h"
@ -105,6 +106,11 @@ public:
const StackVec &stackVec,
const ChartParserCallback &outColl);
void Add(const TargetPhraseCollection &tpc,
const StackVec &stackVec,
const std::vector<float> &stackScores,
const ChartParserCallback &outColl);
private:
std::vector<CompletedRule*> m_collection;
float m_scoreThreshold;

View File

@ -4,7 +4,7 @@ for local d in $(most-deps) {
obj $(d:B).o : $(d) ;
}
#and stuff them into an alias.
alias deps : $(most-deps:B).o ..//z ..//boost_iostreams ../moses//ThreadPool ../moses//Util ../util//kenutil ;
alias deps : $(most-deps:B).o ..//z ..//boost_iostreams ../moses//moses ../moses//ThreadPool ../moses//Util ../util//kenutil ;
#ExtractionPhrasePair.cpp requires that main define some global variables.
#Build the mains that do not need these global variables.

View File

@ -1,2 +1,2 @@
exe extract-mixed-syntax : Main.cpp AlignedSentence.cpp AlignedSentenceSyntax.cpp ConsistentPhrase.cpp ConsistentPhrases.cpp NonTerm.cpp OutputFileStream.cpp Parameter.cpp Phrase.cpp pugixml.cpp Rule.cpp RulePhrase.cpp Rules.cpp RuleSymbol.cpp SyntaxTree.cpp Word.cpp ..//..//z ..//..//boost_iostreams ..//..//boost_program_options ..//..//moses ;
exe extract-mixed-syntax : Main.cpp AlignedSentence.cpp AlignedSentenceSyntax.cpp ConsistentPhrase.cpp ConsistentPhrases.cpp NonTerm.cpp Parameter.cpp Phrase.cpp pugixml.cpp Rule.cpp RulePhrase.cpp Rules.cpp RuleSymbol.cpp SyntaxTree.cpp Word.cpp ..//deps ../..//z ../..//boost_iostreams ../..//boost_program_options ../../moses//moses : <include>.. ;

View File

@ -1,79 +0,0 @@
// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <boost/iostreams/filter/gzip.hpp>
#include "OutputFileStream.h"
#include "gzfilebuf.h"
using namespace std;
namespace Moses
{
OutputFileStream::OutputFileStream()
:boost::iostreams::filtering_ostream()
,m_outFile(NULL)
{
}
OutputFileStream::OutputFileStream(const std::string &filePath)
: m_outFile(NULL)
{
Open(filePath);
}
OutputFileStream::~OutputFileStream()
{
Close();
}
bool OutputFileStream::Open(const std::string &filePath)
{
m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary);
if (m_outFile->fail()) {
return false;
}
if (filePath.size() > 3 && filePath.substr(filePath.size() - 3, 3) == ".gz") {
this->push(boost::iostreams::gzip_compressor());
}
this->push(*m_outFile);
return true;
}
void OutputFileStream::Close()
{
if (m_outFile == NULL) {
return;
}
this->flush();
this->pop(); // file
m_outFile->close();
delete m_outFile;
m_outFile = NULL;
return;
}
}

View File

@ -1,50 +0,0 @@
// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <cstdlib>
#include <fstream>
#include <string>
#include <iostream>
#include <boost/iostreams/filtering_stream.hpp>
namespace Moses
{
/** Used in place of std::istream, can read zipped files if it ends in .gz
*/
class OutputFileStream : public boost::iostreams::filtering_ostream
{
protected:
std::ofstream *m_outFile;
public:
OutputFileStream();
OutputFileStream(const std::string &filePath);
virtual ~OutputFileStream();
bool Open(const std::string &filePath);
void Close();
};
}

View File

@ -190,3 +190,22 @@ sub open_or_zcat {
open($hdl,$read) or die "Can't read $fn ($read)";
return $hdl;
}
sub safesystem {
print STDERR "Executing: @_\n";
system(@_);
if ($? == -1) {
print STDERR "ERROR: Failed to execute: @_\n $!\n";
exit(1);
}
elsif ($? & 127) {
printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
($? & 127), ($? & 128) ? 'with' : 'without';
exit(1);
}
else {
my $exitcode = $? >> 8;
print STDERR "Exit code: $exitcode\n" if $exitcode;
return ! $exitcode;
}
}