Merge remote branch 'github/master' into miramerge

Conflicts:
	moses/src/AlignmentInfo.cpp
	moses/src/AlignmentInfo.h
	moses/src/ChartHypothesis.cpp
	moses/src/ChartTrellisNode.cpp
	moses/src/LM/Implementation.cpp
	moses/src/LM/Ken.cpp
	moses/src/TargetPhrase.cpp
	moses/src/TargetPhrase.h
This commit is contained in:
Barry Haddow 2012-10-08 17:54:59 +01:00
commit 848aafb644
89 changed files with 4467 additions and 1342 deletions

2
.gitmodules vendored
View File

@ -1,3 +1,3 @@
[submodule "regression-testing/tests"]
path = regression-testing/tests
url = ../moses-regression-tests.git
url = git@github.com:moses-smt/moses-regression-tests.git

View File

@ -73,7 +73,7 @@ you're ready to install packages in non-standard paths:
#For Boost:
./bootstrap.sh
./b2 --prefix=$PREFIX --libdir=$PREFIX/lib64 --layout=tagged link=static,shared threading=multi install
./b2 --prefix=$PREFIX --libdir=$LIBDIR --layout=tagged link=static,shared threading=multi,single install
--------------------------------------------------------------------------

View File

@ -0,0 +1,140 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.162355801">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.162355801" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.162355801" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.162355801." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1633424067" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.1437309068" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/moses-chart-cmd/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.1495140314" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1247128100" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1087697480" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1163099464" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1584931166" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.65842083" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../moses/src&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1402496521" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.827478809" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1840610682" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.1437095112" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.128236233" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.755343734" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.816413868" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.330225535" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:/moses}/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:/lm}/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:/OnDiskPt}/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:/util}/Debug&quot;"/>
</option>
<option id="gnu.cpp.link.option.libs.1177721357" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="rt"/>
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.128214028" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1267270542" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.612723114" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.516628324">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.516628324" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.516628324" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.516628324." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1782680519" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.587667692" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/moses-chart-cmd/Release}" id="cdt.managedbuild.target.gnu.builder.exe.release.330540300" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1062976385" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1344864210" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.1422341509" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1573362644" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1937178483" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1116405938" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.32856289" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.1235489953" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1583852187" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1007421110" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.195880914" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.518921609" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.330494310" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1407747418" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="moses-chart-cmd.cdt.managedbuild.target.gnu.exe.532411209" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.516628324;cdt.managedbuild.config.gnu.exe.release.516628324.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.1116405938;cdt.managedbuild.tool.gnu.c.compiler.input.1583852187">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.162355801;cdt.managedbuild.config.gnu.exe.debug.162355801.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.827478809;cdt.managedbuild.tool.gnu.c.compiler.input.128236233">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.162355801;cdt.managedbuild.config.gnu.exe.debug.162355801.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1087697480;cdt.managedbuild.tool.gnu.cpp.compiler.input.1402496521">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.516628324;cdt.managedbuild.config.gnu.exe.release.516628324.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1344864210;cdt.managedbuild.tool.gnu.cpp.compiler.input.1937178483">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="1">
<resource resourceType="PROJECT" workspacePath="/moses-chart-cmd"/>
</storageModule>
</cproject>

View File

@ -0,0 +1,199 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>moses-chart-cmd</name>
<comment></comment>
<projects>
<project>lm</project>
<project>moses</project>
<project>OnDiskPt</project>
<project>util</project>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
<dictionary>
<key>?name?</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.append_environment</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildArguments</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildCommand</key>
<value>make</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildLocation</key>
<value>${workspace_loc:/moses-chart-cmd/Debug}</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
<value>clean</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.contents</key>
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
<value>false</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.stopOnError</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
<value>true</value>
</dictionary>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>IOWrapper.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/IOWrapper.cpp</locationURI>
</link>
<link>
<name>IOWrapper.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/IOWrapper.h</locationURI>
</link>
<link>
<name>Jamfile</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/Jamfile</locationURI>
</link>
<link>
<name>Main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/Main.cpp</locationURI>
</link>
<link>
<name>Main.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/Main.h</locationURI>
</link>
<link>
<name>TranslationAnalysis.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/TranslationAnalysis.cpp</locationURI>
</link>
<link>
<name>TranslationAnalysis.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/TranslationAnalysis.h</locationURI>
</link>
<link>
<name>bin</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>mbr.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/mbr.cpp</locationURI>
</link>
<link>
<name>mbr.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/mbr.h</locationURI>
</link>
<link>
<name>moses_chart</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/moses_chart</locationURI>
</link>
<link>
<name>bin/gcc-4.6</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/IOWrapper.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/IOWrapper.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Main.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Main.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/PhraseDictionary.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/PhraseDictionary.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/TranslationAnalysis.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/TranslationAnalysis.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/mbr.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/mbr.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/moses_chart</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-chart-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/moses_chart</locationURI>
</link>
</linkedResources>
</projectDescription>

View File

@ -337,7 +337,6 @@
"-lflm",
"-llattice",
"-lboost_thread-mt",
"-lboost_filesystem-mt",
"-lboost_system-mt",
"-lcmph",
);
@ -385,7 +384,6 @@
"-lflm",
"-llattice",
"-lboost_thread-mt",
"-lboost_filesystem-mt",
"-lboost_system-mt",
"-lcmph",
);
@ -430,7 +428,6 @@
"-lflm",
"-llattice",
"-lboost_thread-mt",
"-lboost_filesystem-mt",
"-lboost_system-mt",
"-lcmph",
);

View File

@ -3,11 +3,11 @@
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.461114338">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.461114338" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@ -16,78 +16,64 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.1679946908" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.451172468" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
<builder buildPath="${workspace_loc:/moses-cmd/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1382407954" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.2118670613" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.84059290" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
<option id="macosx.cpp.link.option.libs.1641794848" name="Libraries (-l)" superClass="macosx.cpp.link.option.libs" valueType="libs">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.461114338" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.461114338." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1896491482" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.2144309834" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/moses-cmd/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.56664170" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1278274354" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.626095182" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.2084031389" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.811344734" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.2118465683" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../moses/src&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.363379373" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.504208780" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.782785840" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.1722468661" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.860636318" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.2096997198" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1546774818" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.523170942" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:/moses}/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:/lm}/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:/OnDiskPt}/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:/util}/Debug&quot;"/>
</option>
<option id="gnu.cpp.link.option.libs.998577284" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="rt"/>
<listOptionValue builtIn="false" value="misc"/>
<listOptionValue builtIn="false" value="dstruct"/>
<listOptionValue builtIn="false" value="oolm"/>
<listOptionValue builtIn="false" value="flm"/>
<listOptionValue builtIn="false" value="lattice"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="rt"/>
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
</option>
<option id="macosx.cpp.link.option.paths.1615268628" name="Library search path (-L)" superClass="macosx.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="${workspace_loc:/moses}/Debug"/>
<listOptionValue builtIn="false" value="${workspace_loc:}/../../srilm/lib/i686-m64"/>
<listOptionValue builtIn="false" value="${workspace_loc:/OnDiskPt}/Debug"/>
<listOptionValue builtIn="false" value="${workspace_loc:/lm}/Debug"/>
<listOptionValue builtIn="false" value="${workspace_loc:/util}/Debug"/>
<listOptionValue builtIn="false" value="${workspace_loc:}/../../irstlm/lib"/>
</option>
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.412058804" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.983725033" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.896987906" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.187427846" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.2033983602" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1808603697" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.2018824611" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1176009559" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1024398579" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../moses/src"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.491464216" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.240921565" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.748558048" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.1014626120" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2031799877" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1646579979" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1206872262" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
<sourceEntries>
<entry excluding="LatticeMBRGrid.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.1916112479">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.1916112479" moduleId="org.eclipse.cdt.core.settings" name="Release">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.2121690436">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.2121690436" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@ -96,31 +82,31 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.1916112479" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.1916112479." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.1528572752" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.1976002706" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
<builder buildPath="${workspace_loc:/moses-cmd/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1470455063" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.335066624" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.1173017253" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.675070011" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.2121690436" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.2121690436." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1577734572" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1535487925" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/moses-cmd/Release}" id="cdt.managedbuild.target.gnu.builder.exe.release.2122426151" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.441254004" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.376987001" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.1276092407" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1794377625" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.93276909" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1553350132" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.93522212" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.1860716465" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1508465135" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1658143889" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.378727798" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1701769819" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.174060449" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1018665338" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.440711813" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1219375865" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1940339824" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1648308879" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.604224475" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.2105388501" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1692046412" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1452105399" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1550193619" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1296687303" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@ -130,25 +116,24 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="moses-cmd.cdt.managedbuild.target.macosx.exe.1016275955" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
<project id="moses-cmd.cdt.managedbuild.target.gnu.exe.1380109162" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.2121690436;cdt.managedbuild.config.gnu.exe.release.2121690436.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.1553350132;cdt.managedbuild.tool.gnu.c.compiler.input.1508465135">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.461114338;cdt.managedbuild.config.gnu.exe.debug.461114338.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.504208780;cdt.managedbuild.tool.gnu.c.compiler.input.860636318">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.461114338;cdt.managedbuild.config.gnu.exe.debug.461114338.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.626095182;cdt.managedbuild.tool.gnu.cpp.compiler.input.363379373">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.2121690436;cdt.managedbuild.config.gnu.exe.release.2121690436.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.376987001;cdt.managedbuild.tool.gnu.cpp.compiler.input.93276909">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="1">
<resource resourceType="PROJECT" workspacePath="/moses-cmd"/>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223;cdt.managedbuild.tool.gnu.c.compiler.input.1452105399">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609;cdt.managedbuild.tool.gnu.c.compiler.input.2031799877">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1219375865;cdt.managedbuild.tool.gnu.cpp.compiler.input.604224475">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1808603697;cdt.managedbuild.tool.gnu.cpp.compiler.input.240921565">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
</cproject>

View File

@ -95,11 +95,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/IOWrapper.h</locationURI>
</link>
<link>
<name>IOWrapper.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/IOWrapper.o</locationURI>
</link>
<link>
<name>Jamfile</name>
<type>1</type>
@ -115,21 +110,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBR.h</locationURI>
</link>
<link>
<name>LatticeMBR.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBR.o</locationURI>
</link>
<link>
<name>LatticeMBRGrid.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBRGrid.cpp</locationURI>
</link>
<link>
<name>LatticeMBRGrid.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBRGrid.o</locationURI>
</link>
<link>
<name>Main.cpp</name>
<type>1</type>
@ -140,11 +120,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Main.h</locationURI>
</link>
<link>
<name>Main.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Main.o</locationURI>
</link>
<link>
<name>TranslationAnalysis.cpp</name>
<type>1</type>
@ -156,19 +131,9 @@
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/TranslationAnalysis.h</locationURI>
</link>
<link>
<name>TranslationAnalysis.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/TranslationAnalysis.o</locationURI>
</link>
<link>
<name>libkenlm.dylib</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/libkenlm.dylib</locationURI>
</link>
<link>
<name>libkenutil.dylib</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/libkenutil.dylib</locationURI>
<name>bin</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>lmbrgrid</name>
@ -185,15 +150,80 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/mbr.h</locationURI>
</link>
<link>
<name>mbr.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/mbr.o</locationURI>
</link>
<link>
<name>moses</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/moses</locationURI>
</link>
<link>
<name>bin/gcc-4.6</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/IOWrapper.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/IOWrapper.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/LatticeMBR.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/LatticeMBR.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/LatticeMBRGrid.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/LatticeMBRGrid.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Main.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Main.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/PhraseDictionary.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/PhraseDictionary.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/TranslationAnalysis.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/TranslationAnalysis.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/lmbrgrid</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/lmbrgrid</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/mbr.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/mbr.o</locationURI>
</link>
<link>
<name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/moses</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/moses</locationURI>
</link>
</linkedResources>
</projectDescription>

View File

@ -135,8 +135,6 @@
1EC737A814B977AB00238410 /* GlobalLexicalModel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7363F14B977AA00238410 /* GlobalLexicalModel.cpp */; };
1EC737A914B977AB00238410 /* GlobalLexicalModel.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7364014B977AA00238410 /* GlobalLexicalModel.h */; };
1EC737AA14B977AB00238410 /* gzfilebuf.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7364114B977AA00238410 /* gzfilebuf.h */; };
1EC737AB14B977AB00238410 /* hash.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7364214B977AA00238410 /* hash.cpp */; };
1EC737AC14B977AB00238410 /* hash.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7364314B977AA00238410 /* hash.h */; };
1EC737AD14B977AB00238410 /* Hypothesis.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7364514B977AA00238410 /* Hypothesis.cpp */; };
1EC737AE14B977AB00238410 /* Hypothesis.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7364614B977AA00238410 /* Hypothesis.h */; };
1EC737AF14B977AB00238410 /* HypothesisStack.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7364714B977AA00238410 /* HypothesisStack.cpp */; };
@ -465,8 +463,6 @@
1EC7363F14B977AA00238410 /* GlobalLexicalModel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = GlobalLexicalModel.cpp; path = ../../moses/src/GlobalLexicalModel.cpp; sourceTree = "<group>"; };
1EC7364014B977AA00238410 /* GlobalLexicalModel.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = GlobalLexicalModel.h; path = ../../moses/src/GlobalLexicalModel.h; sourceTree = "<group>"; };
1EC7364114B977AA00238410 /* gzfilebuf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = gzfilebuf.h; path = ../../moses/src/gzfilebuf.h; sourceTree = "<group>"; };
1EC7364214B977AA00238410 /* hash.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = hash.cpp; path = ../../moses/src/hash.cpp; sourceTree = "<group>"; };
1EC7364314B977AA00238410 /* hash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = hash.h; path = ../../moses/src/hash.h; sourceTree = "<group>"; };
1EC7364414B977AA00238410 /* hypergraph.proto */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = hypergraph.proto; path = ../../moses/src/hypergraph.proto; sourceTree = "<group>"; };
1EC7364514B977AA00238410 /* Hypothesis.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Hypothesis.cpp; path = ../../moses/src/Hypothesis.cpp; sourceTree = "<group>"; };
1EC7364614B977AA00238410 /* Hypothesis.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Hypothesis.h; path = ../../moses/src/Hypothesis.h; sourceTree = "<group>"; };
@ -772,8 +768,6 @@
1EC7363F14B977AA00238410 /* GlobalLexicalModel.cpp */,
1EC7364014B977AA00238410 /* GlobalLexicalModel.h */,
1EC7364114B977AA00238410 /* gzfilebuf.h */,
1EC7364214B977AA00238410 /* hash.cpp */,
1EC7364314B977AA00238410 /* hash.h */,
1EC7364414B977AA00238410 /* hypergraph.proto */,
1EF8F2C3159A61970047B613 /* HypoList.h */,
1EC7364514B977AA00238410 /* Hypothesis.cpp */,
@ -1155,7 +1149,6 @@
1EC737A714B977AB00238410 /* GenerationDictionary.h in Headers */,
1EC737A914B977AB00238410 /* GlobalLexicalModel.h in Headers */,
1EC737AA14B977AB00238410 /* gzfilebuf.h in Headers */,
1EC737AC14B977AB00238410 /* hash.h in Headers */,
1EC737AE14B977AB00238410 /* Hypothesis.h in Headers */,
1EC737B014B977AB00238410 /* HypothesisStack.h in Headers */,
1EC737B214B977AB00238410 /* HypothesisStackCubePruning.h in Headers */,
@ -1378,7 +1371,6 @@
1EC737A414B977AB00238410 /* FloydWarshall.cpp in Sources */,
1EC737A614B977AB00238410 /* GenerationDictionary.cpp in Sources */,
1EC737A814B977AB00238410 /* GlobalLexicalModel.cpp in Sources */,
1EC737AB14B977AB00238410 /* hash.cpp in Sources */,
1EC737AD14B977AB00238410 /* Hypothesis.cpp in Sources */,
1EC737AF14B977AB00238410 /* HypothesisStack.cpp in Sources */,
1EC737B114B977AB00238410 /* HypothesisStackCubePruning.cpp in Sources */,

View File

@ -31,7 +31,6 @@
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1759650532" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.2123672332" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.57896781" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include/"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../irstlm/include"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../srilm/include"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../moses/src"/>
@ -63,18 +62,8 @@
</tool>
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.558758254" name="SyntacticLanguageModelState.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelState.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1930327037" name="SyntacticLanguageModelFiles.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelFiles.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1751563578" name="PhraseTableCreator.cpp" rcbsApplicability="disable" resourcePath="CompactPT/PhraseTableCreator.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1652631861">
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1652631861" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327"/>
</fileInfo>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1174630266" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.707830535" name="SRI.h" rcbsApplicability="disable" resourcePath="LM/SRI.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.160366559" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.622077510" name="ParallelBackoff.h" rcbsApplicability="disable" resourcePath="LM/ParallelBackoff.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1084194539" name="SyntacticLanguageModel.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModel.h" toolsToInvoke=""/>
<sourceEntries>
<entry excluding="CompactPT/PhraseTableCreator.cpp|CompactPT/LexicalReorderingTableCreator.cpp|LM/SRI.h|LM/SRI.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
<entry excluding="src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|LM/SRI.h|LM/SRI.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>

File diff suppressed because it is too large Load Diff

View File

@ -10,9 +10,15 @@ The idea is to have some of Moses' internals exposed to Python (inspired on pycd
## Building
1. Build the python extension
1. Build the python extension:
python setup.py build_ext -i [--with-cmph]
You need to compile Moses with link=shared and (for while) without SRILM (for some reason SRILM prevents the compiler from generating libLM.so)
./bjam --libdir=path cxxflags=-fPIC link=shared
Then you can build the extension (in case you used --libdir=path above, use --moses-lib=path below)
python setup.py build_ext -i [--with-cmph] [--moses-lib=path]
3. Check the example code

View File

@ -1,4 +1,4 @@
/* Generated by Cython 0.16 on Tue Sep 18 11:36:58 2012 */
/* Generated by Cython 0.16 on Fri Sep 21 10:28:51 2012 */
#define PY_SSIZE_T_CLEAN
#include "Python.h"
@ -692,7 +692,7 @@ static PyObject *__pyx_pf_5binpt_11QueryResult_2words(struct __pyx_obj_5binpt_Qu
static PyObject *__pyx_pf_5binpt_11QueryResult_4scores(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_5binpt_11QueryResult_6wa(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
static PyObject *__pyx_lambda_funcdef_lambda1(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_r); /* proto */
static PyObject *__pyx_pf_5binpt_11QueryResult_8desc(PyObject *__pyx_v_x, PyObject *__pyx_v_y, PyObject *__pyx_v_keys); /* proto */
static PyObject *__pyx_pf_5binpt_11QueryResult_8desc(PyObject *__pyx_v_x, PyObject *__pyx_v_y, PyObject *__pyx_v_key); /* proto */
static PyObject *__pyx_pf_5binpt_11QueryResult_10__str__(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
static PyObject *__pyx_pf_5binpt_11QueryResult_12__repr__(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
static int __pyx_pf_5binpt_17BinaryPhraseTable___cinit__(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self, PyObject *__pyx_v_path, unsigned int __pyx_v_nscores, int __pyx_v_wa, PyObject *__pyx_v_delimiters); /* proto */
@ -722,10 +722,10 @@ static char __pyx_k__y[] = "y";
static char __pyx_k__os[] = "os";
static char __pyx_k__wa[] = "wa";
static char __pyx_k__cmp[] = "cmp";
static char __pyx_k__key[] = "key";
static char __pyx_k__top[] = "top";
static char __pyx_k__desc[] = "desc";
static char __pyx_k__join[] = "join";
static char __pyx_k__keys[] = "keys";
static char __pyx_k__line[] = "line";
static char __pyx_k__path[] = "path";
static char __pyx_k__sort[] = "sort";
@ -771,7 +771,7 @@ static PyObject *__pyx_n_s__encode;
static PyObject *__pyx_n_s__isValidBinaryTable;
static PyObject *__pyx_n_s__isfile;
static PyObject *__pyx_n_s__join;
static PyObject *__pyx_n_s__keys;
static PyObject *__pyx_n_s__key;
static PyObject *__pyx_n_s__line;
static PyObject *__pyx_n_s__nscores;
static PyObject *__pyx_n_s__os;
@ -1276,13 +1276,13 @@ static PyObject *__pyx_pf_5binpt_11QueryResult_6wa(struct __pyx_obj_5binpt_Query
/* Python wrapper */
static PyObject *__pyx_pw_5binpt_11QueryResult_9desc(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
static char __pyx_doc_5binpt_11QueryResult_8desc[] = "Returns the sign of keys(y) - keys(x).\n Can only be used if scores is not an empty vector as\n keys defaults to scores[0]";
static char __pyx_doc_5binpt_11QueryResult_8desc[] = "Returns the sign of key(y) - key(x).\n Can only be used if scores is not an empty vector as\n keys defaults to scores[0]";
static PyMethodDef __pyx_mdef_5binpt_11QueryResult_9desc = {__Pyx_NAMESTR("desc"), (PyCFunction)__pyx_pw_5binpt_11QueryResult_9desc, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_5binpt_11QueryResult_8desc)};
static PyObject *__pyx_pw_5binpt_11QueryResult_9desc(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
PyObject *__pyx_v_x = 0;
PyObject *__pyx_v_y = 0;
PyObject *__pyx_v_keys = 0;
static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__x,&__pyx_n_s__y,&__pyx_n_s__keys,0};
PyObject *__pyx_v_key = 0;
static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__x,&__pyx_n_s__y,&__pyx_n_s__key,0};
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("desc (wrapper)", 0);
@ -1313,7 +1313,7 @@ static PyObject *__pyx_pw_5binpt_11QueryResult_9desc(PyObject *__pyx_self, PyObj
}
case 2:
if (kw_args > 0) {
PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__keys);
PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__key);
if (value) { values[2] = value; kw_args--; }
}
}
@ -1331,7 +1331,7 @@ static PyObject *__pyx_pw_5binpt_11QueryResult_9desc(PyObject *__pyx_self, PyObj
}
__pyx_v_x = values[0];
__pyx_v_y = values[1];
__pyx_v_keys = values[2];
__pyx_v_key = values[2];
}
goto __pyx_L4_argument_unpacking_done;
__pyx_L5_argtuple_error:;
@ -1341,7 +1341,7 @@ static PyObject *__pyx_pw_5binpt_11QueryResult_9desc(PyObject *__pyx_self, PyObj
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
__pyx_r = __pyx_pf_5binpt_11QueryResult_8desc(__pyx_v_x, __pyx_v_y, __pyx_v_keys);
__pyx_r = __pyx_pf_5binpt_11QueryResult_8desc(__pyx_v_x, __pyx_v_y, __pyx_v_key);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
@ -1362,8 +1362,8 @@ static PyObject *__pyx_pw_5binpt_11QueryResult_4desc_lambda1(PyObject *__pyx_sel
/* "binpt.pyx":52
*
* @staticmethod
* def desc(x, y, keys = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* '''Returns the sign of keys(y) - keys(x).
* def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* '''Returns the sign of key(y) - key(x).
* Can only be used if scores is not an empty vector as
*/
@ -1399,7 +1399,7 @@ static PyObject *__pyx_lambda_funcdef_lambda1(CYTHON_UNUSED PyObject *__pyx_self
return __pyx_r;
}
static PyObject *__pyx_pf_5binpt_11QueryResult_8desc(PyObject *__pyx_v_x, PyObject *__pyx_v_y, PyObject *__pyx_v_keys) {
static PyObject *__pyx_pf_5binpt_11QueryResult_8desc(PyObject *__pyx_v_x, PyObject *__pyx_v_y, PyObject *__pyx_v_key) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
PyObject *__pyx_t_1 = NULL;
@ -1414,7 +1414,7 @@ static PyObject *__pyx_pf_5binpt_11QueryResult_8desc(PyObject *__pyx_v_x, PyObje
/* "binpt.pyx":56
* Can only be used if scores is not an empty vector as
* keys defaults to scores[0]'''
* return fsign(keys(y) - keys(x)) # <<<<<<<<<<<<<<
* return fsign(key(y) - key(x)) # <<<<<<<<<<<<<<
*
* def __str__(self):
*/
@ -1424,7 +1424,7 @@ static PyObject *__pyx_pf_5binpt_11QueryResult_8desc(PyObject *__pyx_v_x, PyObje
__Pyx_INCREF(__pyx_v_y);
PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_y);
__Pyx_GIVEREF(__pyx_v_y);
__pyx_t_2 = PyObject_Call(__pyx_v_keys, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_2 = PyObject_Call(__pyx_v_key, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_2);
__Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
__pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@ -1432,7 +1432,7 @@ static PyObject *__pyx_pf_5binpt_11QueryResult_8desc(PyObject *__pyx_v_x, PyObje
__Pyx_INCREF(__pyx_v_x);
PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_x);
__Pyx_GIVEREF(__pyx_v_x);
__pyx_t_3 = PyObject_Call(__pyx_v_keys, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__pyx_t_3 = PyObject_Call(__pyx_v_key, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_3);
__Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
__pyx_t_1 = PyNumber_Subtract(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@ -1475,7 +1475,7 @@ static PyObject *__pyx_pw_5binpt_11QueryResult_11__str__(PyObject *__pyx_v_self)
}
/* "binpt.pyx":58
* return fsign(keys(y) - keys(x))
* return fsign(key(y) - key(x))
*
* def __str__(self): # <<<<<<<<<<<<<<
* '''Returns a string such as: <words> ||| <scores> [||| word-alignment info]'''
@ -3724,7 +3724,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
{&__pyx_n_s__isValidBinaryTable, __pyx_k__isValidBinaryTable, sizeof(__pyx_k__isValidBinaryTable), 0, 0, 1, 1},
{&__pyx_n_s__isfile, __pyx_k__isfile, sizeof(__pyx_k__isfile), 0, 0, 1, 1},
{&__pyx_n_s__join, __pyx_k__join, sizeof(__pyx_k__join), 0, 0, 1, 1},
{&__pyx_n_s__keys, __pyx_k__keys, sizeof(__pyx_k__keys), 0, 0, 1, 1},
{&__pyx_n_s__key, __pyx_k__key, sizeof(__pyx_k__key), 0, 0, 1, 1},
{&__pyx_n_s__line, __pyx_k__line, sizeof(__pyx_k__line), 0, 0, 1, 1},
{&__pyx_n_s__nscores, __pyx_k__nscores, sizeof(__pyx_k__nscores), 0, 0, 1, 1},
{&__pyx_n_s__os, __pyx_k__os, sizeof(__pyx_k__os), 0, 0, 1, 1},
@ -3774,8 +3774,8 @@ static int __Pyx_InitCachedConstants(void) {
/* "binpt.pyx":52
*
* @staticmethod
* def desc(x, y, keys = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* '''Returns the sign of keys(y) - keys(x).
* def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* '''Returns the sign of key(y) - key(x).
* Can only be used if scores is not an empty vector as
*/
__pyx_k_tuple_16 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@ -3786,9 +3786,9 @@ static int __Pyx_InitCachedConstants(void) {
__Pyx_INCREF(((PyObject *)__pyx_n_s__y));
PyTuple_SET_ITEM(__pyx_k_tuple_16, 1, ((PyObject *)__pyx_n_s__y));
__Pyx_GIVEREF(((PyObject *)__pyx_n_s__y));
__Pyx_INCREF(((PyObject *)__pyx_n_s__keys));
PyTuple_SET_ITEM(__pyx_k_tuple_16, 2, ((PyObject *)__pyx_n_s__keys));
__Pyx_GIVEREF(((PyObject *)__pyx_n_s__keys));
__Pyx_INCREF(((PyObject *)__pyx_n_s__key));
PyTuple_SET_ITEM(__pyx_k_tuple_16, 2, ((PyObject *)__pyx_n_s__key));
__Pyx_GIVEREF(((PyObject *)__pyx_n_s__key));
__Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_16));
__pyx_k_codeobj_17 = (PyObject*)__Pyx_PyCode_New(3, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_18, __pyx_n_s__desc, 52, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@ -3987,8 +3987,8 @@ PyMODINIT_FUNC PyInit_binpt(void)
/* "binpt.pyx":52
*
* @staticmethod
* def desc(x, y, keys = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* '''Returns the sign of keys(y) - keys(x).
* def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* '''Returns the sign of key(y) - key(x).
* Can only be used if scores is not an empty vector as
*/
__pyx_t_1 = __Pyx_CyFunction_NewEx(&__pyx_mdef_5binpt_11QueryResult_4desc_lambda1, 0, NULL, __pyx_n_s__binpt, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
@ -4001,8 +4001,8 @@ PyMODINIT_FUNC PyInit_binpt(void)
* return self._wa
*
* @staticmethod # <<<<<<<<<<<<<<
* def desc(x, y, keys = lambda r: r.scores[0]):
* '''Returns the sign of keys(y) - keys(x).
* def desc(x, y, key = lambda r: r.scores[0]):
* '''Returns the sign of key(y) - key(x).
*/
__pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5binpt_11QueryResult_9desc, NULL, __pyx_n_s__binpt); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
__Pyx_GOTREF(__pyx_t_1);
@ -4021,8 +4021,8 @@ PyMODINIT_FUNC PyInit_binpt(void)
/* "binpt.pyx":52
*
* @staticmethod
* def desc(x, y, keys = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* '''Returns the sign of keys(y) - keys(x).
* def desc(x, y, key = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
* '''Returns the sign of key(y) - key(x).
* Can only be used if scores is not an empty vector as
*/
__pyx_t_1 = __Pyx_GetName((PyObject *)__pyx_ptype_5binpt_QueryResult, __pyx_n_s__desc); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}

View File

@ -49,11 +49,11 @@ cdef class QueryResult(object):
return self._wa
@staticmethod
def desc(x, y, keys = lambda r: r.scores[0]):
'''Returns the sign of keys(y) - keys(x).
def desc(x, y, key = lambda r: r.scores[0]):
'''Returns the sign of key(y) - key(x).
Can only be used if scores is not an empty vector as
keys defaults to scores[0]'''
return fsign(keys(y) - keys(x))
return fsign(key(y) - key(x))
def __str__(self):
'''Returns a string such as: <words> ||| <scores> [||| word-alignment info]'''
@ -138,7 +138,7 @@ cdef class BinaryPhraseTable(object):
def delimiters(self):
return self._delimiters
def query(self, line, cmp = None, top = 0):
def query(self, line, cmp = None, key = lambda x: x.scores[0], top = 0):
'''Queries the phrase table and returns a list of matches.
Each match is a QueryResult.
If 'cmp' is defined the return list is sorted.

View File

@ -3,31 +3,38 @@ from distutils.extension import Extension
import os
import sys
available_switches = ['--with-cmph']
available_switches = ['--with-cmph', '--moses-lib']
with_cmph = False
while sys.argv[-1] in available_switches:
switch = sys.argv.pop()
if switch == '--with-cmph':
with_cmph = True
#### From here you probably don't need to change anything
#### unless a new dependency shows up in Moses
mosesdir = os.path.abspath('../../')
includes = [mosesdir, os.path.join(mosesdir, 'moses/src'), os.path.join(mosesdir, 'util')]
libdir = os.path.join(mosesdir, 'lib')
while sys.argv[-1].split('=')[0] in available_switches:
param = sys.argv.pop().split('=')
if param[0] == '--with-cmph':
with_cmph = True
if param[0] == '--moses-lib':
libdir = param[1]
print >> sys.stderr, 'mosesdir=%s\nincludes=%s\nlibdir=%s\ncmph=%s' % (mosesdir, includes, libdir, with_cmph)
#### From here you probably don't need to change anything
#### unless a new dependency shows up in Moses
basic=['z', 'stdc++', 'pthread', 'm', 'gcc_s', 'c', 'boost_system', 'boost_thread', 'boost_filesystem', 'rt']
moses=['OnDiskPt', 'kenutil', 'kenlm', 'LM', 'mert_lib', 'moses_internal', 'CYKPlusParser', 'Scope3Parser', 'fuzzy-match', 'RuleTable', 'CompactPT', 'moses', 'dynsa', 'pcfg_common' ]
additional=[]
if with_cmph:
additional.append('cmph')
exobj = [os.path.join(libdir, 'lib' + l + '.so') for l in moses]
print >> sys.stderr, 'basic=%s\nmoses=%s\nadditional=%s\nextra=%s' % (basic, moses, additional, exobj)
ext_modules = [
Extension(name = 'binpt',
sources = ['binpt/binpt.cpp'],

View File

@ -7,8 +7,8 @@ my $threshold = -1;
use Getopt::Long;
$_HELP = 1 if (@ARGV < 1 or !GetOptions ("table=s" => \$table, #table to filter
"scores=s" => \$scores_file, #scores of each phrase pair, should have same size as the table to filter
"percentage=i" => \$percentage, # percentage of phrase table to remain
"threshold=i" => \$threshold)); # threshold (score < threshold equals prune entry)
"percentage=f" => \$percentage, # percentage of phrase table to remain
"threshold=f" => \$threshold)); # threshold (score < threshold equals prune entry)
# help message if arguments are not correct
if ($_HELP) {

View File

@ -29,7 +29,7 @@ ldflags = [ os.environ "LDFLAGS" ] ;
#Run g++ with empty main and these arguments to see if it passes.
rule test_flags ( flags * ) {
flags = $(cxxflags) $(ldflags) $(flags) ;
local cmd = "bash -c \"g++ "$(flags:J=" ")" -x c++ - <<<'int main() {}' -o /dev/null >/dev/null 2>/dev/null\"" ;
local cmd = "bash -c \"g++ "$(flags:J=" ")" -x c++ - <<<'int main() {}' -o $(TOP)/dummy >/dev/null 2>/dev/null && rm $(TOP)/dummy 2>/dev/null\"" ;
local ret = [ SHELL $(cmd) : exit-status ] ;
if --debug-configuration in [ modules.peek : ARGV ] {
echo $(cmd) ;
@ -152,7 +152,6 @@ rule boost ( min-version ) {
boost-lib program_options PROGRAM_OPTIONS_DYN_LINK ;
boost-lib unit_test_framework TEST_DYN_LINK ;
boost-lib iostreams IOSTREAMS_DYN_LINK ;
boost-lib filesystem FILE_SYSTEM_DYN_LINK : boost_system ;
}
#Link normally to a library, but sometimes static isn't installed so fall back to dynamic.

View File

@ -17,4 +17,4 @@ run model_test.cc ../util//kenutil kenlm ..//boost_unit_test_framework : : test.
exe query : ngram_query.cc kenlm ../util//kenutil ;
exe build_binary : build_binary.cc kenlm ../util//kenutil ;
exe kenlm_max_order : max_order.cc : $(max-order) ;
exe kenlm_max_order : max_order.cc : <include>.. $(max-order) ;

View File

@ -50,7 +50,7 @@ std::size_t ArrayCount(uint64_t max_offset, uint64_t max_next, const Config &con
}
} // namespace
std::size_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) {
uint64_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) {
return sizeof(uint64_t) * (1 /* header */ + ArrayCount(max_offset, max_next, config)) + 7 /* 8-byte alignment */;
}

View File

@ -33,7 +33,7 @@ class DontBhiksha {
static void UpdateConfigFromBinary(int /*fd*/, Config &/*config*/) {}
static std::size_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; }
static uint64_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; }
static uint8_t InlineBits(uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) {
return util::RequiredBits(max_next);
@ -67,7 +67,7 @@ class ArrayBhiksha {
static void UpdateConfigFromBinary(int fd, Config &config);
static std::size_t Size(uint64_t max_offset, uint64_t max_next, const Config &config);
static uint64_t Size(uint64_t max_offset, uint64_t max_next, const Config &config);
static uint8_t InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config);

View File

@ -200,10 +200,10 @@ void SeekPastHeader(int fd, const Parameters &params) {
util::SeekOrThrow(fd, TotalHeaderSize(params.counts.size()));
}
uint8_t *SetupBinary(const Config &config, const Parameters &params, std::size_t memory_size, Backing &backing) {
uint8_t *SetupBinary(const Config &config, const Parameters &params, uint64_t memory_size, Backing &backing) {
const uint64_t file_size = util::SizeFile(backing.file.get());
// The header is smaller than a page, so we have to map the whole header as well.
std::size_t total_map = TotalHeaderSize(params.counts.size()) + memory_size;
std::size_t total_map = util::CheckOverflow(TotalHeaderSize(params.counts.size()) + memory_size);
if (file_size != util::kBadSize && static_cast<uint64_t>(file_size) < total_map)
UTIL_THROW(FormatLoadException, "Binary file has size " << file_size << " but the headers say it should be at least " << total_map);

View File

@ -70,7 +70,7 @@ void MatchCheck(ModelType model_type, unsigned int search_version, const Paramet
void SeekPastHeader(int fd, const Parameters &params);
uint8_t *SetupBinary(const Config &config, const Parameters &params, std::size_t memory_size, Backing &backing);
uint8_t *SetupBinary(const Config &config, const Parameters &params, uint64_t memory_size, Backing &backing);
void ComplainAboutARPA(const Config &config, ModelType model_type);
@ -90,7 +90,7 @@ template <class To> void LoadLM(const char *file, const Config &config, To &to)
new_config.probing_multiplier = params.fixed.probing_multiplier;
detail::SeekPastHeader(backing.file.get(), params);
To::UpdateConfigFromBinary(backing.file.get(), params.counts, new_config);
std::size_t memory_size = To::Size(params.counts, new_config);
uint64_t memory_size = To::Size(params.counts, new_config);
uint8_t *start = detail::SetupBinary(new_config, params, memory_size, backing);
to.InitializeFromBinary(start, params, new_config, backing.file.get());
} else {

View File

@ -11,6 +11,8 @@
#ifdef WIN32
#include "util/getopt.hh"
#else
#include <unistd.h>
#endif
namespace lm {
@ -85,16 +87,16 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) {
std::vector<uint64_t> counts;
util::FilePiece f(file);
lm::ReadARPACounts(f, counts);
std::size_t sizes[6];
uint64_t sizes[6];
sizes[0] = ProbingModel::Size(counts, config);
sizes[1] = RestProbingModel::Size(counts, config);
sizes[2] = TrieModel::Size(counts, config);
sizes[3] = QuantTrieModel::Size(counts, config);
sizes[4] = ArrayTrieModel::Size(counts, config);
sizes[5] = QuantArrayTrieModel::Size(counts, config);
std::size_t max_length = *std::max_element(sizes, sizes + sizeof(sizes) / sizeof(size_t));
std::size_t min_length = *std::min_element(sizes, sizes + sizeof(sizes) / sizeof(size_t));
std::size_t divide;
uint64_t max_length = *std::max_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t));
uint64_t min_length = *std::min_element(sizes, sizes + sizeof(sizes) / sizeof(uint64_t));
uint64_t divide;
char prefix;
if (min_length < (1 << 10) * 10) {
prefix = ' ';

View File

@ -38,6 +38,7 @@
#ifndef LM_LEFT__
#define LM_LEFT__
#include "lm/max_order.hh"
#include "lm/state.hh"
#include "lm/return.hh"

View File

@ -1,3 +1,4 @@
#include "lm/max_order.hh"
#include <iostream>
int main(int argc, char *argv[]) {

12
lm/max_order.hh Normal file
View File

@ -0,0 +1,12 @@
/* IF YOUR BUILD SYSTEM PASSES -DKENLM_MAX_ORDER, THEN CHANGE THE BUILD SYSTEM.
* If not, this is the default maximum order.
* Having this limit means that State can be
* (kMaxOrder - 1) * sizeof(float) bytes instead of
* sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
*/
#ifndef KENLM_MAX_ORDER
#define KENLM_MAX_ORDER 6
#endif
#ifndef KENLM_ORDER_MESSAGE
#define KENLM_ORDER_MESSAGE "Recompile with e.g. `bjam --kenlm-max-order=6 -a' to change the maximum order."
#endif

View File

@ -5,12 +5,14 @@
#include "lm/search_hashed.hh"
#include "lm/search_trie.hh"
#include "lm/read_arpa.hh"
#include "util/have.hh"
#include "util/murmur_hash.hh"
#include <algorithm>
#include <functional>
#include <numeric>
#include <cmath>
#include <limits>
namespace lm {
namespace ngram {
@ -18,17 +20,18 @@ namespace detail {
template <class Search, class VocabularyT> const ModelType GenericModel<Search, VocabularyT>::kModelType = Search::kModelType;
template <class Search, class VocabularyT> size_t GenericModel<Search, VocabularyT>::Size(const std::vector<uint64_t> &counts, const Config &config) {
template <class Search, class VocabularyT> uint64_t GenericModel<Search, VocabularyT>::Size(const std::vector<uint64_t> &counts, const Config &config) {
return VocabularyT::Size(counts[0], config) + Search::Size(counts, config);
}
template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::SetupMemory(void *base, const std::vector<uint64_t> &counts, const Config &config) {
size_t goal_size = util::CheckOverflow(Size(counts, config));
uint8_t *start = static_cast<uint8_t*>(base);
size_t allocated = VocabularyT::Size(counts[0], config);
vocab_.SetupMemory(start, allocated, counts[0], config);
start += allocated;
start = search_.SetupMemory(start, counts, config);
if (static_cast<std::size_t>(start - static_cast<uint8_t*>(base)) != Size(counts, config)) UTIL_THROW(FormatLoadException, "The data structures took " << (start - static_cast<uint8_t*>(base)) << " but Size says they should take " << Size(counts, config));
if (static_cast<std::size_t>(start - static_cast<uint8_t*>(base)) != goal_size) UTIL_THROW(FormatLoadException, "The data structures took " << (start - static_cast<uint8_t*>(base)) << " but Size says they should take " << goal_size);
}
template <class Search, class VocabularyT> GenericModel<Search, VocabularyT>::GenericModel(const char *file, const Config &config) {
@ -47,8 +50,19 @@ template <class Search, class VocabularyT> GenericModel<Search, VocabularyT>::Ge
P::Init(begin_sentence, null_context, vocab_, search_.Order());
}
namespace {
void CheckCounts(const std::vector<uint64_t> &counts) {
UTIL_THROW_IF(counts.size() > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << counts.size() << " but KenLM was compiled to support up to " << KENLM_MAX_ORDER << ". " << KENLM_ORDER_MESSAGE);
if (sizeof(uint64_t) > sizeof(std::size_t)) {
for (std::vector<uint64_t>::const_iterator i = counts.begin(); i != counts.end(); ++i) {
UTIL_THROW_IF(*i > static_cast<uint64_t>(std::numeric_limits<size_t>::max()), util::OverflowException, "This model has " << *i << " " << (i - counts.begin() + 1) << "-grams which is too many for 32-bit machines.");
}
}
}
} // namespace
template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::InitializeFromBinary(void *start, const Parameters &params, const Config &config, int fd) {
UTIL_THROW_IF(params.counts.size() > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << params.counts.size() << ". Re-compile (use -a), passing a number at least this large to bjam's --max-kenlm-order flag.");
CheckCounts(params.counts);
SetupMemory(start, params.counts, config);
vocab_.LoadedBinary(params.fixed.has_vocabulary, fd, config.enumerate_vocab);
search_.LoadedBinary();
@ -61,12 +75,11 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT
std::vector<uint64_t> counts;
// File counts do not include pruned trigrams that extend to quadgrams etc. These will be fixed by search_.
ReadARPACounts(f, counts);
UTIL_THROW_IF(counts.size() > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << counts.size() << ". Re-compile (use -a), passing a number at least this large to bjam's --max-kenlm-order flag.");
CheckCounts(counts);
if (counts.size() < 2) UTIL_THROW(FormatLoadException, "This ngram implementation assumes at least a bigram model.");
if (config.probing_multiplier <= 1.0) UTIL_THROW(ConfigException, "probing multiplier must be > 1.0");
std::size_t vocab_size = VocabularyT::Size(counts[0], config);
std::size_t vocab_size = util::CheckOverflow(VocabularyT::Size(counts[0], config));
// Setup the binary file for writing the vocab lookup table. The search_ is responsible for growing the binary file to its needs.
vocab_.SetupMemory(SetupJustVocab(config, counts.size(), vocab_size, backing_), vocab_size, counts[0], config);

View File

@ -41,7 +41,7 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
* does not include small non-mapped control structures, such as this class
* itself.
*/
static size_t Size(const std::vector<uint64_t> &counts, const Config &config = Config());
static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config = Config());
/* Load the model from a file. It may be an ARPA or binary file. Binary
* files must have the format expected by this class or you'll get an

View File

@ -3,6 +3,7 @@
#include "lm/blank.hh"
#include "lm/config.hh"
#include "lm/max_order.hh"
#include "lm/model_type.hh"
#include "util/bit_packing.hh"
@ -23,7 +24,7 @@ class DontQuantize {
public:
static const ModelType kModelTypeAdd = static_cast<ModelType>(0);
static void UpdateConfigFromBinary(int, const std::vector<uint64_t> &, Config &) {}
static std::size_t Size(uint8_t /*order*/, const Config &/*config*/) { return 0; }
static uint64_t Size(uint8_t /*order*/, const Config &/*config*/) { return 0; }
static uint8_t MiddleBits(const Config &/*config*/) { return 63; }
static uint8_t LongestBits(const Config &/*config*/) { return 31; }
@ -137,9 +138,9 @@ class SeparatelyQuantize {
static void UpdateConfigFromBinary(int fd, const std::vector<uint64_t> &counts, Config &config);
static std::size_t Size(uint8_t order, const Config &config) {
size_t longest_table = (static_cast<size_t>(1) << static_cast<size_t>(config.prob_bits)) * sizeof(float);
size_t middle_table = (static_cast<size_t>(1) << static_cast<size_t>(config.backoff_bits)) * sizeof(float) + longest_table;
static uint64_t Size(uint8_t order, const Config &config) {
uint64_t longest_table = (static_cast<uint64_t>(1) << static_cast<uint64_t>(config.prob_bits)) * sizeof(float);
uint64_t middle_table = (static_cast<uint64_t>(1) << static_cast<uint64_t>(config.backoff_bits)) * sizeof(float) + longest_table;
// unigrams are currently not quantized so no need for a table.
return (order - 2) * middle_table + longest_table + /* for the bit counts and alignment padding) */ 8;
}

View File

@ -2,12 +2,13 @@
#include "lm/blank.hh"
#include <cmath>
#include <cstdlib>
#include <iostream>
#include <sstream>
#include <vector>
#include <ctype.h>
#include <math.h>
#include <string.h>
#include <stdint.h>
@ -31,6 +32,15 @@ bool IsEntirelyWhiteSpace(const StringPiece &line) {
const char kBinaryMagic[] = "mmap lm http://kheafield.com/code";
// strtoull isn't portable enough :-(
uint64_t ReadCount(const std::string &from) {
std::stringstream stream(from);
uint64_t ret;
stream >> ret;
UTIL_THROW_IF(!stream, FormatLoadException, "Bad count " << from);
return ret;
}
} // namespace
void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) {
@ -52,15 +62,11 @@ void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number) {
// So strtol doesn't go off the end of line.
std::string remaining(line.data() + 6, line.size() - 6);
char *end_ptr;
unsigned long int length = std::strtol(remaining.c_str(), &end_ptr, 10);
unsigned int length = std::strtol(remaining.c_str(), &end_ptr, 10);
if ((end_ptr == remaining.c_str()) || (length - 1 != number.size())) UTIL_THROW(FormatLoadException, "ngram count lengths should be consecutive starting with 1: " << line);
if (*end_ptr != '=') UTIL_THROW(FormatLoadException, "Expected = immediately following the first number in the count line " << line);
++end_ptr;
const char *start = end_ptr;
long int count = std::strtol(start, &end_ptr, 10);
if (count < 0) UTIL_THROW(FormatLoadException, "Negative n-gram count " << count);
if (start == end_ptr) UTIL_THROW(FormatLoadException, "Couldn't parse n-gram count from " << line);
number.push_back(count);
number.push_back(ReadCount(end_ptr));
}
}
@ -103,7 +109,7 @@ void ReadBackoff(util::FilePiece &in, float &backoff) {
int float_class = _fpclass(backoff);
UTIL_THROW_IF(float_class == _FPCLASS_SNAN || float_class == _FPCLASS_QNAN || float_class == _FPCLASS_NINF || float_class == _FPCLASS_PINF, FormatLoadException, "Bad backoff " << backoff);
#else
int float_class = fpclassify(backoff);
int float_class = std::fpclassify(backoff);
UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff);
#endif
}

View File

@ -74,8 +74,8 @@ template <class Value> class HashedSearch {
// TODO: move probing_multiplier here with next binary file format update.
static void UpdateConfigFromBinary(int, const std::vector<uint64_t> &, Config &) {}
static std::size_t Size(const std::vector<uint64_t> &counts, const Config &config) {
std::size_t ret = Unigram::Size(counts[0]);
static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config) {
uint64_t ret = Unigram::Size(counts[0]);
for (unsigned char n = 1; n < counts.size() - 1; ++n) {
ret += Middle::Size(counts[n], config.probing_multiplier);
}
@ -160,7 +160,7 @@ template <class Value> class HashedSearch {
#endif
{}
static std::size_t Size(uint64_t count) {
static uint64_t Size(uint64_t count) {
return (count + 1) * sizeof(ProbBackoff); // +1 for hallucinate <unk>
}

View File

@ -5,6 +5,7 @@
#include "lm/binary_format.hh"
#include "lm/blank.hh"
#include "lm/lm_exception.hh"
#include "lm/max_order.hh"
#include "lm/quantize.hh"
#include "lm/trie.hh"
#include "lm/trie_sort.hh"
@ -88,7 +89,7 @@ class BackoffMessages {
if (!HasExtension(weights.backoff)) {
weights.backoff = kExtensionBackoff;
UTIL_THROW_IF(fseek(unigrams, -sizeof(weights), SEEK_CUR), util::ErrnoException, "Seeking backwards to denote unigram extension failed.");
WriteOrThrow(unigrams, &weights, sizeof(weights));
util::WriteOrThrow(unigrams, &weights, sizeof(weights));
}
const ProbPointer &write_to = *reinterpret_cast<const ProbPointer*>(current_ + sizeof(WordIndex));
base[write_to.array][write_to.index] += weights.backoff;

View File

@ -44,8 +44,8 @@ template <class Quant, class Bhiksha> class TrieSearch {
Bhiksha::UpdateConfigFromBinary(fd, config);
}
static std::size_t Size(const std::vector<uint64_t> &counts, const Config &config) {
std::size_t ret = Quant::Size(counts.size(), config) + Unigram::Size(counts[0]);
static uint64_t Size(const std::vector<uint64_t> &counts, const Config &config) {
uint64_t ret = Quant::Size(counts.size(), config) + Unigram::Size(counts[0]);
for (unsigned char i = 1; i < counts.size() - 1; ++i) {
ret += Middle::Size(Quant::MiddleBits(config), counts[i], counts[0], counts[i+1], config);
}

View File

@ -1,6 +1,7 @@
#ifndef LM_STATE__
#define LM_STATE__
#include "lm/max_order.hh"
#include "lm/word_index.hh"
#include "util/murmur_hash.hh"

View File

@ -36,7 +36,7 @@ bool FindBitPacked(const void *base, uint64_t key_mask, uint8_t key_bits, uint8_
}
} // namespace
std::size_t BitPacked::BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits) {
uint64_t BitPacked::BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits) {
uint8_t total_bits = util::RequiredBits(max_vocab) + remaining_bits;
// Extra entry for next pointer at the end.
// +7 then / 8 to round up bits and convert to bytes
@ -57,7 +57,7 @@ void BitPacked::BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits)
max_vocab_ = max_vocab;
}
template <class Bhiksha> std::size_t BitPackedMiddle<Bhiksha>::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) {
template <class Bhiksha> uint64_t BitPackedMiddle<Bhiksha>::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) {
return Bhiksha::Size(entries + 1, max_ptr, config) + BaseSize(entries, max_vocab, quant_bits + Bhiksha::InlineBits(entries + 1, max_ptr, config));
}

View File

@ -49,7 +49,7 @@ class Unigram {
unigram_ = static_cast<UnigramValue*>(start);
}
static std::size_t Size(uint64_t count) {
static uint64_t Size(uint64_t count) {
// +1 in case unknown doesn't appear. +1 for the final next.
return (count + 2) * sizeof(UnigramValue);
}
@ -84,7 +84,7 @@ class BitPacked {
}
protected:
static std::size_t BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits);
static uint64_t BaseSize(uint64_t entries, uint64_t max_vocab, uint8_t remaining_bits);
void BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits);
@ -99,7 +99,7 @@ class BitPacked {
template <class Bhiksha> class BitPackedMiddle : public BitPacked {
public:
static std::size_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const Config &config);
static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const Config &config);
// next_source need not be initialized.
BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config);
@ -128,7 +128,7 @@ template <class Bhiksha> class BitPackedMiddle : public BitPacked {
class BitPackedLongest : public BitPacked {
public:
static std::size_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab) {
static uint64_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab) {
return BaseSize(entries, max_vocab, quant_bits);
}

View File

@ -22,12 +22,6 @@
namespace lm {
namespace ngram {
namespace trie {
void WriteOrThrow(FILE *to, const void *data, size_t size) {
assert(size);
if (1 != std::fwrite(data, size, 1, to)) UTIL_THROW(util::ErrnoException, "Short write; requested size " << size);
}
namespace {
typedef util::SizedIterator NGramIter;
@ -95,12 +89,12 @@ FILE *WriteContextFile(uint8_t *begin, uint8_t *end, const util::TempMaker &make
// Write out to file and uniqueify at the same time. Could have used unique_copy if there was an appropriate OutputIterator.
if (context_begin == context_end) return out.release();
PartialIter i(context_begin);
WriteOrThrow(out.get(), i->Data(), context_size);
util::WriteOrThrow(out.get(), i->Data(), context_size);
const void *previous = i->Data();
++i;
for (; i != context_end; ++i) {
if (memcmp(previous, i->Data(), context_size)) {
WriteOrThrow(out.get(), i->Data(), context_size);
util::WriteOrThrow(out.get(), i->Data(), context_size);
previous = i->Data();
}
}
@ -116,7 +110,7 @@ struct ThrowCombine {
// Useful for context files that just contain records with no value.
struct FirstCombine {
void operator()(std::size_t entry_size, const void *first, const void * /*second*/, FILE *out) const {
WriteOrThrow(out, first, entry_size);
util::WriteOrThrow(out, first, entry_size);
}
};
@ -129,10 +123,10 @@ template <class Combine> FILE *MergeSortedFiles(FILE *first_file, FILE *second_f
EntryCompare less(order);
while (first && second) {
if (less(first.Data(), second.Data())) {
WriteOrThrow(out_file.get(), first.Data(), entry_size);
util::WriteOrThrow(out_file.get(), first.Data(), entry_size);
++first;
} else if (less(second.Data(), first.Data())) {
WriteOrThrow(out_file.get(), second.Data(), entry_size);
util::WriteOrThrow(out_file.get(), second.Data(), entry_size);
++second;
} else {
combine(entry_size, first.Data(), second.Data(), out_file.get());
@ -140,7 +134,7 @@ template <class Combine> FILE *MergeSortedFiles(FILE *first_file, FILE *second_f
}
}
for (RecordReader &remains = (first ? first : second); remains; ++remains) {
WriteOrThrow(out_file.get(), remains.Data(), entry_size);
util::WriteOrThrow(out_file.get(), remains.Data(), entry_size);
}
return out_file.release();
}
@ -164,7 +158,7 @@ void RecordReader::Init(FILE *file, std::size_t entry_size) {
void RecordReader::Overwrite(const void *start, std::size_t amount) {
long internal = (uint8_t*)start - (uint8_t*)data_.get();
UTIL_THROW_IF(fseek(file_, internal - entry_size_, SEEK_CUR), util::ErrnoException, "Couldn't seek backwards for revision");
WriteOrThrow(file_, start, amount);
util::WriteOrThrow(file_, start, amount);
long forward = entry_size_ - internal - amount;
#if !defined(_WIN32) && !defined(_WIN64)
if (forward)

View File

@ -3,6 +3,7 @@
#ifndef LM_TRIE_SORT__
#define LM_TRIE_SORT__
#include "lm/max_order.hh"
#include "lm/word_index.hh"
#include "util/file.hh"
@ -28,8 +29,6 @@ struct Config;
namespace trie {
void WriteOrThrow(FILE *to, const void *data, size_t size);
class EntryCompare : public std::binary_function<const void*, const void*, bool> {
public:
explicit EntryCompare(unsigned char order) : order_(order) {}

View File

@ -87,7 +87,7 @@ void WriteWordsWrapper::Write(int fd) {
SortedVocabulary::SortedVocabulary() : begin_(NULL), end_(NULL), enumerate_(NULL) {}
std::size_t SortedVocabulary::Size(std::size_t entries, const Config &/*config*/) {
uint64_t SortedVocabulary::Size(uint64_t entries, const Config &/*config*/) {
// Lead with the number of entries.
return sizeof(uint64_t) + sizeof(uint64_t) * entries;
}
@ -165,7 +165,7 @@ struct ProbingVocabularyHeader {
ProbingVocabulary::ProbingVocabulary() : enumerate_(NULL) {}
std::size_t ProbingVocabulary::Size(std::size_t entries, const Config &config) {
uint64_t ProbingVocabulary::Size(uint64_t entries, const Config &config) {
return ALIGN8(sizeof(detail::ProbingVocabularyHeader)) + Lookup::Size(entries, config.probing_multiplier);
}

View File

@ -62,7 +62,7 @@ class SortedVocabulary : public base::Vocabulary {
}
// Size for purposes of file writing
static size_t Size(std::size_t entries, const Config &config);
static uint64_t Size(uint64_t entries, const Config &config);
// Vocab words are [0, Bound()) Only valid after FinishedLoading/LoadedBinary.
WordIndex Bound() const { return bound_; }
@ -129,7 +129,7 @@ class ProbingVocabulary : public base::Vocabulary {
return lookup_.Find(detail::HashForVocab(str), i) ? i->value : 0;
}
static size_t Size(std::size_t entries, const Config &config);
static uint64_t Size(uint64_t entries, const Config &config);
// Vocab words are [0, Bound()).
WordIndex Bound() const { return bound_; }

View File

@ -225,12 +225,12 @@ void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats&
//SCOREROUT eg: 0.04546
distanceValue*=SCORE_MULTFACT; //SCOREROUT eg: 4546 to transform float into integer
ostringstream tempStream;
tempStream.precision(SCORE_PRECISION);
tempStream << distanceValue << " 1"; //use for final normalization over the amount of test sentences
tempStream.precision(0); // decimal precision not needed as score was multiplied per SCORE_MULTFACT
tempStream << std::fixed << distanceValue << " 1"; //use for final normalization over the amount of test sentences
string str = tempStream.str();
entry.set(str);
//cout << tempStream.str();
//cout << distanceValue << "=" << distanceValue << " (str:" << tempStream.str() << ")" << endl;
}
//Will just be final score

View File

@ -49,6 +49,16 @@ int main(int argc, char **argv)
LMList lmList;
Parameter *parameter = new Parameter();
const_cast<std::vector<std::string>&>(parameter->GetParam("factor-delimiter")).resize(1, "||dummy_string||");
const_cast<std::vector<std::string>&>(parameter->GetParam("input-factors")).resize(1, "0");
const_cast<std::vector<std::string>&>(parameter->GetParam("verbose")).resize(1, "0");
const_cast<std::vector<std::string>&>(parameter->GetParam("weight-w")).resize(1, "0");
const_cast<std::vector<std::string>&>(parameter->GetParam("weight-d")).resize(1, "0");
const_cast<StaticData&>(StaticData::Instance()).LoadData(parameter);
PhraseDictionaryFeature pdf(Compact, nscores, nscores, input, output, ttable, weight, 0, "", "");
PhraseDictionaryCompact pdc(nscores, Compact, &pdf, false, useAlignments);
bool ret = pdc.Load(input, output, ttable, weight, 0, lmList, 0);
@ -74,7 +84,8 @@ int main(int argc, char **argv)
if(useAlignments)
std::cout << " " << tp.GetAlignmentInfo() << "|||";
for(size_t i = 0; i < tp.GetScoreBreakdown().size(); i++)
size_t offset = tp.GetScoreBreakdown().size() - nscores;
for(size_t i = offset; i < tp.GetScoreBreakdown().size(); i++)
std::cout << " " << exp(tp.GetScoreBreakdown()[i]);
std::cout << std::endl;
}

View File

@ -72,7 +72,6 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
,m_nBestOutputCollector(NULL)
,m_searchGraphOutputCollector(NULL)
,m_singleBestOutputCollector(NULL)
,m_alignmentOutputCollector(NULL)
{
const StaticData &staticData = StaticData::Instance();
@ -113,15 +112,6 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
m_detailOutputCollector = new Moses::OutputCollector(m_detailedTranslationReportingStream);
}
if (staticData.PrintAlignmentInfo()) {
if (staticData.GetAlignmentOutputFile().empty()) {
m_alignmentOutputCollector = new Moses::OutputCollector(&std::cout);
} else {
m_alignmentOutputCollector = new Moses::OutputCollector(new std::ofstream(staticData.GetAlignmentOutputFile().c_str()));
m_alignmentOutputCollector->HoldOutputStream();
}
}
}
IOWrapper::~IOWrapper()
@ -135,7 +125,6 @@ IOWrapper::~IOWrapper()
delete m_nBestOutputCollector;
delete m_searchGraphOutputCollector;
delete m_singleBestOutputCollector;
delete m_alignmentOutputCollector;
}
void IOWrapper::ResetTranslationId() {
@ -199,86 +188,6 @@ void OutputSurface(std::ostream &out, const ChartHypothesis *hypo, const std::ve
}
}
}
namespace {
typedef std::vector< std::pair<size_t, size_t> > WordAlignment;
bool IsUnknownWord(const Word& word) {
const Factor* factor = word[MAX_NUM_FACTORS - 1];
if (factor == NULL)
return false;
return factor->GetString() == UNKNOWN_FACTOR;
}
WordAlignment GetWordAlignment(const Moses::ChartHypothesis *hypo, size_t *targetWordsCount)
{
const Moses::TargetPhrase& targetPhrase = hypo->GetCurrTargetPhrase();
const AlignmentInfo& phraseAlignmentInfo = targetPhrase.GetAlignmentInfo();
size_t sourceSize = 0;
for (AlignmentInfo::const_iterator it = phraseAlignmentInfo.begin();
it != phraseAlignmentInfo.end(); ++it)
{
sourceSize = std::max(sourceSize, it->first + 1);
}
std::vector<size_t> sourceSideLengths(sourceSize, 1);
std::vector<size_t> targetSideLengths(targetPhrase.GetSize(), 1);
std::vector<WordAlignment> alignmentsPerSourceNonTerm(sourceSize);
size_t prevHypoIndex = 0;
for (AlignmentInfo::const_iterator it = phraseAlignmentInfo.begin();
it != phraseAlignmentInfo.end(); ++it)
{
if (targetPhrase.GetWord(it->second).IsNonTerminal()) {
const Moses::ChartHypothesis *prevHypo = hypo->GetPrevHypo(prevHypoIndex);
++prevHypoIndex;
alignmentsPerSourceNonTerm[it->first] = GetWordAlignment(
prevHypo, &targetSideLengths[it->second]);
sourceSideLengths[it->first] = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
CHECK(prevHypo->GetCurrSourceRange().GetStartPos() - hypo->GetCurrSourceRange().GetStartPos()
== (int)std::accumulate(sourceSideLengths.begin(), sourceSideLengths.begin() + it->first, 0));
} else {
alignmentsPerSourceNonTerm[it->first].push_back(WordAlignment::value_type(0, 0));
}
}
if (targetWordsCount != NULL) {
*targetWordsCount = std::accumulate(targetSideLengths.begin(), targetSideLengths.end(), 0);
}
// isn't valid since there may be unaligned words: CHECK(hypo->GetCurrSourceRange().GetNumWordsCovered() == std::accumulate(sourceSideLengths.begin(), sourceSideLengths.end(), 0));
WordAlignment result;
for (AlignmentInfo::const_iterator it = phraseAlignmentInfo.begin();
it != phraseAlignmentInfo.end(); ++it)
{
size_t sourceOffset = std::accumulate(sourceSideLengths.begin(), sourceSideLengths.begin() + it->first, 0);
size_t targetOffset = std::accumulate(targetSideLengths.begin(), targetSideLengths.begin() + it->second, 0);
for (WordAlignment::const_iterator it2 = alignmentsPerSourceNonTerm[it->first].begin();
it2 != alignmentsPerSourceNonTerm[it->first].end(); ++it2)
{
result.push_back(make_pair(sourceOffset + it2->first, targetOffset + it2->second));
}
}
if (result.empty() && targetPhrase.GetSize() == 1 && hypo->GetCurrSourceRange().GetNumWordsCovered() == 1 && IsUnknownWord(targetPhrase.GetWord(0))) {
result.push_back(WordAlignment::value_type(0, 0));
}
return result;
}
}
void IOWrapper::OutputAlignment(const Moses::ChartHypothesis *hypo, long translationId)
{
if (m_alignmentOutputCollector == NULL)
return;
WordAlignment alignment = GetWordAlignment(hypo, NULL);
std::ostringstream out;
for (WordAlignment::const_iterator it = alignment.begin();
it != alignment.end(); ++it)
{
if (it != alignment.begin())
out << " ";
out << it->first << "-" << it->second;
}
out << std::endl;
m_alignmentOutputCollector->Write(static_cast<int>(translationId), out.str());
}
void IOWrapper::Backtrack(const ChartHypothesis *hypo)
{

View File

@ -74,7 +74,6 @@ protected:
Moses::OutputCollector *m_nBestOutputCollector;
Moses::OutputCollector *m_searchGraphOutputCollector;
Moses::OutputCollector *m_singleBestOutputCollector;
Moses::OutputCollector *m_alignmentOutputCollector;
public:
IOWrapper(const std::vector<Moses::FactorType> &inputFactorOrder
@ -91,7 +90,6 @@ public:
void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartHypothesis *bestHypo, const Moses::TranslationSystem* system, long translationId);
void OutputSparseFeatureScores(std::ostream& out, const Moses::ChartTrellisPath &path, const Moses::FeatureFunction *ff, std::string &lastName);
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
void OutputAlignment(const Moses::ChartHypothesis *hypo, long translationId);
void Backtrack(const Moses::ChartHypothesis *hypo);
void ResetTranslationId();

View File

@ -58,6 +58,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "ChartTrellisPath.h"
#include "ChartTrellisPathList.h"
#include "util/usage.hh"
using namespace std;
using namespace Moses;
using namespace MosesChartCmd;
@ -103,10 +105,6 @@ public:
const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
m_ioWrapper.OutputDetailedTranslationReport(bestHypo, sentence, lineNumber);
}
if (staticData.PrintAlignmentInfo()) {
m_ioWrapper.OutputAlignment(bestHypo, lineNumber);
}
// n-best
size_t nBestSize = staticData.GetNBestSize();
@ -278,6 +276,8 @@ int main(int argc, char* argv[])
return EXIT_FAILURE;
}
IFVERBOSE(1) util::PrintUsage(std::cerr);
#ifdef HACK_EXIT
//This avoids that detructors are called (it can take a long time)
exit(EXIT_SUCCESS);

View File

@ -28,6 +28,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <sstream>
#include <vector>
#include "util/usage.hh"
#ifdef WIN32
// Include Visual Leak Detector
//#include <vld.h>
@ -537,6 +539,8 @@ int main(int argc, char** argv)
return EXIT_FAILURE;
}
IFVERBOSE(1) util::PrintUsage(std::cerr);
#ifndef EXIT_RETURN
//This avoids that destructors are called (it can take a long time)
exit(EXIT_SUCCESS);

View File

@ -26,7 +26,7 @@ namespace Moses
{
void AlignmentInfo::BuildNonTermIndexMap()
{
{
if (m_collection.empty()) {
return;
}
@ -40,9 +40,9 @@ void AlignmentInfo::BuildNonTermIndexMap()
m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
size_t i = 0;
for (p = begin(); p != end(); ++p) {
//std::cerr << "nt point: " << p->second << " -> " << i << std::endl;
m_nonTermIndexMap[p->second] = i++;
m_nonTermIndexMap[p->second] = i++;
}
}
bool compare_target(const std::pair<size_t,size_t> *a, const std::pair<size_t,size_t> *b) {

View File

@ -165,8 +165,6 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
}
dottedRuleCol.Clear(relEndPos+1);
outColl.ShrinkToLimit();
}
// Given a partial rule application ending at startPos-1 and given the sets of

View File

@ -165,8 +165,6 @@ void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
}
dottedRuleCol.Clear(relEndPos+1);
outColl.ShrinkToLimit();
}
// Given a partial rule application ending at startPos-1 and given the sets of

View File

@ -268,8 +268,6 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
}
} // for (size_t ind = 0; ind < savedNodeColl.size(); ++ind)
outColl.ShrinkToLimit();
//cerr << numDerivations << " ";
}

View File

@ -92,6 +92,7 @@ ChartHypothesis::~ChartHypothesis()
*/
void ChartHypothesis::CreateOutputPhrase(Phrase &outPhrase) const
{
for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos) {
const Word &word = GetCurrTargetPhrase().GetWord(pos);
if (word.IsNonTerminal()) {

View File

@ -98,8 +98,7 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
}
}
void ChartTranslationOptionList::ShrinkToLimit()
{
void ChartTranslationOptionList::ApplyThreshold() {
if (m_size > m_ruleLimit) {
// Something's gone wrong if the list has grown to m_ruleLimit * 2
// without being pruned.
@ -112,10 +111,7 @@ void ChartTranslationOptionList::ShrinkToLimit()
ChartTranslationOptionOrderer());
m_size = m_ruleLimit;
}
}
void ChartTranslationOptionList::ApplyThreshold()
{
// keep only those over best + threshold
float scoreThreshold = -std::numeric_limits<float>::infinity();

View File

@ -46,7 +46,6 @@ class ChartTranslationOptionList
const WordsRange &);
void Clear();
void ShrinkToLimit();
void ApplyThreshold();
private:

View File

@ -76,12 +76,14 @@ Phrase ChartTrellisNode::GetOutputPhrase() const
// exactly like same fn in hypothesis, but use trellis nodes instead of prevHypos pointer
Phrase ret(ARRAY_SIZE_INCR);
const TargetPhrase &currTargetPhrase = m_hypo.GetCurrTargetPhrase();
const Phrase &currTargetPhrase = m_hypo.GetCurrTargetPhrase();
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
m_hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
for (size_t pos = 0; pos < currTargetPhrase.GetSize(); ++pos) {
const Word &word = currTargetPhrase.GetWord(pos);
if (word.IsNonTerminal()) {
// non-term. fill out with prev hypo
size_t nonTermInd = currTargetPhrase.GetAlignmentInfo().GetNonTermIndexMap()[pos];
size_t nonTermInd = nonTermIndexMap[pos];
const ChartTrellisNode &childNode = GetChild(nonTermInd);
Phrase childPhrase = childNode.GetOutputPhrase();
ret.Append(childPhrase);

View File

@ -20,7 +20,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <boost/version.hpp>
#ifdef WITH_THREADS
#include <boost/thread/locks.hpp>
#endif
#include <ostream>
#include <string>
#include "FactorCollection.h"

View File

@ -37,7 +37,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "InputType.h"
#include "LMList.h"
#include "Manager.h"
#include "hash.h"
using namespace std;

View File

@ -24,7 +24,7 @@ lib moses_internal :
[ glob *.cpp DynSAInclude/*.cpp : PhraseDictionary.cpp ThreadPool.cpp SyntacticLanguageModel.cpp *Test.cpp Mock*.cpp ]
synlm ThreadPool headers rt ;
lib moses : PhraseDictionary.cpp moses_internal CYKPlusParser//CYKPlusParser CompactPT//CompactPT LM//LM RuleTable//RuleTable Scope3Parser//Scope3Parser fuzzy-match//fuzzy-match headers ../..//z ../../OnDiskPt//OnDiskPt ../..//boost_filesystem ;
lib moses : PhraseDictionary.cpp moses_internal CYKPlusParser//CYKPlusParser CompactPT//CompactPT LM//LM RuleTable//RuleTable Scope3Parser//Scope3Parser fuzzy-match//fuzzy-match headers ../..//z ../../OnDiskPt//OnDiskPt ;
alias headers-to-install : [ glob-tree *.h ] ;

View File

@ -395,11 +395,11 @@ FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo,
// loop over rule
for (size_t phrasePos = 0, wordPos = 0;
phrasePos < target.GetSize();
phrasePos < hypo.GetCurrTargetPhrase().GetSize();
phrasePos++)
{
// consult rule for either word or non-terminal
const Word &word = target.GetWord(phrasePos);
const Word &word = hypo.GetCurrTargetPhrase().GetWord(phrasePos);
// regular word
if (!word.IsNonTerminal())

View File

@ -55,8 +55,10 @@ PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &so
{
const size_t size = source.GetSize();
const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
PhraseDictionaryNodeSCFG *currNode = &m_collection;
map<size_t, size_t> sourceToTargetMap(target.GetAlignmentInfo().begin(), target.GetAlignmentInfo().end());
for (size_t pos = 0 ; pos < size ; ++pos) {
const Word& word = source.GetWord(pos);
@ -64,9 +66,10 @@ PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &so
// indexed by source label 1st
const Word &sourceNonTerm = word;
map<size_t, size_t>::const_iterator iterAlign = sourceToTargetMap.find(pos);
CHECK(iterAlign != sourceToTargetMap.end());
CHECK(iterAlign != target.GetAlignmentInfo().end());
CHECK(iterAlign->first == pos);
size_t targetNonTermInd = iterAlign->second;
++iterAlign;
const Word &targetNonTerm = target.GetWord(targetNonTermInd);
currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);

View File

@ -20,9 +20,6 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <boost/filesystem/operations.hpp>
#include <boost/filesystem/path.hpp>
#include <string>
#include "util/check.hh"
#include "PhraseDictionaryMemory.h"
@ -2095,16 +2092,24 @@ void StaticData::ReLoadBleuScoreFeatureParameter(float weight)
void StaticData::SetExecPath(const std::string &path)
{
namespace fs = boost::filesystem;
/*
namespace fs = boost::filesystem;
fs::path full_path( fs::initial_path<fs::path>() );
full_path = fs::system_complete( fs::path( path ) );
//Without file name
m_binPath = full_path.parent_path().string();
*/
fs::path full_path( fs::initial_path<fs::path>() );
full_path = fs::system_complete( fs::path( path ) );
//Without file name
m_binPath = full_path.parent_path().string();
// NOT TESTED
size_t pos = path.rfind("/");
if (pos != string::npos)
{
m_binPath = path.substr(0, pos);
}
cerr << m_binPath << endl;
}
const string &StaticData::GetBinDirectory() const

View File

@ -285,6 +285,7 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
return clone;
}
namespace {
void MosesShouldUseExceptions(bool value) {
if (!value) {
@ -294,7 +295,6 @@ void MosesShouldUseExceptions(bool value) {
}
} // namespace
void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
{
set<pair<size_t,size_t> > alignmentInfo;

View File

@ -1,70 +0,0 @@
// $Id$
#define mix(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}
/* the key */
/* the length of the key */
/* the previous hash, or an arbitrary value */
unsigned int quick_hash(register const char *k, register unsigned int length, register unsigned int initval)
{
register unsigned int a,b,c,len;
/* Set up the internal state */
len = length;
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = initval; /* the previous hash value */
/*---------------------------------------- handle most of the key */
while (len >= 12) {
a += (k[0] +((unsigned int)k[1]<<8) +((unsigned int)k[2]<<16) +((unsigned int)k[3]<<24));
b += (k[4] +((unsigned int)k[5]<<8) +((unsigned int)k[6]<<16) +((unsigned int)k[7]<<24));
c += (k[8] +((unsigned int)k[9]<<8) +((unsigned int)k[10]<<16)+((unsigned int)k[11]<<24));
mix(a,b,c);
k += 12;
len -= 12;
}
/*------------------------------------- handle the last 11 bytes */
c += length;
switch(len) { /* all the case statements fall through */
case 11:
c+=((unsigned int)k[10]<<24);
case 10:
c+=((unsigned int)k[9]<<16);
case 9 :
c+=((unsigned int)k[8]<<8);
/* the first byte of c is reserved for the length */
case 8 :
b+=((unsigned int)k[7]<<24);
case 7 :
b+=((unsigned int)k[6]<<16);
case 6 :
b+=((unsigned int)k[5]<<8);
case 5 :
b+=k[4];
case 4 :
a+=((unsigned int)k[3]<<24);
case 3 :
a+=((unsigned int)k[2]<<16);
case 2 :
a+=((unsigned int)k[1]<<8);
case 1 :
a+=k[0];
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}

View File

@ -1,8 +0,0 @@
#ifndef moses_hash_h
#define moses_hash_h
// taken from burtleburtle.net/bob/hash/doobs.html
unsigned int quick_hash(register const char *k, register unsigned int length, register unsigned int initval);
#endif

View File

@ -90,7 +90,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
std::string alignmentLine;
XmlTreeParser xmlTreeParser(labelSet, topLabelSet);
ScfgRuleWriter writer(fwdExtractStream, invExtractStream, options);
size_t lineNum = 0;
size_t lineNum = options.sentenceOffset;
while (true) {
std::getline(targetStream, targetLine);
std::getline(sourceStream, sourceLine);
@ -289,6 +289,9 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
"extract minimal rules only")
("PCFG",
"include score based on PCFG scores in target corpus")
("SentenceOffset",
po::value(&options.sentenceOffset)->default_value(options.sentenceOffset),
"set sentence number offset if processing split corpus")
("UnknownWordLabel",
po::value(&options.unknownWordFile),
"write unknown word labels to named file")

View File

@ -38,6 +38,7 @@ struct Options {
, maxScope(3)
, minimal(false)
, pcfg(false)
, sentenceOffset(0)
, unpairedExtractFormat(false) {}
// Positional options
@ -57,6 +58,7 @@ struct Options {
int maxScope;
bool minimal;
bool pcfg;
int sentenceOffset;
bool unpairedExtractFormat;
std::string unknownWordFile;
};

View File

@ -13,10 +13,10 @@ if $(with-regtest) {
if $(with-regtest) {
test-dir = $(with-regtest)/tests ;
rule reg_test ( name : tests * : program : action ) {
rule reg_test ( name : tests * : programs * : action ) {
alias $(name) : $(tests:D=).passed ;
for test in $(tests) {
make $(test:D=).passed : $(program) : $(action) ;
make $(test:D=).passed : $(programs) : $(action) ;
alias $(test) : $(test:D=).passed ;
}
}
@ -43,11 +43,11 @@ if $(with-regtest) {
}
reg_test extractrules : [ glob $(test-dir)/extract-rules.* : $(with-regtest)/extract-rules.hierarchical ] : ../phrase-extract//extract-rules : @reg_test_extractrules ;
pwd = [ path.pwd ] ;
actions reg_test_mert {
$(TOP)/regression-testing/run-test-mert.perl --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
$(TOP)/regression-testing/run-test-mert.perl --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) --bin-dir=$(pwd)/$(>:D) && touch $(<)
}
reg_test mert : [ glob $(test-dir)/mert.* ] : ../mert//mert : @reg_test_mert ;
reg_test mert : [ glob $(test-dir)/mert.* ] : ../mert//mert ../mert//extractor ../mert//pro : @reg_test_mert ;
alias all : phrase chart mert score extract extractrules ;
}

View File

@ -361,13 +361,13 @@ pcfg-extract
default-name: model/pcfg
ignore-unless: use-pcfg-feature
rerun-on-change: use-pcfg-feature
template: $moses-script-dir/training/phrase-extract/pcfg-extract/pcfg-extract < IN.$output-extension > OUT.$output-extension
template: $moses-bin-dir/pcfg-extract < IN.$output-extension > OUT.$output-extension
pcfg-score
in: parse-relaxed-corpus pcfg
out: scored-corpus
default-name: model/scored-corpus
pass-unless: use-pcfg-feature
template: ln -s IN.$input-extension OUT.$input-extension ; $moses-script-dir/training/phrase-extract/pcfg-score/pcfg-score IN1.$output-extension < IN.$output-extension > OUT.$output-extension
template: ln -s IN.$input-extension OUT.$input-extension ; $moses-bin-dir/pcfg-score IN1.$output-extension < IN.$output-extension > OUT.$output-extension
extract-phrases
in: word-alignment scored-corpus
out: extracted-phrases

View File

@ -1065,7 +1065,7 @@ sub execute_steps {
$DO{$i}++;
print "\texecuting $step via qsub ($active active)\n";
my $qsub_args = &get_qsub_args($DO_STEP[$i]);
`qsub $qsub_args -e $step.STDERR -o $step.STDOUT $step`;
`qsub $qsub_args -S /bin/bash -e $step.STDERR -o $step.STDOUT $step`;
}
# execute in fork

View File

@ -869,9 +869,12 @@ function rule_summary() {
printf("tree depth: %.2f<br>\n",$depth);
printf("nt/rule: %.2f<br>\n",$nt_count/$total);
print "<table>\n";
arsort($count_nt);
$i=0;
foreach ($count_nt as $rule => $count) {
printf("<tr><td>%s</td><td align=right>%d</td><td align=right>%.1f%s</td></tr>\n",$rule,$count,$count/$total*100,'%');
if ($i++ < 5) { printf("<tr><td>%s</td><td align=right>%d</td><td align=right>%.1f%s</td></tr>\n",$rule,$count,$count/$total*100,'%'); }
}
if (count($count_nt)>5) { print "<tr><td align=center>...</td><td align=center>...</td><td align=center>...</td></tr>\n"; }
print "</table>\n";
}
@ -920,6 +923,7 @@ function bleu_show() {
if ($filter != "") {
print "; filter: '$filter'";
}
sentence_annotation($count,$filter);
print "<p align=center><A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',5+$count,'".base64_encode($filter)."')\">5 more</A> | ";
print "<A HREF=\"javascript:show('bleu','" . $_GET['sort'] . "',10+$count,'".base64_encode($filter)."')\">10 more</A> | ";
@ -1126,6 +1130,12 @@ function sentence_annotation($count,$filter) {
//print "<div id=\"debug\">$sort / $offset</div>";
for($i=$offset;$i<$count+$offset && $i<count($bleu);$i++) {
$line = $bleu[$i];
$search_graph_dir = get_current_analysis_filename("basic","search-graph");
if (file_exists($search_graph_dir) && file_exists($search_graph_dir."/graph.".$line["id"])) {
$state = return_state_for_link();
print "<FONT SIZE=-1><A TARGET=_blank HREF=\"?$state&analysis=sgviz&set=$set&id=$id&sentence=".$line["id"]."\">show search graph</a></FONT><br>\n";
}
if ($hierarchical) {
annotation_hierarchical($line["id"],$segmentation[$line["id"]],$segmentation_out[$line["id"]],$node[$line["id"]]);
}

View File

@ -116,8 +116,10 @@ function precision_by_coverage_diff() {
$log_info[$log_count]["length"] -= $item[3];
}
print "<h4>By log<sub>2</sub>-count in the training corpus</h4>";
precision_by_coverage_diff_graph("byCoverage",$log_info,$log_info_new,$total,$img_width,SORT_NUMERIC);
precision_by_coverage_diff_matrix();
// load factored data
$d = dir("$dir/evaluation/$set.analysis.".get_precision_analysis_version($dir,$set,$id));
@ -290,6 +292,244 @@ function precision_by_word_diff($type) {
print "</table>\n";
}
function precision_by_coverage_diff_matrix() {
global $id,$id2;
print "<h4>Impact of Change in Coverage</h4>";
print "Coverage in run $id is the x-axis, change in coverage in run $id2 is the y-axis. Size of box reflects how many output words are produced, yellow is the number of correct translations, green indicates increase, green decrease. The bleu rectangle below each box indicates number of words dropped, and increase (cyan) or decrease (purple).<p>(";
$scale = 30;
for($i=1; $i<=5; $i++) {
$size = (int)(sqrt($i*$scale));
$name = "size-$i";
print "<canvas id=\"$name\" width=\"$size\" height=\"$size\"></canvas><script language=\"javascript\">
var canvas = document.getElementById(\"$name\");
var ctx = canvas.getContext(\"2d\");
ctx.fillStyle = \"rgb(0,0,0)\";
ctx.fillRect (0, 0, $size, $size);
</script> = $i word";
if ($i>1) { print "s"; }
if ($i<5) { print ", "; }
}
print ")<p>";
# get base data
$data = file(get_current_analysis_filename("precision","precision-by-input-word"));
$word = array(); $class = array();
for($i=0;$i<count($data);$i++) {
$line = rtrim($data[$i]);
$item = split("\t",$line);
$surface = $item[5];
$word[$surface] = array();
$word[$surface]["precision"] = $item[0]; # number of precise translations
$word[$surface]["delete"] = $item[1]; # number of deleted
$word[$surface]["total"] = $item[2]; # number of all translations
$word[$surface]["coverage"] = $item[4]; # count in training corpus
if ($item[4] == 0) { $log_count = -1; }
else { $log_count = (int) (log($item[4])/log(2)); }
$word[$surface]["log_count"] = $log_count;
if (!array_key_exists($log_count,$class)) { $class[$log_count] = array(); }
$class[$log_count][] = $surface;
}
# init matrix
$matrix = array();
$max_base_log_count = -1;
$min_alt_log_count = 99;
$max_alt_log_count = -1;
foreach(array_keys($class) as $log_count) {
$matrix[$log_count] = array();
if ($log_count > $max_base_log_count) { $max_base_log_count = $log_count; }
}
# get alternative data
$data = file(get_current_analysis_filename2("precision","precision-by-input-word"));
for($i=0;$i<count($data);$i++) {
$line = rtrim($data[$i]);
$item = split("\t",$line);
$surface = $item[5];
if ($item[4] == 0) { $alt = -1; }
else { $alt = (int) (log($item[4])/log(2)); }
$base = -1;
if (array_key_exists($surface,$word)) {
$base = $word[$surface]["log_count"];
}
$alt -= $base;
if ($alt > $max_alt_log_count) { $max_alt_log_count = $alt; }
if ($alt < $min_alt_log_count) { $min_alt_log_count = $alt; }
if (!array_key_exists($alt,$matrix[$base])) {
$matrix[$base][$alt] = array();
$matrix[$base][$alt]["precision1"] = 0;
$matrix[$base][$alt]["delete1"] = 0;
$matrix[$base][$alt]["total1"] = 0;
$matrix[$base][$alt]["coverage1"] = 0;
$matrix[$base][$alt]["precision2"] = 0;
$matrix[$base][$alt]["delete2"] = 0;
$matrix[$base][$alt]["total2"] = 0;
$matrix[$base][$alt]["coverage2"] = 0;
}
# ignore mismatches in source words due to tokenization / casing
if (array_key_exists($surface,$word)) {
$matrix[$base][$alt]["precision1"] += $word[$surface]["precision"];
$matrix[$base][$alt]["delete1"] += $word[$surface]["delete"];
$matrix[$base][$alt]["total1"] += $word[$surface]["total"];
$matrix[$base][$alt]["coverage1"] += $word[$surface]["count"];
$matrix[$base][$alt]["precision2"] += $item[0];
$matrix[$base][$alt]["delete2"] += $item[1];
$matrix[$base][$alt]["total2"] += $item[2];
$matrix[$base][$alt]["coverage2"] += $item[4];
}
}
# make table
print "<table border=1 cellspacing=0 cellpadding=0><tr><td>&nbsp;</td>";
for($base=-1;$base<=$max_base_log_count;$base++) {
print "<td align=center>$base</td>";
}
print "<td></td></tr>";
for($alt=$max_alt_log_count;$alt>=$min_alt_log_count;$alt--) {
print "<tr><td>$alt</td>";
for($base=-1;$base<=$max_base_log_count;$base++) {
print "<td align=center valign=center>";
if (array_key_exists($base,$matrix) &&
array_key_exists($alt,$matrix[$base])) {
#print $matrix[$base][$alt]["precision1"]."->".
# $matrix[$base][$alt]["precision2"]."<br>";
#print $matrix[$base][$alt]["delete1"]."->".
# $matrix[$base][$alt]["delete2"]."<br>";
#print $matrix[$base][$alt]["total1"]."->".
# $matrix[$base][$alt]["total2"]."<br>";
$scale = 30;
$total = $matrix[$base][$alt]["total1"];
if ($matrix[$base][$alt]["total2"] > $total) {
$total = $matrix[$base][$alt]["total2"];
}
$total = (int)(sqrt($total*$scale));
if ($total>0) {
$prec1 = $matrix[$base][$alt]["precision1"]*$scale;
$prec2 = $matrix[$base][$alt]["precision2"]*$scale;
if ($prec1 > $prec2) {
$prec_base = (int)(sqrt($prec1));
$prec_imp = (int)(sqrt($prec1-$prec2));
$prec_color = "255,100,100";
}
else {
$prec_base = (int)(sqrt($prec2));
$prec_imp = (int)(sqrt($prec2-$prec1));
$prec_color = "100,255,100";
}
$prec_base_top = (int)(($total-$prec_base)/2);
$prec_imp_top = (int)(($total-$prec_imp)/2);
$del1 = $matrix[$base][$alt]["delete1"]*$scale;
$del2 = $matrix[$base][$alt]["delete2"]*$scale;
if ($del1 > $del2) {
$del_base = $del1;
$del_imp = $del1-$del2;
$del_color = "150,100,255";
}
else {
$del_base = $del2;
$del_imp = $del2-$del1;
$del_color = "100,200,200";
}
$del_base_height = (int)($del_base/$total);
$del_imp_height = (int)($del_imp/$total);
$name = "matrix-$base-$alt";
#print "$total/$prec1/$prec2 -> $prec_base/$prec_imp<br>";
print "<a href=\"javascript:generic_show_diff('CoverageMatrixDetails','base=$base&alt=$alt')\">";
print "<canvas id=\"$name\" width=\"$total\" height=\"".($total+$del_base_height+$del_imp_height)."\"></canvas></a>";
print "<script language=\"javascript\">
var canvas = document.getElementById(\"$name\");
var ctx = canvas.getContext(\"2d\");
ctx.fillStyle = \"rgb(200,200,200)\";
ctx.fillRect (0, 0, $total, $total);
ctx.fillStyle = \"rgb(200,200,0)\";
ctx.fillRect ($prec_base_top, $prec_base_top, $prec_base, $prec_base);
ctx.fillStyle = \"rgb($prec_color)\";
ctx.fillRect ($prec_imp_top, $prec_imp_top, $prec_imp, $prec_imp);
ctx.fillStyle = \"rgb(100,100,255)\";
ctx.fillRect (0, $total, $total, $del_base_height);
ctx.fillStyle = \"rgb($del_color)\";
ctx.fillRect (0, ".($total+$del_base_height).", $total, $del_imp_height);
</script>";
}
}
print "</td>";
}
print "<td>$alt</td></tr>";
}
print "<tr><td></td>";
for($base=-1;$base<=$max_base_log_count;$base++) {
print "<td align=center>$base</td>";
}
print "<td></td></tr></table><div id=\"CoverageMatrixDetails\"></div>";
}
function precision_by_coverage_diff_matrix_details() {
$alt = $_GET["alt"];
$base = $_GET["base"];
$impact_total = 0;
$data = file(get_current_analysis_filename("precision","precision-by-input-word"));
$word = array(); $class = array();
for($i=0;$i<count($data);$i++) {
$line = rtrim($data[$i]);
$item = split("\t",$line);
if ($item[4] == 0) { $log_count = -1; }
else { $log_count = (int) (log($item[4])/log(2)); }
if ($log_count == $base) {
$surface = $item[5];
$word[$surface] = array();
$word[$surface]["precision"] = $item[0]; # number of precise translations
$word[$surface]["delete"] = $item[1]; # number of deleted
$word[$surface]["total"] = $item[2]; # number of all translations
$word[$surface]["coverage"] = $item[4]; # count in training corpus
}
$impact_total += $item[2];
}
print "<table border=1><tr><td align=center>&nbsp;</td><td align=center colspan=3>Precision</td><td align=center colspan=2>Precision Impact</td><td align=center colspan=3>Delete</td><td align=center colspan=2>Delete Impact</td></tr>\n";
# get alternative data
$data = file(get_current_analysis_filename2("precision","precision-by-input-word"));
for($i=0;$i<count($data);$i++) {
$line = rtrim($data[$i]);
$item = split("\t",$line);
if ($item[4] == 0) { $log_count = -1; }
else { $log_count = (int) (log($item[4])/log(2)); }
$surface = $item[5];
if ($log_count-$base == $alt && array_key_exists($surface,$word)) {
$precision = $item[0]; # number of precise translations
$delete = $item[1]; # number of deleted
$total = $item[3]; # number of all translations + deletions
$coverage = $item[4]; # count in training corpus
$surface = $item[5];
$out = sprintf("%07d", (1-($precision-$word[$surface]["precision"])/$impact_total)*1000000);
$out .= "\t<tr><td align=cente>$surface</td>";
$out .= sprintf("<td align=right>%.1f%s</td><td align=right>%+.1f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",
$precision/$total*100,"%",
($precision-$word[$surface]["precision"])/$total*100,"%",
$precision-$word[$surface]["precision"],$total);
$out .= sprintf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",
($precision-$word[$surface]["precision"])/$impact_total*100,"%",
$precision-$word[$surface]["precision"],$impact_total);
$out .= sprintf("<td align=right>%.1f%s</td><td align=right>%+.1f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",
$delete/$total*100,"%",
($delete-$word[$surface]["delete"])/$total*100,"%",
$delete-$word[$surface]["delete"],$total);
$out .= sprintf("<td align=right>%+.2f%s</td><td align=right><font size=-1>%+.1f/%d</font></td>",
($delete-$word[$surface]["delete"])/$impact_total*100,"%",
$delete-$word[$surface]["delete"],$impact_total);
$out .= "</tr>";
$all_out[] = $out;
}
}
sort($all_out);
foreach($all_out as $out) { $o = explode("\t",$out); print $o[1]; }
print "</table>";
}
function precision_by_coverage_diff_graph($name,$log_info,$log_info_new,$total,$img_width,$sort_type) {
$keys = array_keys($log_info);
@ -502,12 +742,22 @@ function bleu_diff() {
print "</font><BR>\n";
bleu_diff_annotation();
print "<font size=-1>";
print "<A HREF=\"javascript:diff('bleu','" . $_GET['sort'] . "',5+$count)\">more</A> ";
print "</font><BR>\n";
}
function bleu_diff_annotation() {
global $set,$id,$id2,$dir;
// load data
// load input
$input_annotation = file(get_analysis_filename($dir,$set,$id,"coverage","input-annotation"));
for($i=0;$i<count($input_annotation);$i++) {
$item = split("\t",$input_annotation[$i]);
$input[$i] = $item[0];
}
// load translations
for($idx=0;$idx<2;$idx++) {
$data = file(get_analysis_filename($dir,$set,$idx?$id2:$id,"basic","bleu-annotation"));
for($i=0;$i<count($data);$i++) {
@ -575,6 +825,7 @@ function bleu_diff_annotation() {
// display
for($i=0;$i<$count && $i<count($annotation);$i++) {
$line = $annotation[$i];
print "<font size=-2>[src]</font> ".$input[$line["id"]]."<br>";
$word_with_score1 = split(" ",$line["system1"]);
$word_with_score0 = split(" ",$line["system0"]);

108
scripts/ems/web/base64.js Normal file
View File

@ -0,0 +1,108 @@
var END_OF_INPUT = -1;
var base64Chars = new Array(
'A','B','C','D','E','F','G','H',
'I','J','K','L','M','N','O','P',
'Q','R','S','T','U','V','W','X',
'Y','Z','a','b','c','d','e','f',
'g','h','i','j','k','l','m','n',
'o','p','q','r','s','t','u','v',
'w','x','y','z','0','1','2','3',
'4','5','6','7','8','9','+','/'
);
var reverseBase64Chars = new Array();
for (var i=0; i < base64Chars.length; i++){
reverseBase64Chars[base64Chars[i]] = i;
}
var base64Str;
var base64Count;
function setBase64Str(str){
base64Str = str;
base64Count = 0;
}
function readBase64(){
if (!base64Str) return END_OF_INPUT;
if (base64Count >= base64Str.length) return END_OF_INPUT;
var c = base64Str.charCodeAt(base64Count) & 0xff;
base64Count++;
return c;
}
function encodeBase64(str){
setBase64Str(str);
var result = '';
var inBuffer = new Array(3);
var lineCount = 0;
var done = false;
while (!done && (inBuffer[0] = readBase64()) != END_OF_INPUT){
inBuffer[1] = readBase64();
inBuffer[2] = readBase64();
result += (base64Chars[ inBuffer[0] >> 2 ]);
if (inBuffer[1] != END_OF_INPUT){
result += (base64Chars [(( inBuffer[0] << 4 ) & 0x30) | (inBuffer[1] >> 4) ]);
if (inBuffer[2] != END_OF_INPUT){
result += (base64Chars [((inBuffer[1] << 2) & 0x3c) | (inBuffer[2] >> 6) ]);
result += (base64Chars [inBuffer[2] & 0x3F]);
} else {
result += (base64Chars [((inBuffer[1] << 2) & 0x3c)]);
result += ('=');
done = true;
}
} else {
result += (base64Chars [(( inBuffer[0] << 4 ) & 0x30)]);
result += ('=');
result += ('=');
done = true;
}
lineCount += 4;
if (lineCount >= 76){
result += ('\n');
lineCount = 0;
}
}
return result;
}
function readReverseBase64(){
if (!base64Str) return END_OF_INPUT;
while (true){
if (base64Count >= base64Str.length) return END_OF_INPUT;
var nextCharacter = base64Str.charAt(base64Count);
base64Count++;
if (reverseBase64Chars[nextCharacter]){
return reverseBase64Chars[nextCharacter];
}
if (nextCharacter == 'A') return 0;
}
return END_OF_INPUT;
}
function ntos(n){
n=n.toString(16);
if (n.length == 1) n="0"+n;
n="%"+n;
return unescape(n);
}
function decodeBase64(str){
setBase64Str(str);
var result = "";
var inBuffer = new Array(4);
var done = false;
while (!done && (inBuffer[0] = readReverseBase64()) != END_OF_INPUT
&& (inBuffer[1] = readReverseBase64()) != END_OF_INPUT){
inBuffer[2] = readReverseBase64();
inBuffer[3] = readReverseBase64();
result += ntos((((inBuffer[0] << 2) & 0xff)| inBuffer[1] >> 4));
if (inBuffer[2] != END_OF_INPUT){
result += ntos((((inBuffer[1] << 4) & 0xff)| inBuffer[2] >> 2));
if (inBuffer[3] != END_OF_INPUT){
result += ntos((((inBuffer[2] << 6) & 0xff) | inBuffer[3]));
} else {
done = true;
}
} else {
done = true;
}
}
return result;
}

BIN
scripts/ems/web/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

View File

@ -5,10 +5,12 @@ require("overview.php");
require("analysis.php");
require("analysis_diff.php");
require("diff.php");
require("sgviz.php");
function head($title) {
print '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html><head><title>'.$title.'</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<script language="javascript" src="/javascripts/prototype.js"></script>
<script language="javascript" src="/javascripts/scriptaculous.js"></script>
<script language="javascript" src="hierarchical-segmentation.js"></script>
@ -43,8 +45,11 @@ if (array_key_exists("setup",$_POST) || array_key_exists("setup",$_GET)) {
else if (preg_match("/PrecisionByWordDiff(.+)_show/",$action,$match)) { precision_by_word_diff($match[1]); }
else if (preg_match("/PrecisionByWord(.+)_show/",$action,$match)) { precision_by_word($match[1]); }
else if ($action == "CoverageDetails_show") { coverage_details(); }
else if ($action == "CoverageMatrixDetails_show") { precision_by_coverage_diff_matrix_details(); }
else if ($action == "SegmentationSummary_show") { segmentation_summary(); }
else if ($action == "biconcor") { biconcor(base64_decode($_GET["phrase"])); }
else if ($action == "sgviz") { sgviz($_GET["sentence"]); }
else if ($action == "sgviz_data") { sgviz_data($_GET["sentence"]); }
else { print "ERROR! $action"; }
}
else if (array_key_exists("analysis_diff_home",$_GET)) {

View File

@ -65,7 +65,13 @@ function load_experiment_info() {
}
krsort($experiment);
ksort($evalset);
uksort($evalset,"evalsetsort");
}
function evalsetsort($a,$b) {
if ($a == "avg") { return -1; }
if ($b == "avg") { return 1; }
return strcmp($a,$b);
}
function load_parameter($run) {
@ -187,7 +193,7 @@ function get_analysis_version($dir,$set,$id) {
if (file_exists("$dir/steps/$id/REPORTING_report.$id")) {
$report = file("$dir/steps/$id/REPORTING_report.$id.INFO");
foreach ($report as $line) {
if (preg_match("/\# reuse run (\d+) for EVALUATION:(.+):analysis/",$line,$match) &&
if (preg_match("/\# reuse run (\d+) for EVALUATION:(.+):analysis$/",$line,$match) &&
$match[2] == $set) {
if (file_exists("$prefix.$match[1]/summary")) {
$analysis_version[$id][$set]["basic"] = $match[1];

View File

@ -13,7 +13,7 @@ function setup() {
print "<TR><TD><A HREF=\"?setup=$dir[0]\">$dir[0]</A></TD><TD>$dir[1]</TD><TD>$dir[2]</TD><TD>$dir[3]</TD></TR>\n";
}
print "</TABLE>\n";
print "<P>To add experiment, edit setup in web directory";
print "<P>To add experiment, edit the file 'setup' in the web directory.";
}
function overview() {
@ -134,7 +134,9 @@ function overview() {
print "var best_score = [];\n";
reset($evalset);
while (list($set,$dummy) = each($evalset)) {
print "best_score[\"$set\"] = ".$best[$set].";\n";
if ($best[$set] != "" && $best[$set]>0) {
print "best_score[\"$set\"] = ".$best[$set].";\n";
}
}
?>
@ -282,28 +284,29 @@ function output_score($id,$info) {
$each_score = explode(" ; ",$score);
for($i=0;$i<count($each_score);$i++) {
if (preg_match('/([\d\(\)\.\s]+) (\S*)/',$each_score[$i],$match)) {
//if ($i>0) { print "&nbsp;"; }
$opened_a_tag = 0;
if ($set != "avg") {
if (file_exists("$dir/evaluation/$set.cleaned.$id")) {
print "<a href=\"?$state&show=evaluation/$set.cleaned.$id\">";
$opened_a_tag = 1;
}
else if (file_exists("$dir/evaluation/$set.output.$id")) {
print "<a href=\"?$state&show=evaluation/$set.output.$id\">";
$opened_a_tag = 1;
}
}
if ($set == "avg" && count($each_score)>1) { print $match[2].": "; }
print "<div title=". $match[2] ." class=". $match[2] .">".$match[1]."</div>";
if ($opened_a_tag) { print "</a>"; }
if (preg_match('/([\d\(\)\.\s]+) (BLEU[\-c]*)/',$each_score[$i],$match) ||
preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match)) {
if ($i>0) { print "<BR>"; }
$opened_a_tag = 0;
if ($set != "avg") {
if (file_exists("$dir/evaluation/$set.cleaned.$id")) {
print "<a href=\"?$state&show=evaluation/$set.cleaned.$id\">";
$opened_a_tag = 1;
}
else if (file_exists("$dir/evaluation/$set.output.$id")) {
print "<a href=\"?$state&show=evaluation/$set.output.$id\">";
$opened_a_tag = 1;
}
}
if ($set == "avg" && count($each_score)>1) { print $match[2].": "; }
print $match[1];
if ($opened_a_tag) { print "</a>"; }
}
else {
print "-";
print "-";
}
}
print "</td>";
if ($has_analysis && array_key_exists($set,$has_analysis)) {
print "<td align=center>";

1703
scripts/ems/web/sgviz.js Normal file

File diff suppressed because it is too large Load Diff

65
scripts/ems/web/sgviz.php Normal file
View File

@ -0,0 +1,65 @@
<?php
function sgviz($sentence) {
global $setup,$dir,$id,$set;
?><html><head><title>Search Graph Visualization, Sentence <?php $sentence ?></title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<script language="javascript" src="/javascripts/prototype.js"></script></head>
<body><svg id="sg" height="500" width="900" xmlns="http://www.w3.org/2000/svg"><g id="chart"></g></svg>
<script>
var sg = document.getElementById("sg");
sg.setAttribute("width", window.innerWidth-20);
sg.setAttribute("height",window.innerHeight-20);
<?php
// read input sentence
$handle = fopen(get_current_analysis_filename("coverage","input-annotation"),"r");
for($i=0;$i<$sentence;$i++) { $line = fgets($handle); }
$line = fgets($handle);
fclose($handle);
$l = explode("\t",$line);
print "input=[\"<s>\",\"".join("\",\"",explode(" ",addslashes($l[0])))."\",\"</s>\"];\n";
?>
</script>
<script language="javascript" src="sgviz.js"></script>
<script>
var edge = new Array();
function test() {
alert("test");
}
new Ajax.Request('?analysis=sgviz_data'
+ '&setup=<?php print $setup ?>'
+ '&id=<?php print $id ?>'
+ '&set=<?php print $set ?>'
+ '&sentence=<?php print $sentence; ?>',
{
onSuccess: function(transport) {
var json = transport.responseText.evalJSON();
edge = json.edge;
process_hypotheses();
},
method: "post"
});
</script></body></html>
<?php
// read graph
//$file = get_current_analysis_filename("basic","search-graph")."/graph.$sentence";
//$handle = fopen($file,"r");
//while (($line = fgets($handle)) !== false) {
// $e = explode("\t",addslashes(chop($line)));
// print "edge[$e[0]]=new Array($e[1],$e[2],$e[3],\"$e[4]\",\"$e[5]\",\"$e[6]\",$e[7],$e[8],$e[9],\"$e[10]\");\n";
//}
//fclose($handle);
}
function sgviz_data($sentence) {
header('Content-type: application/json');
$file = get_current_analysis_filename("basic","search-graph")."/graph.$sentence";
$handle = fopen($file,"r");
while (($line = fgets($handle)) !== false) {
$e = explode("\t",addslashes(chop($line)));
$edge[$e[0]] = array($e[1],$e[2],$e[3],$e[4],$e[5],$e[6],$e[7],$e[8],$e[9],$e[10]);
}
$return['edge'] = $edge;
print json_encode($return);
exit();
}

View File

@ -965,7 +965,7 @@ sub safesystem {
sub getPwdCmd(){
my $pwdcmd="pwd";
my $a;
chomp($a=`which pawd | head -1 | awk '{print $1}'`);
chomp($a=`which pawd 2> /dev/null | head -1 | awk '{print $1}'`);
if ($a && -e $a){ $pwdcmd=$a; }
return $pwdcmd;
}

View File

@ -25,6 +25,7 @@ while(-e "$stem$ref") {
$ref++;
}
&add_to_ref($stem,\@REF) if -e $stem;
die("ERROR: could not find reference file $stem") unless scalar @REF;
sub add_to_ref {
my ($file,$REF) = @_;

View File

@ -248,7 +248,7 @@ sub safesystem {
sub getPwdCmd(){
my $pwdcmd="pwd";
my $a;
chomp($a=`which pawd | head -1 | awk '{print $1}'`);
chomp($a=`which pawd 2> /dev/null | head -1 | awk '{print $1}'`);
if ($a && -e $a){ $pwdcmd=$a; }
return $pwdcmd;
}

View File

@ -98,7 +98,7 @@ sub ensure_absolute {
my $target = shift;
my $originfile = shift;
my $cwd = `pawd`;
my $cwd = `pawd 2> /dev/null`;
$cwd = `pwd` if ! defined $cwd; # not everyone has pawd!
die "Failed to absolutize $target. Failing to get cwd!" if ! defined $cwd;
chomp $cwd;

View File

@ -9,6 +9,9 @@ use File::Temp qw/tempfile/;
my $BITPAR = "/exports/home/s0565741/work/bin/bitpar";
my $TMPDIR = "tmp";
my $SCRIPTS_ROOT_DIR = "$RealBin/../..";
my $DEESCAPE = "$SCRIPTS_ROOT_DIR/tokenizer/deescape-special-chars.perl";
my $DEBUG = 0;
my $BASIC = 0;
my $OLD_BITPAR = 0;
@ -27,11 +30,11 @@ my ($scriptname, $directories) = fileparse($0);
my ($TMP, $tmpfile) = tempfile("$scriptname-XXXXXXXXXX", DIR=>$TMPDIR, UNLINK=>1);
if ($OLD_BITPAR)
{
open(INPUT,"iconv -c -f UTF-8 -t iso-8859-1 |");
open(INPUT,"$DEESCAPE | iconv -c -f UTF-8 -t iso-8859-1 |");
}
else
{
open (INPUT,"cat |");
open (INPUT,"$DEESCAPE |");
}
while(<INPUT>)
{
@ -162,7 +165,12 @@ sub is_aux_label {
sub escape {
my ($text) = @_;
$text =~ s/&/&amp;/g;
$text =~ s/\|/&#124;/g;
$text =~ s/</&lt;/g;
$text =~ s/>/&gt;/g;
$text =~ s/'/&apos;/g;
$text =~ s/"/&quot;/g;
$text =~ s/\[/&#91;/g;
$text =~ s/\]/&#93;/g;
return $text;
}

View File

@ -9,16 +9,16 @@ namespace util {
namespace { const unsigned char kWidth = 100; }
ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<std::size_t>::max()), complete_(next_), out_(NULL) {}
ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<uint64_t>::max()), complete_(next_), out_(NULL) {}
ErsatzProgress::~ErsatzProgress() {
if (out_) Finished();
}
ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std::string &message)
ErsatzProgress::ErsatzProgress(uint64_t complete, std::ostream *to, const std::string &message)
: current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) {
if (!out_) {
next_ = std::numeric_limits<std::size_t>::max();
next_ = std::numeric_limits<uint64_t>::max();
return;
}
if (!message.empty()) *out_ << message << '\n';
@ -28,14 +28,14 @@ ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std
void ErsatzProgress::Milestone() {
if (!out_) { current_ = 0; return; }
if (!complete_) return;
unsigned char stone = std::min(static_cast<std::size_t>(kWidth), (current_ * kWidth) / complete_);
unsigned char stone = std::min(static_cast<uint64_t>(kWidth), (current_ * kWidth) / complete_);
for (; stones_written_ < stone; ++stones_written_) {
(*out_) << '*';
}
if (stone == kWidth) {
(*out_) << std::endl;
next_ = std::numeric_limits<std::size_t>::max();
next_ = std::numeric_limits<uint64_t>::max();
out_ = NULL;
} else {
next_ = std::max(next_, (stone * complete_) / kWidth);

View File

@ -4,6 +4,8 @@
#include <iostream>
#include <string>
#include <inttypes.h>
// Ersatz version of boost::progress so core language model doesn't depend on
// boost. Also adds option to print nothing.
@ -14,7 +16,7 @@ class ErsatzProgress {
ErsatzProgress();
// Null means no output. The null value is useful for passing along the ostream pointer from another caller.
explicit ErsatzProgress(std::size_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
explicit ErsatzProgress(uint64_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
~ErsatzProgress();
@ -23,12 +25,12 @@ class ErsatzProgress {
return *this;
}
ErsatzProgress &operator+=(std::size_t amount) {
ErsatzProgress &operator+=(uint64_t amount) {
if ((current_ += amount) >= next_) Milestone();
return *this;
}
void Set(std::size_t to) {
void Set(uint64_t to) {
if ((current_ = to) >= next_) Milestone();
Milestone();
}
@ -40,7 +42,7 @@ class ErsatzProgress {
private:
void Milestone();
std::size_t current_, next_, complete_;
uint64_t current_, next_, complete_;
unsigned char stones_written_;
std::ostream *out_;

View File

@ -84,4 +84,7 @@ EndOfFileException::EndOfFileException() throw() {
}
EndOfFileException::~EndOfFileException() throw() {}
OverflowException::OverflowException() throw() {}
OverflowException::~OverflowException() throw() {}
} // namespace util

View File

@ -2,9 +2,12 @@
#define UTIL_EXCEPTION__
#include <exception>
#include <limits>
#include <sstream>
#include <string>
#include <inttypes.h>
namespace util {
template <class Except, class Data> typename Except::template ExceptionTag<Except&>::Identity operator<<(Except &e, const Data &data);
@ -111,6 +114,25 @@ class EndOfFileException : public Exception {
~EndOfFileException() throw();
};
class OverflowException : public Exception {
public:
OverflowException() throw();
~OverflowException() throw();
};
template <unsigned len> inline std::size_t CheckOverflowInternal(uint64_t value) {
UTIL_THROW_IF(value > static_cast<uint64_t>(std::numeric_limits<std::size_t>::max()), OverflowException, "Integer overflow detected. This model is too big for 32-bit code.");
return value;
}
template <> inline std::size_t CheckOverflowInternal<8>(uint64_t value) {
return value;
}
inline std::size_t CheckOverflow(uint64_t value) {
return CheckOverflowInternal<sizeof(std::size_t)>(value);
}
} // namespace util
#endif // UTIL_EXCEPTION__

View File

@ -6,6 +6,7 @@
#include <cstdio>
#include <iostream>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@ -111,6 +112,11 @@ void WriteOrThrow(int fd, const void *data_void, std::size_t size) {
}
}
void WriteOrThrow(FILE *to, const void *data, std::size_t size) {
assert(size);
if (1 != std::fwrite(data, size, 1, to)) UTIL_THROW(util::ErrnoException, "Short write; requested size " << size);
}
void FSyncOrThrow(int fd) {
// Apparently windows doesn't have fsync?
#if !defined(_WIN32) && !defined(_WIN64)
@ -119,8 +125,13 @@ void FSyncOrThrow(int fd) {
}
namespace {
void InternalSeek(int fd, off_t off, int whence) {
void InternalSeek(int fd, int64_t off, int whence) {
#if defined(_WIN32) || defined(_WIN64)
UTIL_THROW_IF((__int64)-1 == _lseeki64(fd, off, whence), ErrnoException, "Windows seek failed");
#else
UTIL_THROW_IF((off_t)-1 == lseek(fd, off, whence), ErrnoException, "Seek failed");
#endif
}
} // namespace
@ -143,6 +154,12 @@ std::FILE *FDOpenOrThrow(scoped_fd &file) {
return ret;
}
std::FILE *FOpenOrThrow(const char *path, const char *mode) {
std::FILE *ret;
UTIL_THROW_IF(!(ret = fopen(path, mode)), util::ErrnoException, "Could not fopen " << path << " for " << mode);
return ret;
}
TempMaker::TempMaker(const std::string &prefix) : base_(prefix) {
base_ += "XXXXXX";
}

View File

@ -80,6 +80,7 @@ void ReadOrThrow(int fd, void *to, std::size_t size);
std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount);
void WriteOrThrow(int fd, const void *data_void, std::size_t size);
void WriteOrThrow(FILE *to, const void *data, std::size_t size);
void FSyncOrThrow(int fd);
@ -90,6 +91,8 @@ void SeekEnd(int fd);
std::FILE *FDOpenOrThrow(scoped_fd &file);
std::FILE *FOpenOrThrow(const char *path, const char *mode);
class TempMaker {
public:
explicit TempMaker(const std::string &prefix);

View File

@ -5,6 +5,8 @@
#include "util/mmap.hh"
#ifdef WIN32
#include <io.h>
#else
#include <unistd.h>
#endif // WIN32
#include <iostream>

View File

@ -8,6 +8,7 @@
#include <functional>
#include <assert.h>
#include <inttypes.h>
namespace util {
@ -42,8 +43,8 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
typedef EqualT Equal;
public:
static std::size_t Size(std::size_t entries, float multiplier) {
std::size_t buckets = std::max(entries + 1, static_cast<std::size_t>(multiplier * static_cast<float>(entries)));
static uint64_t Size(uint64_t entries, float multiplier) {
uint64_t buckets = std::max(entries + 1, static_cast<uint64_t>(multiplier * static_cast<float>(entries)));
return buckets * sizeof(Entry);
}