Merge branch 'master' into miramerge

Conflicts:
	Jamroot
	mert/FeatureStats.cpp
	moses-cmd/src/IOWrapper.h
	scripts/training/mert-moses.pl
	scripts/training/train-model.perl.missing_bin_dir
This commit is contained in:
Colin Cherry 2012-05-30 12:39:53 -04:00
commit 3c44d04baf
124 changed files with 8906 additions and 438 deletions

1
.gitignore vendored
View File

@ -17,6 +17,7 @@ mert/extractor
mert/mert
mert/megam_i686.opt
mert/pro
mert/kbmira
misc/processLexicalTable
misc/processPhraseTable
misc/queryLexicalTable

View File

@ -115,8 +115,7 @@ build-project scripts ;
#Regression tests (only does anything if --with-regtest is passed)
build-project regression-testing ;
alias programs : lm//query lm//build_binary moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDisk mert//programs contrib/server//mosesserver misc//programs mira//programs ;
alias programs : lm//query lm//build_binary moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDisk OnDiskPt//queryOnDiskPt mert//programs contrib/server//mosesserver misc//programs mira//programs ;
install-bin-libs programs ;
install-headers headers-base : [ glob-tree *.h *.hh : jam-files dist kenlm moses ] : . ;

View File

@ -1,2 +1,5 @@
lib OnDiskPt : OnDiskWrapper.cpp SourcePhrase.cpp TargetPhrase.cpp Word.cpp Phrase.cpp PhraseNode.cpp TargetPhraseCollection.cpp Vocab.cpp ../moses/src//headers ;
exe CreateOnDisk : Main.cpp ../moses/src//moses OnDiskPt ;
exe queryOnDiskPt : queryOnDiskPt.cpp ../moses/src//moses OnDiskPt ;

View File

@ -6,7 +6,7 @@
#include <string>
#include <vector>
#include "util.h"
#include "Util.h"
#include "OnDiskWrapper.h"
#include "SourcePhrase.h"

View File

@ -0,0 +1,131 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings>
<externalSetting>
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/OnDiskPt"/>
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/OnDiskPt/Debug"/>
<entry flags="RESOLVED" kind="libraryFile" name="OnDiskPt"/>
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.725420545" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1586272140" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
<builder buildPath="${workspace_loc:/OnDiskPt/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1909553559" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.30521110" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.478334849" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1328561226" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.108239817" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1825070846" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.901309550" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.2001028511" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.676959181" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1484480101" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1556683035" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
<listOptionValue builtIn="false" value="/opt/local/include"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.576529322" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.426851981" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1925590121" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.726316251" name="Main.h" rcbsApplicability="disable" resourcePath="Main.h" toolsToInvoke=""/>
<sourceEntries>
<entry excluding="Main.h|Main.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.701931933">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.701931933" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.701931933." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.5036266" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.396818757" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
<builder buildPath="${workspace_loc:/OnDiskPt/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1081186575" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.894082374" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.640159085" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1673993744" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.596082362" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.851420859" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.385722535" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.983488413" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.21058138" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1704184753" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1034344194" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1029035384" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.171488636" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.843129626" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1014721928" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="OnDiskPt.cdt.managedbuild.target.macosx.exe.542902806" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.701931933;cdt.managedbuild.config.macosx.exe.release.701931933.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1029035384;cdt.managedbuild.tool.gnu.c.compiler.input.1014721928">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978;cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634;cdt.managedbuild.tool.gnu.c.compiler.input.1925590121">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978;cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.2001028511;cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.701931933;cdt.managedbuild.config.macosx.exe.release.701931933.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.983488413;cdt.managedbuild.tool.gnu.cpp.compiler.input.1034344194">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="1">
<resource resourceType="PROJECT" workspacePath="/OnDiskPt"/>
</storageModule>
</cproject>

View File

@ -0,0 +1,185 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>OnDiskPt</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
<dictionary>
<key>?name?</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.append_environment</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildArguments</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildCommand</key>
<value>make</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildLocation</key>
<value>${workspace_loc:/OnDiskPt/Debug}</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
<value>clean</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.contents</key>
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
<value>false</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.stopOnError</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
<value>true</value>
</dictionary>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>Jamfile</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Jamfile</locationURI>
</link>
<link>
<name>Main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.cpp</locationURI>
</link>
<link>
<name>Main.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.h</locationURI>
</link>
<link>
<name>OnDiskWrapper.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/OnDiskWrapper.cpp</locationURI>
</link>
<link>
<name>OnDiskWrapper.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/OnDiskWrapper.h</locationURI>
</link>
<link>
<name>Phrase.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Phrase.cpp</locationURI>
</link>
<link>
<name>Phrase.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Phrase.h</locationURI>
</link>
<link>
<name>PhraseNode.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/PhraseNode.cpp</locationURI>
</link>
<link>
<name>PhraseNode.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/PhraseNode.h</locationURI>
</link>
<link>
<name>SourcePhrase.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/SourcePhrase.cpp</locationURI>
</link>
<link>
<name>SourcePhrase.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/SourcePhrase.h</locationURI>
</link>
<link>
<name>TargetPhrase.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhrase.cpp</locationURI>
</link>
<link>
<name>TargetPhrase.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhrase.h</locationURI>
</link>
<link>
<name>TargetPhraseCollection.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhraseCollection.cpp</locationURI>
</link>
<link>
<name>TargetPhraseCollection.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhraseCollection.h</locationURI>
</link>
<link>
<name>Vocab.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Vocab.cpp</locationURI>
</link>
<link>
<name>Vocab.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Vocab.h</locationURI>
</link>
<link>
<name>Word.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Word.cpp</locationURI>
</link>
<link>
<name>Word.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Word.h</locationURI>
</link>
<link>
<name>queryOnDiskPt.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/queryOnDiskPt.cpp</locationURI>
</link>
</linkedResources>
</projectDescription>

View File

@ -0,0 +1,125 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings>
<externalSetting>
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/lm"/>
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/lm/Debug"/>
<entry flags="RESOLVED" kind="libraryFile" name="lm"/>
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.640882096" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.793478365" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
<builder buildPath="${workspace_loc:/lm/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.36011795" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.1252826468" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1024598065" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.139111896" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.62265891" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.588438623" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.775866405" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.586969644" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.7139692" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1988092227" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.934764060" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.2078705375" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1028526865" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.203229648">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.203229648" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.203229648" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.203229648." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.1942852701" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.2107180060" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
<builder buildPath="${workspace_loc:/lm/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.127652112" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.1668850519" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.934899611" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.794276660" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.362272521" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.370659018" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.2103660404" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.2026817795" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1671568858" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.230723898" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1058671602" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.990116990" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1934130159" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1848737807" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1294441742" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="lm.cdt.managedbuild.target.macosx.exe.1399596076" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750;cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140;cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.203229648;cdt.managedbuild.config.macosx.exe.release.203229648.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.990116990;cdt.managedbuild.tool.gnu.c.compiler.input.1294441742">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750;cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722;cdt.managedbuild.tool.gnu.c.compiler.input.1028526865">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.203229648;cdt.managedbuild.config.macosx.exe.release.203229648.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.2026817795;cdt.managedbuild.tool.gnu.cpp.compiler.input.1058671602">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope"/>
</cproject>

View File

@ -0,0 +1,360 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>lm</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
<dictionary>
<key>?name?</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.append_environment</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildArguments</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildCommand</key>
<value>make</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildLocation</key>
<value>${workspace_loc:/lm/Debug}</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
<value>clean</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.contents</key>
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
<value>false</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.stopOnError</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
<value>true</value>
</dictionary>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>.DS_Store</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/.DS_Store</locationURI>
</link>
<link>
<name>COPYING</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/COPYING</locationURI>
</link>
<link>
<name>COPYING.LESSER</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/COPYING.LESSER</locationURI>
</link>
<link>
<name>Jamfile</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/Jamfile</locationURI>
</link>
<link>
<name>LICENSE</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/LICENSE</locationURI>
</link>
<link>
<name>README</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/README</locationURI>
</link>
<link>
<name>bhiksha.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/bhiksha.cc</locationURI>
</link>
<link>
<name>bhiksha.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/bhiksha.hh</locationURI>
</link>
<link>
<name>binary_format.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/binary_format.cc</locationURI>
</link>
<link>
<name>binary_format.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/binary_format.hh</locationURI>
</link>
<link>
<name>blank.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/blank.hh</locationURI>
</link>
<link>
<name>build_binary</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/build_binary</locationURI>
</link>
<link>
<name>build_binary.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/build_binary.cc</locationURI>
</link>
<link>
<name>clean.sh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/clean.sh</locationURI>
</link>
<link>
<name>compile.sh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/compile.sh</locationURI>
</link>
<link>
<name>config.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/config.cc</locationURI>
</link>
<link>
<name>config.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/config.hh</locationURI>
</link>
<link>
<name>enumerate_vocab.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/enumerate_vocab.hh</locationURI>
</link>
<link>
<name>facade.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/facade.hh</locationURI>
</link>
<link>
<name>left.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/left.hh</locationURI>
</link>
<link>
<name>left_test.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/left_test.cc</locationURI>
</link>
<link>
<name>libkenlm.dylib</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/libkenlm.dylib</locationURI>
</link>
<link>
<name>libkenutil.dylib</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/libkenutil.dylib</locationURI>
</link>
<link>
<name>lm_exception.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/lm_exception.cc</locationURI>
</link>
<link>
<name>lm_exception.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/lm_exception.hh</locationURI>
</link>
<link>
<name>max_order.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/max_order.hh</locationURI>
</link>
<link>
<name>model.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/model.cc</locationURI>
</link>
<link>
<name>model.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/model.hh</locationURI>
</link>
<link>
<name>model_test.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/model_test.cc</locationURI>
</link>
<link>
<name>model_type.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/model_type.hh</locationURI>
</link>
<link>
<name>ngram_query.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/ngram_query.cc</locationURI>
</link>
<link>
<name>ngram_query.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/ngram_query.hh</locationURI>
</link>
<link>
<name>quantize.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/quantize.cc</locationURI>
</link>
<link>
<name>quantize.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/quantize.hh</locationURI>
</link>
<link>
<name>query</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/query</locationURI>
</link>
<link>
<name>read_arpa.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/read_arpa.cc</locationURI>
</link>
<link>
<name>read_arpa.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/read_arpa.hh</locationURI>
</link>
<link>
<name>return.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/return.hh</locationURI>
</link>
<link>
<name>search_hashed.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/search_hashed.cc</locationURI>
</link>
<link>
<name>search_hashed.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/search_hashed.hh</locationURI>
</link>
<link>
<name>search_trie.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/search_trie.cc</locationURI>
</link>
<link>
<name>search_trie.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/search_trie.hh</locationURI>
</link>
<link>
<name>test.arpa</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/test.arpa</locationURI>
</link>
<link>
<name>test.sh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/test.sh</locationURI>
</link>
<link>
<name>test_nounk.arpa</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/test_nounk.arpa</locationURI>
</link>
<link>
<name>trie.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/trie.cc</locationURI>
</link>
<link>
<name>trie.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/trie.hh</locationURI>
</link>
<link>
<name>trie_sort.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/trie_sort.cc</locationURI>
</link>
<link>
<name>trie_sort.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/trie_sort.hh</locationURI>
</link>
<link>
<name>virtual_interface.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/virtual_interface.cc</locationURI>
</link>
<link>
<name>virtual_interface.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/virtual_interface.hh</locationURI>
</link>
<link>
<name>vocab.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/vocab.cc</locationURI>
</link>
<link>
<name>vocab.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/vocab.hh</locationURI>
</link>
<link>
<name>weights.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/weights.hh</locationURI>
</link>
<link>
<name>word_index.hh</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/word_index.hh</locationURI>
</link>
</linkedResources>
</projectDescription>

View File

@ -307,6 +307,7 @@
LIBRARY_SEARCH_PATHS = (
../../irstlm/lib,
../../srilm/lib/macosx,
/opt/local/lib,
);
OTHER_LDFLAGS = (
"-lz",
@ -316,6 +317,7 @@
"-loolm",
"-lflm",
"-llattice",
"-lboost_thread-mt",
);
PRODUCT_NAME = "moses-chart-cmd";
USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
@ -338,6 +340,7 @@
LIBRARY_SEARCH_PATHS = (
../../irstlm/lib,
../../srilm/lib/macosx,
/opt/local/lib,
);
OTHER_LDFLAGS = (
"-lz",
@ -347,6 +350,7 @@
"-loolm",
"-lflm",
"-llattice",
"-lboost_thread-mt",
);
PRODUCT_NAME = "moses-chart-cmd";
USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
@ -359,7 +363,10 @@
ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = TRACE_ENABLE;
GCC_PREPROCESSOR_DEFINITIONS = (
TRACE_ENABLE,
WITH_THREADS,
);
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = (
@ -378,7 +385,10 @@
buildSettings = {
ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_PREPROCESSOR_DEFINITIONS = TRACE_ENABLE;
GCC_PREPROCESSOR_DEFINITIONS = (
TRACE_ENABLE,
WITH_THREADS,
);
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = (

View File

@ -311,6 +311,7 @@
LM_SRI,
LM_IRST,
TRACE_ENABLE,
WITH_THREADS,
);
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
@ -324,6 +325,7 @@
LIBRARY_SEARCH_PATHS = (
../../irstlm/lib,
../../srilm/lib/macosx,
/opt/local/lib,
);
OTHER_LDFLAGS = (
"-lflm",
@ -332,6 +334,7 @@
"-ldstruct",
"-lz",
"-lirstlm",
"-lboost_thread-mt",
);
PREBINDING = NO;
PRODUCT_NAME = "moses-cmd";
@ -348,9 +351,10 @@
GCC_MODEL_TUNING = G5;
GCC_OPTIMIZATION_LEVEL = 3;
GCC_PREPROCESSOR_DEFINITIONS = (
LM_IRST,
LM_SRI,
LM_IRST,
TRACE_ENABLE,
WITH_THREADS,
);
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
@ -364,6 +368,7 @@
LIBRARY_SEARCH_PATHS = (
../../irstlm/lib,
../../srilm/lib/macosx,
/opt/local/lib,
);
OTHER_LDFLAGS = (
"-lflm",
@ -372,6 +377,7 @@
"-ldstruct",
"-lz",
"-lirstlm",
"-lboost_thread-mt",
);
PREBINDING = NO;
PRODUCT_NAME = "moses-cmd";
@ -384,6 +390,12 @@
buildSettings = {
GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
GCC_MODEL_TUNING = G5;
GCC_PREPROCESSOR_DEFINITIONS = (
LM_SRI,
LM_IRST,
TRACE_ENABLE,
WITH_THREADS,
);
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = (
@ -396,6 +408,7 @@
LIBRARY_SEARCH_PATHS = (
../../irstlm/lib,
../../srilm/lib/macosx,
/opt/local/lib,
);
OTHER_LDFLAGS = (
"-lflm",
@ -404,6 +417,7 @@
"-ldstruct",
"-lz",
"-lirstlm",
"-lboost_thread-mt",
);
PREBINDING = NO;
PRODUCT_NAME = "moses-cmd";

View File

@ -0,0 +1,140 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.1679946908" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.451172468" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
<builder buildPath="${workspace_loc:/moses-cmd/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1382407954" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.2118670613" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.84059290" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
<option id="macosx.cpp.link.option.libs.1641794848" name="Libraries (-l)" superClass="macosx.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="irstlm"/>
</option>
<option id="macosx.cpp.link.option.paths.1615268628" name="Library search path (-L)" superClass="macosx.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/moses/Debug"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/OnDiskPt/Debug"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/lm/Debug"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/util/Debug"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/irstlm/lib"/>
</option>
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.412058804" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.896987906" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.187427846" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.2033983602" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1808603697" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.2018824611" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1176009559" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1024398579" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.240921565" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.748558048" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.1014626120" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2031799877" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
<sourceEntries>
<entry excluding="LatticeMBRGrid.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.1916112479">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.1916112479" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.1916112479" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.1916112479." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.1528572752" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.1976002706" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
<builder buildPath="${workspace_loc:/moses-cmd/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1470455063" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.335066624" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.1173017253" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.675070011" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.174060449" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1018665338" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.440711813" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1219375865" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1940339824" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1648308879" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.604224475" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.2105388501" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1692046412" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1452105399" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="moses-cmd.cdt.managedbuild.target.macosx.exe.1016275955" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="1">
<resource resourceType="PROJECT" workspacePath="/moses-cmd"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609;cdt.managedbuild.tool.gnu.c.compiler.input.2031799877">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223;cdt.managedbuild.tool.gnu.c.compiler.input.1452105399">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1219375865;cdt.managedbuild.tool.gnu.cpp.compiler.input.604224475">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1808603697;cdt.managedbuild.tool.gnu.cpp.compiler.input.240921565">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
</cproject>

View File

@ -0,0 +1,199 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>moses-cmd</name>
<comment></comment>
<projects>
<project>lm</project>
<project>moses</project>
<project>OnDiskPt</project>
<project>util</project>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
<dictionary>
<key>?name?</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.append_environment</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildArguments</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildCommand</key>
<value>make</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildLocation</key>
<value>${workspace_loc:/moses-cmd/Debug}</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
<value>clean</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.contents</key>
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
<value>false</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.stopOnError</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
<value>true</value>
</dictionary>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>IOWrapper.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/IOWrapper.cpp</locationURI>
</link>
<link>
<name>IOWrapper.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/IOWrapper.h</locationURI>
</link>
<link>
<name>IOWrapper.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/IOWrapper.o</locationURI>
</link>
<link>
<name>Jamfile</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Jamfile</locationURI>
</link>
<link>
<name>LatticeMBR.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBR.cpp</locationURI>
</link>
<link>
<name>LatticeMBR.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBR.h</locationURI>
</link>
<link>
<name>LatticeMBR.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBR.o</locationURI>
</link>
<link>
<name>LatticeMBRGrid.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBRGrid.cpp</locationURI>
</link>
<link>
<name>LatticeMBRGrid.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBRGrid.o</locationURI>
</link>
<link>
<name>Main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Main.cpp</locationURI>
</link>
<link>
<name>Main.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Main.h</locationURI>
</link>
<link>
<name>Main.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Main.o</locationURI>
</link>
<link>
<name>TranslationAnalysis.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/TranslationAnalysis.cpp</locationURI>
</link>
<link>
<name>TranslationAnalysis.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/TranslationAnalysis.h</locationURI>
</link>
<link>
<name>TranslationAnalysis.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/TranslationAnalysis.o</locationURI>
</link>
<link>
<name>libkenlm.dylib</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/libkenlm.dylib</locationURI>
</link>
<link>
<name>libkenutil.dylib</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/libkenutil.dylib</locationURI>
</link>
<link>
<name>lmbrgrid</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/lmbrgrid</locationURI>
</link>
<link>
<name>mbr.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/mbr.cpp</locationURI>
</link>
<link>
<name>mbr.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/mbr.h</locationURI>
</link>
<link>
<name>mbr.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/mbr.o</locationURI>
</link>
<link>
<name>moses</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/moses</locationURI>
</link>
</linkedResources>
</projectDescription>

View File

@ -1357,6 +1357,7 @@
LM_IRST,
"_FILE_OFFSET_BITS=64",
_LARGE_FILES,
WITH_THREADS,
);
HEADER_SEARCH_PATHS = (
../..,
@ -1399,6 +1400,7 @@
LM_IRST,
"_FILE_OFFSET_BITS=64",
_LARGE_FILES,
WITH_THREADS,
);
HEADER_SEARCH_PATHS = (
../..,

View File

@ -0,0 +1,164 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings>
<externalSetting>
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/moses"/>
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/moses/Debug"/>
<entry flags="RESOLVED" kind="libraryFile" name="moses"/>
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.497902212" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1820609450" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
<builder buildPath="${workspace_loc:/moses/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1998579330" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.1330311562" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1226580551" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.102127808" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool command="as" commandLinePattern="${COMMAND} ${FLAGS} ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT} ${INPUTS}" id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.1556759720" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.897776351" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1820797229" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1867588805" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.1898625650" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.806998992" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1819917957" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/srilm/include"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/irstlm/include"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.1569452418" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="LM_SRI"/>
<listOptionValue builtIn="false" value="LM_IRST"/>
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1110302565" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.401409202" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.753046525" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.1396911098" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1919272901" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1722029461" name="SyntacticLanguageModelState.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelState.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1432960145" name="SyntacticLanguageModelFiles.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelFiles.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1906856645" name="SyntacticLanguageModel.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModel.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.460380900" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1692203139" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.538301588" name="Remote.h" rcbsApplicability="disable" resourcePath="LM/Remote.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.854427429" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
<sourceEntries>
<entry excluding="SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.cpp|LM/LDHT.h|LM/Remote.h|LM/Remote.cpp|LM/Rand.h|LM/Rand.cpp|LM/ORLM.h|LM/ORLM.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.722580523">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.722580523" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.722580523" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.722580523." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.2070671582" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.503591386" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
<builder buildPath="${workspace_loc:/moses/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.108117223" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.1203406445" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.1539915639" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1333560300" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.1693865756" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.2000339940" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.505919286" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1662892925" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1036481202" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.484015287" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.preprocessor.def.1089615214" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="LM_SRI"/>
<listOptionValue builtIn="false" value="LM_IRST"/>
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
</option>
<option id="gnu.cpp.compiler.option.include.paths.1722702487" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/srilm/include"/>
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/irstlm/include"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.936283391" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1404156839" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1487222992" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1171203697" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1172147378" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1831545277" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1743378025" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1490362543" name="Remote.h" rcbsApplicability="disable" resourcePath="LM/Remote.h" toolsToInvoke=""/>
<sourceEntries>
<entry excluding="LM/LDHT.cpp|LM/Rand.h|LM/Rand.cpp|LM/ORLM.h|LM/ORLM.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="moses.cdt.managedbuild.target.macosx.exe.1209017164" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426;cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.401409202;cdt.managedbuild.tool.gnu.c.compiler.input.1919272901">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.722580523;cdt.managedbuild.config.macosx.exe.release.722580523.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1404156839;cdt.managedbuild.tool.gnu.c.compiler.input.1172147378">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426;cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1867588805;cdt.managedbuild.tool.gnu.cpp.compiler.input.1110302565">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.722580523;cdt.managedbuild.config.macosx.exe.release.722580523.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1662892925;cdt.managedbuild.tool.gnu.cpp.compiler.input.936283391">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="1">
<resource resourceType="PROJECT" workspacePath="/moses"/>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,133 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings>
<externalSetting>
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/util"/>
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/util/Debug"/>
<entry flags="RESOLVED" kind="libraryFile" name="util"/>
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.1388624938" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1873607607" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
<builder buildPath="${workspace_loc:/util/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.2045214944" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.589471640" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1543780089" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.635667684" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.726000130" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.592875056" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1252745601" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1018784824" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.623959371" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.892917290" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1401298824" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
<listOptionValue builtIn="false" value="/opt/local/include"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1420621104" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1724141901" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.36067607" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.460849578" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.289923594" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
<sourceEntries>
<entry excluding="util/bit_packing_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.172239955">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.172239955" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.172239955" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.172239955." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.822279811" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.533470822" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
<builder buildPath="${workspace_loc:/util/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1705559832" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.476073423" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.384294309" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1583097070" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.1872669585" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.453642480" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1010248526" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.549134109" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1741196615" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1171704152" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.883129829" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.685540722" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.279247859" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1371842588" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1581172024" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1632081663" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="util.cdt.managedbuild.target.macosx.exe.2006203724" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.172239955;cdt.managedbuild.config.macosx.exe.release.172239955.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.279247859;cdt.managedbuild.tool.gnu.c.compiler.input.1632081663">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.172239955;cdt.managedbuild.config.macosx.exe.release.172239955.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.549134109;cdt.managedbuild.tool.gnu.cpp.compiler.input.685540722">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447;cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1018784824;cdt.managedbuild.tool.gnu.cpp.compiler.input.1420621104">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447;cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1724141901;cdt.managedbuild.tool.gnu.c.compiler.input.289923594">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="1">
<resource resourceType="PROJECT" workspacePath="/util"/>
</storageModule>
</cproject>

View File

@ -0,0 +1,90 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>util</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
<dictionary>
<key>?name?</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.append_environment</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildArguments</key>
<value></value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildCommand</key>
<value>make</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.buildLocation</key>
<value>${workspace_loc:/util/Debug}</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
<value>clean</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.contents</key>
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
<value>false</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
<value>all</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.stopOnError</key>
<value>true</value>
</dictionary>
<dictionary>
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
<value>true</value>
</dictionary>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>util</name>
<type>2</type>
<locationURI>PARENT-3-PROJECT_LOC/util</locationURI>
</link>
</linkedResources>
</projectDescription>

View File

@ -1,41 +0,0 @@
#If you get compilation errors here, make sure you have xmlrpc-c installed properly, including the abyss server option.
import option ;
import path ;
with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ;
if $(with-xmlrpc-c) {
build-moses-server = true ;
xmlrpc-command = $(with-xmlrpc-c)/bin/xmlrpc-c-config ;
if ! [ path.exists $(xmlrpc-command) ] {
exit Could not find $(xmlrpc-command) : 1 ;
}
} else {
xmlrpc-check = [ _shell "xmlrpc-c-config --features 2>/dev/null" : exit-status ] ;
if $(xmlrpc-check[2]) = 0 {
if [ MATCH "(abyss-server)" : $(xmlrpc-check[1]) ] {
build-moses-server = true ;
} else {
echo "Found xmlrpc-c but it does not have abyss-server. Skipping mosesserver." ;
}
}
xmlrpc-command = "xmlrpc-c-config" ;
}
rule shell_or_die ( cmd ) {
local ret = [ _shell $(cmd) : exit-status ] ;
if $(ret[2]) != 0 {
exit "Failed to run $(cmd)" : 1 ;
}
return $(ret[1]) ;
}
if $(build-moses-server) = true
{
xmlrpc-linkflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --libs" ] ;
xmlrpc-cxxflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --cflags" ] ;
exe queryOnDiskPt : queryOnDiskPt.cpp ../../moses/src//moses ../../OnDiskPt//OnDiskPt : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
} else {
alias queryOnDiskPt ;
}

View File

@ -1,6 +0,0 @@
SRI=/Users/hieuhoang/workspace/srilm
IRST=/Users/hieuhoang/workspace/irstlm/trunk
g++ -o queryOnDiskPt queryOnDiskPt.cpp ../../moses/src/PhraseDictionary.cpp -I../../moses/src/ -I../../ -L../../dist/lib/ -I../../OnDiskPt -lmert_lib -ldynsa -lz -lmoses_internal -lOnDiskPt -lLM -lkenlm -lkenutil -lRuleTable -lCYKPlusParser -lScope3Parser -L$SRI/lib/macosx/ -ldstruct -lflm -llattice -lmisc -loolm -L/opt/local/lib -lboost_thread-mt -L$IRST/lib -lirstlm

View File

@ -1,6 +1,7 @@
#include "lm/bhiksha.hh"
#include "lm/config.hh"
#include "util/file.hh"
#include "util/exception.hh"
#include <limits>

View File

@ -232,3 +232,44 @@ float sentenceLevelBleuPlusOne(const vector<float>& stats) {
}
return exp(logbleu);
}
float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg)
{
// Sum sent and background
std::vector<float> stats;
CHECK(sent.size()==bg.size());
CHECK(sent.size()==kBleuNgramOrder*2+1);
for(size_t i=0;i<sent.size();i++)
stats.push_back(sent[i]+bg[i]);
// Calculate BLEU
float logbleu = 0.0;
for (int j = 0; j < kBleuNgramOrder; j++) {
logbleu += log(stats[2 * j]) - log(stats[2 * j + 1]);
}
logbleu /= kBleuNgramOrder;
const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
if (brevity < 0.0) {
logbleu += brevity;
}
// Exponentiate and scale by reference length (as per Chiang et al 08)
return exp(logbleu) * stats[kBleuNgramOrder*2];
}
float unsmoothedBleu(const std::vector<float>& stats) {
CHECK(stats.size() == kBleuNgramOrder * 2 + 1);
float logbleu = 0.0;
for (int j = 0; j < kBleuNgramOrder; j++) {
logbleu += log(stats[2 * j]) - log(stats[2 * j + 1]);
}
logbleu /= kBleuNgramOrder;
const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
if (brevity < 0.0) {
logbleu += brevity;
}
return exp(logbleu);
}

View File

@ -70,4 +70,14 @@ private:
*/
float sentenceLevelBleuPlusOne(const std::vector<float>& stats);
/** Computes sentence-level BLEU score given a background corpus.
* This function is used in batch MIRA.
*/
float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg);
/**
* Computes plain old BLEU from a vector of stats
*/
float unsmoothedBleu(const std::vector<float>& stats);
#endif // MERT_BLEU_SCORER_H_

View File

@ -152,10 +152,10 @@ BOOST_AUTO_TEST_CASE(bleu_count_ngrams) {
// "girl with a telescope", "with a telescope ."
NgramCounts counts;
BOOST_REQUIRE(scorer.CountNgrams(line, counts, kBleuNgramOrder) == 8);
BOOST_CHECK_EQUAL(25, counts.size());
BOOST_CHECK_EQUAL((std::size_t)25, counts.size());
mert::Vocabulary* vocab = scorer.GetVocab();
BOOST_CHECK_EQUAL(7, vocab->size());
BOOST_CHECK_EQUAL((std::size_t)7, vocab->size());
std::vector<std::string> res;
Tokenize(line.c_str(), ' ', &res);
@ -203,7 +203,7 @@ BOOST_AUTO_TEST_CASE(bleu_clipped_counts) {
ScoreStats entry;
scorer.prepareStats(0, line, entry);
BOOST_CHECK_EQUAL(entry.size(), 2 * kBleuNgramOrder + 1);
BOOST_CHECK_EQUAL(entry.size(), (std::size_t)(2 * kBleuNgramOrder + 1));
// Test hypothesis ngram counts
BOOST_CHECK_EQUAL(entry.get(0), 5); // unigram

View File

@ -33,8 +33,8 @@ BOOST_AUTO_TEST_CASE(shard_basic) {
std::vector<Data> shards;
data.createShards(2,0,"",shards);
BOOST_CHECK_EQUAL(shards.size(),2);
BOOST_CHECK_EQUAL(shards[1].getFeatureData()->size(),2);
BOOST_CHECK_EQUAL(shards.size(),(std::size_t)2);
BOOST_CHECK_EQUAL(shards[1].getFeatureData()->size(),(std::size_t)2);
}
BOOST_AUTO_TEST_CASE(init_feature_map_test) {

View File

@ -18,6 +18,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include <sstream>
#include <boost/functional/hash.hpp>
#include "util/tokenize_piece.hh"
@ -47,6 +48,16 @@ float ParseFloat(const StringPiece& str) {
return value;
}
bool operator==(FeatureDataItem const& item1, FeatureDataItem const& item2) {
return item1.dense==item1.dense && item1.sparse==item1.sparse;
}
size_t hash_value(FeatureDataItem const& item) {
size_t seed = 0;
boost::hash_combine(seed,item.dense);
boost::hash_combine(seed,item.sparse);
return seed;
}
FeatureDataIterator::FeatureDataIterator() {}

View File

@ -61,6 +61,9 @@ class FeatureDataItem
SparseVector sparse;
};
bool operator==(FeatureDataItem const& item1, FeatureDataItem const& item2);
std::size_t hash_value(FeatureDataItem const& item);
class FeatureDataIterator :
public boost::iterator_facade<FeatureDataIterator,
const std::vector<FeatureDataItem>,

View File

@ -13,7 +13,7 @@ void CheckFeatureMap(const FeatureData* feature_data,
std::stringstream ss;
ss << str << "_" << i;
const std::string& s = ss.str();
BOOST_CHECK_EQUAL(feature_data->getFeatureIndex(s), *cnt);
BOOST_CHECK_EQUAL(feature_data->getFeatureIndex(s), (std::size_t)(*cnt));
BOOST_CHECK_EQUAL(feature_data->getFeatureName(*cnt).c_str(), s);
++(*cnt);
}
@ -35,6 +35,6 @@ BOOST_AUTO_TEST_CASE(set_feature_map) {
CheckFeatureMap(&feature_data, "lm", 2, &cnt);
CheckFeatureMap(&feature_data, "tm", 5, &cnt);
BOOST_CHECK_EQUAL(feature_data.getFeatureIndex("w_0"), cnt);
BOOST_CHECK_EQUAL(feature_data.getFeatureIndex("w_0"), (std::size_t)cnt);
BOOST_CHECK_EQUAL(feature_data.getFeatureName(cnt).c_str(), "w_0");
}

View File

@ -11,6 +11,9 @@
#include <fstream>
#include <cmath>
#include <stdexcept>
#include <boost/functional/hash.hpp>
#include "Util.h"
using namespace std;
@ -109,6 +112,42 @@ FeatureStatsType inner_product(const SparseVector& lhs, const SparseVector& rhs)
}
}
std::vector<std::size_t> SparseVector::feats() const {
std::vector<std::size_t> toRet;
for(fvector_t::const_iterator iter = m_fvector.begin();
iter!=m_fvector.end();
iter++) {
toRet.push_back(iter->first);
}
return toRet;
}
std::size_t SparseVector::encode(const std::string& name) {
name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
size_t id = 0;
if (name2id_iter == m_name_to_id.end()) {
id = m_id_to_name.size();
m_id_to_name.push_back(name);
m_name_to_id[name] = id;
} else {
id = name2id_iter->second;
}
return id;
}
std::string SparseVector::decode(std::size_t id) {
return m_id_to_name[id];
}
bool operator==(SparseVector const& item1, SparseVector const& item2) {
return item1.m_fvector==item2.m_fvector;
}
std::size_t hash_value(SparseVector const& item) {
boost::hash<SparseVector::fvector_t> hasher;
return hasher(item.m_fvector);
}
FeatureStats::FeatureStats()
: m_available_size(kAvailableSize), m_entries(0),
m_array(new FeatureStatsType[m_available_size]) {}

View File

@ -29,12 +29,20 @@ public:
void clear();
void load(const std::string& file);
std::size_t size() const { return m_fvector.size(); }
void write(std::ostream& out, const std::string& sep = " ") const;
SparseVector& operator-=(const SparseVector& rhs);
FeatureStatsType inner_product(const SparseVector& rhs) const;
// Added by cherryc
std::vector<std::size_t> feats() const;
friend bool operator==(SparseVector const& item1, SparseVector const& item2);
friend std::size_t hash_value(SparseVector const& item);
static std::size_t encode(const std::string& feat);
static std::string decode(std::size_t feat);
// End added by cherryc
private:
static name2id_t m_name_to_id;
static id2name_t m_id_to_name;

189
mert/HypPackEnumerator.cpp Normal file
View File

@ -0,0 +1,189 @@
#include "HypPackEnumerator.h"
#include <cassert>
#include <algorithm>
#include <boost/unordered_set.hpp>
using namespace std;
StreamingHypPackEnumerator::StreamingHypPackEnumerator
(
vector<std::string> const& featureFiles,
vector<std::string> const& scoreFiles
)
: m_featureFiles(featureFiles),
m_scoreFiles(scoreFiles)
{
if (scoreFiles.size() == 0 || featureFiles.size() == 0) {
cerr << "No data to process" << endl;
exit(0);
}
if (featureFiles.size() != scoreFiles.size()) {
cerr << "Error: Number of feature files (" << featureFiles.size() <<
") does not match number of score files (" << scoreFiles.size() << ")" << endl;
exit(1);
}
m_num_lists = scoreFiles.size();
m_primed = false;
m_iNumDense = -1;
}
size_t StreamingHypPackEnumerator::num_dense() const {
if(m_iNumDense<0) {
cerr << "Error: Requested num_dense() for an unprimed StreamingHypPackEnumerator" << endl;
exit(1);
}
return (size_t) m_iNumDense;
}
void StreamingHypPackEnumerator::prime(){
m_current_indexes.clear();
boost::unordered_set<FeatureDataItem> seen;
m_primed = true;
for (size_t i = 0; i < m_num_lists; ++i) {
if (m_featureDataIters[i] == FeatureDataIterator::end()) {
cerr << "Error: Feature file " << i << " ended prematurely" << endl;
exit(1);
}
if (m_scoreDataIters[i] == ScoreDataIterator::end()) {
cerr << "Error: Score file " << i << " ended prematurely" << endl;
exit(1);
}
if (m_featureDataIters[i]->size() != m_scoreDataIters[i]->size()) {
cerr << "Error: For sentence " << m_sentenceId << " features and scores have different size" << endl;
exit(1);
}
for (size_t j = 0; j < m_featureDataIters[i]->size(); ++j) {
FeatureDataItem item = m_featureDataIters[i]->operator[](j);
// Dedup
if(seen.find(item)==seen.end()) {
seen.insert(item);
// Confirm dense features are always the same
int iDense = item.dense.size();
if(m_iNumDense != iDense) {
if(m_iNumDense==-1) m_iNumDense = iDense;
else {
cerr << "Error: expecting constant number of dense features: "
<< m_iNumDense << " != " << iDense << endl;
exit(1);
}
}
// Store item for retrieval
m_current_indexes.push_back(pair<size_t,size_t>(i,j));
}
}
}
}
void StreamingHypPackEnumerator::reset(){
m_featureDataIters.clear();
m_scoreDataIters.clear();
for (size_t i = 0; i < m_num_lists; ++i) {
m_featureDataIters.push_back(FeatureDataIterator(m_featureFiles[i]));
m_scoreDataIters.push_back(ScoreDataIterator(m_scoreFiles[i]));
}
m_sentenceId=0;
prime();
}
bool StreamingHypPackEnumerator::finished(){
return m_featureDataIters[0]==FeatureDataIterator::end();
}
void StreamingHypPackEnumerator::next(){
if(!m_primed) {
cerr << "Enumerating an unprimed HypPackEnumerator" << endl;
exit(1);
}
for (size_t i = 0; i < m_num_lists; ++i) {
++m_featureDataIters[i];
++m_scoreDataIters[i];
}
m_sentenceId++;
if(!finished()) prime();
}
size_t StreamingHypPackEnumerator::cur_size(){
if(!m_primed) {
cerr << "Querying size from an unprimed HypPackEnumerator" << endl;
exit(1);
}
return m_current_indexes.size();
}
const FeatureDataItem& StreamingHypPackEnumerator::featuresAt(size_t index){
if(!m_primed) {
cerr << "Querying features from an unprimed HypPackEnumerator" << endl;
exit(1);
}
const pair<size_t,size_t>& pij = m_current_indexes[index];
return m_featureDataIters[pij.first]->operator[](pij.second);
}
const ScoreDataItem& StreamingHypPackEnumerator::scoresAt(size_t index) {
if(!m_primed) {
cerr << "Querying scores from an unprimed HypPackEnumerator" << endl;
exit(1);
}
const pair<size_t,size_t>& pij = m_current_indexes[index];
return m_scoreDataIters[pij.first]->operator[](pij.second);
}
/* --------- RandomAccessHypPackEnumerator ------------- */
RandomAccessHypPackEnumerator::RandomAccessHypPackEnumerator(vector<string> const& featureFiles,
vector<string> const& scoreFiles,
bool no_shuffle)
{
StreamingHypPackEnumerator train(featureFiles,scoreFiles);
size_t index=0;
for(train.reset(); !train.finished(); train.next()) {
m_features.push_back(vector<FeatureDataItem>());
m_scores.push_back(vector<ScoreDataItem>());
for(size_t j=0;j<train.cur_size();j++) {
m_features.back().push_back(train.featuresAt(j));
m_scores.back().push_back(train.scoresAt(j));
}
m_indexes.push_back(index++);
}
m_cur_index = 0;
m_no_shuffle = no_shuffle;
m_num_dense = train.num_dense();
}
size_t RandomAccessHypPackEnumerator::num_dense() const {
return m_num_dense;
}
void RandomAccessHypPackEnumerator::reset() {
m_cur_index = 0;
if(!m_no_shuffle) random_shuffle(m_indexes.begin(),m_indexes.end());
}
bool RandomAccessHypPackEnumerator::finished() {
return m_cur_index >= m_indexes.size();
}
void RandomAccessHypPackEnumerator::next() {
m_cur_index++;
}
size_t RandomAccessHypPackEnumerator::cur_size() {
assert(m_features[m_indexes[m_cur_index]].size()==m_scores[m_indexes[m_cur_index]].size());
return m_features[m_indexes[m_cur_index]].size();
}
const FeatureDataItem& RandomAccessHypPackEnumerator::featuresAt(size_t i) {
return m_features[m_indexes[m_cur_index]][i];
}
const ScoreDataItem& RandomAccessHypPackEnumerator::scoresAt(size_t i) {
return m_scores[m_indexes[m_cur_index]][i];
}
// --Emacs trickery--
// Local Variables:
// mode:c++
// c-basic-offset:2
// End:

101
mert/HypPackEnumerator.h Normal file
View File

@ -0,0 +1,101 @@
/*
* HypPackCollection.h
* kbmira - k-best Batch MIRA
*
* Abstracts away the mess of iterating through multiple
* collections of k-best lists, as well as deduping
*/
#ifndef MERT_HYP_PACK_COLLECTION_H
#define MERT_HYP_PACK_COLLECTION_H
#include <string>
#include <vector>
#include <utility>
#include <stddef.h>
#include "FeatureDataIterator.h"
#include "ScoreDataIterator.h"
// Start with these abstract classes
class HypPackEnumerator {
public:
virtual ~HypPackEnumerator() {}
virtual void reset() = 0;
virtual bool finished() = 0;
virtual void next() = 0;
virtual std::size_t cur_size() = 0;
virtual std::size_t num_dense() const = 0;
virtual const FeatureDataItem& featuresAt(std::size_t i) = 0;
virtual const ScoreDataItem& scoresAt(std::size_t i) = 0;
};
// Instantiation that streams from disk
// Low-memory, low-speed, sequential access
class StreamingHypPackEnumerator : public HypPackEnumerator {
public:
StreamingHypPackEnumerator(std::vector<std::string> const& featureFiles,
std::vector<std::string> const& scoreFiles);
virtual std::size_t num_dense() const;
virtual void reset();
virtual bool finished();
virtual void next();
virtual std::size_t cur_size();
virtual const FeatureDataItem& featuresAt(std::size_t i);
virtual const ScoreDataItem& scoresAt(std::size_t i);
private:
void prime();
std::size_t m_num_lists;
std::size_t m_sentenceId;
std::vector<std::string> m_featureFiles;
std::vector<std::string> m_scoreFiles;
bool m_primed;
int m_iNumDense;
std::vector<FeatureDataIterator> m_featureDataIters;
std::vector<ScoreDataIterator> m_scoreDataIters;
std::vector<std::pair<std::size_t,std::size_t> > m_current_indexes;
};
// Instantiation that reads into memory
// High-memory, high-speed, random access
// (Actually randomizes with each call to reset)
class RandomAccessHypPackEnumerator : public HypPackEnumerator {
public:
RandomAccessHypPackEnumerator(std::vector<std::string> const& featureFiles,
std::vector<std::string> const& scoreFiles,
bool no_shuffle);
virtual std::size_t num_dense() const;
virtual void reset();
virtual bool finished();
virtual void next();
virtual std::size_t cur_size();
virtual const FeatureDataItem& featuresAt(std::size_t i);
virtual const ScoreDataItem& scoresAt(std::size_t i);
private:
bool m_no_shuffle;
std::size_t m_cur_index;
std::size_t m_num_dense;
std::vector<std::size_t> m_indexes;
std::vector<std::vector<FeatureDataItem> > m_features;
std::vector<std::vector<ScoreDataItem> > m_scores;
};
#endif // MERT_HYP_PACK_COLLECTION_H
// --Emacs trickery--
// Local Variables:
// mode:c++
// c-basic-offset:2
// End:

View File

@ -15,6 +15,9 @@ FeatureStats.cpp
FeatureArray.cpp
FeatureData.cpp
FeatureDataIterator.cpp
MiraFeatureVector.cpp
MiraWeightVector.cpp
HypPackEnumerator.cpp
Data.cpp
BleuScorer.cpp
SemposScorer.cpp
@ -52,7 +55,9 @@ exe evaluator : evaluator.cpp mert_lib ;
exe pro : pro.cpp mert_lib ..//boost_program_options ;
alias programs : mert extractor evaluator pro ;
exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ;
alias programs : mert extractor evaluator pro kbmira ;
unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;

146
mert/MiraFeatureVector.cpp Normal file
View File

@ -0,0 +1,146 @@
#include <cmath>
#include "MiraFeatureVector.h"
using namespace std;
MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
: m_dense(vec.dense)
{
vector<size_t> sparseFeats = vec.sparse.feats();
bool bFirst = true;
size_t lastFeat = 0;
for(size_t i=0;i<sparseFeats.size();i++)
{
size_t feat = m_dense.size() + sparseFeats[i];
m_sparseFeats.push_back(feat);
m_sparseVals.push_back(vec.sparse.get(sparseFeats[i]));
// Check ordered property
if(bFirst) {
bFirst = false;
}
else {
if(lastFeat>=feat) {
cerr << "Error: Feature indeces must be strictly ascending coming out of SparseVector" << endl;
exit(1);
}
}
lastFeat = feat;
}
}
MiraFeatureVector::MiraFeatureVector(const MiraFeatureVector& other)
: m_dense(other.m_dense),
m_sparseFeats(other.m_sparseFeats),
m_sparseVals(other.m_sparseVals)
{
if(m_sparseVals.size()!=m_sparseFeats.size()) {
cerr << "Error: mismatching sparse feat and val sizes" << endl;
exit(1);
}
}
MiraFeatureVector::MiraFeatureVector(const vector<ValType>& dense,
const vector<size_t>& sparseFeats,
const vector<ValType>& sparseVals)
: m_dense(dense),
m_sparseFeats(sparseFeats),
m_sparseVals(sparseVals)
{
if(m_sparseVals.size()!=m_sparseFeats.size()) {
cerr << "Error: mismatching sparse feat and val sizes" << endl;
exit(1);
}
}
ValType MiraFeatureVector::val(size_t index) const {
if(index < m_dense.size())
return m_dense[index];
else
return m_sparseVals[index];
}
size_t MiraFeatureVector::feat(size_t index) const {
if(index < m_dense.size())
return index;
else
return m_sparseFeats[index];
}
size_t MiraFeatureVector::size() const {
return m_dense.size() + m_sparseVals.size();
}
ValType MiraFeatureVector::sqrNorm() const {
ValType toRet = 0.0;
for(size_t i=0;i<m_dense.size();i++)
toRet += m_dense[i]*m_dense[i];
for(size_t i=0;i<m_sparseVals.size();i++)
toRet += m_sparseVals[i] * m_sparseVals[i];
return toRet;
}
MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector& b)
{
// Dense subtraction
vector<ValType> dense;
if(a.m_dense.size()!=b.m_dense.size()) {
cerr << "Mismatching dense vectors passed to MiraFeatureVector subtraction" << endl;
exit(1);
}
for(size_t i=0;i<a.m_dense.size();i++) {
dense.push_back(a.m_dense[i] - b.m_dense[i]);
}
// Sparse subtraction
size_t i=0;
size_t j=0;
vector<ValType> sparseVals;
vector<size_t> sparseFeats;
while(i < a.m_sparseFeats.size() && j < b.m_sparseFeats.size()) {
if(a.m_sparseFeats[i] < b.m_sparseFeats[j]) {
sparseFeats.push_back(a.m_sparseFeats[i]);
sparseVals.push_back(a.m_sparseVals[i]);
i++;
}
else if(b.m_sparseFeats[j] < a.m_sparseFeats[i]) {
sparseFeats.push_back(b.m_sparseFeats[j]);
sparseVals.push_back(-b.m_sparseVals[j]);
j++;
}
else {
ValType newVal = a.m_sparseVals[i] - b.m_sparseVals[j];
if(abs(newVal)>1e-6) {
sparseFeats.push_back(a.m_sparseFeats[i]);
sparseVals.push_back(newVal);
}
i++;
j++;
}
}
while(i<a.m_sparseFeats.size()) {
sparseFeats.push_back(a.m_sparseFeats[i]);
sparseVals.push_back(a.m_sparseVals[i]);
i++;
}
while(j<b.m_sparseFeats.size()) {
sparseFeats.push_back(b.m_sparseFeats[j]);
sparseVals.push_back(-b.m_sparseVals[j]);
j++;
}
// Create and return vector
return MiraFeatureVector(dense,sparseFeats,sparseVals);
}
// --Emacs trickery--
// Local Variables:
// mode:c++
// c-basic-offset:2
// End:

49
mert/MiraFeatureVector.h Normal file
View File

@ -0,0 +1,49 @@
/*
* MiraFeatureVector.h
* kbmira - k-best Batch MIRA
*
* An alternative to the existing SparseVector
* and FeatureDataItem combo. Should be as memory
* efficient, and a little more time efficient,
* and should save me from constantly hacking
* SparseVector
*/
#ifndef MERT_MIRA_FEATURE_VECTOR_H
#define MERT_MIRA_FEATURE_VECTOR_H
#include <vector>
#include "FeatureDataIterator.h"
typedef FeatureStatsType ValType;
class MiraFeatureVector {
public:
MiraFeatureVector(const FeatureDataItem& vec);
MiraFeatureVector(const MiraFeatureVector& other);
MiraFeatureVector(const std::vector<ValType>& dense,
const std::vector<std::size_t>& sparseFeats,
const std::vector<ValType>& sparseVals);
ValType val(std::size_t index) const;
std::size_t feat(std::size_t index) const;
std::size_t size() const;
ValType sqrNorm() const;
friend MiraFeatureVector operator-(const MiraFeatureVector& a,
const MiraFeatureVector& b);
private:
std::vector<ValType> m_dense;
std::vector<std::size_t> m_sparseFeats;
std::vector<ValType> m_sparseVals;
};
#endif // MERT_FEATURE_VECTOR_H
// --Emacs trickery--
// Local Variables:
// mode:c++
// c-basic-offset:2
// End:

145
mert/MiraWeightVector.cpp Normal file
View File

@ -0,0 +1,145 @@
#include "MiraWeightVector.h"
using namespace std;
/**
* Constructor, initializes to the zero vector
*/
MiraWeightVector::MiraWeightVector()
: m_weights(),
m_totals(),
m_lastUpdated()
{
m_numUpdates = 0;
}
/**
* Constructor with provided initial vector
* \param init Initial feature values
*/
MiraWeightVector::MiraWeightVector(const vector<ValType>& init)
: m_weights(init),
m_totals(init),
m_lastUpdated(init.size(), 0)
{
m_numUpdates = 0;
}
/**
* Update a the model
* \param fv Feature vector to be added to the weights
* \param tau FV will be scaled by this value before update
*/
void MiraWeightVector::update(const MiraFeatureVector& fv, float tau) {
m_numUpdates++;
for(size_t i=0;i<fv.size();i++) {
update(fv.feat(i), fv.val(i)*tau);
}
}
/**
* Perform an empty update (affects averaging)
*/
void MiraWeightVector::tick() {
m_numUpdates++;
}
/**
* Score a feature vector according to the model
* \param fv Feature vector to be scored
*/
ValType MiraWeightVector::score(const MiraFeatureVector& fv) const {
ValType toRet = 0.0;
for(size_t i=0; i<fv.size(); i++) {
toRet += weight(fv.feat(i)) * fv.val(i);
}
return toRet;
}
/**
* Return an averaged view of this weight vector
*/
AvgWeightVector MiraWeightVector::avg() {
this->fixTotals();
return AvgWeightVector(*this);
}
/**
* Updates a weight and lazily updates its total
*/
void MiraWeightVector::update(size_t index, ValType delta) {
// Handle previously unseen weights
while(index>=m_weights.size()) {
m_weights.push_back(0.0);
m_totals.push_back(0.0);
m_lastUpdated.push_back(0);
}
// Book keeping for w = w + delta
m_totals[index] += (m_numUpdates - m_lastUpdated[index]) * m_weights[index] + delta;
m_weights[index] += delta;
m_lastUpdated[index] = m_numUpdates;
}
/**
* Make sure everyone's total is up-to-date
*/
void MiraWeightVector::fixTotals() {
for(size_t i=0; i<m_weights.size(); i++) update(i,0);
}
/**
* Helper to handle out of range weights
*/
ValType MiraWeightVector::weight(size_t index) const {
if(index < m_weights.size()) {
return m_weights[index];
}
else {
return 0;
}
}
ValType MiraWeightVector::sqrNorm() const {
ValType toRet = 0;
for(size_t i=0;i<m_weights.size();i++) {
toRet += weight(i) * weight(i);
}
return toRet;
}
AvgWeightVector::AvgWeightVector(const MiraWeightVector& wv)
:m_wv(wv)
{}
ValType AvgWeightVector::weight(size_t index) const
{
if(m_wv.m_numUpdates==0) return m_wv.weight(index);
else {
if(index < m_wv.m_totals.size()) {
return m_wv.m_totals[index] / m_wv.m_numUpdates;
}
else {
return 0;
}
}
}
ValType AvgWeightVector::score(const MiraFeatureVector& fv) const {
ValType toRet = 0.0;
for(size_t i=0; i<fv.size(); i++) {
toRet += weight(fv.feat(i)) * fv.val(i);
}
return toRet;
}
size_t AvgWeightVector::size() const {
return m_wv.m_weights.size();
}
// --Emacs trickery--
// Local Variables:
// mode:c++
// c-basic-offset:2
// End:

104
mert/MiraWeightVector.h Normal file
View File

@ -0,0 +1,104 @@
/*
* MiraWeightVector.h
* kbmira - k-best Batch MIRA
*
* A self-averaging weight-vector. Good for
* perceptron learning as well.
*
*/
#ifndef MERT_MIRA_WEIGHT_VECTOR_H
#define MERT_MIRA_WEIGHT_VECTOR_H
#include <vector>
#include "MiraFeatureVector.h"
class AvgWeightVector;
class MiraWeightVector {
public:
/**
* Constructor, initializes to the zero vector
*/
MiraWeightVector();
/**
* Constructor with provided initial vector
* \param init Initial feature values
*/
MiraWeightVector(const std::vector<ValType>& init);
/**
* Update a the model
* \param fv Feature vector to be added to the weights
* \param tau FV will be scaled by this value before update
*/
void update(const MiraFeatureVector& fv, float tau);
/**
* Perform an empty update (affects averaging)
*/
void tick();
/**
* Score a feature vector according to the model
* \param fv Feature vector to be scored
*/
ValType score(const MiraFeatureVector& fv) const;
/**
* Squared norm of the weight vector
*/
ValType sqrNorm() const;
/**
* Return an averaged view of this weight vector
*/
AvgWeightVector avg();
friend class AvgWeightVector;
private:
/**
* Updates a weight and lazily updates its total
*/
void update(std::size_t index, ValType delta);
/**
* Make sure everyone's total is up-to-date
*/
void fixTotals();
/**
* Helper to handle out-of-range weights
*/
ValType weight(std::size_t index) const;
std::vector<ValType> m_weights;
std::vector<ValType> m_totals;
std::vector<std::size_t> m_lastUpdated;
std::size_t m_numUpdates;
};
/**
* Averaged view of a weight vector
*/
class AvgWeightVector {
public:
AvgWeightVector(const MiraWeightVector& wv);
ValType score(const MiraFeatureVector& fv) const;
ValType weight(std::size_t index) const;
std::size_t size() const;
private:
const MiraWeightVector& m_wv;
};
#endif // MERT_WEIGHT_VECTOR_H
// --Emacs trickery--
// Local Variables:
// mode:c++
// c-basic-offset:2
// End:

298
mert/kbmira.cpp Normal file
View File

@ -0,0 +1,298 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
***********************************************************************/
/**
* k-best Batch Mira, as described in:
*
* Colin Cherry and George Foster
* Batch Tuning Strategies for Statistical Machine Translation
* NAACL 2012
*
* Implemented by colin.cherry@nrc-cnrc.gc.ca
*
* To license implementations of any of the other tuners in that paper,
* please get in touch with any member of NRC Canada's Portage project
*
* Input is a set of n-best lists, encoded as feature and score files.
*
* Output is a weight file that results from running MIRA on these
* n-btest lists for J iterations. Will return the set that maximizes
* training BLEU.
**/
#include <cmath>
#include <cstddef>
#include <cstdlib>
#include <ctime>
#include <cassert>
#include <iostream>
#include <string>
#include <vector>
#include <utility>
#include <algorithm>
#include <boost/program_options.hpp>
#include <boost/scoped_ptr.hpp>
#include "BleuScorer.h"
#include "HypPackEnumerator.h"
#include "MiraFeatureVector.h"
#include "MiraWeightVector.h"
using namespace std;
namespace po = boost::program_options;
ValType evaluate(HypPackEnumerator* train, const AvgWeightVector& wv) {
vector<ValType> stats(kBleuNgramOrder*2+1,0);
for(train->reset(); !train->finished(); train->next()) {
// Find max model
size_t max_index=0;
ValType max_score=0;
for(size_t i=0;i<train->cur_size();i++) {
MiraFeatureVector vec(train->featuresAt(i));
ValType score = wv.score(vec);
if(i==0 || score > max_score) {
max_index = i;
max_score = score;
}
}
// Update stats
const vector<float>& sent = train->scoresAt(max_index);
for(size_t i=0;i<sent.size();i++) {
stats[i]+=sent[i];
}
}
return unsmoothedBleu(stats);
}
int main(int argc, char** argv)
{
bool help;
string denseInitFile;
string sparseInitFile;
vector<string> scoreFiles;
vector<string> featureFiles;
int seed;
string outputFile;
float c = 0.01; // Step-size cap C
float decay = 0.999; // Pseudo-corpus decay \gamma
int n_iters = 60; // Max epochs J
bool streaming = false; // Stream all k-best lists?
bool no_shuffle = false; // Don't shuffle, even for in memory version
bool model_bg = false; // Use model for background corpus
// Command-line processing follows pro.cpp
po::options_description desc("Allowed options");
desc.add_options()
("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
("output-file,o", po::value<string>(&outputFile), "Output file")
("cparam,C", po::value<float>(&c), "MIRA C-parameter, lower for more regularization (default 0.01)")
("decay,D", po::value<float>(&decay), "BLEU background corpus decay rate (default 0.999)")
("iters,J", po::value<int>(&n_iters), "Number of MIRA iterations to run (default 60)")
("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features")
("sparse-init,s", po::value<string>(&sparseInitFile), "Weight file for sparse features")
("streaming", po::value(&streaming)->zero_tokens()->default_value(false), "Stream n-best lists to save memory, implies --no-shuffle")
("no-shuffle", po::value(&no_shuffle)->zero_tokens()->default_value(false), "Don't shuffle hypotheses before each epoch")
("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background");
;
po::options_description cmdline_options;
cmdline_options.add(desc);
po::variables_map vm;
po::store(po::command_line_parser(argc,argv).
options(cmdline_options).run(), vm);
po::notify(vm);
if (help) {
cout << "Usage: " + string(argv[0]) + " [options]" << endl;
cout << desc << endl;
exit(0);
}
if (vm.count("random-seed")) {
cerr << "Initialising random seed to " << seed << endl;
srand(seed);
} else {
cerr << "Initialising random seed from system clock" << endl;
srand(time(NULL));
}
// Initialize weights
///
// Dense
vector<parameter_t> initParams;
if(!denseInitFile.empty()) {
ifstream opt(denseInitFile.c_str());
string buffer; istringstream strstrm(buffer);
if (opt.fail()) {
cerr << "could not open dense initfile: " << denseInitFile << endl;
exit(3);
}
parameter_t val;
getline(opt,buffer);
while(strstrm >> val) initParams.push_back(val);
opt.close();
}
size_t initDenseSize = initParams.size();
// Sparse
if(!sparseInitFile.empty()) {
if(initDenseSize==0) {
cerr << "sparse initialization requires dense initialization" << endl;
exit(3);
}
ifstream opt(sparseInitFile.c_str());
if(opt.fail()) {
cerr << "could not open sparse initfile: " << sparseInitFile << endl;
exit(3);
}
int sparseCount=0;
parameter_t val; std::string name;
while(opt >> name >> val) {
size_t id = SparseVector::encode(name) + initDenseSize;
while(initParams.size()<=id) initParams.push_back(0.0);
initParams[id] = val;
sparseCount++;
}
cerr << "Found " << sparseCount << " initial sparse features" << endl;
opt.close();
}
MiraWeightVector wv(initParams);
// Initialize background corpus
vector<ValType> bg;
for(int j=0;j<kBleuNgramOrder;j++){
bg.push_back(kBleuNgramOrder-j);
bg.push_back(kBleuNgramOrder-j);
}
bg.push_back(kBleuNgramOrder);
// Training loop
boost::scoped_ptr<HypPackEnumerator> train;
if(streaming)
train.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
else
train.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
cerr << "Initial BLEU = " << evaluate(train.get(), wv.avg()) << endl;
ValType bestBleu = 0;
for(int j=0;j<n_iters;j++)
{
// MIRA train for one epoch
int iNumHyps = 0;
int iNumExamples = 0;
int iNumUpdates = 0;
ValType totalLoss = 0.0;
for(train->reset(); !train->finished(); train->next()) {
// Hope / fear decode
size_t hope_index=0, fear_index=0, model_index=0;
ValType hope_score=0, fear_score=0, model_score=0;
for(size_t i=0; i< train->cur_size(); i++) {
MiraFeatureVector vec(train->featuresAt(i));
ValType score = wv.score(vec);
ValType bleu = sentenceLevelBackgroundBleu(train->scoresAt(i),bg);
// Hope
if(i==0 || (score + bleu) > hope_score) {
hope_score = score + bleu;
hope_index = i;
}
// Fear
if(i==0 || (score - bleu) > fear_score) {
fear_score = score - bleu;
fear_index = i;
}
// Model
if(i==0 || score > model_score) {
model_score = score;
model_index = i;
}
iNumHyps++;
}
// Update weights
if(hope_index!=fear_index) {
// Vector difference
MiraFeatureVector hope(train->featuresAt(hope_index));
MiraFeatureVector fear(train->featuresAt(fear_index));
MiraFeatureVector diff = hope - fear;
// Bleu difference
const vector<float>& hope_stats = train->scoresAt(hope_index);
ValType hopeBleu = sentenceLevelBackgroundBleu(hope_stats, bg);
const vector<float>& fear_stats = train->scoresAt(fear_index);
ValType fearBleu = sentenceLevelBackgroundBleu(fear_stats, bg);
assert(hopeBleu > fearBleu);
ValType delta = hopeBleu - fearBleu;
// Loss and update
ValType diff_score = wv.score(diff);
ValType loss = delta - diff_score;
if(loss > 0) {
ValType eta = min(c, loss / diff.sqrNorm());
wv.update(diff,eta);
totalLoss+=loss;
iNumUpdates++;
}
// Update BLEU statistics
const vector<float>& model_stats = train->scoresAt(model_index);
for(size_t k=0;k<bg.size();k++) {
bg[k]*=decay;
if(model_bg)
bg[k]+=model_stats[k];
else
bg[k]+=hope_stats[k];
}
}
iNumExamples++;
}
// Training Epoch summary
cerr << iNumUpdates << "/" << iNumExamples << " updates"
<< ", avg loss = " << (totalLoss / iNumExamples);
// Evaluate current average weights
AvgWeightVector avg = wv.avg();
ValType bleu = evaluate(train.get(), avg);
cerr << ", BLEU = " << bleu << endl;
if(bleu > bestBleu) {
size_t num_dense = train->num_dense();
if(initDenseSize>0 && initDenseSize!=num_dense) {
cerr << "Error: Initial dense feature count and dense feature count from n-best do not match: "
<< initDenseSize << "!=" << num_dense << endl;
exit(1);
}
// Write to a file
ostream* out;
ofstream outFile;
if (!outputFile.empty() ) {
outFile.open(outputFile.c_str());
if (!(outFile)) {
cerr << "Error: Failed to open " << outputFile << endl;
exit(1);
}
out = &outFile;
} else {
out = &cout;
}
for(size_t i=0;i<avg.size();i++) {
if(i<num_dense)
*out << "F" << i << " " << avg.weight(i) << endl;
else {
if(abs(avg.weight(i))>1e-8)
*out << SparseVector::decode(i-num_dense) << " " << avg.weight(i) << endl;
}
}
outFile.close();
bestBleu = bleu;
}
}
cerr << "Best BLEU = " << bestBleu << endl;
}
// --Emacs trickery--
// Local Variables:
// mode:c++
// c-basic-offset:2
// End:

View File

@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
#ifndef moses_cmd_IOWrapper_h
#define moses_cmd_IOWrapper_h
#include <cassert>
#include <fstream>
#include <ostream>
#include <vector>
@ -121,15 +122,15 @@ IOWrapper *GetIODevice(const Moses::StaticData &staticData);
bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source);
void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, bool reportSegmentation, bool reportAllFactors);
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&,
const TranslationSystem* system, long translationId, bool reportSegmentation);
void OutputAllFeatureScores(std::ostream& out, const TranslationSystem* system, const TrellisPath &path);
void OutputFeatureScores(std::ostream& out, const TrellisPath &path, const FeatureFunction *ff, std::string &lastName);
const Moses::TranslationSystem* system, long translationId, bool reportSegmentation);
void OutputAllFeatureScores(std::ostream& out, const Moses::TranslationSystem* system, const Moses::TrellisPath &path);
void OutputFeatureScores(std::ostream& out, const Moses::TrellisPath &path, const Moses::FeatureFunction *ff, std::string &lastName);
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
bool reportSegmentation, bool reportAllFactors, std::ostream& out);
void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,bool reportSegmentation, bool reportAllFactors, std::ostream &out);
void OutputInput(std::ostream& os, const Hypothesis* hypo);
void OutputAlignment(OutputCollector* collector, size_t lineNo, const Hypothesis *hypo);
void OutputAlignment(OutputCollector* collector, size_t lineNo, const TrellisPath &path);
void OutputInput(std::ostream& os, const Moses::Hypothesis* hypo);
void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::Hypothesis *hypo);
void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::TrellisPath &path);
#endif

View File

@ -13,6 +13,7 @@
#include <set>
using namespace std;
using namespace Moses;
size_t bleu_order = 4;
float UNKNGRAMLOGPROB = -20;

View File

@ -17,35 +17,33 @@
#include "Manager.h"
#include "TrellisPathList.h"
using namespace Moses;
class Edge;
typedef std::vector< const Hypothesis *> Lattice;
typedef std::vector< const Moses::Hypothesis *> Lattice;
typedef std::vector<const Edge*> Path;
typedef std::map<Path, size_t> PathCounts;
typedef std::map<Phrase, PathCounts > NgramHistory;
typedef std::map<Moses::Phrase, PathCounts > NgramHistory;
class Edge
{
const Hypothesis* m_tailNode;
const Hypothesis* m_headNode;
const Moses::Hypothesis* m_tailNode;
const Moses::Hypothesis* m_headNode;
float m_score;
TargetPhrase m_targetPhrase;
Moses::TargetPhrase m_targetPhrase;
NgramHistory m_ngrams;
public:
Edge(const Hypothesis* from, const Hypothesis* to, float score, const TargetPhrase& targetPhrase) : m_tailNode(from), m_headNode(to), m_score(score), m_targetPhrase(targetPhrase) {
Edge(const Moses::Hypothesis* from, const Moses::Hypothesis* to, float score, const Moses::TargetPhrase& targetPhrase) : m_tailNode(from), m_headNode(to), m_score(score), m_targetPhrase(targetPhrase) {
//cout << "Creating new edge from Node " << from->GetId() << ", to Node : " << to->GetId() << ", score: " << score << " phrase: " << targetPhrase << endl;
}
const Hypothesis* GetHeadNode() const {
const Moses::Hypothesis* GetHeadNode() const {
return m_headNode;
}
const Hypothesis* GetTailNode() const {
const Moses::Hypothesis* GetTailNode() const {
return m_tailNode;
}
@ -57,19 +55,19 @@ public:
return m_targetPhrase.GetSize();
}
const Phrase& GetWords() const {
const Moses::Phrase& GetWords() const {
return m_targetPhrase;
}
friend std::ostream& operator<< (std::ostream& out, const Edge& edge);
const NgramHistory& GetNgrams( std::map<const Hypothesis*, std::vector<Edge> > & incomingEdges) ;
const NgramHistory& GetNgrams( std::map<const Moses::Hypothesis*, std::vector<Edge> > & incomingEdges) ;
bool operator < (const Edge & compare) const;
void GetPhraseSuffix(const Phrase& origPhrase, size_t lastN, Phrase& targetPhrase) const;
void GetPhraseSuffix(const Moses::Phrase& origPhrase, size_t lastN, Moses::Phrase& targetPhrase) const;
void storeNgramHistory(const Phrase& phrase, Path & path, size_t count = 1) {
void storeNgramHistory(const Moses::Phrase& phrase, Path & path, size_t count = 1) {
m_ngrams[phrase][path]+= count;
}
@ -84,16 +82,16 @@ public:
NgramScores() {}
/** logsum this score to the existing score */
void addScore(const Hypothesis* node, const Phrase& ngram, float score);
void addScore(const Moses::Hypothesis* node, const Moses::Phrase& ngram, float score);
/** Iterate through ngrams for selected node */
typedef std::map<const Phrase*, float>::const_iterator NodeScoreIterator;
NodeScoreIterator nodeBegin(const Hypothesis* node);
NodeScoreIterator nodeEnd(const Hypothesis* node);
typedef std::map<const Moses::Phrase*, float>::const_iterator NodeScoreIterator;
NodeScoreIterator nodeBegin(const Moses::Hypothesis* node);
NodeScoreIterator nodeEnd(const Moses::Hypothesis* node);
private:
std::set<Phrase> m_ngrams;
std::map<const Hypothesis*, std::map<const Phrase*, float> > m_scores;
std::set<Moses::Phrase> m_ngrams;
std::map<const Moses::Hypothesis*, std::map<const Moses::Phrase*, float> > m_scores;
};
@ -102,11 +100,11 @@ class LatticeMBRSolution
{
public:
/** Read the words from the path */
LatticeMBRSolution(const TrellisPath& path, bool isMap);
LatticeMBRSolution(const Moses::TrellisPath& path, bool isMap);
const std::vector<float>& GetNgramScores() const {
return m_ngramScores;
}
const std::vector<Word>& GetWords() const {
const std::vector<Moses::Word>& GetWords() const {
return m_words;
}
float GetMapScore() const {
@ -117,10 +115,10 @@ public:
}
/** Initialise ngram scores */
void CalcScore(std::map<Phrase, float>& finalNgramScores, const std::vector<float>& thetas, float mapWeight);
void CalcScore(std::map<Moses::Phrase, float>& finalNgramScores, const std::vector<float>& thetas, float mapWeight);
private:
std::vector<Word> m_words;
std::vector<Moses::Word> m_words;
float m_mapScore;
std::vector<float> m_ngramScores;
float m_score;
@ -132,18 +130,18 @@ struct LatticeMBRSolutionComparator {
}
};
void pruneLatticeFB(Lattice & connectedHyp, std::map < const Hypothesis*, std::set <const Hypothesis* > > & outgoingHyps, std::map<const Hypothesis*, std::vector<Edge> >& incomingEdges,
const std::vector< float> & estimatedScores, const Hypothesis*, size_t edgeDensity,float scale);
void pruneLatticeFB(Lattice & connectedHyp, std::map < const Moses::Hypothesis*, std::set <const Moses::Hypothesis* > > & outgoingHyps, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges,
const std::vector< float> & estimatedScores, const Moses::Hypothesis*, size_t edgeDensity,float scale);
//Use the ngram scores to rerank the nbest list, return at most n solutions
void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
void getLatticeMBRNBest(Moses::Manager& manager, Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
//calculate expectated ngram counts, clipping at 1 (ie calculating posteriors) if posteriors==true.
void calcNgramExpectations(Lattice & connectedHyp, std::map<const Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Phrase,
void calcNgramExpectations(Lattice & connectedHyp, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Moses::Phrase,
float>& finalNgramScores, bool posteriors);
void GetOutputFactors(const TrellisPath &path, std::vector <Word> &translation);
void extract_ngrams(const std::vector<Word >& sentence, std::map < Phrase, int > & allngrams);
bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b);
std::vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList);
const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList);
//std::vector<Word> doConsensusDecoding(Manager& manager, TrellisPathList& nBestList);
void GetOutputFactors(const Moses::TrellisPath &path, std::vector <Moses::Word> &translation);
void extract_ngrams(const std::vector<Moses::Word >& sentence, std::map < Moses::Phrase, int > & allngrams);
bool ascendingCoverageCmp(const Moses::Hypothesis* a, const Moses::Hypothesis* b);
std::vector<Moses::Word> doLatticeMBR(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
const Moses::TrellisPath doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
//std::vector<Moses::Word> doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
#endif

View File

@ -5,7 +5,7 @@
#include "Factor.h"
#include "Util.h"
#include "LM/SingleFactor.h"
#include "onlineRLM.h"
#include "DynSAInclude/onlineRLM.h"
//#include "multiOnlineRLM.h"
#include "DynSAInclude/file.h"
#include "DynSAInclude/vocab.h"

View File

@ -13,6 +13,8 @@
#include "RuleTable/Loader.h"
#include "RuleTable/LoaderFactory.h"
#include "TypeDef.h"
#include "StaticData.h"
#include "UserMessage.h"
using namespace std;
@ -27,6 +29,13 @@ bool PhraseDictionaryALSuffixArray::Load(const std::vector<FactorType> &input
, const LMList &languageModels
, const WordPenaltyProducer* wpProducer)
{
const StaticData &staticData = StaticData::Instance();
if (staticData.ThreadCount() > 1)
{
UserMessage::Add("Suffix array implementation is not threadsafe");
return false;
}
// file path is the directory of the rules for eacg, NOT the file of all the rules
SetFilePath(filePath);
m_tableLimit = tableLimit;

View File

@ -42,6 +42,8 @@ if $(location) {
install compactify : training/compact-rule-table//compactify : <location>$(location)/training/compact-rule-table/tools ;
install phrase-extract : training/phrase-extract//programs : <location>$(location)/training/phrase-extract ;
install pcfg-extract : training/phrase-extract/pcfg-extract//pcfg-extract : <location>$(location)/training/phrase-extract/pcfg-extract ;
install pcfg-score : training/phrase-extract/pcfg-score//pcfg-score : <location>$(location)/training/phrase-extract/pcfg-score ;
install lexical-reordering : training/lexical-reordering//score : <location>$(location)/training/lexical-reordering ;
install symal : training/symal//symal : <location>$(location)/training/symal ;

View File

@ -260,7 +260,8 @@ script = $moses-script-dir/training/train-model.perl
### general options
# these are options that are passed on to train-model.perl, for instance
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
# * "-sort-buffer-size 8G" to reduce on-disk sorting
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
#
#training-options = ""

View File

@ -280,7 +280,8 @@ script = $moses-script-dir/training/train-model.perl
### general options
# these are options that are passed on to train-model.perl, for instance
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
# * "-sort-buffer-size 8G" to reduce on-disk sorting
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
#
#training-options = ""

View File

@ -260,7 +260,8 @@ script = $moses-script-dir/training/train-model.perl
### general options
# these are options that are passed on to train-model.perl, for instance
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
# * "-sort-buffer-size 8G" to reduce on-disk sorting
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
#
#training-options = ""

View File

@ -264,7 +264,8 @@ script = $moses-script-dir/training/train-model.perl
### general options
# these are options that are passed on to train-model.perl, for instance
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
# * "-sort-buffer-size 8G" to reduce on-disk sorting
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
#
#training-options = ""

View File

@ -244,7 +244,8 @@ script = $moses-script-dir/training/train-model.perl
### general options
# these are options that are passed on to train-model.perl, for instance
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
# * "-sort-buffer-size 8G" to reduce on-disk sorting
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
#
#training-options = ""

View File

@ -1,3 +1,4 @@
cluster: townhill seville hermes lion seville sannox lutzow frontend
multicore-8: tyr thor odin crom saxnot vali vili freyja bragi hoenir
multicore-8: tyr thor odin crom
multicore-16: saxnot vali vili freyja bragi hoenir
multicore-24: syn hel skaol saga

View File

@ -344,8 +344,21 @@ parse-relax
pass-unless: input-parse-relaxer output-parse-relaxer
template-if: input-parse-relaxer IN.$input-extension OUT.$input-extension
template-if: output-parse-relaxer IN.$output-extension OUT.$output-extension
pcfg-extract
in: parse-relaxed-corpus
out: pcfg
default-name: model/pcfg
ignore-unless: use-pcfg-feature
rerun-on-change: use-pcfg-feature
template: $moses-script-dir/training/phrase-extract/pcfg-extract/pcfg-extract < IN.$output-extension > OUT.$output-extension
pcfg-score
in: parse-relaxed-corpus pcfg
out: scored-corpus
default-name: model/scored-corpus
pass-unless: use-pcfg-feature
template: ln -s IN.$input-extension OUT.$input-extension ; $moses-script-dir/training/phrase-extract/pcfg-score/pcfg-score IN1.$output-extension < IN.$output-extension > OUT.$output-extension
extract-phrases
in: word-alignment parse-relaxed-corpus
in: word-alignment scored-corpus
out: extracted-phrases
rerun-on-change: max-phrase-length translation-factors reordering-factors hierarchical-rule-set extract-settings training-options script use-ghkm
default-name: model/extract
@ -756,6 +769,20 @@ ibm-bleu-c
ignore-unless: ibm-bleu-c
rerun-on-change: ibm-bleu-c
template: $ibm-bleu-c -s $input-sgm -r IN1 -t IN > OUT
bolt-bleu
in: detokenized-output
out: bolt-bleu-score
default-name: evaluation/bolt-bleu
ignore-unless: bolt-bleu
rerun-on-change: bolt-bleu
template: $bolt-bleu IN > OUT
bolt-bleu-c
in: detokenized-output
out: bolt-bleu-c-score
default-name: evaluation/bolt-bleu-c
ignore-unless: bolt-bleu-c
rerun-on-change: bolt-bleu-c
template: $bolt-bleu-c IN > OUT
multi-bleu
in: cleaned-output reference
out: multi-bleu-score
@ -811,6 +838,6 @@ analysis-precision
[REPORTING] single
report
in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model
in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:bolt-bleu-score EVALUATION:bolt-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model
out: report
default-name: evaluation/report

View File

@ -1020,7 +1020,7 @@ sub execute_steps {
}
}
print "number of steps doable or running: ".(scalar keys %DO)."\n";
print "number of steps doable or running: ".(scalar keys %DO)." at ".`date`;
foreach my $step (keys %DO) { print "\t".($DO{$step}==2?"running: ":"doable: ").$DO_STEP[$step]."\n"; }
return unless scalar keys %DO;
@ -2018,6 +2018,7 @@ sub get_training_setting {
my $target_syntax = &get("GENERAL:output-parser");
my $score_settings = &get("TRAINING:score-settings");
my $parallel = &get("TRAINING:parallel");
my $pcfg = &get("TRAINING:use-pcfg-feature");
my $xml = $source_syntax || $target_syntax;
@ -2040,6 +2041,7 @@ sub get_training_setting {
$cmd .= "-glue-grammar " if $hierarchical;
$cmd .= "-score-options '".$score_settings."' " if $score_settings;
$cmd .= "-parallel " if $parallel;
$cmd .= "-pcfg " if $pcfg;
# factored training
if (&backoff_and_get("TRAINING:input-factors")) {

View File

@ -3,6 +3,7 @@
use strict;
my $cores = 8;
my $serial = 1;
my ($infile,$outfile,$cmd,$tmpdir);
my $parent = $$;
@ -12,6 +13,7 @@ GetOptions('cores=i' => \$cores,
'in=s' => \$infile,
'out=s' => \$outfile,
'cmd=s' => \$cmd,
'serial=i' => \$serial
) or exit(1);
die("ERROR: specify command with -cmd") unless $cmd;
@ -24,8 +26,9 @@ die("ERROR: you need to specify a tempdir with -tmpdir") unless $tmpdir;
# create split input files
my $sentenceN = `cat $infile | wc -l`;
my $splitN = int(($sentenceN+$cores-0.5) / $cores);
`split -a 2 -l $splitN $infile $tmpdir/in-$parent-`;
my $splitN = int(($sentenceN+($cores*$serial)-0.5) / ($cores*$serial));
print STDERR "split -a 3 -l $splitN $infile $tmpdir/in-$parent-\n";
`split -a 4 -l $splitN $infile $tmpdir/in-$parent-`;
# find out the names of the processes
my @CORE=`ls $tmpdir/in-$parent-*`;
@ -33,17 +36,23 @@ chomp(@CORE);
grep(s/.+in\-\d+\-([a-z]+)$/$1/e,@CORE);
# create core scripts
foreach my $core (@CORE){
for(my $i=0;$i<scalar(@CORE);$i++) {
my $core = $CORE[$i];
open(BASH,">$tmpdir/core-$parent-$core.bash") or die "Cannot open: $!";
print BASH "#bash\n\n";
# print BASH "export PATH=$ENV{PATH}\n\n";
printf BASH $cmd."\n", "$tmpdir/in-$parent-$core", "$tmpdir/out-$parent-$core";
for(my $j=2;$j<=$serial;$j++) {
$core = $CORE[++$i];
printf BASH $cmd."\n", "$tmpdir/in-$parent-$core", "$tmpdir/out-$parent-$core";
}
close(BASH);
}
# fork processes
my (@CHILDREN);
foreach my $core (@CORE){
next unless -e "$tmpdir/core-$parent-$core.bash";
my $child = fork();
if (! $child) { # I am child
print STDERR "running child $core\n";

View File

@ -14,6 +14,10 @@ $TYPE{"multi-bleu-c"}= "BLEU-c";
$TYPE{"ibm-bleu"} = "IBM";
$TYPE{"ibm-bleu-c"} = "IBM-c";
$TYPE{"meteor"} = "METEOR";
$TYPE{"bolt-bleu"} = "BLEU";
$TYPE{"bolt-bleu-c"} = "BLEU-c";
$TYPE{"bolt-ter"} = "TER";
$TYPE{"bolt-ter-c"} = "TER-c";
my %SCORE;
my %AVERAGE;
@ -60,6 +64,9 @@ sub process {
elsif ($type eq 'meteor') {
$SCORE{$set} .= &extract_meteor($file,$type)." ";
}
elsif ($type =~ /^bolt-(.+)$/) {
$SCORE{$set} .= &extract_bolt($file,$1)." ";
}
}
sub extract_nist_bleu {
@ -115,6 +122,19 @@ sub extract_multi_bleu {
return $output.$TYPE{$type};
}
sub extract_bolt {
my ($file,$type) = @_;
my $score;
foreach (`cat $file`) {
$score = $1 if $type eq 'bleu' && /Lowercase BLEU\s+([\d\.]+)/;
$score = $1 if $type eq 'bleu-c' && /Cased BLEU\s+([\d\.]+)/;
$score = $1 if $type eq 'ter' && /Lowercase TER\s+([\d\.]+)/;
$score = $1 if $type eq 'ter-c' && /Cased TER\s+([\d\.]+)/;
}
my $output = sprintf("%.02f ",$score*100);
$AVERAGE{"bolt-".$type} += $score*100;
return $output.$TYPE{"bolt-".$type};
}
sub extract_meteor {
my ($file,$type) = @_;
my ($meteor, $precision);

View File

@ -8,15 +8,23 @@ my $FILLER = ":s:es";
my $MIN_SIZE = 3;
my $MIN_COUNT = 5;
my $MAX_COUNT = 5;
my $FACTORED = 0;
my $SYNTAX = 0;
my $MARK_SPLIT = 0;
my $BINARIZE = 0;
$HELP = 1
unless &GetOptions('corpus=s' => \$CORPUS,
'model=s' => \$MODEL,
'filler=s' => \$FILLER,
'factored' => \$FACTORED,
'min-size=i' => \$MIN_SIZE,
'min-count=i' => \$MIN_COUNT,
'max-count=i' => \$MAX_COUNT,
'help' => \$HELP,
'verbose' => \$VERBOSE,
'syntax' => \$SYNTAX,
'binarize' => \$BINARIZE,
'mark-split' => \$MARK_SPLIT,
'train' => \$TRAIN);
if ($HELP ||
@ -29,59 +37,152 @@ if ($HELP ||
print "options: -min-size: minimum word size (default $MIN_SIZE)\n";
print " -min-count: minimum word count (default $MIN_COUNT)\n";
print " -filler: filler letters between words (default $FILLER)\n";
print " -factor: factored data, assuming factor 0 as surface (default $FACTORED)\n";
print " -syntax: syntactically parsed data (default $SYNTAX)\n";
print " -mark-split: mark non-terminal label of split words (default $MARK_SPLIT)\n";
print " -binarize: binarize subtree for split word (default $BINARIZE)\n";
exit;
}
if ($TRAIN) {
&train;
if ($SYNTAX) { &train_syntax(); }
elsif ($FACTORED) { &train_factored(); }
else { &train(); }
}
else {
&apply;
&apply();
}
sub train {
my %WORD;
my %COUNT;
open(CORPUS,$CORPUS) || die("ERROR: could not open corpus '$CORPUS'");
while(<CORPUS>) {
chop; s/\s+/ /g; s/^ //; s/ $//;
foreach (split) {
$WORD{$_}++;
$COUNT{$_}++;
}
}
close($CORPUS);
close(CORPUS);
&save_trained_model(\%COUNT);
}
sub save_trained_model {
my ($COUNT) = @_;
my $id = 0;
open(MODEL,">".$MODEL);
foreach my $word (keys %WORD) {
print MODEL "".(++$id)."\t".$word."\t".$WORD{$word}."\n";
foreach my $word (keys %$COUNT) {
print MODEL "".(++$id)."\t".$word."\t".$$COUNT{$word}."\n";
}
close(MODEL);
print STDERR "written model file with ".(scalar keys %WORD)." words.\n";
print STDERR "written model file with ".(scalar keys %$COUNT)." words.\n";
}
sub train_factored {
my (%COUNT,%FACTORED_COUNT);
# collect counts for interpretations for each surface word
open(CORPUS,$CORPUS) || die("ERROR: could not open corpus '$CORPUS'");
while(<CORPUS>) {
chop; s/\s+/ /g; s/^ //; s/ $//;
foreach my $factored_word (split) {
my $word = $factored_word;
$word =~ s/\|.+//g; # just first factor
$FACTORED_COUNT{$word}{$factored_word}++;
}
}
close(CORPUS);
# only preserve most frequent interpretation, assign sum of counts
foreach my $word (keys %FACTORED_COUNT) {
my ($max,$best,$total) = (0,"",0);
foreach my $factored_word (keys %{$FACTORED_COUNT{$word}}) {
my $count = $FACTORED_COUNT{$word}{$factored_word};
$total += $count;
if ($count > $max) {
$max = $count;
$best = $factored_word;
}
}
$COUNT{$best} = $total;
}
&save_trained_model(\%COUNT);
}
sub train_syntax {
my (%COUNT,%LABELED_COUNT);
# collect counts for interpretations for each surface word
open(CORPUS,$CORPUS) || die("ERROR: could not open corpus '$CORPUS'");
while(<CORPUS>) {
chop; s/\s+/ /g; s/^ //; s/ $//;
my $label;
foreach (split) {
if (/^label="([^\"]+)"/) {
$label = $1;
}
elsif (! /^</) {
$LABELED_COUNT{$_}{$label}++;
}
}
}
close(CORPUS);
# only preserve most frequent label, assign sum of counts
foreach my $word (keys %LABELED_COUNT) {
my ($max,$best,$total) = (0,"",0);
foreach my $label (keys %{$LABELED_COUNT{$word}}) {
my $count = $LABELED_COUNT{$word}{$label};
$total += $count;
if ($count > $max) {
$max = $count;
$best = "$word $label";
}
}
$COUNT{$best} = $total;
}
&save_trained_model(\%COUNT);
}
sub apply {
my (%WORD,%TRUECASE);
my (%COUNT,%TRUECASE,%LABEL);
open(MODEL,$MODEL) || die("ERROR: could not open model '$MODEL'");
while(<MODEL>) {
chomp;
my ($id,$word,$count) = split(/\t/);
my ($id,$factored_word,$count) = split(/\t/);
my $label;
($factored_word,$label) = split(/ /,$factored_word);
my $word = $factored_word;
$word =~ s/\|.+//g; # just first factor
my $lc = lc($word);
# if word exists with multipe casings, only record most frequent
next if defined($WORD{$lc}) && $WORD{$lc} > $count;
$WORD{$lc} = $count;
$TRUECASE{$lc} = $word;
next if defined($COUNT{$lc}) && $COUNT{$lc} > $count;
$COUNT{$lc} = $count;
$TRUECASE{$lc} = $factored_word;
$LABEL{$lc} = $label if $SYNTAX;
}
close(MODEL);
while(<STDIN>) {
my $first = 1;
chop; s/\s+/ /g; s/^ //; s/ $//;
foreach my $word (split) {
my @BUFFER; # for xml tags
foreach my $factored_word (split) {
print " " unless $first;
$first = 0;
# syntax: don't split xml
if ($SYNTAX && ($factored_word =~ /^</ || $factored_word =~ />$/)) {
push @BUFFER,$factored_word;
$first = 1;
next;
}
# get case class
my $word = $factored_word;
$word =~ s/\|.+//g; # just first factor
my $lc = lc($word);
# don't split frequent words
if (defined($WORD{$word}) && $WORD{$word}>=$MAX_COUNT) {
print $word;
if (defined($COUNT{$lc}) && $COUNT{$lc}>=$MAX_COUNT) {
print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
print $factored_word;
next;
}
@ -100,17 +201,18 @@ sub apply {
my $subword = lc(substr($word,
$start+length($filler),
$end-$start+1-length($filler)));
next unless defined($WORD{$subword});
next unless $WORD{$subword} >= $MIN_COUNT;
print STDERR "\tmatching word $start .. $end ($filler)$subword $WORD{$subword}\n" if $VERBOSE;
push @{$REACHABLE{$end}},"$start $TRUECASE{$subword} $WORD{$subword}";
next unless defined($COUNT{$subword});
next unless $COUNT{$subword} >= $MIN_COUNT;
print STDERR "\tmatching word $start .. $end ($filler)$subword $COUNT{$subword}\n" if $VERBOSE;
push @{$REACHABLE{$end}},"$start $TRUECASE{$subword} $COUNT{$subword}";
}
}
}
# no matches at all?
if (!defined($REACHABLE{$final})) {
print $word;
print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
print $factored_word;
next;
}
@ -152,9 +254,35 @@ sub apply {
last unless scalar @{$REACHABLE{$final}} > $ITERATOR{$final};
for(my $i=0;$i<$increase;$i++) { $ITERATOR{$i}=0; }
}
$best_split = $word unless $best_split =~ / /; # do not change case for unsplit words
print $best_split;
if ($best_split !~ / /) {
print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
print $word; # do not change case for unsplit words
next;
}
if (!$SYNTAX) {
print $best_split;
}
else {
$BUFFER[$#BUFFER] =~ s/label=\"/label=\"SPLIT-/ if $MARK_SPLIT;
$BUFFER[$#BUFFER] =~ /label=\"([^\"]+)\"/ || die("ERROR: $BUFFER[$#BUFFER]\n");
my $pos = $1;
print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
my @SPLIT = split(/ /,$best_split);
my @OUT = ();
if ($BINARIZE) {
for(my $w=0;$w<scalar(@SPLIT)-2;$w++) {
push @OUT,"<tree label=\"\@$pos\">";
}
}
for(my $w=0;$w<scalar(@SPLIT);$w++) {
if ($BINARIZE && $w>=2) { push @OUT, "</tree>"; }
push @OUT,"<tree label=\"".$LABEL{lc($SPLIT[$w])}."\"> $SPLIT[$w] </tree>";
}
print join(" ",@OUT);
}
}
print " ".join(" ",@BUFFER) if scalar(@BUFFER); @BUFFER = (); # clear buffer
print "\n";
}
}

View File

@ -6,11 +6,15 @@
use strict;
use File::Basename;
sub RunFork($);
sub systemCheck($);
sub NumStr($);
print "Started ".localtime() ."\n";
my $numParallel= $ARGV[0];
$numParallel = 1 if $numParallel < 1;
my $splitCmd= $ARGV[1];
my $sortCmd= $ARGV[2];
my $extractCmd= $ARGV[3];
@ -29,25 +33,34 @@ for (my $i = 8; $i < $#ARGV + 1; ++$i)
my $TMPDIR=dirname($extract) ."/tmp.$$";
mkdir $TMPDIR;
my $totalLines = int(`wc -l $align`);
my $totalLines = int(`cat $align | wc -l`);
my $linesPerSplit = int($totalLines / $numParallel) + 1;
print "total=$totalLines line-per-split=$linesPerSplit \n";
my @children;
my $pid;
my $cmd;
if ($numParallel > 1)
{
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $target $TMPDIR/target.";
print STDERR "Executing: $cmd \n";
`$cmd`;
$pid = RunFork($cmd);
push(@children, $pid);
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $source $TMPDIR/source.";
print STDERR "Executing: $cmd \n";
`$cmd`;
$pid = RunFork($cmd);
push(@children, $pid);
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $align $TMPDIR/align.";
print STDERR "Executing: $cmd \n";
`$cmd`;
$pid = RunFork($cmd);
push(@children, $pid);
# wait for everything is finished
foreach (@children) {
waitpid($_, 0);
}
}
else
{
@ -67,15 +80,13 @@ else
}
# run extract
my $isParent = 1;
my @childs;
@children = ();
for (my $i = 0; $i < $numParallel; ++$i)
{
my $pid = fork();
if ($pid == 0)
{ # child
$isParent = 0;
my $numStr = NumStr($i);
my $cmd = "$extractCmd $TMPDIR/target.$numStr $TMPDIR/source.$numStr $TMPDIR/align.$numStr $TMPDIR/extract.$numStr $otherExtractArgs \n";
print STDERR $cmd;
@ -85,20 +96,13 @@ for (my $i = 0; $i < $numParallel; ++$i)
}
else
{ # parent
push(@childs, $pid);
push(@children, $pid);
}
}
# wait for everything is finished
if ($isParent)
{
foreach (@childs) {
waitpid($_, 0);
}
}
else
{
die "shouldn't be here";
foreach (@children) {
waitpid($_, 0);
}
# merge
@ -116,20 +120,28 @@ for (my $i = 0; $i < $numParallel; ++$i)
$catCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.sorted.gz \n";
$catInvCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.inv.sorted.gz \n";
$catOCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.o.sorted.gz \n";
print STDERR $catCmd;
print STDERR $catInvCmd;
print STDERR $catOCmd;
systemCheck($catCmd);
systemCheck($catInvCmd);
@children = ();
$pid = RunFork($catCmd);
push(@children, $pid);
$pid = RunFork($catInvCmd);
push(@children, $pid);
my $numStr = NumStr(0);
if (-e "$TMPDIR/extract.$numStr.o.gz")
{
systemCheck($catOCmd);
$pid = RunFork($catOCmd);
push(@children, $pid);
}
# wait for all sorting to finish
foreach (@children) {
waitpid($_, 0);
}
# delete temporary files
$cmd = "rm -rf $TMPDIR \n";
print STDERR $cmd;
`$cmd`;
@ -139,6 +151,21 @@ print STDERR "Finished ".localtime() ."\n";
# -----------------------------------------
# -----------------------------------------
sub RunFork($)
{
my $cmd = shift;
my $pid = fork();
if ($pid == 0)
{ # child
print STDERR $cmd;
systemCheck($cmd);
exit();
}
return $pid;
}
sub systemCheck($)
{
my $cmd = shift;
@ -171,4 +198,3 @@ sub NumStr($)
return $numStr;
}

View File

@ -0,0 +1,280 @@
#! /usr/bin/perl -w
# example
# ./score-parallel.perl 8 "gsort --batch-size=253" ./score ./extract.2.sorted.gz ./lex.2.f2e ./phrase-table.2.half.f2e --GoodTuring ./phrase-table.2.coc 0
# ./score-parallel.perl 8 "gsort --batch-size=253" ./score ./extract.2.inv.sorted.gz ./lex.2.e2f ./phrase-table.2.half.e2f --Inverse 1
use strict;
use File::Basename;
sub RunFork($);
sub systemCheck($);
sub GetSourcePhrase($);
sub NumStr($);
#my $EXTRACT_SPLIT_LINES = 5000000;
my $EXTRACT_SPLIT_LINES = 1000000;
print "Started ".localtime() ."\n";
my $numParallel = $ARGV[0];
$numParallel = 1 if $numParallel < 1;
my $sortCmd = $ARGV[1];
my $scoreCmd = $ARGV[2];
my $extractFile = $ARGV[3]; # 1st arg of extract argument
my $lexFile = $ARGV[4];
my $ptHalf = $ARGV[5]; # output
my $otherExtractArgs= "";
for (my $i = 6; $i < $#ARGV; ++$i)
{
$otherExtractArgs .= $ARGV[$i] ." ";
}
#$scoreCmd $extractFile $lexFile $ptHalf $otherExtractArgs
my $doSort = $ARGV[$#ARGV]; # last arg
my $TMPDIR=dirname($ptHalf) ."/tmp.$$";
mkdir $TMPDIR;
my $cmd;
my $fileCount = 0;
if ($numParallel <= 1)
{ # don't do parallel. Just link the extract file into place
$cmd = "ln -s $extractFile $TMPDIR/extract.0.gz";
print STDERR "$cmd \n";
systemCheck($cmd);
$fileCount = 1;
}
else
{ # cut up extract file into smaller mini-extract files.
if ($extractFile =~ /\.gz$/) {
open(IN, "gunzip -c $extractFile |") || die "can't open pipe to $extractFile";
}
else {
open(IN, $extractFile) || die "can't open $extractFile";
}
my $filePath = "$TMPDIR/extract.$fileCount.gz";
open (OUT, "| gzip -c > $filePath") or die "error starting gzip $!";
my $lineCount = 0;
my $line;
my $prevSourcePhrase = "";
while ($line=<IN>)
{
chomp($line);
++$lineCount;
if ($lineCount > $EXTRACT_SPLIT_LINES)
{ # over line limit. Cut off at next source phrase change
my $sourcePhrase = GetSourcePhrase($line);
if ($prevSourcePhrase eq "")
{ # start comparing
$prevSourcePhrase = $sourcePhrase;
}
elsif ($sourcePhrase eq $prevSourcePhrase)
{ # can't cut off yet. Do nothing
}
else
{ # cut off, open next min-extract file & write to that instead
close OUT;
$prevSourcePhrase = "";
$lineCount = 0;
++$fileCount;
my $filePath = $fileCount;
$filePath = "$TMPDIR/extract.$filePath.gz";
open (OUT, "| gzip -c > $filePath") or die "error starting gzip $!";
}
}
else
{ # keep on writing to current mini-extract file
}
print OUT "$line\n";
}
close OUT;
++$fileCount;
}
# create run scripts
my @runFiles = (0..($numParallel-1));
for (my $i = 0; $i < $numParallel; ++$i)
{
my $path = "$TMPDIR/run.$i.sh";
open(my $fh, ">", $path) or die "cannot open $path: $!";
$runFiles[$i] = $fh;
}
# write scoring of mini-extracts to run scripts
for (my $i = 0; $i < $fileCount; ++$i)
{
my $numStr = NumStr($i);
my $fileInd = $i % $numParallel;
my $fh = $runFiles[$fileInd];
my $cmd = "$scoreCmd $TMPDIR/extract.$i.gz $lexFile $TMPDIR/phrase-table.half.$numStr.gz $otherExtractArgs\n";
print $fh $cmd;
}
# close run script files
for (my $i = 0; $i < $numParallel; ++$i)
{
close($runFiles[$i]);
my $path = "$TMPDIR/run.$i.sh";
systemCheck("chmod +x $path");
}
# run each score script in parallel
my @children;
for (my $i = 0; $i < $numParallel; ++$i)
{
my $cmd = "$TMPDIR/run.$i.sh";
my $pid = RunFork($cmd);
push(@children, $pid);
}
# wait for everything is finished
foreach (@children) {
waitpid($_, 0);
}
# merge & sort
$cmd = "\n\nOH SHIT. This should have been filled in \n\n";
if ($fileCount == 1 && !$doSort)
{
my $numStr = NumStr(0);
$cmd = "mv $TMPDIR/phrase-table.half.$numStr.gz $ptHalf";
}
else
{
$cmd = "zcat $TMPDIR/phrase-table.half.*.gz";
if ($doSort) {
$cmd .= "| LC_ALL=C $sortCmd -T $TMPDIR ";
}
$cmd .= " | gzip -c > $ptHalf";
}
print STDERR $cmd;
systemCheck($cmd);
# merge coc
my $numStr = NumStr(0);
my $cocPath = "$TMPDIR/phrase-table.half.$numStr.gz.coc";
if (-e $cocPath)
{
my @arrayCOC;
my $line;
# 1st file
open(FHCOC, $cocPath) || die "can't open pipe to $cocPath";
while ($line = <FHCOC>)
{
my $coc = int($line);
push(@arrayCOC, $coc);
}
close(FHCOC);
# all other files
for (my $i = 1; $i < $fileCount; ++$i)
{
$numStr = NumStr($i);
$cocPath = "$TMPDIR/phrase-table.half.$numStr.gz.coc";
open(FHCOC, $cocPath) || die "can't open pipe to $cocPath";
my $arrayInd = 0;
while ($line = <FHCOC>)
{
my $coc = int($line);
$arrayCOC[$arrayInd] += $coc;
++$arrayInd;
}
close(FHCOC);
}
# output
$cocPath = "$ptHalf.coc";
open(FHCOC, ">", $cocPath) or die "cannot open $cocPath: $!";
for (my $i = 0; $i < @arrayCOC; ++$i)
{
print FHCOC $arrayCOC[$i]."\n";
}
close(FHCOC);
}
$cmd = "rm -rf $TMPDIR \n";
print STDERR $cmd;
systemCheck($cmd);
print STDERR "Finished ".localtime() ."\n";
# -----------------------------------------
# -----------------------------------------
sub RunFork($)
{
my $cmd = shift;
my $pid = fork();
if ($pid == 0)
{ # child
print STDERR $cmd;
systemCheck($cmd);
exit();
}
return $pid;
}
sub systemCheck($)
{
my $cmd = shift;
my $retVal = system($cmd);
if ($retVal != 0)
{
exit(1);
}
}
sub GetSourcePhrase($)
{
my $line = shift;
my $pos = index($line, "|||");
my $sourcePhrase = substr($line, 0, $pos);
return $sourcePhrase;
}
sub NumStr($)
{
my $i = shift;
my $numStr;
if ($i < 10) {
$numStr = "0000$i";
}
elsif ($i < 100) {
$numStr = "000$i";
}
elsif ($i < 1000) {
$numStr = "00$i";
}
elsif ($i < 10000) {
$numStr = "0$i";
}
else {
$numStr = $i;
}
return $numStr;
}

View File

@ -3,11 +3,15 @@
use strict;
while(<STDIN>) {
s/\&bar;/\|/g;
s/\&lt;/\</g;
s/\&gt;/\>/g;
s/\&bra;/\[/g;
s/\&ket;/\]/g;
s/\&amp;/\&/g;
s/\&bar;/\|/g; # factor separator
s/\&lt;/\</g; # xml
s/\&gt;/\>/g; # xml
s/\&bra;/\[/g; # syntax non-terminal (legacy)
s/\&ket;/\]/g; # syntax non-terminal (legacy)
s/\&quot;/\"/g; # xml
s/\&apos;/\'/g; # xml
s/\&#91;/\[/g; # syntax non-terminal
s/\&#93;/\]/g; # syntax non-terminal
s/\&amp;/\&/g; # escape escape
print $_;
}

View File

@ -33,8 +33,9 @@ if ($HELP) {
exit;
}
die "No built-in rules for language $language, claim en for default behaviour."
if $language !~ /^(cs|en|fr|it)$/;
if ($language !~ /^(cs|en|fr|it)$/) {
print STDERR "Warning: No built-in rules for language $language.\n"
}
if (!$QUIET) {
print STDERR "Detokenizer Version ".'$Revision: 4134 $'."\n";
@ -65,12 +66,16 @@ sub detokenize {
$text = " $text ";
$text =~ s/ \@\-\@ /-/g;
# de-escape special chars
$text =~ s/\&bar;/\|/g;
$text =~ s/\&lt;/\</g;
$text =~ s/\&gt;/\>/g;
$text =~ s/\&bra;/\[/g;
$text =~ s/\&ket;/\]/g;
$text =~ s/\&amp;/\&/g;
$text =~ s/\&bar;/\|/g; # factor separator
$text =~ s/\&lt;/\</g; # xml
$text =~ s/\&gt;/\>/g; # xml
$text =~ s/\&bra;/\[/g; # syntax non-terminal (legacy)
$text =~ s/\&ket;/\]/g; # syntax non-terminal (legacy)
$text =~ s/\&quot;/\"/g; # xml
$text =~ s/\&apos;/\'/g; # xml
$text =~ s/\&#91;/\[/g; # syntax non-terminal
$text =~ s/\&#93;/\]/g; # syntax non-terminal
$text =~ s/\&amp;/\&/g; # escape escape
my $word;
my $i;

View File

@ -6,18 +6,22 @@ while(<STDIN>) {
chop;
# avoid general madness
s/[\000-\037]//g;
s/\s+/ /g;
s/^ //g;
s/ $//g;
s/[\000-\037]//g;
# special characters in moses
s/\&/\&amp;/g;
s/\|/\&bar;/g;
s/\</\&lt;/g;
s/\>/\&gt;/g;
s/\[/\&bra;/g;
s/\]/\&ket;/g;
s/\&/\&amp;/g; # escape escape
s/\|/\&bar;/g; # factor separator
s/\</\&lt;/g; # xml
s/\>/\&gt;/g; # xml
s/\'/\&apos;/g; # xml
s/\"/\&quot;/g; # xml
s/\[/\&#91;/g; # syntax non-terminal
s/\]/\&#93;/g; # syntax non-terminal
# restore xml instructions
s/\&lt;(\S+) translation="([^\"]+)"&gt; (.+?) &lt;\/(\S+)&gt;/\<$1 translation=\"$2\"> $3 <\/$4>/g;
print $_."\n";
}

View File

@ -149,12 +149,14 @@ sub tokenize {
$text =~ s/DOTMULTI/./g;
#escape special chars
$text =~ s/\&/\&amp;/g;
$text =~ s/\|/\&bar;/g;
$text =~ s/\</\&lt;/g;
$text =~ s/\>/\&gt;/g;
$text =~ s/\[/\&bra;/g;
$text =~ s/\]/\&ket;/g;
$text =~ s/\&/\&amp;/g; # escape escape
$text =~ s/\|/\&bar;/g; # factor separator
$text =~ s/\</\&lt;/g; # xml
$text =~ s/\>/\&gt;/g; # xml
$text =~ s/\'/\&apos;/g; # xml
$text =~ s/\"/\&quot;/g; # xml
$text =~ s/\[/\&#91;/g; # syntax non-terminal
$text =~ s/\]/\&#93;/g; # syntax non-terminal
#ensure final line break
$text .= "\n" unless $text =~ /\n$/;

View File

@ -117,6 +117,9 @@ my $___HISTORIC_INTERPOLATION = 0; # interpolate optimize weights with previous
# TODO: Should we also add these values to options of this script?
my $megam_default_options = "-fvals -maxi 30 -nobias binary";
# Flags related to Batch MIRA (Cherry & Foster, 2012)
my $___BATCH_MIRA = 0; # flg to enable batch MIRA
my $__THREADS = 0;
# Parameter for effective reference length when computing BLEU score
@ -206,6 +209,7 @@ GetOptions(
"pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER,
"pro-starting-point" => \$___PRO_STARTING_POINT,
"historic-interpolation=f" => \$___HISTORIC_INTERPOLATION,
"batch-mira" => \$___BATCH_MIRA,
"threads=i" => \$__THREADS
) or exit(1);
@ -324,10 +328,12 @@ if (!defined $mertdir) {
my $mert_extract_cmd = File::Spec->catfile($mertdir, "extractor");
my $mert_mert_cmd = File::Spec->catfile($mertdir, "mert");
my $mert_pro_cmd = File::Spec->catfile($mertdir, "pro");
my $mert_mira_cmd = File::Spec->catfile($mertdir, "kbmira");
die "Not executable: $mert_extract_cmd" if ! -x $mert_extract_cmd;
die "Not executable: $mert_mert_cmd" if ! -x $mert_mert_cmd;
die "Not executable: $mert_pro_cmd" if ! -x $mert_pro_cmd;
die "Not executable: $mert_mira_cmd" if ! -x $mert_mira_cmd;
my $pro_optimizer = File::Spec->catfile($mertdir, "megam_i686.opt"); # or set to your installation
@ -727,6 +733,11 @@ while (1) {
$scfiles = "$score_file";
}
my $mira_settings = "";
$mira_settings .= " --dense-init run$run.$weights_in_file";
if (-e "run$run.sparse-weights") {
$mira_settings .= " --sparse-init run$run.sparse-weights";
}
my $file_settings = " --ffile $ffiles --scfile $scfiles";
my $pro_file_settings = "--ffile " . join(" --ffile ", split(/,/, $ffiles)) .
" --scfile " . join(" --scfile ", split(/,/, $scfiles));
@ -774,11 +785,14 @@ while (1) {
$cmd = $cmd." --sparse-weights run$run.merge-weights";
# ... and run mert
&submit_or_exec($cmd.$mert_settings,$mert_outfile,$mert_logfile);
}
# just mert
else {
&submit_or_exec($cmd.$mert_settings,$mert_outfile,$mert_logfile);
$cmd =~ s/(--ifile \S+)/$1,run$run.init.pro/;
&submit_or_exec($cmd . $mert_settings, $mert_outfile, $mert_logfile);
} elsif ($___BATCH_MIRA) { # batch MIRA optimization
safesystem("echo 'not used' > $weights_out_file") or die;
$cmd = "$mert_mira_cmd $mira_settings $seed_settings $pro_file_settings -o $mert_outfile";
&submit_or_exec($cmd, "run$run.mira.out", $mert_logfile);
} else { # just mert
&submit_or_exec($cmd . $mert_settings, $mert_outfile, $mert_logfile);
}
die "Optimization failed, file $weights_out_file does not exist or is empty"
@ -932,7 +946,7 @@ chdir($cwd);
sub get_weights_from_mert {
my ($outfile, $logfile, $weight_count, $sparse_weights) = @_;
my ($bestpoint, $devbleu);
if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/)) {
if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/) || $___BATCH_MIRA) {
open my $fh, '<', $outfile or die "Can't open $outfile: $!";
my (@WEIGHT, $sum);
for (my $i = 0; $i < $weight_count; $i++) { push @WEIGHT, 0; }
@ -949,6 +963,14 @@ sub get_weights_from_mert {
foreach (keys %{$sparse_weights}) { $$sparse_weights{$_} /= $sum; }
$bestpoint = join(" ", @WEIGHT);
close $fh;
if($___BATCH_MIRA) {
open my $fh2, '<', $logfile or die "Can't open $logfile: $!";
while(<$fh2>) {
if(/Best BLEU = ([\-\d\.]+)/) {
$devbleu = $1;
}
}
}
} else {
open my $fh, '<', $logfile or die "Can't open $logfile: $!";
while (<$fh>) {
@ -1115,7 +1137,7 @@ sub get_order_of_scores_from_nbestlist {
# return the score labels in order
my $fname_or_source = shift;
# print STDERR "Peeking at the beginning of nbestlist to get order of scores: $fname_or_source\n";
open my $fh, '<', $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source': $!";
open my $fh, $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source': $!";
my $line = <$fh>;
close $fh;
die "Line empty in nbestlist '$fname_or_source'" if !defined $line;
@ -1195,7 +1217,7 @@ sub create_config {
}
if (defined($sparse_weights_file)) {
push @{$P{"weights-file"}}, File::Spec->catfile($___WORKING_DIR, $sparse_weights_file);
push @{$P{"weight-file"}}, File::Spec->catfile($___WORKING_DIR, $sparse_weights_file);
}
# create new moses.ini decoder config file by cloning and overriding the original one

View File

@ -43,6 +43,7 @@ public:
int startS;
int endS;
float count;
double pcfgScore;
std::map<size_t, std::pair<size_t, size_t> > m_ntLengths;
@ -58,6 +59,7 @@ public:
, startS(sS)
, endS(eS)
, count(0)
, pcfgScore(0.0)
{}
void SetSpanLength(size_t sourcePos, size_t sourceLength, size_t targetLength)

View File

@ -10,13 +10,13 @@ obj XmlTree.o : XmlTree.cpp : <include>. ;
alias filestreams : InputFileStream.cpp OutputFileStream.cpp : : : <include>. ;
alias trees : SyntaxTree.cpp tables-core.o XmlTree.o : : : <include>. ;
exe extract : tables-core.o SentenceAlignment.o extract.cpp InputFileStream ../../..//boost_iostreams ;
exe extract : tables-core.o SentenceAlignment.o extract.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
exe extract-rules : tables-core.o SentenceAlignment.o SyntaxTree.o XmlTree.o SentenceAlignmentWithSyntax.cpp HoleCollection.cpp extract-rules.cpp ExtractedRule.cpp OutputFileStream.cpp InputFileStream ../../../moses/src//ThreadPool ../../..//boost_iostreams ;
exe extract-lex : extract-lex.cpp InputFileStream ;
exe score : tables-core.o AlignmentPhrase.o score.cpp PhraseAlignment.cpp InputFileStream ../../..//boost_iostreams ;
exe score : tables-core.o AlignmentPhrase.o score.cpp PhraseAlignment.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
exe consolidate : consolidate.cpp tables-core.o OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
@ -33,3 +33,5 @@ alias programs : extract extract-rules extract-lex score consolidate consolidate
install legacy : programs : <location>. <install-type>EXE ;
build-project extract-ghkm ;
build-project pcfg-extract ;
build-project pcfg-score ;

View File

@ -13,6 +13,8 @@
#include "tables-core.h"
#include "score.h"
#include <cstdlib>
using namespace std;
extern Vocabulary vcbT;
@ -111,6 +113,9 @@ void PhraseAlignment::create( char line[], int lineID )
}
else if (item == 5) { // non-term lengths
addNTLength(token[j]);
} else if (item == 6) { // target syntax PCFG score
float pcfgScore = std::atof(token[j].c_str());
pcfgSum = pcfgScore * count;
}
}
@ -119,7 +124,7 @@ void PhraseAlignment::create( char line[], int lineID )
if (item == 3) {
count = 1.0;
}
if (item < 3 || item > 5) {
if (item < 3 || item > 6) {
cerr << "ERROR: faulty line " << lineID << ": " << line << endl;
}
}

View File

@ -25,6 +25,7 @@ protected:
void createAlignVec(size_t sourceSize, size_t targetSize);
void addNTLength(const std::string &tok);
public:
float pcfgSum;
float count;
std::vector< std::set<size_t> > alignedToT;
std::vector< std::set<size_t> > alignedToS;

View File

@ -45,8 +45,11 @@ public:
bool targetSyntax;
bool duplicateRules;
bool fractionalCounting;
bool pcfgScore;
bool outputNTLengths;
bool gzOutput;
bool unpairedExtractFormat;
bool conditionOnTargetLhs;
RuleExtractionOptions()
: maxSpan(10)
@ -74,8 +77,11 @@ public:
, targetSyntax(false)
, duplicateRules(true)
, fractionalCounting(true)
, pcfgScore(false)
, outputNTLengths(false)
, gzOutput(false)
, unpairedExtractFormat(false)
, conditionOnTargetLhs(false)
{}
};

View File

@ -27,6 +27,8 @@
#include "XmlException.h"
#include "XmlTree.h"
using namespace std;
bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetString, int sentenceID)
{
if (!m_options.targetSyntax) {

View File

@ -42,11 +42,12 @@ void SyntaxTree::Clear()
m_index.clear();
}
void SyntaxTree::AddNode( int startPos, int endPos, std::string label )
SyntaxNode *SyntaxTree::AddNode( int startPos, int endPos, std::string label )
{
SyntaxNode* newNode = new SyntaxNode( startPos, endPos, label );
m_nodes.push_back( newNode );
m_index[ startPos ][ endPos ].push_back( newNode );
return newNode;
}
ParentNodes SyntaxTree::Parse()

View File

@ -34,12 +34,14 @@ protected:
std::string m_label;
std::vector< SyntaxNode* > m_children;
SyntaxNode* m_parent;
float m_pcfgScore;
public:
SyntaxNode( int startPos, int endPos, std::string label )
:m_start(startPos)
,m_end(endPos)
,m_label(label)
,m_parent(0)
,m_pcfgScore(0.0f)
{}
int GetStart() const {
return m_start;
@ -50,6 +52,12 @@ public:
std::string GetLabel() const {
return m_label;
}
float GetPcfgScore() const {
return m_pcfgScore;
}
void SetPcfgScore(float score) {
m_pcfgScore = score;
}
SyntaxNode *GetParent() {
return m_parent;
}
@ -89,11 +97,12 @@ public:
}
~SyntaxTree();
SyntaxNode *AddNode( int startPos, int endPos, std::string label );
SyntaxNode *GetTop() {
return m_top;
}
void AddNode( int startPos, int endPos, std::string label );
ParentNodes Parse();
bool HasNode( int startPos, int endPos ) const;
const std::vector< SyntaxNode* >& GetNodes( int startPos, int endPos ) const;

View File

@ -25,7 +25,7 @@
#include <string>
#include <set>
#include <iostream>
#include <stdlib.h>
#include <cstdlib>
#include <sstream>
#include "SyntaxTree.h"
#include "XmlException.h"
@ -128,6 +128,16 @@ string unescape(const string& str)
s += string("<");
} else if (name == "gt") {
s += string(">");
} else if (name == "#91") {
s += string("[");
} else if (name == "#93") {
s += string("]");
} else if (name == "bra") {
s += string("[");
} else if (name == "ket") {
s += string("]");
} else if (name == "bar") {
s += string("|");
} else if (name == "amp") {
s += string("&");
} else if (name == "apos") {
@ -345,13 +355,18 @@ bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &label
string label = ParseXmlTagAttribute(tagContent,"label");
labelCollection.insert( label );
string pcfgString = ParseXmlTagAttribute(tagContent,"pcfg");
float pcfgScore = pcfgString == "" ? 0.0f
: std::atof(pcfgString.c_str());
// report what we have processed so far
if (0) {
cerr << "XML TAG NAME IS: '" << tagName << "'" << endl;
cerr << "XML TAG LABEL IS: '" << label << "'" << endl;
cerr << "XML SPAN IS: " << startPos << "-" << (endPos-1) << endl;
}
tree.AddNode( startPos, endPos-1, label );
SyntaxNode *node = tree.AddNode( startPos, endPos-1, label );
node->SetPcfgScore(pcfgScore);
}
}
}

View File

@ -212,6 +212,10 @@ Node *AlignmentGraph::CopyParseTree(const ParseTree *root)
std::auto_ptr<Node> n(new Node(root->GetLabel(), nodeType));
if (nodeType == TREE) {
n->SetPcfgScore(root->GetPcfgScore());
}
const std::vector<ParseTree *> &children = root->GetChildren();
std::vector<Node *> childNodes;
childNodes.reserve(children.size());

View File

@ -266,6 +266,8 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
//("help", "print this help message and exit")
("AllowUnary",
"allow fully non-lexical unary rules")
("ConditionOnTargetLHS",
"write target LHS instead of \"X\" as source LHS")
("GlueGrammar",
po::value(&options.glueGrammarFile),
"write glue grammar to named file")
@ -285,6 +287,8 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
"set maximum allowed scope")
("Minimal",
"extract minimal rules only")
("PCFG",
"include score based on PCFG scores in target corpus")
("UnknownWordLabel",
po::value(&options.unknownWordFile),
"write unknown word labels to named file")
@ -355,12 +359,18 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
if (vm.count("AllowUnary")) {
options.allowUnary = true;
}
if (vm.count("ConditionOnTargetLHS")) {
options.conditionOnTargetLhs = true;
}
if (vm.count("GZOutput")) {
options.gzOutput = true;
}
if (vm.count("Minimal")) {
options.minimal = true;
}
if (vm.count("PCFG")) {
options.pcfg = true;
}
if (vm.count("UnpairedExtractFormat")) {
options.unpairedExtractFormat = true;
}

View File

@ -41,8 +41,7 @@ class Node
Node(const std::string &label, NodeType type)
: m_label(label)
, m_type(type)
, m_children()
, m_parents() {}
, m_pcfgScore(0.0f) {}
~Node();
@ -50,12 +49,14 @@ class Node
NodeType GetType() const { return m_type; }
const std::vector<Node*> &GetChildren() const { return m_children; }
const std::vector<Node*> &GetParents() const { return m_parents; }
float GetPcfgScore() const { return m_pcfgScore; }
const Span &GetSpan() const { return m_span; }
const Span &GetComplementSpan() const { return m_complementSpan; }
const std::vector<const Subgraph*> &GetRules() const { return m_rules; }
void SetChildren(const std::vector<Node*> &c) { m_children = c; }
void SetParents(const std::vector<Node*> &p) { m_parents = p; }
void SetPcfgScore(float s) { m_pcfgScore = s; }
void SetSpan(const Span &s) { m_span = s; }
void SetComplementSpan(const Span &cs) { m_complementSpan = cs; }
@ -92,6 +93,7 @@ class Node
NodeType m_type;
std::vector<Node*> m_children;
std::vector<Node*> m_parents;
float m_pcfgScore;
Span m_span;
Span m_complementSpan;
std::vector<const Subgraph*> m_rules;

View File

@ -30,12 +30,14 @@ struct Options {
public:
Options()
: allowUnary(false)
, conditionOnTargetLhs(false)
, gzOutput(false)
, maxNodes(15)
, maxRuleDepth(3)
, maxRuleSize(3)
, maxScope(3)
, minimal(false)
, pcfg(false)
, unpairedExtractFormat(false) {}
// Positional options
@ -46,6 +48,7 @@ struct Options {
// All other options
bool allowUnary;
bool conditionOnTargetLhs;
std::string glueGrammarFile;
bool gzOutput;
int maxNodes;
@ -53,6 +56,7 @@ struct Options {
int maxRuleSize;
int maxScope;
bool minimal;
bool pcfg;
bool unpairedExtractFormat;
std::string unknownWordFile;
};

View File

@ -32,17 +32,19 @@ class ParseTree
public:
ParseTree(const std::string &label)
: m_label(label)
, m_children()
, m_parent() {}
, m_parent(0)
, m_pcfgScore(0.0) {}
~ParseTree();
const std::string &GetLabel() const { return m_label; }
const std::vector<ParseTree*> &GetChildren() const { return m_children; }
const ParseTree *GetParent() const { return m_parent; }
float GetPcfgScore() const { return m_pcfgScore; }
void SetParent(ParseTree *);
void SetChildren(const std::vector<ParseTree*> &);
void SetPcfgScore(float score) { m_pcfgScore = score; }
void AddChild(ParseTree *);
@ -59,6 +61,7 @@ class ParseTree
std::string m_label;
std::vector<ParseTree*> m_children;
ParseTree *m_parent;
float m_pcfgScore; // log probability
};
template<typename OutputIterator>

View File

@ -30,6 +30,7 @@ namespace GHKM {
ScfgRule::ScfgRule(const Subgraph &fragment)
: m_sourceLHS("X", NonTerminal)
, m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
, m_pcfgScore(fragment.GetPcfgScore())
{
// Source RHS

View File

@ -57,6 +57,7 @@ class ScfgRule
const std::vector<Symbol> &GetSourceRHS() const { return m_sourceRHS; }
const std::vector<Symbol> &GetTargetRHS() const { return m_targetRHS; }
const Alignment &GetAlignment() const { return m_alignment; }
float GetPcfgScore() const { return m_pcfgScore; }
int Scope() const;
@ -68,6 +69,7 @@ class ScfgRule
std::vector<Symbol> m_sourceRHS;
std::vector<Symbol> m_targetRHS;
Alignment m_alignment;
float m_pcfgScore;
};
} // namespace GHKM

View File

@ -24,6 +24,7 @@
#include "ScfgRule.h"
#include <cassert>
#include <cmath>
#include <ostream>
#include <map>
#include <sstream>
@ -34,14 +35,43 @@ namespace GHKM {
void ScfgRuleWriter::Write(const ScfgRule &rule)
{
std::ostringstream sourceSS;
std::ostringstream targetSS;
if (m_options.unpairedExtractFormat) {
WriteUnpairedFormat(rule);
WriteUnpairedFormat(rule, sourceSS, targetSS);
} else {
WriteStandardFormat(rule);
WriteStandardFormat(rule, sourceSS, targetSS);
}
// Write the rule to the forward and inverse extract files.
m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
const Alignment &alignment = rule.GetAlignment();
for (Alignment::const_iterator p = alignment.begin();
p != alignment.end(); ++p) {
m_fwd << " " << p->first << "-" << p->second;
m_inv << " " << p->second << "-" << p->first;
}
// Write a count of 1 and an empty NT length column to the forward extract
// file.
// TODO Add option to write NT length?
m_fwd << " ||| 1 ||| |||";
if (m_options.pcfg) {
// Write the PCFG score.
m_fwd << " " << std::exp(rule.GetPcfgScore());
}
m_fwd << std::endl;
// Write a count of 1 to the inverse extract file.
m_inv << " ||| 1" << std::endl;
}
void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule,
std::ostream &sourceSS,
std::ostream &targetSS)
{
const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();
@ -60,9 +90,6 @@ void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
}
}
std::ostringstream sourceSS;
std::ostringstream targetSS;
// Write the source side of the rule to sourceSS.
int i = 0;
for (std::vector<Symbol>::const_iterator p(sourceRHS.begin());
@ -74,7 +101,11 @@ void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
}
sourceSS << " ";
}
WriteSymbol(rule.GetSourceLHS(), sourceSS);
if (m_options.conditionOnTargetLhs) {
WriteSymbol(rule.GetTargetLHS(), sourceSS);
} else {
WriteSymbol(rule.GetSourceLHS(), sourceSS);
}
// Write the target side of the rule to targetSS.
i = 0;
@ -88,27 +119,14 @@ void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
targetSS << " ";
}
WriteSymbol(rule.GetTargetLHS(), targetSS);
// Write the rule to the forward and inverse extract files.
m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
for (Alignment::const_iterator p(alignment.begin());
p != alignment.end(); ++p) {
m_fwd << " " << p->first << "-" << p->second;
m_inv << " " << p->second << "-" << p->first;
}
m_fwd << " ||| 1" << std::endl;
m_inv << " ||| 1" << std::endl;
}
void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule)
void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule,
std::ostream &sourceSS,
std::ostream &targetSS)
{
const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();
const Alignment &alignment = rule.GetAlignment();
std::ostringstream sourceSS;
std::ostringstream targetSS;
// Write the source side of the rule to sourceSS.
int i = 0;
@ -117,7 +135,11 @@ void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule)
WriteSymbol(*p, sourceSS);
sourceSS << " ";
}
WriteSymbol(rule.GetSourceLHS(), sourceSS);
if (m_options.conditionOnTargetLhs) {
WriteSymbol(rule.GetTargetLHS(), sourceSS);
} else {
WriteSymbol(rule.GetSourceLHS(), sourceSS);
}
// Write the target side of the rule to targetSS.
i = 0;
@ -127,17 +149,6 @@ void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule)
targetSS << " ";
}
WriteSymbol(rule.GetTargetLHS(), targetSS);
// Write the rule to the forward and inverse extract files.
m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
for (Alignment::const_iterator p(alignment.begin());
p != alignment.end(); ++p) {
m_fwd << " " << p->first << "-" << p->second;
m_inv << " " << p->second << "-" << p->first;
}
m_fwd << " ||| 1" << std::endl;
m_inv << " ||| 1" << std::endl;
}
void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)

View File

@ -45,8 +45,8 @@ class ScfgRuleWriter
ScfgRuleWriter(const ScfgRuleWriter &);
ScfgRuleWriter &operator=(const ScfgRuleWriter &);
void WriteStandardFormat(const ScfgRule &);
void WriteUnpairedFormat(const ScfgRule &);
void WriteStandardFormat(const ScfgRule &, std::ostream &, std::ostream &);
void WriteUnpairedFormat(const ScfgRule &, std::ostream &, std::ostream &);
void WriteSymbol(const Symbol &, std::ostream &);
std::ostream &m_fwd;

View File

@ -101,5 +101,21 @@ int Subgraph::CalcDepth(const Node *n) const
return maxChildDepth + 1;
}
float Subgraph::CalcPcfgScore() const
{
if (m_root->GetType() != TREE || m_leaves.empty()) {
return 0.0f;
}
float score = m_root->GetPcfgScore();
for (std::set<const Node *>::const_iterator p = m_leaves.begin();
p != m_leaves.end(); ++p) {
const Node *leaf = *p;
if (leaf->GetType() == TREE) {
score -= leaf->GetPcfgScore();
}
}
return score;
}
} // namespace Moses
} // namespace GHKM

View File

@ -38,7 +38,8 @@ class Subgraph
: m_root(root)
, m_depth(0)
, m_size(root->GetType() == TREE ? 1 : 0)
, m_nodeCount(1) {}
, m_nodeCount(1)
, m_pcfgScore(0.0f) {}
Subgraph(const Node *root, const std::set<const Node *> &leaves)
: m_root(root)
@ -46,10 +47,12 @@ class Subgraph
, m_depth(-1)
, m_size(-1)
, m_nodeCount(-1)
, m_pcfgScore(0.0f)
{
m_depth = CalcDepth(m_root);
m_size = CalcSize(m_root);
m_nodeCount = CountNodes(m_root);
m_pcfgScore = CalcPcfgScore();
}
const Node *GetRoot() const { return m_root; }
@ -57,6 +60,7 @@ class Subgraph
int GetDepth() const { return m_depth; }
int GetSize() const { return m_size; }
int GetNodeCount() const { return m_nodeCount; }
float GetPcfgScore() const { return m_pcfgScore; }
bool IsTrivial() const { return m_leaves.empty(); }
@ -66,6 +70,7 @@ class Subgraph
void GetTargetLeaves(const Node *, std::vector<const Node *> &) const;
int CalcDepth(const Node *) const;
int CalcSize(const Node *) const;
float CalcPcfgScore() const;
int CountNodes(const Node *) const;
const Node *m_root;
@ -73,6 +78,7 @@ class Subgraph
int m_depth;
int m_size;
int m_nodeCount;
float m_pcfgScore;
};
} // namespace GHKM

View File

@ -61,6 +61,7 @@ std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
const std::vector<std::string> &words)
{
std::auto_ptr<ParseTree> root(new ParseTree(tree.GetLabel()));
root->SetPcfgScore(tree.GetPcfgScore());
const std::vector<SyntaxNode*> &children = tree.GetChildren();
if (children.empty()) {
if (tree.GetStart() != tree.GetEnd()) {

View File

@ -90,7 +90,7 @@ void addHieroRule( int startT, int endT, int startS, int endS
void printHieroPhrase( int startT, int endT, int startS, int endS
, HoleCollection &holeColl, LabelIndex &labelIndex);
string printTargetHieroPhrase( int startT, int endT, int startS, int endS
, WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex);
, WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore);
string printSourceHieroPhrase( int startT, int endT, int startS, int endS
, HoleCollection &holeColl, const LabelIndex &labelIndex);
void preprocessSourceHieroPhrase( int startT, int endT, int startS, int endS
@ -140,7 +140,9 @@ int main(int argc, char* argv[])
<< " | --MaxNonTerm[" << options.maxNonTerm << "]"
<< " | --MaxScope[" << options.maxScope << "]"
<< " | --SourceSyntax | --TargetSyntax"
<< " | --AllowOnlyUnalignedWords | --DisallowNonTermConsecTarget |--NonTermConsecSource | --NoNonTermFirstWord | --NoFractionalCounting ]\n";
<< " | --AllowOnlyUnalignedWords | --DisallowNonTermConsecTarget |--NonTermConsecSource | --NoNonTermFirstWord | --NoFractionalCounting"
<< " | --UnpairedExtractFormat"
<< " | --ConditionOnTargetLHS ]\n";
exit(1);
}
char* &fileNameT = argv[1];
@ -257,8 +259,14 @@ int main(int argc, char* argv[])
// if an source phrase is paired with two target phrases, then count(t|s) = 0.5
else if (strcmp(argv[i],"--NoFractionalCounting") == 0) {
options.fractionalCounting = false;
} else if (strcmp(argv[i],"--PCFG") == 0) {
options.pcfgScore = true;
} else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
options.outputNTLengths = true;
} else if (strcmp(argv[i],"--UnpairedExtractFormat") == 0) {
options.unpairedExtractFormat = true;
} else if (strcmp(argv[i],"--ConditionOnTargetLHS") == 0) {
options.conditionOnTargetLhs = true;
#ifdef WITH_THREADS
} else if (strcmp(argv[i],"-threads") == 0 ||
strcmp(argv[i],"--threads") == 0 ||
@ -517,7 +525,7 @@ void ExtractTask::preprocessSourceHieroPhrase( int startT, int endT, int startS,
}
string ExtractTask::printTargetHieroPhrase( int startT, int endT, int startS, int endS
, WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex)
, WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore)
{
HoleList::iterator iterHoleList = holeColl.GetHoles().begin();
assert(iterHoleList != holeColl.GetHoles().end());
@ -543,7 +551,16 @@ string ExtractTask::printTargetHieroPhrase( int startT, int endT, int startS, in
m_sentence->targetTree.GetNodes(currPos,hole.GetEnd(1))[ labelI ]->GetLabel() : "X";
hole.SetLabel(targetLabel, 1);
out += "[" + sourceLabel + "][" + targetLabel + "] ";
if (m_options.unpairedExtractFormat) {
out += "[" + targetLabel + "] ";
} else {
out += "[" + sourceLabel + "][" + targetLabel + "] ";
}
if (m_options.pcfgScore) {
double score = m_sentence->targetTree.GetNodes(currPos,hole.GetEnd(1))[labelI]->GetPcfgScore();
logPCFGScore -= score;
}
currPos = hole.GetEnd(1);
hole.SetPos(outPos, 1);
@ -584,7 +601,11 @@ string ExtractTask::printSourceHieroPhrase( int startT, int endT, int startS, in
assert(targetLabel != "");
const string &sourceLabel = hole.GetLabel(0);
out += "[" + sourceLabel + "][" + targetLabel + "] ";
if (m_options.unpairedExtractFormat) {
out += "[" + sourceLabel + "] ";
} else {
out += "[" + sourceLabel + "][" + targetLabel + "] ";
}
currPos = hole.GetEnd(0);
hole.SetPos(outPos, 0);
@ -652,19 +673,29 @@ void ExtractTask::printHieroPhrase( int startT, int endT, int startS, int endS
m_sentence->targetTree.GetNodes(startT,endT)[ labelIndex[0] ]->GetLabel() : "X";
string sourceLabel = m_options.sourceSyntax ?
m_sentence->sourceTree.GetNodes(startS,endS)[ labelIndex[1] ]->GetLabel() : "X";
//string sourceLabel = "X";
// create non-terms on the source side
preprocessSourceHieroPhrase(startT, endT, startS, endS, indexS, holeColl, labelIndex);
// target
rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex)
if (m_options.pcfgScore) {
double logPCFGScore = m_sentence->targetTree.GetNodes(startT,endT)[labelIndex[0]]->GetPcfgScore();
rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore)
+ " [" + targetLabel + "]";
rule.pcfgScore = std::exp(logPCFGScore);
} else {
double logPCFGScore = 0.0f;
rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore)
+ " [" + targetLabel + "]";
}
// source
// holeColl.SortSourceHoles();
rule.source = printSourceHieroPhrase(startT, endT, startS, endS, holeColl, labelIndex)
+ " [" + sourceLabel + "]";
rule.source = printSourceHieroPhrase(startT, endT, startS, endS, holeColl, labelIndex);
if (m_options.conditionOnTargetLhs) {
rule.source += " [" + targetLabel + "]";
} else {
rule.source += " [" + sourceLabel + "]";
}
// alignment
printHieroAlignment(startT, endT, startS, endS, indexS, indexT, holeColl, rule);
@ -860,10 +891,15 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, RuleExist
// phrase labels
string targetLabel,sourceLabel;
sourceLabel = m_options.sourceSyntax ?
m_sentence->sourceTree.GetNodes(startS,endS)[0]->GetLabel() : "X";
targetLabel = m_options.targetSyntax ?
m_sentence->targetTree.GetNodes(startT,endT)[0]->GetLabel() : "X";
if (m_options.targetSyntax && m_options.conditionOnTargetLhs) {
sourceLabel = targetLabel = m_sentence->targetTree.GetNodes(startT,endT)[0]->GetLabel();
}
else {
sourceLabel = m_options.sourceSyntax ?
m_sentence->sourceTree.GetNodes(startS,endS)[0]->GetLabel() : "X";
targetLabel = m_options.targetSyntax ?
m_sentence->targetTree.GetNodes(startT,endT)[0]->GetLabel() : "X";
}
// source
rule.source = "";
@ -877,6 +913,11 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, RuleExist
rule.target += m_sentence->target[ti] + " ";
rule.target += "[" + targetLabel + "]";
if (m_options.pcfgScore) {
double logPCFGScore = m_sentence->targetTree.GetNodes(startT,endT)[0]->GetPcfgScore();
rule.pcfgScore = std::exp(logPCFGScore);
}
// alignment
for(int ti=startT; ti<=endT; ti++) {
for(unsigned int i=0; i<m_sentence->alignedToT[ti].size(); i++) {
@ -957,11 +998,13 @@ void ExtractTask::writeRulesToFile()
out << rule->source << " ||| "
<< rule->target << " ||| "
<< rule->alignment << " ||| "
<< rule->count;
<< rule->count << " ||| ";
if (m_options.outputNTLengths) {
out << " ||| ";
rule->OutputNTLengths(out);
}
if (m_options.pcfgScore) {
out << " ||| " << rule->pcfgScore;
}
out << "\n";
if (!m_options.onlyDirectFlag) {

View File

@ -22,6 +22,7 @@
#include "SentenceAlignment.h"
#include "tables-core.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
using namespace std;
@ -82,15 +83,16 @@ bool hierModel = false;
REO_MODEL_TYPE hierType = REO_MSD;
ofstream extractFile;
ofstream extractFileInv;
ofstream extractFileOrientation;
ofstream extractFileSentenceId;
Moses::OutputFileStream extractFile;
Moses::OutputFileStream extractFileInv;
Moses::OutputFileStream extractFileOrientation;
Moses::OutputFileStream extractFileSentenceId;
int maxPhraseLength;
bool orientationFlag = false;
bool translationFlag = true;
bool sentenceIdFlag = false; //create extract file with sentence id
bool onlyOutputSpanInfo = false;
bool gzOutput = false;
int main(int argc, char* argv[])
{
@ -116,6 +118,8 @@ int main(int argc, char* argv[])
translationFlag = false;
} else if (strcmp(argv[i], "--SentenceId") == 0) {
sentenceIdFlag = true;
} else if (strcmp(argv[i], "--GZOutput") == 0) {
gzOutput = true;
} else if(strcmp(argv[i],"--model") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
@ -193,18 +197,18 @@ int main(int argc, char* argv[])
// open output files
if (translationFlag) {
string fileNameExtractInv = fileNameExtract + ".inv";
extractFile.open(fileNameExtract.c_str());
extractFileInv.open(fileNameExtractInv.c_str());
string fileNameExtractInv = fileNameExtract + ".inv" + (gzOutput?".gz":"");
extractFile.Open( (fileNameExtract + (gzOutput?".gz":"")).c_str());
extractFileInv.Open(fileNameExtractInv.c_str());
}
if (orientationFlag) {
string fileNameExtractOrientation = fileNameExtract + ".o";
extractFileOrientation.open(fileNameExtractOrientation.c_str());
string fileNameExtractOrientation = fileNameExtract + ".o" + (gzOutput?".gz":"");
extractFileOrientation.Open(fileNameExtractOrientation.c_str());
}
if (sentenceIdFlag) {
string fileNameExtractSentenceId = fileNameExtract + ".sid";
extractFileSentenceId.open(fileNameExtractSentenceId.c_str());
string fileNameExtractSentenceId = fileNameExtract + ".sid" + (gzOutput?".gz":"");
extractFileSentenceId.Open(fileNameExtractSentenceId.c_str());
}
int i=0;
@ -239,12 +243,12 @@ int main(int argc, char* argv[])
//az: only close if we actually opened it
if (!onlyOutputSpanInfo) {
if (translationFlag) {
extractFile.close();
extractFileInv.close();
extractFile.Close();
extractFileInv.Close();
}
if (orientationFlag) extractFileOrientation.close();
if (orientationFlag) extractFileOrientation.Close();
if (sentenceIdFlag) {
extractFileSentenceId.close();
extractFileSentenceId.Close();
}
}
}

View File

@ -14,22 +14,20 @@
#include <set>
#include <vector>
using namespace std;
// HPhraseVertex represents a point in the alignment matrix
typedef pair <int, int> HPhraseVertex;
typedef std::pair <int, int> HPhraseVertex;
// Phrase represents a bi-phrase; each bi-phrase is defined by two points in the alignment matrix:
// bottom-left and top-right
typedef pair<HPhraseVertex, HPhraseVertex> HPhrase;
typedef std::pair<HPhraseVertex, HPhraseVertex> HPhrase;
// HPhraseVector is a vector of phrases
// HPhraseVector is a std::vector of phrases
// the bool value indicates if the associated phrase is within the length limit or not
typedef vector < HPhrase > HPhraseVector;
typedef std::vector < HPhrase > HPhraseVector;
// SentenceVertices represents all vertices that have the same positioning of all extracted phrases
// The key of the map is the English index and the value is a set of the foreign ones
typedef map <int, set<int> > HSenteceVertices;
// The key of the std::map is the English index and the value is a std::set of the foreign ones
typedef std::map <int, std::set<int> > HSenteceVertices;
#endif /* HIERARCHICAL_H_ */

View File

@ -0,0 +1 @@
lib pcfg_common : [ glob *.cc ] ..//trees ;

View File

@ -0,0 +1,41 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef PCFG_EXCEPTION_H_
#define PCFG_EXCEPTION_H_
#include <string>
namespace Moses {
namespace PCFG {
class Exception {
public:
Exception(const char *msg) : msg_(msg) {}
Exception(const std::string &msg) : msg_(msg) {}
const std::string &msg() const { return msg_; }
private:
std::string msg_;
};
} // namespace PCFG
} // namespace Moses
#endif

View File

@ -0,0 +1,109 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef PCFG_NUMBERED_SET_H_
#define PCFG_NUMBERED_SET_H_
#include "exception.h"
#include <boost/unordered_map.hpp>
#include <limits>
#include <sstream>
#include <vector>
namespace Moses {
namespace PCFG {
// Stores a set of elements of type T, each of which is allocated an integral
// ID of type I. IDs are contiguous starting at 0. Individual elements cannot
// be removed once inserted (but the whole set can be cleared).
template<typename T, typename I=std::size_t>
class NumberedSet {
private:
typedef boost::unordered_map<T, I> ElementToIdMap;
typedef std::vector<const T *> IdToElementMap;
public:
typedef I IdType;
typedef typename IdToElementMap::const_iterator const_iterator;
NumberedSet() {}
const_iterator begin() const { return id_to_element_.begin(); }
const_iterator end() const { return id_to_element_.end(); }
// Static value
static I NullId() { return std::numeric_limits<I>::max(); }
bool Empty() const { return id_to_element_.empty(); }
std::size_t Size() const { return id_to_element_.size(); }
// Insert the given object and return its ID.
I Insert(const T &);
I Lookup(const T &) const;
const T &Lookup(I) const;
void Clear();
private:
ElementToIdMap element_to_id_;
IdToElementMap id_to_element_;
};
template<typename T, typename I>
I NumberedSet<T, I>::Lookup(const T &s) const {
typename ElementToIdMap::const_iterator p = element_to_id_.find(s);
return (p == element_to_id_.end()) ? NullId() : p->second;
}
template<typename T, typename I>
const T &NumberedSet<T, I>::Lookup(I id) const {
if (id < 0 || id >= id_to_element_.size()) {
std::ostringstream msg;
msg << "Value not found: " << id;
throw Exception(msg.str());
}
return *(id_to_element_[id]);
}
template<typename T, typename I>
I NumberedSet<T, I>::Insert(const T &x) {
std::pair<T, I> value(x, id_to_element_.size());
std::pair<typename ElementToIdMap::iterator, bool> result =
element_to_id_.insert(value);
if (result.second) {
// x is a new element.
id_to_element_.push_back(&result.first->first);
}
return result.first->second;
}
template<typename T, typename I>
void NumberedSet<T, I>::Clear() {
element_to_id_.clear();
id_to_element_.clear();
}
} // namespace PCFG
} // namespace Moses
#endif

View File

@ -0,0 +1,106 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "pcfg.h"
#include "exception.h"
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
#include <cassert>
namespace Moses {
namespace PCFG {
void Pcfg::Add(const Key &key, double score) {
rules_[key] = score;
}
bool Pcfg::Lookup(const Key &key, double &score) const {
Map::const_iterator p = rules_.find(key);
if (p == rules_.end()) {
return false;
}
score = p->second;
return true;
}
void Pcfg::Read(std::istream &input, Vocabulary &vocab) {
std::string line;
std::string lhs_string;
std::vector<std::string> rhs_strings;
std::string score_string;
Key key;
while (std::getline(input, line)) {
// Read LHS.
std::size_t pos = line.find("|||");
if (pos == std::string::npos) {
throw Exception("missing first delimiter");
}
lhs_string = line.substr(0, pos);
boost::trim(lhs_string);
// Read RHS.
std::size_t begin = pos+3;
pos = line.find("|||", begin);
if (pos == std::string::npos) {
throw Exception("missing second delimiter");
}
std::string rhs_text = line.substr(begin, pos-begin);
boost::trim(rhs_text);
rhs_strings.clear();
boost::split(rhs_strings, rhs_text, boost::algorithm::is_space(),
boost::algorithm::token_compress_on);
// Read score.
score_string = line.substr(pos+3);
boost::trim(score_string);
// Construct key.
key.clear();
key.reserve(rhs_strings.size()+1);
key.push_back(vocab.Insert(lhs_string));
for (std::vector<std::string>::const_iterator p = rhs_strings.begin();
p != rhs_strings.end(); ++p) {
key.push_back(vocab.Insert(*p));
}
// Add rule.
double score = boost::lexical_cast<double>(score_string);
Add(key, score);
}
}
void Pcfg::Write(const Vocabulary &vocab, std::ostream &output) const {
for (const_iterator p = begin(); p != end(); ++p) {
const Key &key = p->first;
double score = p->second;
std::vector<std::size_t>::const_iterator q = key.begin();
std::vector<std::size_t>::const_iterator end = key.end();
output << vocab.Lookup(*q++) << " |||";
while (q != end) {
output << " " << vocab.Lookup(*q++);
}
output << " ||| " << score << std::endl;
}
}
} // namespace PCFG
} // namespace Moses

View File

@ -0,0 +1,61 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef PCFG_PCFG_H_
#define PCFG_PCFG_H_
#include "typedef.h"
#include <istream>
#include <map>
#include <ostream>
#include <vector>
namespace Moses {
namespace PCFG {
class Pcfg {
public:
typedef std::vector<std::size_t> Key;
typedef std::map<Key, double> Map;
typedef Map::iterator iterator;
typedef Map::const_iterator const_iterator;
Pcfg() {}
iterator begin() { return rules_.begin(); }
const_iterator begin() const { return rules_.begin(); }
iterator end() { return rules_.end(); }
const_iterator end() const { return rules_.end(); }
void Add(const Key &, double);
bool Lookup(const Key &, double &) const;
void Read(std::istream &, Vocabulary &);
void Write(const Vocabulary &, std::ostream &) const;
private:
Map rules_;
};
} // namespace PCFG
} // namespace Moses
#endif

View File

@ -0,0 +1,77 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef PCFG_PCFG_TREE_H_
#define PCFG_PCFG_TREE_H_
#include "syntax_tree.h"
#include "xml_tree_writer.h"
#include <string>
namespace Moses {
namespace PCFG {
template<typename DerivedType>
class PcfgTreeBase : public SyntaxTreeBase<std::string, DerivedType> {
public:
typedef std::string LabelType;
typedef SyntaxTreeBase<LabelType, DerivedType> BaseType;
PcfgTreeBase(const LabelType &label) : BaseType(label), score_(0.0) {}
double score() const { return score_; }
void set_score(double s) { score_ = s; }
private:
double score_;
};
class PcfgTree : public PcfgTreeBase<PcfgTree> {
public:
typedef PcfgTreeBase<PcfgTree> BaseType;
PcfgTree(const BaseType::LabelType &label) : BaseType(label) {}
};
// Specialise XmlOutputHandler for PcfgTree.
template<>
class XmlOutputHandler<PcfgTree> {
public:
typedef std::map<std::string, std::string> AttributeMap;
void GetLabel(const PcfgTree &tree, std::string &label) const {
label = tree.label();
}
void GetAttributes(const PcfgTree &tree, AttributeMap &attribute_map) const {
attribute_map.clear();
double score = tree.score();
if (score != 0.0) {
std::ostringstream out;
out << tree.score();
attribute_map["pcfg"] = out.str();
}
}
};
} // namespace PCFG
} // namespace Moses
#endif

View File

@ -0,0 +1,91 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef PCFG_SYNTAX_TREE_H_
#define PCFG_SYNTAX_TREE_H_
#include <cassert>
#include <vector>
namespace Moses {
namespace PCFG {
// Base class for SyntaxTree, AgreementTree, and friends.
template<typename T, typename DerivedType>
class SyntaxTreeBase {
public:
// Constructors
SyntaxTreeBase(const T &label)
: label_(label)
, children_()
, parent_(0) {}
SyntaxTreeBase(const T &label, const std::vector<DerivedType *> &children)
: label_(label)
, children_(children)
, parent_(0) {}
// Destructor
virtual ~SyntaxTreeBase();
const T &label() const { return label_; }
const DerivedType *parent() const { return parent_; }
DerivedType *parent() { return parent_; }
const std::vector<DerivedType *> &children() const { return children_; }
std::vector<DerivedType *> &children() { return children_; }
void set_label(const T &label) { label_ = label; }
void set_parent(DerivedType *parent) { parent_ = parent; }
void set_children(const std::vector<DerivedType *> &c) { children_ = c; }
bool IsLeaf() const { return children_.empty(); }
bool IsPreterminal() const {
return children_.size() == 1 && children_[0]->IsLeaf();
}
void AddChild(DerivedType *child) { children_.push_back(child); }
private:
T label_;
std::vector<DerivedType *> children_;
DerivedType *parent_;
};
template<typename T>
class SyntaxTree : public SyntaxTreeBase<T, SyntaxTree<T> > {
public:
typedef SyntaxTreeBase<T, SyntaxTree<T> > BaseType;
SyntaxTree(const T &label) : BaseType(label) {}
SyntaxTree(const T &label, const std::vector<SyntaxTree *> &children)
: BaseType(label, children) {}
};
template<typename T, typename DerivedType>
SyntaxTreeBase<T, DerivedType>::~SyntaxTreeBase() {
for (std::size_t i = 0; i < children_.size(); ++i) {
delete children_[i];
}
}
} // namespace PCFG
} // namespace Moses
#endif

View File

@ -0,0 +1,80 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "tool.h"
#include <sstream>
namespace Moses {
namespace PCFG {
std::istream &Tool::OpenInputOrDie(const std::string &filename) {
// TODO Check that function is only called once?
if (filename.empty() || filename == "-") {
input_ptr_ = &(std::cin);
} else {
input_file_stream_.open(filename.c_str());
if (!input_file_stream_) {
std::ostringstream msg;
msg << "failed to open input file: " << filename;
Error(msg.str());
}
input_ptr_ = &input_file_stream_;
}
return *input_ptr_;
}
std::ostream &Tool::OpenOutputOrDie(const std::string &filename) {
// TODO Check that function is only called once?
if (filename.empty() || filename == "-") {
output_ptr_ = &(std::cout);
} else {
output_file_stream_.open(filename.c_str());
if (!output_file_stream_) {
std::ostringstream msg;
msg << "failed to open output file: " << filename;
Error(msg.str());
}
output_ptr_ = &output_file_stream_;
}
return *output_ptr_;
}
void Tool::OpenNamedInputOrDie(const std::string &filename,
std::ifstream &stream) {
stream.open(filename.c_str());
if (!stream) {
std::ostringstream msg;
msg << "failed to open input file: " << filename;
Error(msg.str());
}
}
void Tool::OpenNamedOutputOrDie(const std::string &filename,
std::ofstream &stream) {
stream.open(filename.c_str());
if (!stream) {
std::ostringstream msg;
msg << "failed to open output file: " << filename;
Error(msg.str());
}
}
} // namespace PCFG
} // namespace Moses

View File

@ -0,0 +1,91 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef PCFG_TOOL_H_
#define PCFG_TOOL_H_
#include <boost/program_options/cmdline.hpp>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
namespace Moses {
namespace PCFG {
class Tool {
public:
virtual ~Tool() {}
const std::string &name() const { return name_; }
virtual int Main(int argc, char *argv[]) = 0;
protected:
Tool(const std::string &name) : name_(name) {}
// Returns the boost::program_options style that should be used by all tools.
static int CommonOptionStyle() {
namespace cls = boost::program_options::command_line_style;
return cls::default_style & (~cls::allow_guessing);
}
void Warn(const std::string &msg) const {
std::cerr << name_ << ": warning: " << msg << std::endl;
}
void Error(const std::string &msg) const {
std::cerr << name_ << ": error: " << msg << std::endl;
std::exit(1);
}
// Initialises the tool's main input stream and returns a reference that is
// valid for the remainder of the tool's lifetime. If filename is empty or
// "-" then input is standard input; otherwise it is the named file. Calls
// Error() if the file cannot be opened for reading.
std::istream &OpenInputOrDie(const std::string &filename);
// Initialises the tool's main output stream and returns a reference that is
// valid for the remainder of the tool's lifetime. If filename is empty or
// "-" then output is standard output; otherwise it is the named file. Calls
// Error() if the file cannot be opened for writing.
std::ostream &OpenOutputOrDie(const std::string &filename);
// Opens the named input file using the supplied ifstream. Calls Error() if
// the file cannot be opened for reading.
void OpenNamedInputOrDie(const std::string &, std::ifstream &);
// Opens the named output file using the supplied ofstream. Calls Error() if
// the file cannot be opened for writing.
void OpenNamedOutputOrDie(const std::string &, std::ofstream &);
private:
std::string name_;
std::istream *input_ptr_;
std::ifstream input_file_stream_;
std::ostream *output_ptr_;
std::ofstream output_file_stream_;
};
} // namespace PCFG
} // namespace Moses
#endif

View File

@ -0,0 +1,37 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef PCFG_TYPEDEF_H_
#define PCFG_TYPEDEF_H_
#include "numbered_set.h"
#include "syntax_tree.h"
#include <string>
namespace Moses {
namespace PCFG {
typedef NumberedSet<std::string> Vocabulary;
} // namespace PCFG
} // namespace Moses
#endif

View File

@ -0,0 +1,88 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "xml_tree_parser.h"
#include "exception.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include <cassert>
#include <vector>
namespace Moses {
namespace PCFG {
XmlTreeParser::XmlTreeParser()
{
}
std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line)
{
m_line = line;
m_tree.Clear();
try {
if (!ProcessAndStripXMLTags(m_line, m_tree, m_labelSet, m_topLabelSet)) {
throw Exception("");
}
} catch (const XmlException &e) {
throw Exception(e.getMsg());
}
m_tree.ConnectNodes();
SyntaxNode *root = m_tree.GetTop();
if (!root) {
// There is no XML tree.
return std::auto_ptr<PcfgTree>();
}
m_words = tokenize(m_line.c_str());
return ConvertTree(*root, m_words);
}
// Converts a SyntaxNode tree to a Moses::PCFG::PcfgTree.
std::auto_ptr<PcfgTree> XmlTreeParser::ConvertTree(
const SyntaxNode &tree,
const std::vector<std::string> &words)
{
std::auto_ptr<PcfgTree> root(new PcfgTree(tree.GetLabel()));
const std::vector<SyntaxNode*> &children = tree.GetChildren();
if (children.empty()) {
if (tree.GetStart() != tree.GetEnd()) {
std::ostringstream msg;
msg << "leaf node covers multiple words (" << tree.GetStart()
<< "-" << tree.GetEnd() << "): this is currently unsupported";
throw Exception(msg.str());
}
std::auto_ptr<PcfgTree> leaf(new PcfgTree(words[tree.GetStart()]));
leaf->set_parent(root.get());
root->AddChild(leaf.release());
} else {
for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
p != children.end(); ++p) {
assert(*p);
std::auto_ptr<PcfgTree> child = ConvertTree(**p, words);
child->set_parent(root.get());
root->AddChild(child.release());
}
}
return root;
}
} // namespace PCFG
} // namespace Moses

View File

@ -0,0 +1,56 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef PCFG_XML_TREE_PARSER_H_
#define PCFG_XML_TREE_PARSER_H_
#include "pcfg_tree.h"
#include "SyntaxTree.h"
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
namespace Moses {
namespace PCFG {
// Parses a string in Moses' XML parse tree format and returns a PcfgTree
// object.
class XmlTreeParser {
public:
XmlTreeParser();
std::auto_ptr<PcfgTree> Parse(const std::string &);
private:
std::auto_ptr<PcfgTree> ConvertTree(const SyntaxNode &,
const std::vector<std::string> &);
std::set<std::string> m_labelSet;
std::map<std::string, int> m_topLabelSet;
std::string m_line;
::SyntaxTree m_tree;
std::vector<std::string> m_words;
};
} // namespace PCFG
} // namespace Moses
#endif

View File

@ -0,0 +1,133 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#ifndef PCFG_XML_TREE_WRITER_H_
#define PCFG_XML_TREE_WRITER_H_
#include "syntax_tree.h"
#include "XmlTree.h"
#include <cassert>
#include <map>
#include <memory>
#include <ostream>
#include <vector>
#include <string>
namespace Moses {
namespace PCFG {
template<typename InputTree>
class XmlOutputHandler {
public:
typedef std::map<std::string, std::string> AttributeMap;
void GetLabel(const InputTree &, std::string &) const;
void GetAttributes(const InputTree &, AttributeMap &) const;
};
template<typename InputTree>
class XmlTreeWriter : public XmlOutputHandler<InputTree> {
public:
typedef XmlOutputHandler<InputTree> Base;
void Write(const InputTree &, std::ostream &) const;
private:
std::string Escape(const std::string &) const;
};
template<typename InputTree>
void XmlTreeWriter<InputTree>::Write(const InputTree &tree,
std::ostream &out) const {
assert(!tree.IsLeaf());
// Opening tag
std::string label;
Base::GetLabel(tree, label);
out << "<tree label=\"" << Escape(label) << "\"";
typename Base::AttributeMap attribute_map;
Base::GetAttributes(tree, attribute_map);
for (typename Base::AttributeMap::const_iterator p = attribute_map.begin();
p != attribute_map.end(); ++p) {
out << " " << p->first << "=\"" << p->second << "\"";
}
out << ">";
// Children
const std::vector<InputTree *> &children = tree.children();
for (typename std::vector<InputTree *>::const_iterator p = children.begin();
p != children.end(); ++p) {
InputTree &child = **p;
if (child.IsLeaf()) {
Base::GetLabel(child, label);
out << " " << Escape(label);
} else {
out << " ";
Write(**p, out);
}
}
// Closing tag
out << " </tree>";
if (tree.parent() == 0) {
out << std::endl;
}
}
// Escapes XML special characters.
template<typename InputTree>
std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const {
std::string t;
std::size_t len = s.size();
t.reserve(len);
for (std::size_t i = 0; i < len; ++i) {
if (s[i] == '<') {
t += "&lt;";
} else if (s[i] == '>') {
t += "&gt;";
} else if (s[i] == '[') {
t += "&#91;";
} else if (s[i] == ']') {
t += "&#93;";
} else if (s[i] == '|') {
t += "&bar;";
} else if (s[i] == '&') {
t += "&amp;";
} else if (s[i] == '\'') {
t += "&apos;";
} else if (s[i] == '"') {
t += "&quot;";
} else {
t += s[i];
}
}
return t;
}
} // namespace PCFG
} // namespace Moses
#endif

View File

@ -0,0 +1 @@
exe pcfg-extract : [ glob *.cc ] ..//pcfg-common ../../../..//boost_program_options ;

View File

@ -0,0 +1,25 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2012 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "pcfg_extract.h"
int main(int argc, char *argv[]) {
Moses::PCFG::PcfgExtract tool;
return tool.Main(argc, argv);
}

Some files were not shown because too many files have changed in this diff Show More