mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 13:23:25 +03:00
Merge branch 'master' into miramerge
Conflicts: Jamroot mert/FeatureStats.cpp moses-cmd/src/IOWrapper.h scripts/training/mert-moses.pl scripts/training/train-model.perl.missing_bin_dir
This commit is contained in:
commit
3c44d04baf
1
.gitignore
vendored
1
.gitignore
vendored
@ -17,6 +17,7 @@ mert/extractor
|
||||
mert/mert
|
||||
mert/megam_i686.opt
|
||||
mert/pro
|
||||
mert/kbmira
|
||||
misc/processLexicalTable
|
||||
misc/processPhraseTable
|
||||
misc/queryLexicalTable
|
||||
|
3
Jamroot
3
Jamroot
@ -115,8 +115,7 @@ build-project scripts ;
|
||||
#Regression tests (only does anything if --with-regtest is passed)
|
||||
build-project regression-testing ;
|
||||
|
||||
|
||||
alias programs : lm//query lm//build_binary moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDisk mert//programs contrib/server//mosesserver misc//programs mira//programs ;
|
||||
alias programs : lm//query lm//build_binary moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDisk OnDiskPt//queryOnDiskPt mert//programs contrib/server//mosesserver misc//programs mira//programs ;
|
||||
|
||||
install-bin-libs programs ;
|
||||
install-headers headers-base : [ glob-tree *.h *.hh : jam-files dist kenlm moses ] : . ;
|
||||
|
@ -1,2 +1,5 @@
|
||||
lib OnDiskPt : OnDiskWrapper.cpp SourcePhrase.cpp TargetPhrase.cpp Word.cpp Phrase.cpp PhraseNode.cpp TargetPhraseCollection.cpp Vocab.cpp ../moses/src//headers ;
|
||||
|
||||
exe CreateOnDisk : Main.cpp ../moses/src//moses OnDiskPt ;
|
||||
exe queryOnDiskPt : queryOnDiskPt.cpp ../moses/src//moses OnDiskPt ;
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "util.h"
|
||||
#include "Util.h"
|
||||
#include "OnDiskWrapper.h"
|
||||
#include "SourcePhrase.h"
|
||||
|
131
contrib/other-builds/OnDiskPt/.cproject
Normal file
131
contrib/other-builds/OnDiskPt/.cproject
Normal file
@ -0,0 +1,131 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?>
|
||||
|
||||
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings>
|
||||
<externalSetting>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/OnDiskPt"/>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/OnDiskPt/Debug"/>
|
||||
<entry flags="RESOLVED" kind="libraryFile" name="OnDiskPt"/>
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.725420545" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1586272140" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
|
||||
<builder buildPath="${workspace_loc:/OnDiskPt/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1909553559" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.30521110" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.478334849" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1328561226" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.108239817" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1825070846" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.901309550" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.2001028511" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
|
||||
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.676959181" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1484480101" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1556683035" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
|
||||
<listOptionValue builtIn="false" value="/opt/local/include"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.576529322" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.426851981" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1925590121" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.726316251" name="Main.h" rcbsApplicability="disable" resourcePath="Main.h" toolsToInvoke=""/>
|
||||
<sourceEntries>
|
||||
<entry excluding="Main.h|Main.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.701931933">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.701931933" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.701931933." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.5036266" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.396818757" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
|
||||
<builder buildPath="${workspace_loc:/OnDiskPt/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1081186575" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.894082374" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.640159085" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1673993744" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.596082362" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.851420859" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.385722535" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.983488413" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.21058138" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1704184753" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1034344194" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1029035384" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.171488636" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.843129626" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1014721928" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="OnDiskPt.cdt.managedbuild.target.macosx.exe.542902806" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.701931933;cdt.managedbuild.config.macosx.exe.release.701931933.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1029035384;cdt.managedbuild.tool.gnu.c.compiler.input.1014721928">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978;cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634;cdt.managedbuild.tool.gnu.c.compiler.input.1925590121">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978;cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.2001028511;cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.701931933;cdt.managedbuild.config.macosx.exe.release.701931933.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.983488413;cdt.managedbuild.tool.gnu.cpp.compiler.input.1034344194">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="refreshScope" versionNumber="1">
|
||||
<resource resourceType="PROJECT" workspacePath="/OnDiskPt"/>
|
||||
</storageModule>
|
||||
</cproject>
|
185
contrib/other-builds/OnDiskPt/.project
Normal file
185
contrib/other-builds/OnDiskPt/.project
Normal file
@ -0,0 +1,185 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>OnDiskPt</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
<dictionary>
|
||||
<key>?name?</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.append_environment</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildArguments</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildCommand</key>
|
||||
<value>make</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildLocation</key>
|
||||
<value>${workspace_loc:/OnDiskPt/Debug}</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
|
||||
<value>clean</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.contents</key>
|
||||
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
|
||||
<value>false</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.stopOnError</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<triggers>full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
<linkedResources>
|
||||
<link>
|
||||
<name>Jamfile</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Jamfile</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Main.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Main.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>OnDiskWrapper.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/OnDiskWrapper.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>OnDiskWrapper.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/OnDiskWrapper.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Phrase.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Phrase.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Phrase.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Phrase.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>PhraseNode.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/PhraseNode.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>PhraseNode.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/PhraseNode.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SourcePhrase.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/SourcePhrase.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SourcePhrase.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/SourcePhrase.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TargetPhrase.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhrase.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TargetPhrase.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhrase.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TargetPhraseCollection.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhraseCollection.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TargetPhraseCollection.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/TargetPhraseCollection.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Vocab.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Vocab.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Vocab.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Vocab.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Word.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Word.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Word.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Word.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>queryOnDiskPt.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/queryOnDiskPt.cpp</locationURI>
|
||||
</link>
|
||||
</linkedResources>
|
||||
</projectDescription>
|
125
contrib/other-builds/lm/.cproject
Normal file
125
contrib/other-builds/lm/.cproject
Normal file
@ -0,0 +1,125 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?>
|
||||
|
||||
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings>
|
||||
<externalSetting>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/lm"/>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/lm/Debug"/>
|
||||
<entry flags="RESOLVED" kind="libraryFile" name="lm"/>
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.640882096" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.793478365" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
|
||||
<builder buildPath="${workspace_loc:/lm/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.36011795" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.1252826468" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1024598065" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.139111896" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.62265891" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.588438623" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.775866405" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
|
||||
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.586969644" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.7139692" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1988092227" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="/opt/local/include"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.934764060" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.2078705375" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1028526865" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.203229648">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.203229648" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.203229648" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.203229648." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.1942852701" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.2107180060" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
|
||||
<builder buildPath="${workspace_loc:/lm/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.127652112" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.1668850519" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.934899611" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.794276660" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.362272521" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.370659018" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.2103660404" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.2026817795" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1671568858" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.230723898" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1058671602" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.990116990" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1934130159" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1848737807" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1294441742" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="lm.cdt.managedbuild.target.macosx.exe.1399596076" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750;cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140;cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.203229648;cdt.managedbuild.config.macosx.exe.release.203229648.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.990116990;cdt.managedbuild.tool.gnu.c.compiler.input.1294441742">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750;cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722;cdt.managedbuild.tool.gnu.c.compiler.input.1028526865">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.203229648;cdt.managedbuild.config.macosx.exe.release.203229648.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.2026817795;cdt.managedbuild.tool.gnu.cpp.compiler.input.1058671602">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="refreshScope"/>
|
||||
</cproject>
|
360
contrib/other-builds/lm/.project
Normal file
360
contrib/other-builds/lm/.project
Normal file
@ -0,0 +1,360 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>lm</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
<dictionary>
|
||||
<key>?name?</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.append_environment</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildArguments</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildCommand</key>
|
||||
<value>make</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildLocation</key>
|
||||
<value>${workspace_loc:/lm/Debug}</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
|
||||
<value>clean</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.contents</key>
|
||||
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
|
||||
<value>false</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.stopOnError</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<triggers>full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
<linkedResources>
|
||||
<link>
|
||||
<name>.DS_Store</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/.DS_Store</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>COPYING</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/COPYING</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>COPYING.LESSER</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/COPYING.LESSER</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Jamfile</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/Jamfile</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LICENSE</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/LICENSE</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>README</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/README</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>bhiksha.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/bhiksha.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>bhiksha.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/bhiksha.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>binary_format.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/binary_format.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>binary_format.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/binary_format.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>blank.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/blank.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>build_binary</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/build_binary</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>build_binary.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/build_binary.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>clean.sh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/clean.sh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>compile.sh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/compile.sh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>config.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/config.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>config.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/config.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>enumerate_vocab.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/enumerate_vocab.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>facade.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/facade.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>left.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/left.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>left_test.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/left_test.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>libkenlm.dylib</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/libkenlm.dylib</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>libkenutil.dylib</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/libkenutil.dylib</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>lm_exception.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/lm_exception.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>lm_exception.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/lm_exception.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>max_order.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/max_order.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>model.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/model.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>model.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/model.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>model_test.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/model_test.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>model_type.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/model_type.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ngram_query.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/ngram_query.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ngram_query.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/ngram_query.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>quantize.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/quantize.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>quantize.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/quantize.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>query</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/query</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>read_arpa.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/read_arpa.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>read_arpa.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/read_arpa.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>return.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/return.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>search_hashed.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/search_hashed.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>search_hashed.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/search_hashed.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>search_trie.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/search_trie.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>search_trie.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/search_trie.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>test.arpa</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/test.arpa</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>test.sh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/test.sh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>test_nounk.arpa</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/test_nounk.arpa</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>trie.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/trie.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>trie.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/trie.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>trie_sort.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/trie_sort.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>trie_sort.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/trie_sort.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>virtual_interface.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/virtual_interface.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>virtual_interface.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/virtual_interface.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>vocab.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/vocab.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>vocab.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/vocab.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>weights.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/weights.hh</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>word_index.hh</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/lm/word_index.hh</locationURI>
|
||||
</link>
|
||||
</linkedResources>
|
||||
</projectDescription>
|
@ -307,6 +307,7 @@
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
/opt/local/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lz",
|
||||
@ -316,6 +317,7 @@
|
||||
"-loolm",
|
||||
"-lflm",
|
||||
"-llattice",
|
||||
"-lboost_thread-mt",
|
||||
);
|
||||
PRODUCT_NAME = "moses-chart-cmd";
|
||||
USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
|
||||
@ -338,6 +340,7 @@
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
/opt/local/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lz",
|
||||
@ -347,6 +350,7 @@
|
||||
"-loolm",
|
||||
"-lflm",
|
||||
"-llattice",
|
||||
"-lboost_thread-mt",
|
||||
);
|
||||
PRODUCT_NAME = "moses-chart-cmd";
|
||||
USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
|
||||
@ -359,7 +363,10 @@
|
||||
ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
|
||||
GCC_C_LANGUAGE_STANDARD = gnu99;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = TRACE_ENABLE;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
TRACE_ENABLE,
|
||||
WITH_THREADS,
|
||||
);
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
HEADER_SEARCH_PATHS = (
|
||||
@ -378,7 +385,10 @@
|
||||
buildSettings = {
|
||||
ARCHS = "$(ARCHS_STANDARD_32_64_BIT)";
|
||||
GCC_C_LANGUAGE_STANDARD = gnu99;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = TRACE_ENABLE;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
TRACE_ENABLE,
|
||||
WITH_THREADS,
|
||||
);
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
HEADER_SEARCH_PATHS = (
|
||||
|
@ -311,6 +311,7 @@
|
||||
LM_SRI,
|
||||
LM_IRST,
|
||||
TRACE_ENABLE,
|
||||
WITH_THREADS,
|
||||
);
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
@ -324,6 +325,7 @@
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
/opt/local/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lflm",
|
||||
@ -332,6 +334,7 @@
|
||||
"-ldstruct",
|
||||
"-lz",
|
||||
"-lirstlm",
|
||||
"-lboost_thread-mt",
|
||||
);
|
||||
PREBINDING = NO;
|
||||
PRODUCT_NAME = "moses-cmd";
|
||||
@ -348,9 +351,10 @@
|
||||
GCC_MODEL_TUNING = G5;
|
||||
GCC_OPTIMIZATION_LEVEL = 3;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
LM_IRST,
|
||||
LM_SRI,
|
||||
LM_IRST,
|
||||
TRACE_ENABLE,
|
||||
WITH_THREADS,
|
||||
);
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
@ -364,6 +368,7 @@
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
/opt/local/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lflm",
|
||||
@ -372,6 +377,7 @@
|
||||
"-ldstruct",
|
||||
"-lz",
|
||||
"-lirstlm",
|
||||
"-lboost_thread-mt",
|
||||
);
|
||||
PREBINDING = NO;
|
||||
PRODUCT_NAME = "moses-cmd";
|
||||
@ -384,6 +390,12 @@
|
||||
buildSettings = {
|
||||
GCC_GENERATE_DEBUGGING_SYMBOLS = NO;
|
||||
GCC_MODEL_TUNING = G5;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
LM_SRI,
|
||||
LM_IRST,
|
||||
TRACE_ENABLE,
|
||||
WITH_THREADS,
|
||||
);
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
HEADER_SEARCH_PATHS = (
|
||||
@ -396,6 +408,7 @@
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
/opt/local/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lflm",
|
||||
@ -404,6 +417,7 @@
|
||||
"-ldstruct",
|
||||
"-lz",
|
||||
"-lirstlm",
|
||||
"-lboost_thread-mt",
|
||||
);
|
||||
PREBINDING = NO;
|
||||
PRODUCT_NAME = "moses-cmd";
|
||||
|
140
contrib/other-builds/moses-cmd/.cproject
Normal file
140
contrib/other-builds/moses-cmd/.cproject
Normal file
@ -0,0 +1,140 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?>
|
||||
|
||||
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.1679946908" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.451172468" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
|
||||
<builder buildPath="${workspace_loc:/moses-cmd/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1382407954" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.2118670613" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.84059290" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
|
||||
<option id="macosx.cpp.link.option.libs.1641794848" name="Libraries (-l)" superClass="macosx.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="moses"/>
|
||||
<listOptionValue builtIn="false" value="OnDiskPt"/>
|
||||
<listOptionValue builtIn="false" value="lm"/>
|
||||
<listOptionValue builtIn="false" value="util"/>
|
||||
<listOptionValue builtIn="false" value="irstlm"/>
|
||||
</option>
|
||||
<option id="macosx.cpp.link.option.paths.1615268628" name="Library search path (-L)" superClass="macosx.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/moses/Debug"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/OnDiskPt/Debug"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/lm/Debug"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/util/Debug"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/irstlm/lib"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.412058804" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.896987906" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.187427846" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.2033983602" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1808603697" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
|
||||
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.2018824611" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1176009559" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1024398579" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="/opt/local/include"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.240921565" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.748558048" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.1014626120" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2031799877" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<sourceEntries>
|
||||
<entry excluding="LatticeMBRGrid.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.1916112479">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.1916112479" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.1916112479" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.1916112479." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.1528572752" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.1976002706" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
|
||||
<builder buildPath="${workspace_loc:/moses-cmd/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1470455063" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.335066624" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.1173017253" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.675070011" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.174060449" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1018665338" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.440711813" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1219375865" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1940339824" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1648308879" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.604224475" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.2105388501" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1692046412" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1452105399" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="moses-cmd.cdt.managedbuild.target.macosx.exe.1016275955" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="refreshScope" versionNumber="1">
|
||||
<resource resourceType="PROJECT" workspacePath="/moses-cmd"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609;cdt.managedbuild.tool.gnu.c.compiler.input.2031799877">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223;cdt.managedbuild.tool.gnu.c.compiler.input.1452105399">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1219375865;cdt.managedbuild.tool.gnu.cpp.compiler.input.604224475">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1808603697;cdt.managedbuild.tool.gnu.cpp.compiler.input.240921565">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
</cproject>
|
199
contrib/other-builds/moses-cmd/.project
Normal file
199
contrib/other-builds/moses-cmd/.project
Normal file
@ -0,0 +1,199 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>moses-cmd</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
<project>lm</project>
|
||||
<project>moses</project>
|
||||
<project>OnDiskPt</project>
|
||||
<project>util</project>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
<dictionary>
|
||||
<key>?name?</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.append_environment</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildArguments</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildCommand</key>
|
||||
<value>make</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildLocation</key>
|
||||
<value>${workspace_loc:/moses-cmd/Debug}</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
|
||||
<value>clean</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.contents</key>
|
||||
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
|
||||
<value>false</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.stopOnError</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<triggers>full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
<linkedResources>
|
||||
<link>
|
||||
<name>IOWrapper.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/IOWrapper.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>IOWrapper.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/IOWrapper.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>IOWrapper.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/IOWrapper.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Jamfile</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Jamfile</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LatticeMBR.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBR.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LatticeMBR.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBR.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LatticeMBR.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBR.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LatticeMBRGrid.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBRGrid.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>LatticeMBRGrid.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/LatticeMBRGrid.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Main.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Main.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Main.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Main.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Main.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/Main.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationAnalysis.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/TranslationAnalysis.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationAnalysis.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/TranslationAnalysis.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationAnalysis.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/TranslationAnalysis.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>libkenlm.dylib</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/libkenlm.dylib</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>libkenutil.dylib</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/libkenutil.dylib</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>lmbrgrid</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/lmbrgrid</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>mbr.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/mbr.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>mbr.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/mbr.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>mbr.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/mbr.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>moses</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses-cmd/src/moses</locationURI>
|
||||
</link>
|
||||
</linkedResources>
|
||||
</projectDescription>
|
@ -1357,6 +1357,7 @@
|
||||
LM_IRST,
|
||||
"_FILE_OFFSET_BITS=64",
|
||||
_LARGE_FILES,
|
||||
WITH_THREADS,
|
||||
);
|
||||
HEADER_SEARCH_PATHS = (
|
||||
../..,
|
||||
@ -1399,6 +1400,7 @@
|
||||
LM_IRST,
|
||||
"_FILE_OFFSET_BITS=64",
|
||||
_LARGE_FILES,
|
||||
WITH_THREADS,
|
||||
);
|
||||
HEADER_SEARCH_PATHS = (
|
||||
../..,
|
||||
|
164
contrib/other-builds/moses/.cproject
Normal file
164
contrib/other-builds/moses/.cproject
Normal file
@ -0,0 +1,164 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?>
|
||||
|
||||
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings>
|
||||
<externalSetting>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/moses"/>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/moses/Debug"/>
|
||||
<entry flags="RESOLVED" kind="libraryFile" name="moses"/>
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.497902212" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1820609450" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
|
||||
<builder buildPath="${workspace_loc:/moses/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1998579330" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.1330311562" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1226580551" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.102127808" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool command="as" commandLinePattern="${COMMAND} ${FLAGS} ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT} ${INPUTS}" id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.1556759720" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.897776351" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1820797229" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1867588805" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
|
||||
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.1898625650" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.806998992" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1819917957" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="/opt/local/include"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/srilm/include"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/irstlm/include"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.compiler.option.preprocessor.def.1569452418" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
|
||||
<listOptionValue builtIn="false" value="LM_SRI"/>
|
||||
<listOptionValue builtIn="false" value="LM_IRST"/>
|
||||
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1110302565" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.401409202" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.753046525" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.1396911098" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1919272901" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1722029461" name="SyntacticLanguageModelState.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelState.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1432960145" name="SyntacticLanguageModelFiles.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelFiles.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1906856645" name="SyntacticLanguageModel.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModel.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.460380900" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1692203139" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.538301588" name="Remote.h" rcbsApplicability="disable" resourcePath="LM/Remote.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.854427429" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
|
||||
<sourceEntries>
|
||||
<entry excluding="SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.cpp|LM/LDHT.h|LM/Remote.h|LM/Remote.cpp|LM/Rand.h|LM/Rand.cpp|LM/ORLM.h|LM/ORLM.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.722580523">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.722580523" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.722580523" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.722580523." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.2070671582" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.503591386" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
|
||||
<builder buildPath="${workspace_loc:/moses/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.108117223" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.1203406445" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.1539915639" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1333560300" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.1693865756" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.2000339940" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.505919286" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1662892925" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1036481202" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.484015287" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.preprocessor.def.1089615214" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
|
||||
<listOptionValue builtIn="false" value="LM_SRI"/>
|
||||
<listOptionValue builtIn="false" value="LM_IRST"/>
|
||||
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1722702487" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="/opt/local/include"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/srilm/include"/>
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/irstlm/include"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.936283391" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1404156839" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1487222992" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1171203697" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1172147378" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1831545277" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1743378025" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1490362543" name="Remote.h" rcbsApplicability="disable" resourcePath="LM/Remote.h" toolsToInvoke=""/>
|
||||
<sourceEntries>
|
||||
<entry excluding="LM/LDHT.cpp|LM/Rand.h|LM/Rand.cpp|LM/ORLM.h|LM/ORLM.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="moses.cdt.managedbuild.target.macosx.exe.1209017164" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426;cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.401409202;cdt.managedbuild.tool.gnu.c.compiler.input.1919272901">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.722580523;cdt.managedbuild.config.macosx.exe.release.722580523.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1404156839;cdt.managedbuild.tool.gnu.c.compiler.input.1172147378">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426;cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1867588805;cdt.managedbuild.tool.gnu.cpp.compiler.input.1110302565">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.722580523;cdt.managedbuild.config.macosx.exe.release.722580523.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1662892925;cdt.managedbuild.tool.gnu.cpp.compiler.input.936283391">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="refreshScope" versionNumber="1">
|
||||
<resource resourceType="PROJECT" workspacePath="/moses"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
|
||||
</cproject>
|
3055
contrib/other-builds/moses/.project
Normal file
3055
contrib/other-builds/moses/.project
Normal file
File diff suppressed because it is too large
Load Diff
133
contrib/other-builds/util/.cproject
Normal file
133
contrib/other-builds/util/.cproject
Normal file
@ -0,0 +1,133 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?>
|
||||
|
||||
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings>
|
||||
<externalSetting>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/util"/>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/util/Debug"/>
|
||||
<entry flags="RESOLVED" kind="libraryFile" name="util"/>
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.1388624938" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1873607607" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
|
||||
<builder buildPath="${workspace_loc:/util/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.2045214944" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.589471640" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1543780089" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.635667684" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.726000130" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.592875056" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1252745601" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1018784824" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
|
||||
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.623959371" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.892917290" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1401298824" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
|
||||
<listOptionValue builtIn="false" value="/opt/local/include"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1420621104" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1724141901" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.36067607" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.460849578" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.289923594" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<sourceEntries>
|
||||
<entry excluding="util/bit_packing_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.172239955">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.172239955" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.172239955" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.172239955." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.822279811" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.533470822" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
|
||||
<builder buildPath="${workspace_loc:/util/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1705559832" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.476073423" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.384294309" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1583097070" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.1872669585" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.453642480" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1010248526" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.549134109" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1741196615" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1171704152" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.883129829" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.685540722" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.279247859" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1371842588" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1581172024" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1632081663" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="util.cdt.managedbuild.target.macosx.exe.2006203724" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.172239955;cdt.managedbuild.config.macosx.exe.release.172239955.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.279247859;cdt.managedbuild.tool.gnu.c.compiler.input.1632081663">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.172239955;cdt.managedbuild.config.macosx.exe.release.172239955.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.549134109;cdt.managedbuild.tool.gnu.cpp.compiler.input.685540722">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447;cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1018784824;cdt.managedbuild.tool.gnu.cpp.compiler.input.1420621104">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447;cdt.managedbuild.config.gnu.macosx.exe.debug.1869657447.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1724141901;cdt.managedbuild.tool.gnu.c.compiler.input.289923594">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="refreshScope" versionNumber="1">
|
||||
<resource resourceType="PROJECT" workspacePath="/util"/>
|
||||
</storageModule>
|
||||
</cproject>
|
90
contrib/other-builds/util/.project
Normal file
90
contrib/other-builds/util/.project
Normal file
@ -0,0 +1,90 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>util</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
<dictionary>
|
||||
<key>?name?</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.append_environment</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.autoBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildArguments</key>
|
||||
<value></value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildCommand</key>
|
||||
<value>make</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.buildLocation</key>
|
||||
<value>${workspace_loc:/util/Debug}</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.cleanBuildTarget</key>
|
||||
<value>clean</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.contents</key>
|
||||
<value>org.eclipse.cdt.make.core.activeConfigSettings</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableAutoBuild</key>
|
||||
<value>false</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableCleanBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.enableFullBuild</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.fullBuildTarget</key>
|
||||
<value>all</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.stopOnError</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
<dictionary>
|
||||
<key>org.eclipse.cdt.make.core.useDefaultBuildCmd</key>
|
||||
<value>true</value>
|
||||
</dictionary>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<triggers>full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
<linkedResources>
|
||||
<link>
|
||||
<name>util</name>
|
||||
<type>2</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/util</locationURI>
|
||||
</link>
|
||||
</linkedResources>
|
||||
</projectDescription>
|
@ -1,41 +0,0 @@
|
||||
#If you get compilation errors here, make sure you have xmlrpc-c installed properly, including the abyss server option.
|
||||
|
||||
import option ;
|
||||
import path ;
|
||||
|
||||
with-xmlrpc-c = [ option.get "with-xmlrpc-c" ] ;
|
||||
if $(with-xmlrpc-c) {
|
||||
build-moses-server = true ;
|
||||
xmlrpc-command = $(with-xmlrpc-c)/bin/xmlrpc-c-config ;
|
||||
if ! [ path.exists $(xmlrpc-command) ] {
|
||||
exit Could not find $(xmlrpc-command) : 1 ;
|
||||
}
|
||||
} else {
|
||||
xmlrpc-check = [ _shell "xmlrpc-c-config --features 2>/dev/null" : exit-status ] ;
|
||||
if $(xmlrpc-check[2]) = 0 {
|
||||
if [ MATCH "(abyss-server)" : $(xmlrpc-check[1]) ] {
|
||||
build-moses-server = true ;
|
||||
} else {
|
||||
echo "Found xmlrpc-c but it does not have abyss-server. Skipping mosesserver." ;
|
||||
}
|
||||
}
|
||||
xmlrpc-command = "xmlrpc-c-config" ;
|
||||
}
|
||||
|
||||
rule shell_or_die ( cmd ) {
|
||||
local ret = [ _shell $(cmd) : exit-status ] ;
|
||||
if $(ret[2]) != 0 {
|
||||
exit "Failed to run $(cmd)" : 1 ;
|
||||
}
|
||||
return $(ret[1]) ;
|
||||
}
|
||||
|
||||
if $(build-moses-server) = true
|
||||
{
|
||||
xmlrpc-linkflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --libs" ] ;
|
||||
xmlrpc-cxxflags = [ shell_or_die "$(xmlrpc-command) c++2 abyss-server --cflags" ] ;
|
||||
|
||||
exe queryOnDiskPt : queryOnDiskPt.cpp ../../moses/src//moses ../../OnDiskPt//OnDiskPt : <linkflags>$(xmlrpc-linkflags) <cxxflags>$(xmlrpc-cxxflags) ;
|
||||
} else {
|
||||
alias queryOnDiskPt ;
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
SRI=/Users/hieuhoang/workspace/srilm
|
||||
IRST=/Users/hieuhoang/workspace/irstlm/trunk
|
||||
|
||||
g++ -o queryOnDiskPt queryOnDiskPt.cpp ../../moses/src/PhraseDictionary.cpp -I../../moses/src/ -I../../ -L../../dist/lib/ -I../../OnDiskPt -lmert_lib -ldynsa -lz -lmoses_internal -lOnDiskPt -lLM -lkenlm -lkenutil -lRuleTable -lCYKPlusParser -lScope3Parser -L$SRI/lib/macosx/ -ldstruct -lflm -llattice -lmisc -loolm -L/opt/local/lib -lboost_thread-mt -L$IRST/lib -lirstlm
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include "lm/bhiksha.hh"
|
||||
#include "lm/config.hh"
|
||||
#include "util/file.hh"
|
||||
#include "util/exception.hh"
|
||||
|
||||
#include <limits>
|
||||
|
||||
|
@ -232,3 +232,44 @@ float sentenceLevelBleuPlusOne(const vector<float>& stats) {
|
||||
}
|
||||
return exp(logbleu);
|
||||
}
|
||||
|
||||
float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg)
|
||||
{
|
||||
// Sum sent and background
|
||||
std::vector<float> stats;
|
||||
CHECK(sent.size()==bg.size());
|
||||
CHECK(sent.size()==kBleuNgramOrder*2+1);
|
||||
for(size_t i=0;i<sent.size();i++)
|
||||
stats.push_back(sent[i]+bg[i]);
|
||||
|
||||
// Calculate BLEU
|
||||
float logbleu = 0.0;
|
||||
for (int j = 0; j < kBleuNgramOrder; j++) {
|
||||
logbleu += log(stats[2 * j]) - log(stats[2 * j + 1]);
|
||||
}
|
||||
logbleu /= kBleuNgramOrder;
|
||||
const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
|
||||
|
||||
if (brevity < 0.0) {
|
||||
logbleu += brevity;
|
||||
}
|
||||
|
||||
// Exponentiate and scale by reference length (as per Chiang et al 08)
|
||||
return exp(logbleu) * stats[kBleuNgramOrder*2];
|
||||
}
|
||||
|
||||
float unsmoothedBleu(const std::vector<float>& stats) {
|
||||
CHECK(stats.size() == kBleuNgramOrder * 2 + 1);
|
||||
|
||||
float logbleu = 0.0;
|
||||
for (int j = 0; j < kBleuNgramOrder; j++) {
|
||||
logbleu += log(stats[2 * j]) - log(stats[2 * j + 1]);
|
||||
}
|
||||
logbleu /= kBleuNgramOrder;
|
||||
const float brevity = 1.0 - stats[(kBleuNgramOrder * 2)] / stats[1];
|
||||
|
||||
if (brevity < 0.0) {
|
||||
logbleu += brevity;
|
||||
}
|
||||
return exp(logbleu);
|
||||
}
|
||||
|
@ -70,4 +70,14 @@ private:
|
||||
*/
|
||||
float sentenceLevelBleuPlusOne(const std::vector<float>& stats);
|
||||
|
||||
/** Computes sentence-level BLEU score given a background corpus.
|
||||
* This function is used in batch MIRA.
|
||||
*/
|
||||
float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg);
|
||||
|
||||
/**
|
||||
* Computes plain old BLEU from a vector of stats
|
||||
*/
|
||||
float unsmoothedBleu(const std::vector<float>& stats);
|
||||
|
||||
#endif // MERT_BLEU_SCORER_H_
|
||||
|
@ -152,10 +152,10 @@ BOOST_AUTO_TEST_CASE(bleu_count_ngrams) {
|
||||
// "girl with a telescope", "with a telescope ."
|
||||
NgramCounts counts;
|
||||
BOOST_REQUIRE(scorer.CountNgrams(line, counts, kBleuNgramOrder) == 8);
|
||||
BOOST_CHECK_EQUAL(25, counts.size());
|
||||
BOOST_CHECK_EQUAL((std::size_t)25, counts.size());
|
||||
|
||||
mert::Vocabulary* vocab = scorer.GetVocab();
|
||||
BOOST_CHECK_EQUAL(7, vocab->size());
|
||||
BOOST_CHECK_EQUAL((std::size_t)7, vocab->size());
|
||||
|
||||
std::vector<std::string> res;
|
||||
Tokenize(line.c_str(), ' ', &res);
|
||||
@ -203,7 +203,7 @@ BOOST_AUTO_TEST_CASE(bleu_clipped_counts) {
|
||||
ScoreStats entry;
|
||||
scorer.prepareStats(0, line, entry);
|
||||
|
||||
BOOST_CHECK_EQUAL(entry.size(), 2 * kBleuNgramOrder + 1);
|
||||
BOOST_CHECK_EQUAL(entry.size(), (std::size_t)(2 * kBleuNgramOrder + 1));
|
||||
|
||||
// Test hypothesis ngram counts
|
||||
BOOST_CHECK_EQUAL(entry.get(0), 5); // unigram
|
||||
|
@ -33,8 +33,8 @@ BOOST_AUTO_TEST_CASE(shard_basic) {
|
||||
std::vector<Data> shards;
|
||||
data.createShards(2,0,"",shards);
|
||||
|
||||
BOOST_CHECK_EQUAL(shards.size(),2);
|
||||
BOOST_CHECK_EQUAL(shards[1].getFeatureData()->size(),2);
|
||||
BOOST_CHECK_EQUAL(shards.size(),(std::size_t)2);
|
||||
BOOST_CHECK_EQUAL(shards[1].getFeatureData()->size(),(std::size_t)2);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(init_feature_map_test) {
|
||||
|
@ -18,6 +18,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
#include "util/tokenize_piece.hh"
|
||||
|
||||
@ -47,6 +48,16 @@ float ParseFloat(const StringPiece& str) {
|
||||
return value;
|
||||
}
|
||||
|
||||
bool operator==(FeatureDataItem const& item1, FeatureDataItem const& item2) {
|
||||
return item1.dense==item1.dense && item1.sparse==item1.sparse;
|
||||
}
|
||||
|
||||
size_t hash_value(FeatureDataItem const& item) {
|
||||
size_t seed = 0;
|
||||
boost::hash_combine(seed,item.dense);
|
||||
boost::hash_combine(seed,item.sparse);
|
||||
return seed;
|
||||
}
|
||||
|
||||
|
||||
FeatureDataIterator::FeatureDataIterator() {}
|
||||
|
@ -61,6 +61,9 @@ class FeatureDataItem
|
||||
SparseVector sparse;
|
||||
};
|
||||
|
||||
bool operator==(FeatureDataItem const& item1, FeatureDataItem const& item2);
|
||||
std::size_t hash_value(FeatureDataItem const& item);
|
||||
|
||||
class FeatureDataIterator :
|
||||
public boost::iterator_facade<FeatureDataIterator,
|
||||
const std::vector<FeatureDataItem>,
|
||||
|
@ -13,7 +13,7 @@ void CheckFeatureMap(const FeatureData* feature_data,
|
||||
std::stringstream ss;
|
||||
ss << str << "_" << i;
|
||||
const std::string& s = ss.str();
|
||||
BOOST_CHECK_EQUAL(feature_data->getFeatureIndex(s), *cnt);
|
||||
BOOST_CHECK_EQUAL(feature_data->getFeatureIndex(s), (std::size_t)(*cnt));
|
||||
BOOST_CHECK_EQUAL(feature_data->getFeatureName(*cnt).c_str(), s);
|
||||
++(*cnt);
|
||||
}
|
||||
@ -35,6 +35,6 @@ BOOST_AUTO_TEST_CASE(set_feature_map) {
|
||||
CheckFeatureMap(&feature_data, "lm", 2, &cnt);
|
||||
CheckFeatureMap(&feature_data, "tm", 5, &cnt);
|
||||
|
||||
BOOST_CHECK_EQUAL(feature_data.getFeatureIndex("w_0"), cnt);
|
||||
BOOST_CHECK_EQUAL(feature_data.getFeatureIndex("w_0"), (std::size_t)cnt);
|
||||
BOOST_CHECK_EQUAL(feature_data.getFeatureName(cnt).c_str(), "w_0");
|
||||
}
|
||||
|
@ -11,6 +11,9 @@
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
@ -109,6 +112,42 @@ FeatureStatsType inner_product(const SparseVector& lhs, const SparseVector& rhs)
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::size_t> SparseVector::feats() const {
|
||||
std::vector<std::size_t> toRet;
|
||||
for(fvector_t::const_iterator iter = m_fvector.begin();
|
||||
iter!=m_fvector.end();
|
||||
iter++) {
|
||||
toRet.push_back(iter->first);
|
||||
}
|
||||
return toRet;
|
||||
}
|
||||
|
||||
std::size_t SparseVector::encode(const std::string& name) {
|
||||
name2id_t::const_iterator name2id_iter = m_name_to_id.find(name);
|
||||
size_t id = 0;
|
||||
if (name2id_iter == m_name_to_id.end()) {
|
||||
id = m_id_to_name.size();
|
||||
m_id_to_name.push_back(name);
|
||||
m_name_to_id[name] = id;
|
||||
} else {
|
||||
id = name2id_iter->second;
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
std::string SparseVector::decode(std::size_t id) {
|
||||
return m_id_to_name[id];
|
||||
}
|
||||
|
||||
bool operator==(SparseVector const& item1, SparseVector const& item2) {
|
||||
return item1.m_fvector==item2.m_fvector;
|
||||
}
|
||||
|
||||
std::size_t hash_value(SparseVector const& item) {
|
||||
boost::hash<SparseVector::fvector_t> hasher;
|
||||
return hasher(item.m_fvector);
|
||||
}
|
||||
|
||||
FeatureStats::FeatureStats()
|
||||
: m_available_size(kAvailableSize), m_entries(0),
|
||||
m_array(new FeatureStatsType[m_available_size]) {}
|
||||
|
@ -29,12 +29,20 @@ public:
|
||||
void clear();
|
||||
void load(const std::string& file);
|
||||
std::size_t size() const { return m_fvector.size(); }
|
||||
|
||||
|
||||
void write(std::ostream& out, const std::string& sep = " ") const;
|
||||
|
||||
SparseVector& operator-=(const SparseVector& rhs);
|
||||
FeatureStatsType inner_product(const SparseVector& rhs) const;
|
||||
|
||||
// Added by cherryc
|
||||
std::vector<std::size_t> feats() const;
|
||||
friend bool operator==(SparseVector const& item1, SparseVector const& item2);
|
||||
friend std::size_t hash_value(SparseVector const& item);
|
||||
static std::size_t encode(const std::string& feat);
|
||||
static std::string decode(std::size_t feat);
|
||||
// End added by cherryc
|
||||
|
||||
private:
|
||||
static name2id_t m_name_to_id;
|
||||
static id2name_t m_id_to_name;
|
||||
|
189
mert/HypPackEnumerator.cpp
Normal file
189
mert/HypPackEnumerator.cpp
Normal file
@ -0,0 +1,189 @@
|
||||
#include "HypPackEnumerator.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <boost/unordered_set.hpp>
|
||||
|
||||
using namespace std;
|
||||
|
||||
StreamingHypPackEnumerator::StreamingHypPackEnumerator
|
||||
(
|
||||
vector<std::string> const& featureFiles,
|
||||
vector<std::string> const& scoreFiles
|
||||
)
|
||||
: m_featureFiles(featureFiles),
|
||||
m_scoreFiles(scoreFiles)
|
||||
{
|
||||
if (scoreFiles.size() == 0 || featureFiles.size() == 0) {
|
||||
cerr << "No data to process" << endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (featureFiles.size() != scoreFiles.size()) {
|
||||
cerr << "Error: Number of feature files (" << featureFiles.size() <<
|
||||
") does not match number of score files (" << scoreFiles.size() << ")" << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
m_num_lists = scoreFiles.size();
|
||||
m_primed = false;
|
||||
m_iNumDense = -1;
|
||||
}
|
||||
|
||||
size_t StreamingHypPackEnumerator::num_dense() const {
|
||||
if(m_iNumDense<0) {
|
||||
cerr << "Error: Requested num_dense() for an unprimed StreamingHypPackEnumerator" << endl;
|
||||
exit(1);
|
||||
}
|
||||
return (size_t) m_iNumDense;
|
||||
}
|
||||
|
||||
void StreamingHypPackEnumerator::prime(){
|
||||
m_current_indexes.clear();
|
||||
boost::unordered_set<FeatureDataItem> seen;
|
||||
m_primed = true;
|
||||
|
||||
for (size_t i = 0; i < m_num_lists; ++i) {
|
||||
if (m_featureDataIters[i] == FeatureDataIterator::end()) {
|
||||
cerr << "Error: Feature file " << i << " ended prematurely" << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (m_scoreDataIters[i] == ScoreDataIterator::end()) {
|
||||
cerr << "Error: Score file " << i << " ended prematurely" << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (m_featureDataIters[i]->size() != m_scoreDataIters[i]->size()) {
|
||||
cerr << "Error: For sentence " << m_sentenceId << " features and scores have different size" << endl;
|
||||
exit(1);
|
||||
}
|
||||
for (size_t j = 0; j < m_featureDataIters[i]->size(); ++j) {
|
||||
FeatureDataItem item = m_featureDataIters[i]->operator[](j);
|
||||
// Dedup
|
||||
if(seen.find(item)==seen.end()) {
|
||||
seen.insert(item);
|
||||
// Confirm dense features are always the same
|
||||
int iDense = item.dense.size();
|
||||
if(m_iNumDense != iDense) {
|
||||
if(m_iNumDense==-1) m_iNumDense = iDense;
|
||||
else {
|
||||
cerr << "Error: expecting constant number of dense features: "
|
||||
<< m_iNumDense << " != " << iDense << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
// Store item for retrieval
|
||||
m_current_indexes.push_back(pair<size_t,size_t>(i,j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StreamingHypPackEnumerator::reset(){
|
||||
m_featureDataIters.clear();
|
||||
m_scoreDataIters.clear();
|
||||
for (size_t i = 0; i < m_num_lists; ++i) {
|
||||
m_featureDataIters.push_back(FeatureDataIterator(m_featureFiles[i]));
|
||||
m_scoreDataIters.push_back(ScoreDataIterator(m_scoreFiles[i]));
|
||||
}
|
||||
m_sentenceId=0;
|
||||
prime();
|
||||
}
|
||||
|
||||
bool StreamingHypPackEnumerator::finished(){
|
||||
return m_featureDataIters[0]==FeatureDataIterator::end();
|
||||
}
|
||||
|
||||
void StreamingHypPackEnumerator::next(){
|
||||
if(!m_primed) {
|
||||
cerr << "Enumerating an unprimed HypPackEnumerator" << endl;
|
||||
exit(1);
|
||||
}
|
||||
for (size_t i = 0; i < m_num_lists; ++i) {
|
||||
++m_featureDataIters[i];
|
||||
++m_scoreDataIters[i];
|
||||
}
|
||||
m_sentenceId++;
|
||||
if(!finished()) prime();
|
||||
}
|
||||
|
||||
size_t StreamingHypPackEnumerator::cur_size(){
|
||||
if(!m_primed) {
|
||||
cerr << "Querying size from an unprimed HypPackEnumerator" << endl;
|
||||
exit(1);
|
||||
}
|
||||
return m_current_indexes.size();
|
||||
}
|
||||
|
||||
const FeatureDataItem& StreamingHypPackEnumerator::featuresAt(size_t index){
|
||||
if(!m_primed) {
|
||||
cerr << "Querying features from an unprimed HypPackEnumerator" << endl;
|
||||
exit(1);
|
||||
}
|
||||
const pair<size_t,size_t>& pij = m_current_indexes[index];
|
||||
return m_featureDataIters[pij.first]->operator[](pij.second);
|
||||
}
|
||||
|
||||
const ScoreDataItem& StreamingHypPackEnumerator::scoresAt(size_t index) {
|
||||
if(!m_primed) {
|
||||
cerr << "Querying scores from an unprimed HypPackEnumerator" << endl;
|
||||
exit(1);
|
||||
}
|
||||
const pair<size_t,size_t>& pij = m_current_indexes[index];
|
||||
return m_scoreDataIters[pij.first]->operator[](pij.second);
|
||||
}
|
||||
|
||||
/* --------- RandomAccessHypPackEnumerator ------------- */
|
||||
|
||||
RandomAccessHypPackEnumerator::RandomAccessHypPackEnumerator(vector<string> const& featureFiles,
|
||||
vector<string> const& scoreFiles,
|
||||
bool no_shuffle)
|
||||
{
|
||||
StreamingHypPackEnumerator train(featureFiles,scoreFiles);
|
||||
size_t index=0;
|
||||
for(train.reset(); !train.finished(); train.next()) {
|
||||
m_features.push_back(vector<FeatureDataItem>());
|
||||
m_scores.push_back(vector<ScoreDataItem>());
|
||||
for(size_t j=0;j<train.cur_size();j++) {
|
||||
m_features.back().push_back(train.featuresAt(j));
|
||||
m_scores.back().push_back(train.scoresAt(j));
|
||||
}
|
||||
m_indexes.push_back(index++);
|
||||
}
|
||||
|
||||
m_cur_index = 0;
|
||||
m_no_shuffle = no_shuffle;
|
||||
m_num_dense = train.num_dense();
|
||||
}
|
||||
|
||||
size_t RandomAccessHypPackEnumerator::num_dense() const {
|
||||
return m_num_dense;
|
||||
}
|
||||
|
||||
void RandomAccessHypPackEnumerator::reset() {
|
||||
m_cur_index = 0;
|
||||
if(!m_no_shuffle) random_shuffle(m_indexes.begin(),m_indexes.end());
|
||||
}
|
||||
bool RandomAccessHypPackEnumerator::finished() {
|
||||
return m_cur_index >= m_indexes.size();
|
||||
}
|
||||
void RandomAccessHypPackEnumerator::next() {
|
||||
m_cur_index++;
|
||||
}
|
||||
|
||||
size_t RandomAccessHypPackEnumerator::cur_size() {
|
||||
assert(m_features[m_indexes[m_cur_index]].size()==m_scores[m_indexes[m_cur_index]].size());
|
||||
return m_features[m_indexes[m_cur_index]].size();
|
||||
}
|
||||
const FeatureDataItem& RandomAccessHypPackEnumerator::featuresAt(size_t i) {
|
||||
return m_features[m_indexes[m_cur_index]][i];
|
||||
}
|
||||
const ScoreDataItem& RandomAccessHypPackEnumerator::scoresAt(size_t i) {
|
||||
return m_scores[m_indexes[m_cur_index]][i];
|
||||
}
|
||||
|
||||
|
||||
// --Emacs trickery--
|
||||
// Local Variables:
|
||||
// mode:c++
|
||||
// c-basic-offset:2
|
||||
// End:
|
101
mert/HypPackEnumerator.h
Normal file
101
mert/HypPackEnumerator.h
Normal file
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* HypPackCollection.h
|
||||
* kbmira - k-best Batch MIRA
|
||||
*
|
||||
* Abstracts away the mess of iterating through multiple
|
||||
* collections of k-best lists, as well as deduping
|
||||
*/
|
||||
|
||||
#ifndef MERT_HYP_PACK_COLLECTION_H
|
||||
#define MERT_HYP_PACK_COLLECTION_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <stddef.h>
|
||||
|
||||
#include "FeatureDataIterator.h"
|
||||
#include "ScoreDataIterator.h"
|
||||
|
||||
// Start with these abstract classes
|
||||
|
||||
class HypPackEnumerator {
|
||||
public:
|
||||
virtual ~HypPackEnumerator() {}
|
||||
|
||||
virtual void reset() = 0;
|
||||
virtual bool finished() = 0;
|
||||
virtual void next() = 0;
|
||||
|
||||
virtual std::size_t cur_size() = 0;
|
||||
virtual std::size_t num_dense() const = 0;
|
||||
virtual const FeatureDataItem& featuresAt(std::size_t i) = 0;
|
||||
virtual const ScoreDataItem& scoresAt(std::size_t i) = 0;
|
||||
};
|
||||
|
||||
// Instantiation that streams from disk
|
||||
// Low-memory, low-speed, sequential access
|
||||
class StreamingHypPackEnumerator : public HypPackEnumerator {
|
||||
public:
|
||||
StreamingHypPackEnumerator(std::vector<std::string> const& featureFiles,
|
||||
std::vector<std::string> const& scoreFiles);
|
||||
|
||||
virtual std::size_t num_dense() const;
|
||||
|
||||
virtual void reset();
|
||||
virtual bool finished();
|
||||
virtual void next();
|
||||
|
||||
virtual std::size_t cur_size();
|
||||
virtual const FeatureDataItem& featuresAt(std::size_t i);
|
||||
virtual const ScoreDataItem& scoresAt(std::size_t i);
|
||||
|
||||
private:
|
||||
void prime();
|
||||
std::size_t m_num_lists;
|
||||
std::size_t m_sentenceId;
|
||||
std::vector<std::string> m_featureFiles;
|
||||
std::vector<std::string> m_scoreFiles;
|
||||
|
||||
bool m_primed;
|
||||
int m_iNumDense;
|
||||
std::vector<FeatureDataIterator> m_featureDataIters;
|
||||
std::vector<ScoreDataIterator> m_scoreDataIters;
|
||||
std::vector<std::pair<std::size_t,std::size_t> > m_current_indexes;
|
||||
};
|
||||
|
||||
// Instantiation that reads into memory
|
||||
// High-memory, high-speed, random access
|
||||
// (Actually randomizes with each call to reset)
|
||||
class RandomAccessHypPackEnumerator : public HypPackEnumerator {
|
||||
public:
|
||||
RandomAccessHypPackEnumerator(std::vector<std::string> const& featureFiles,
|
||||
std::vector<std::string> const& scoreFiles,
|
||||
bool no_shuffle);
|
||||
|
||||
virtual std::size_t num_dense() const;
|
||||
|
||||
virtual void reset();
|
||||
virtual bool finished();
|
||||
virtual void next();
|
||||
|
||||
virtual std::size_t cur_size();
|
||||
virtual const FeatureDataItem& featuresAt(std::size_t i);
|
||||
virtual const ScoreDataItem& scoresAt(std::size_t i);
|
||||
|
||||
private:
|
||||
bool m_no_shuffle;
|
||||
std::size_t m_cur_index;
|
||||
std::size_t m_num_dense;
|
||||
std::vector<std::size_t> m_indexes;
|
||||
std::vector<std::vector<FeatureDataItem> > m_features;
|
||||
std::vector<std::vector<ScoreDataItem> > m_scores;
|
||||
};
|
||||
|
||||
#endif // MERT_HYP_PACK_COLLECTION_H
|
||||
|
||||
// --Emacs trickery--
|
||||
// Local Variables:
|
||||
// mode:c++
|
||||
// c-basic-offset:2
|
||||
// End:
|
@ -15,6 +15,9 @@ FeatureStats.cpp
|
||||
FeatureArray.cpp
|
||||
FeatureData.cpp
|
||||
FeatureDataIterator.cpp
|
||||
MiraFeatureVector.cpp
|
||||
MiraWeightVector.cpp
|
||||
HypPackEnumerator.cpp
|
||||
Data.cpp
|
||||
BleuScorer.cpp
|
||||
SemposScorer.cpp
|
||||
@ -52,7 +55,9 @@ exe evaluator : evaluator.cpp mert_lib ;
|
||||
|
||||
exe pro : pro.cpp mert_lib ..//boost_program_options ;
|
||||
|
||||
alias programs : mert extractor evaluator pro ;
|
||||
exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ;
|
||||
|
||||
alias programs : mert extractor evaluator pro kbmira ;
|
||||
|
||||
unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;
|
||||
|
146
mert/MiraFeatureVector.cpp
Normal file
146
mert/MiraFeatureVector.cpp
Normal file
@ -0,0 +1,146 @@
|
||||
#include <cmath>
|
||||
|
||||
#include "MiraFeatureVector.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
|
||||
: m_dense(vec.dense)
|
||||
{
|
||||
vector<size_t> sparseFeats = vec.sparse.feats();
|
||||
bool bFirst = true;
|
||||
size_t lastFeat = 0;
|
||||
for(size_t i=0;i<sparseFeats.size();i++)
|
||||
{
|
||||
size_t feat = m_dense.size() + sparseFeats[i];
|
||||
m_sparseFeats.push_back(feat);
|
||||
m_sparseVals.push_back(vec.sparse.get(sparseFeats[i]));
|
||||
|
||||
// Check ordered property
|
||||
if(bFirst) {
|
||||
bFirst = false;
|
||||
}
|
||||
else {
|
||||
if(lastFeat>=feat) {
|
||||
cerr << "Error: Feature indeces must be strictly ascending coming out of SparseVector" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
lastFeat = feat;
|
||||
}
|
||||
}
|
||||
|
||||
MiraFeatureVector::MiraFeatureVector(const MiraFeatureVector& other)
|
||||
: m_dense(other.m_dense),
|
||||
m_sparseFeats(other.m_sparseFeats),
|
||||
m_sparseVals(other.m_sparseVals)
|
||||
{
|
||||
if(m_sparseVals.size()!=m_sparseFeats.size()) {
|
||||
cerr << "Error: mismatching sparse feat and val sizes" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
MiraFeatureVector::MiraFeatureVector(const vector<ValType>& dense,
|
||||
const vector<size_t>& sparseFeats,
|
||||
const vector<ValType>& sparseVals)
|
||||
: m_dense(dense),
|
||||
m_sparseFeats(sparseFeats),
|
||||
m_sparseVals(sparseVals)
|
||||
{
|
||||
if(m_sparseVals.size()!=m_sparseFeats.size()) {
|
||||
cerr << "Error: mismatching sparse feat and val sizes" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
ValType MiraFeatureVector::val(size_t index) const {
|
||||
if(index < m_dense.size())
|
||||
return m_dense[index];
|
||||
else
|
||||
return m_sparseVals[index];
|
||||
}
|
||||
|
||||
size_t MiraFeatureVector::feat(size_t index) const {
|
||||
if(index < m_dense.size())
|
||||
return index;
|
||||
else
|
||||
return m_sparseFeats[index];
|
||||
}
|
||||
|
||||
size_t MiraFeatureVector::size() const {
|
||||
return m_dense.size() + m_sparseVals.size();
|
||||
}
|
||||
|
||||
ValType MiraFeatureVector::sqrNorm() const {
|
||||
ValType toRet = 0.0;
|
||||
for(size_t i=0;i<m_dense.size();i++)
|
||||
toRet += m_dense[i]*m_dense[i];
|
||||
for(size_t i=0;i<m_sparseVals.size();i++)
|
||||
toRet += m_sparseVals[i] * m_sparseVals[i];
|
||||
return toRet;
|
||||
}
|
||||
|
||||
MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector& b)
|
||||
{
|
||||
// Dense subtraction
|
||||
vector<ValType> dense;
|
||||
if(a.m_dense.size()!=b.m_dense.size()) {
|
||||
cerr << "Mismatching dense vectors passed to MiraFeatureVector subtraction" << endl;
|
||||
exit(1);
|
||||
}
|
||||
for(size_t i=0;i<a.m_dense.size();i++) {
|
||||
dense.push_back(a.m_dense[i] - b.m_dense[i]);
|
||||
}
|
||||
|
||||
// Sparse subtraction
|
||||
size_t i=0;
|
||||
size_t j=0;
|
||||
vector<ValType> sparseVals;
|
||||
vector<size_t> sparseFeats;
|
||||
while(i < a.m_sparseFeats.size() && j < b.m_sparseFeats.size()) {
|
||||
|
||||
if(a.m_sparseFeats[i] < b.m_sparseFeats[j]) {
|
||||
sparseFeats.push_back(a.m_sparseFeats[i]);
|
||||
sparseVals.push_back(a.m_sparseVals[i]);
|
||||
i++;
|
||||
}
|
||||
|
||||
else if(b.m_sparseFeats[j] < a.m_sparseFeats[i]) {
|
||||
sparseFeats.push_back(b.m_sparseFeats[j]);
|
||||
sparseVals.push_back(-b.m_sparseVals[j]);
|
||||
j++;
|
||||
}
|
||||
|
||||
else {
|
||||
ValType newVal = a.m_sparseVals[i] - b.m_sparseVals[j];
|
||||
if(abs(newVal)>1e-6) {
|
||||
sparseFeats.push_back(a.m_sparseFeats[i]);
|
||||
sparseVals.push_back(newVal);
|
||||
}
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
while(i<a.m_sparseFeats.size()) {
|
||||
sparseFeats.push_back(a.m_sparseFeats[i]);
|
||||
sparseVals.push_back(a.m_sparseVals[i]);
|
||||
i++;
|
||||
}
|
||||
|
||||
while(j<b.m_sparseFeats.size()) {
|
||||
sparseFeats.push_back(b.m_sparseFeats[j]);
|
||||
sparseVals.push_back(-b.m_sparseVals[j]);
|
||||
j++;
|
||||
}
|
||||
|
||||
// Create and return vector
|
||||
return MiraFeatureVector(dense,sparseFeats,sparseVals);
|
||||
}
|
||||
|
||||
// --Emacs trickery--
|
||||
// Local Variables:
|
||||
// mode:c++
|
||||
// c-basic-offset:2
|
||||
// End:
|
49
mert/MiraFeatureVector.h
Normal file
49
mert/MiraFeatureVector.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* MiraFeatureVector.h
|
||||
* kbmira - k-best Batch MIRA
|
||||
*
|
||||
* An alternative to the existing SparseVector
|
||||
* and FeatureDataItem combo. Should be as memory
|
||||
* efficient, and a little more time efficient,
|
||||
* and should save me from constantly hacking
|
||||
* SparseVector
|
||||
*/
|
||||
|
||||
#ifndef MERT_MIRA_FEATURE_VECTOR_H
|
||||
#define MERT_MIRA_FEATURE_VECTOR_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "FeatureDataIterator.h"
|
||||
|
||||
typedef FeatureStatsType ValType;
|
||||
|
||||
class MiraFeatureVector {
|
||||
public:
|
||||
MiraFeatureVector(const FeatureDataItem& vec);
|
||||
MiraFeatureVector(const MiraFeatureVector& other);
|
||||
MiraFeatureVector(const std::vector<ValType>& dense,
|
||||
const std::vector<std::size_t>& sparseFeats,
|
||||
const std::vector<ValType>& sparseVals);
|
||||
|
||||
ValType val(std::size_t index) const;
|
||||
std::size_t feat(std::size_t index) const;
|
||||
std::size_t size() const;
|
||||
ValType sqrNorm() const;
|
||||
|
||||
friend MiraFeatureVector operator-(const MiraFeatureVector& a,
|
||||
const MiraFeatureVector& b);
|
||||
|
||||
private:
|
||||
std::vector<ValType> m_dense;
|
||||
std::vector<std::size_t> m_sparseFeats;
|
||||
std::vector<ValType> m_sparseVals;
|
||||
};
|
||||
|
||||
#endif // MERT_FEATURE_VECTOR_H
|
||||
|
||||
// --Emacs trickery--
|
||||
// Local Variables:
|
||||
// mode:c++
|
||||
// c-basic-offset:2
|
||||
// End:
|
145
mert/MiraWeightVector.cpp
Normal file
145
mert/MiraWeightVector.cpp
Normal file
@ -0,0 +1,145 @@
|
||||
#include "MiraWeightVector.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
/**
|
||||
* Constructor, initializes to the zero vector
|
||||
*/
|
||||
MiraWeightVector::MiraWeightVector()
|
||||
: m_weights(),
|
||||
m_totals(),
|
||||
m_lastUpdated()
|
||||
{
|
||||
m_numUpdates = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor with provided initial vector
|
||||
* \param init Initial feature values
|
||||
*/
|
||||
MiraWeightVector::MiraWeightVector(const vector<ValType>& init)
|
||||
: m_weights(init),
|
||||
m_totals(init),
|
||||
m_lastUpdated(init.size(), 0)
|
||||
{
|
||||
m_numUpdates = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a the model
|
||||
* \param fv Feature vector to be added to the weights
|
||||
* \param tau FV will be scaled by this value before update
|
||||
*/
|
||||
void MiraWeightVector::update(const MiraFeatureVector& fv, float tau) {
|
||||
m_numUpdates++;
|
||||
for(size_t i=0;i<fv.size();i++) {
|
||||
update(fv.feat(i), fv.val(i)*tau);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform an empty update (affects averaging)
|
||||
*/
|
||||
void MiraWeightVector::tick() {
|
||||
m_numUpdates++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Score a feature vector according to the model
|
||||
* \param fv Feature vector to be scored
|
||||
*/
|
||||
ValType MiraWeightVector::score(const MiraFeatureVector& fv) const {
|
||||
ValType toRet = 0.0;
|
||||
for(size_t i=0; i<fv.size(); i++) {
|
||||
toRet += weight(fv.feat(i)) * fv.val(i);
|
||||
}
|
||||
return toRet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an averaged view of this weight vector
|
||||
*/
|
||||
AvgWeightVector MiraWeightVector::avg() {
|
||||
this->fixTotals();
|
||||
return AvgWeightVector(*this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates a weight and lazily updates its total
|
||||
*/
|
||||
void MiraWeightVector::update(size_t index, ValType delta) {
|
||||
|
||||
// Handle previously unseen weights
|
||||
while(index>=m_weights.size()) {
|
||||
m_weights.push_back(0.0);
|
||||
m_totals.push_back(0.0);
|
||||
m_lastUpdated.push_back(0);
|
||||
}
|
||||
|
||||
// Book keeping for w = w + delta
|
||||
m_totals[index] += (m_numUpdates - m_lastUpdated[index]) * m_weights[index] + delta;
|
||||
m_weights[index] += delta;
|
||||
m_lastUpdated[index] = m_numUpdates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure everyone's total is up-to-date
|
||||
*/
|
||||
void MiraWeightVector::fixTotals() {
|
||||
for(size_t i=0; i<m_weights.size(); i++) update(i,0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to handle out of range weights
|
||||
*/
|
||||
ValType MiraWeightVector::weight(size_t index) const {
|
||||
if(index < m_weights.size()) {
|
||||
return m_weights[index];
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
ValType MiraWeightVector::sqrNorm() const {
|
||||
ValType toRet = 0;
|
||||
for(size_t i=0;i<m_weights.size();i++) {
|
||||
toRet += weight(i) * weight(i);
|
||||
}
|
||||
return toRet;
|
||||
}
|
||||
|
||||
AvgWeightVector::AvgWeightVector(const MiraWeightVector& wv)
|
||||
:m_wv(wv)
|
||||
{}
|
||||
|
||||
ValType AvgWeightVector::weight(size_t index) const
|
||||
{
|
||||
if(m_wv.m_numUpdates==0) return m_wv.weight(index);
|
||||
else {
|
||||
if(index < m_wv.m_totals.size()) {
|
||||
return m_wv.m_totals[index] / m_wv.m_numUpdates;
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ValType AvgWeightVector::score(const MiraFeatureVector& fv) const {
|
||||
ValType toRet = 0.0;
|
||||
for(size_t i=0; i<fv.size(); i++) {
|
||||
toRet += weight(fv.feat(i)) * fv.val(i);
|
||||
}
|
||||
return toRet;
|
||||
}
|
||||
|
||||
size_t AvgWeightVector::size() const {
|
||||
return m_wv.m_weights.size();
|
||||
}
|
||||
|
||||
// --Emacs trickery--
|
||||
// Local Variables:
|
||||
// mode:c++
|
||||
// c-basic-offset:2
|
||||
// End:
|
104
mert/MiraWeightVector.h
Normal file
104
mert/MiraWeightVector.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* MiraWeightVector.h
|
||||
* kbmira - k-best Batch MIRA
|
||||
*
|
||||
* A self-averaging weight-vector. Good for
|
||||
* perceptron learning as well.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MERT_MIRA_WEIGHT_VECTOR_H
|
||||
#define MERT_MIRA_WEIGHT_VECTOR_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "MiraFeatureVector.h"
|
||||
|
||||
class AvgWeightVector;
|
||||
|
||||
class MiraWeightVector {
|
||||
public:
|
||||
/**
|
||||
* Constructor, initializes to the zero vector
|
||||
*/
|
||||
MiraWeightVector();
|
||||
|
||||
/**
|
||||
* Constructor with provided initial vector
|
||||
* \param init Initial feature values
|
||||
*/
|
||||
MiraWeightVector(const std::vector<ValType>& init);
|
||||
|
||||
/**
|
||||
* Update a the model
|
||||
* \param fv Feature vector to be added to the weights
|
||||
* \param tau FV will be scaled by this value before update
|
||||
*/
|
||||
void update(const MiraFeatureVector& fv, float tau);
|
||||
|
||||
/**
|
||||
* Perform an empty update (affects averaging)
|
||||
*/
|
||||
void tick();
|
||||
|
||||
/**
|
||||
* Score a feature vector according to the model
|
||||
* \param fv Feature vector to be scored
|
||||
*/
|
||||
ValType score(const MiraFeatureVector& fv) const;
|
||||
|
||||
/**
|
||||
* Squared norm of the weight vector
|
||||
*/
|
||||
ValType sqrNorm() const;
|
||||
|
||||
/**
|
||||
* Return an averaged view of this weight vector
|
||||
*/
|
||||
AvgWeightVector avg();
|
||||
|
||||
friend class AvgWeightVector;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Updates a weight and lazily updates its total
|
||||
*/
|
||||
void update(std::size_t index, ValType delta);
|
||||
|
||||
/**
|
||||
* Make sure everyone's total is up-to-date
|
||||
*/
|
||||
void fixTotals();
|
||||
|
||||
/**
|
||||
* Helper to handle out-of-range weights
|
||||
*/
|
||||
ValType weight(std::size_t index) const;
|
||||
|
||||
std::vector<ValType> m_weights;
|
||||
std::vector<ValType> m_totals;
|
||||
std::vector<std::size_t> m_lastUpdated;
|
||||
std::size_t m_numUpdates;
|
||||
};
|
||||
|
||||
/**
|
||||
* Averaged view of a weight vector
|
||||
*/
|
||||
class AvgWeightVector {
|
||||
public:
|
||||
AvgWeightVector(const MiraWeightVector& wv);
|
||||
ValType score(const MiraFeatureVector& fv) const;
|
||||
ValType weight(std::size_t index) const;
|
||||
std::size_t size() const;
|
||||
private:
|
||||
const MiraWeightVector& m_wv;
|
||||
};
|
||||
|
||||
|
||||
#endif // MERT_WEIGHT_VECTOR_H
|
||||
|
||||
// --Emacs trickery--
|
||||
// Local Variables:
|
||||
// mode:c++
|
||||
// c-basic-offset:2
|
||||
// End:
|
298
mert/kbmira.cpp
Normal file
298
mert/kbmira.cpp
Normal file
@ -0,0 +1,298 @@
|
||||
// $Id$
|
||||
// vim:tabstop=2
|
||||
/***********************************************************************
|
||||
|
||||
***********************************************************************/
|
||||
|
||||
/**
|
||||
* k-best Batch Mira, as described in:
|
||||
*
|
||||
* Colin Cherry and George Foster
|
||||
* Batch Tuning Strategies for Statistical Machine Translation
|
||||
* NAACL 2012
|
||||
*
|
||||
* Implemented by colin.cherry@nrc-cnrc.gc.ca
|
||||
*
|
||||
* To license implementations of any of the other tuners in that paper,
|
||||
* please get in touch with any member of NRC Canada's Portage project
|
||||
*
|
||||
* Input is a set of n-best lists, encoded as feature and score files.
|
||||
*
|
||||
* Output is a weight file that results from running MIRA on these
|
||||
* n-btest lists for J iterations. Will return the set that maximizes
|
||||
* training BLEU.
|
||||
**/
|
||||
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
|
||||
#include "BleuScorer.h"
|
||||
#include "HypPackEnumerator.h"
|
||||
#include "MiraFeatureVector.h"
|
||||
#include "MiraWeightVector.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
ValType evaluate(HypPackEnumerator* train, const AvgWeightVector& wv) {
|
||||
vector<ValType> stats(kBleuNgramOrder*2+1,0);
|
||||
for(train->reset(); !train->finished(); train->next()) {
|
||||
// Find max model
|
||||
size_t max_index=0;
|
||||
ValType max_score=0;
|
||||
for(size_t i=0;i<train->cur_size();i++) {
|
||||
MiraFeatureVector vec(train->featuresAt(i));
|
||||
ValType score = wv.score(vec);
|
||||
if(i==0 || score > max_score) {
|
||||
max_index = i;
|
||||
max_score = score;
|
||||
}
|
||||
}
|
||||
// Update stats
|
||||
const vector<float>& sent = train->scoresAt(max_index);
|
||||
for(size_t i=0;i<sent.size();i++) {
|
||||
stats[i]+=sent[i];
|
||||
}
|
||||
}
|
||||
return unsmoothedBleu(stats);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
bool help;
|
||||
string denseInitFile;
|
||||
string sparseInitFile;
|
||||
vector<string> scoreFiles;
|
||||
vector<string> featureFiles;
|
||||
int seed;
|
||||
string outputFile;
|
||||
float c = 0.01; // Step-size cap C
|
||||
float decay = 0.999; // Pseudo-corpus decay \gamma
|
||||
int n_iters = 60; // Max epochs J
|
||||
bool streaming = false; // Stream all k-best lists?
|
||||
bool no_shuffle = false; // Don't shuffle, even for in memory version
|
||||
bool model_bg = false; // Use model for background corpus
|
||||
|
||||
// Command-line processing follows pro.cpp
|
||||
po::options_description desc("Allowed options");
|
||||
desc.add_options()
|
||||
("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
|
||||
("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
|
||||
("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
|
||||
("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
|
||||
("output-file,o", po::value<string>(&outputFile), "Output file")
|
||||
("cparam,C", po::value<float>(&c), "MIRA C-parameter, lower for more regularization (default 0.01)")
|
||||
("decay,D", po::value<float>(&decay), "BLEU background corpus decay rate (default 0.999)")
|
||||
("iters,J", po::value<int>(&n_iters), "Number of MIRA iterations to run (default 60)")
|
||||
("dense-init,d", po::value<string>(&denseInitFile), "Weight file for dense features")
|
||||
("sparse-init,s", po::value<string>(&sparseInitFile), "Weight file for sparse features")
|
||||
("streaming", po::value(&streaming)->zero_tokens()->default_value(false), "Stream n-best lists to save memory, implies --no-shuffle")
|
||||
("no-shuffle", po::value(&no_shuffle)->zero_tokens()->default_value(false), "Don't shuffle hypotheses before each epoch")
|
||||
("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background");
|
||||
;
|
||||
|
||||
po::options_description cmdline_options;
|
||||
cmdline_options.add(desc);
|
||||
po::variables_map vm;
|
||||
po::store(po::command_line_parser(argc,argv).
|
||||
options(cmdline_options).run(), vm);
|
||||
po::notify(vm);
|
||||
if (help) {
|
||||
cout << "Usage: " + string(argv[0]) + " [options]" << endl;
|
||||
cout << desc << endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (vm.count("random-seed")) {
|
||||
cerr << "Initialising random seed to " << seed << endl;
|
||||
srand(seed);
|
||||
} else {
|
||||
cerr << "Initialising random seed from system clock" << endl;
|
||||
srand(time(NULL));
|
||||
}
|
||||
|
||||
// Initialize weights
|
||||
///
|
||||
// Dense
|
||||
vector<parameter_t> initParams;
|
||||
if(!denseInitFile.empty()) {
|
||||
ifstream opt(denseInitFile.c_str());
|
||||
string buffer; istringstream strstrm(buffer);
|
||||
if (opt.fail()) {
|
||||
cerr << "could not open dense initfile: " << denseInitFile << endl;
|
||||
exit(3);
|
||||
}
|
||||
parameter_t val;
|
||||
getline(opt,buffer);
|
||||
while(strstrm >> val) initParams.push_back(val);
|
||||
opt.close();
|
||||
}
|
||||
size_t initDenseSize = initParams.size();
|
||||
// Sparse
|
||||
if(!sparseInitFile.empty()) {
|
||||
if(initDenseSize==0) {
|
||||
cerr << "sparse initialization requires dense initialization" << endl;
|
||||
exit(3);
|
||||
}
|
||||
ifstream opt(sparseInitFile.c_str());
|
||||
if(opt.fail()) {
|
||||
cerr << "could not open sparse initfile: " << sparseInitFile << endl;
|
||||
exit(3);
|
||||
}
|
||||
int sparseCount=0;
|
||||
parameter_t val; std::string name;
|
||||
while(opt >> name >> val) {
|
||||
size_t id = SparseVector::encode(name) + initDenseSize;
|
||||
while(initParams.size()<=id) initParams.push_back(0.0);
|
||||
initParams[id] = val;
|
||||
sparseCount++;
|
||||
}
|
||||
cerr << "Found " << sparseCount << " initial sparse features" << endl;
|
||||
opt.close();
|
||||
}
|
||||
|
||||
MiraWeightVector wv(initParams);
|
||||
|
||||
// Initialize background corpus
|
||||
vector<ValType> bg;
|
||||
for(int j=0;j<kBleuNgramOrder;j++){
|
||||
bg.push_back(kBleuNgramOrder-j);
|
||||
bg.push_back(kBleuNgramOrder-j);
|
||||
}
|
||||
bg.push_back(kBleuNgramOrder);
|
||||
|
||||
// Training loop
|
||||
boost::scoped_ptr<HypPackEnumerator> train;
|
||||
if(streaming)
|
||||
train.reset(new StreamingHypPackEnumerator(featureFiles, scoreFiles));
|
||||
else
|
||||
train.reset(new RandomAccessHypPackEnumerator(featureFiles, scoreFiles, no_shuffle));
|
||||
cerr << "Initial BLEU = " << evaluate(train.get(), wv.avg()) << endl;
|
||||
ValType bestBleu = 0;
|
||||
for(int j=0;j<n_iters;j++)
|
||||
{
|
||||
// MIRA train for one epoch
|
||||
int iNumHyps = 0;
|
||||
int iNumExamples = 0;
|
||||
int iNumUpdates = 0;
|
||||
ValType totalLoss = 0.0;
|
||||
for(train->reset(); !train->finished(); train->next()) {
|
||||
|
||||
// Hope / fear decode
|
||||
size_t hope_index=0, fear_index=0, model_index=0;
|
||||
ValType hope_score=0, fear_score=0, model_score=0;
|
||||
for(size_t i=0; i< train->cur_size(); i++) {
|
||||
MiraFeatureVector vec(train->featuresAt(i));
|
||||
ValType score = wv.score(vec);
|
||||
ValType bleu = sentenceLevelBackgroundBleu(train->scoresAt(i),bg);
|
||||
// Hope
|
||||
if(i==0 || (score + bleu) > hope_score) {
|
||||
hope_score = score + bleu;
|
||||
hope_index = i;
|
||||
}
|
||||
// Fear
|
||||
if(i==0 || (score - bleu) > fear_score) {
|
||||
fear_score = score - bleu;
|
||||
fear_index = i;
|
||||
}
|
||||
// Model
|
||||
if(i==0 || score > model_score) {
|
||||
model_score = score;
|
||||
model_index = i;
|
||||
}
|
||||
iNumHyps++;
|
||||
}
|
||||
// Update weights
|
||||
if(hope_index!=fear_index) {
|
||||
// Vector difference
|
||||
MiraFeatureVector hope(train->featuresAt(hope_index));
|
||||
MiraFeatureVector fear(train->featuresAt(fear_index));
|
||||
MiraFeatureVector diff = hope - fear;
|
||||
// Bleu difference
|
||||
const vector<float>& hope_stats = train->scoresAt(hope_index);
|
||||
ValType hopeBleu = sentenceLevelBackgroundBleu(hope_stats, bg);
|
||||
const vector<float>& fear_stats = train->scoresAt(fear_index);
|
||||
ValType fearBleu = sentenceLevelBackgroundBleu(fear_stats, bg);
|
||||
assert(hopeBleu > fearBleu);
|
||||
ValType delta = hopeBleu - fearBleu;
|
||||
// Loss and update
|
||||
ValType diff_score = wv.score(diff);
|
||||
ValType loss = delta - diff_score;
|
||||
if(loss > 0) {
|
||||
ValType eta = min(c, loss / diff.sqrNorm());
|
||||
wv.update(diff,eta);
|
||||
totalLoss+=loss;
|
||||
iNumUpdates++;
|
||||
}
|
||||
// Update BLEU statistics
|
||||
const vector<float>& model_stats = train->scoresAt(model_index);
|
||||
for(size_t k=0;k<bg.size();k++) {
|
||||
bg[k]*=decay;
|
||||
if(model_bg)
|
||||
bg[k]+=model_stats[k];
|
||||
else
|
||||
bg[k]+=hope_stats[k];
|
||||
}
|
||||
}
|
||||
iNumExamples++;
|
||||
}
|
||||
// Training Epoch summary
|
||||
cerr << iNumUpdates << "/" << iNumExamples << " updates"
|
||||
<< ", avg loss = " << (totalLoss / iNumExamples);
|
||||
|
||||
|
||||
// Evaluate current average weights
|
||||
AvgWeightVector avg = wv.avg();
|
||||
ValType bleu = evaluate(train.get(), avg);
|
||||
cerr << ", BLEU = " << bleu << endl;
|
||||
if(bleu > bestBleu) {
|
||||
size_t num_dense = train->num_dense();
|
||||
if(initDenseSize>0 && initDenseSize!=num_dense) {
|
||||
cerr << "Error: Initial dense feature count and dense feature count from n-best do not match: "
|
||||
<< initDenseSize << "!=" << num_dense << endl;
|
||||
exit(1);
|
||||
}
|
||||
// Write to a file
|
||||
ostream* out;
|
||||
ofstream outFile;
|
||||
if (!outputFile.empty() ) {
|
||||
outFile.open(outputFile.c_str());
|
||||
if (!(outFile)) {
|
||||
cerr << "Error: Failed to open " << outputFile << endl;
|
||||
exit(1);
|
||||
}
|
||||
out = &outFile;
|
||||
} else {
|
||||
out = &cout;
|
||||
}
|
||||
for(size_t i=0;i<avg.size();i++) {
|
||||
if(i<num_dense)
|
||||
*out << "F" << i << " " << avg.weight(i) << endl;
|
||||
else {
|
||||
if(abs(avg.weight(i))>1e-8)
|
||||
*out << SparseVector::decode(i-num_dense) << " " << avg.weight(i) << endl;
|
||||
}
|
||||
}
|
||||
outFile.close();
|
||||
bestBleu = bleu;
|
||||
}
|
||||
}
|
||||
cerr << "Best BLEU = " << bestBleu << endl;
|
||||
}
|
||||
// --Emacs trickery--
|
||||
// Local Variables:
|
||||
// mode:c++
|
||||
// c-basic-offset:2
|
||||
// End:
|
@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef moses_cmd_IOWrapper_h
|
||||
#define moses_cmd_IOWrapper_h
|
||||
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
@ -121,15 +122,15 @@ IOWrapper *GetIODevice(const Moses::StaticData &staticData);
|
||||
bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source);
|
||||
void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, bool reportSegmentation, bool reportAllFactors);
|
||||
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&,
|
||||
const TranslationSystem* system, long translationId, bool reportSegmentation);
|
||||
void OutputAllFeatureScores(std::ostream& out, const TranslationSystem* system, const TrellisPath &path);
|
||||
void OutputFeatureScores(std::ostream& out, const TrellisPath &path, const FeatureFunction *ff, std::string &lastName);
|
||||
const Moses::TranslationSystem* system, long translationId, bool reportSegmentation);
|
||||
void OutputAllFeatureScores(std::ostream& out, const Moses::TranslationSystem* system, const Moses::TrellisPath &path);
|
||||
void OutputFeatureScores(std::ostream& out, const Moses::TrellisPath &path, const Moses::FeatureFunction *ff, std::string &lastName);
|
||||
void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
|
||||
void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
|
||||
bool reportSegmentation, bool reportAllFactors, std::ostream& out);
|
||||
void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,bool reportSegmentation, bool reportAllFactors, std::ostream &out);
|
||||
void OutputInput(std::ostream& os, const Hypothesis* hypo);
|
||||
void OutputAlignment(OutputCollector* collector, size_t lineNo, const Hypothesis *hypo);
|
||||
void OutputAlignment(OutputCollector* collector, size_t lineNo, const TrellisPath &path);
|
||||
void OutputInput(std::ostream& os, const Moses::Hypothesis* hypo);
|
||||
void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::Hypothesis *hypo);
|
||||
void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::TrellisPath &path);
|
||||
|
||||
#endif
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <set>
|
||||
|
||||
using namespace std;
|
||||
using namespace Moses;
|
||||
|
||||
size_t bleu_order = 4;
|
||||
float UNKNGRAMLOGPROB = -20;
|
||||
|
@ -17,35 +17,33 @@
|
||||
#include "Manager.h"
|
||||
#include "TrellisPathList.h"
|
||||
|
||||
using namespace Moses;
|
||||
|
||||
|
||||
|
||||
class Edge;
|
||||
|
||||
typedef std::vector< const Hypothesis *> Lattice;
|
||||
typedef std::vector< const Moses::Hypothesis *> Lattice;
|
||||
typedef std::vector<const Edge*> Path;
|
||||
typedef std::map<Path, size_t> PathCounts;
|
||||
typedef std::map<Phrase, PathCounts > NgramHistory;
|
||||
typedef std::map<Moses::Phrase, PathCounts > NgramHistory;
|
||||
|
||||
class Edge
|
||||
{
|
||||
const Hypothesis* m_tailNode;
|
||||
const Hypothesis* m_headNode;
|
||||
const Moses::Hypothesis* m_tailNode;
|
||||
const Moses::Hypothesis* m_headNode;
|
||||
float m_score;
|
||||
TargetPhrase m_targetPhrase;
|
||||
Moses::TargetPhrase m_targetPhrase;
|
||||
NgramHistory m_ngrams;
|
||||
|
||||
public:
|
||||
Edge(const Hypothesis* from, const Hypothesis* to, float score, const TargetPhrase& targetPhrase) : m_tailNode(from), m_headNode(to), m_score(score), m_targetPhrase(targetPhrase) {
|
||||
Edge(const Moses::Hypothesis* from, const Moses::Hypothesis* to, float score, const Moses::TargetPhrase& targetPhrase) : m_tailNode(from), m_headNode(to), m_score(score), m_targetPhrase(targetPhrase) {
|
||||
//cout << "Creating new edge from Node " << from->GetId() << ", to Node : " << to->GetId() << ", score: " << score << " phrase: " << targetPhrase << endl;
|
||||
}
|
||||
|
||||
const Hypothesis* GetHeadNode() const {
|
||||
const Moses::Hypothesis* GetHeadNode() const {
|
||||
return m_headNode;
|
||||
}
|
||||
|
||||
const Hypothesis* GetTailNode() const {
|
||||
const Moses::Hypothesis* GetTailNode() const {
|
||||
return m_tailNode;
|
||||
}
|
||||
|
||||
@ -57,19 +55,19 @@ public:
|
||||
return m_targetPhrase.GetSize();
|
||||
}
|
||||
|
||||
const Phrase& GetWords() const {
|
||||
const Moses::Phrase& GetWords() const {
|
||||
return m_targetPhrase;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<< (std::ostream& out, const Edge& edge);
|
||||
|
||||
const NgramHistory& GetNgrams( std::map<const Hypothesis*, std::vector<Edge> > & incomingEdges) ;
|
||||
const NgramHistory& GetNgrams( std::map<const Moses::Hypothesis*, std::vector<Edge> > & incomingEdges) ;
|
||||
|
||||
bool operator < (const Edge & compare) const;
|
||||
|
||||
void GetPhraseSuffix(const Phrase& origPhrase, size_t lastN, Phrase& targetPhrase) const;
|
||||
void GetPhraseSuffix(const Moses::Phrase& origPhrase, size_t lastN, Moses::Phrase& targetPhrase) const;
|
||||
|
||||
void storeNgramHistory(const Phrase& phrase, Path & path, size_t count = 1) {
|
||||
void storeNgramHistory(const Moses::Phrase& phrase, Path & path, size_t count = 1) {
|
||||
m_ngrams[phrase][path]+= count;
|
||||
}
|
||||
|
||||
@ -84,16 +82,16 @@ public:
|
||||
NgramScores() {}
|
||||
|
||||
/** logsum this score to the existing score */
|
||||
void addScore(const Hypothesis* node, const Phrase& ngram, float score);
|
||||
void addScore(const Moses::Hypothesis* node, const Moses::Phrase& ngram, float score);
|
||||
|
||||
/** Iterate through ngrams for selected node */
|
||||
typedef std::map<const Phrase*, float>::const_iterator NodeScoreIterator;
|
||||
NodeScoreIterator nodeBegin(const Hypothesis* node);
|
||||
NodeScoreIterator nodeEnd(const Hypothesis* node);
|
||||
typedef std::map<const Moses::Phrase*, float>::const_iterator NodeScoreIterator;
|
||||
NodeScoreIterator nodeBegin(const Moses::Hypothesis* node);
|
||||
NodeScoreIterator nodeEnd(const Moses::Hypothesis* node);
|
||||
|
||||
private:
|
||||
std::set<Phrase> m_ngrams;
|
||||
std::map<const Hypothesis*, std::map<const Phrase*, float> > m_scores;
|
||||
std::set<Moses::Phrase> m_ngrams;
|
||||
std::map<const Moses::Hypothesis*, std::map<const Moses::Phrase*, float> > m_scores;
|
||||
};
|
||||
|
||||
|
||||
@ -102,11 +100,11 @@ class LatticeMBRSolution
|
||||
{
|
||||
public:
|
||||
/** Read the words from the path */
|
||||
LatticeMBRSolution(const TrellisPath& path, bool isMap);
|
||||
LatticeMBRSolution(const Moses::TrellisPath& path, bool isMap);
|
||||
const std::vector<float>& GetNgramScores() const {
|
||||
return m_ngramScores;
|
||||
}
|
||||
const std::vector<Word>& GetWords() const {
|
||||
const std::vector<Moses::Word>& GetWords() const {
|
||||
return m_words;
|
||||
}
|
||||
float GetMapScore() const {
|
||||
@ -117,10 +115,10 @@ public:
|
||||
}
|
||||
|
||||
/** Initialise ngram scores */
|
||||
void CalcScore(std::map<Phrase, float>& finalNgramScores, const std::vector<float>& thetas, float mapWeight);
|
||||
void CalcScore(std::map<Moses::Phrase, float>& finalNgramScores, const std::vector<float>& thetas, float mapWeight);
|
||||
|
||||
private:
|
||||
std::vector<Word> m_words;
|
||||
std::vector<Moses::Word> m_words;
|
||||
float m_mapScore;
|
||||
std::vector<float> m_ngramScores;
|
||||
float m_score;
|
||||
@ -132,18 +130,18 @@ struct LatticeMBRSolutionComparator {
|
||||
}
|
||||
};
|
||||
|
||||
void pruneLatticeFB(Lattice & connectedHyp, std::map < const Hypothesis*, std::set <const Hypothesis* > > & outgoingHyps, std::map<const Hypothesis*, std::vector<Edge> >& incomingEdges,
|
||||
const std::vector< float> & estimatedScores, const Hypothesis*, size_t edgeDensity,float scale);
|
||||
void pruneLatticeFB(Lattice & connectedHyp, std::map < const Moses::Hypothesis*, std::set <const Moses::Hypothesis* > > & outgoingHyps, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges,
|
||||
const std::vector< float> & estimatedScores, const Moses::Hypothesis*, size_t edgeDensity,float scale);
|
||||
|
||||
//Use the ngram scores to rerank the nbest list, return at most n solutions
|
||||
void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
|
||||
void getLatticeMBRNBest(Moses::Manager& manager, Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
|
||||
//calculate expectated ngram counts, clipping at 1 (ie calculating posteriors) if posteriors==true.
|
||||
void calcNgramExpectations(Lattice & connectedHyp, std::map<const Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Phrase,
|
||||
void calcNgramExpectations(Lattice & connectedHyp, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Moses::Phrase,
|
||||
float>& finalNgramScores, bool posteriors);
|
||||
void GetOutputFactors(const TrellisPath &path, std::vector <Word> &translation);
|
||||
void extract_ngrams(const std::vector<Word >& sentence, std::map < Phrase, int > & allngrams);
|
||||
bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b);
|
||||
std::vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList);
|
||||
const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList);
|
||||
//std::vector<Word> doConsensusDecoding(Manager& manager, TrellisPathList& nBestList);
|
||||
void GetOutputFactors(const Moses::TrellisPath &path, std::vector <Moses::Word> &translation);
|
||||
void extract_ngrams(const std::vector<Moses::Word >& sentence, std::map < Moses::Phrase, int > & allngrams);
|
||||
bool ascendingCoverageCmp(const Moses::Hypothesis* a, const Moses::Hypothesis* b);
|
||||
std::vector<Moses::Word> doLatticeMBR(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
|
||||
const Moses::TrellisPath doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
|
||||
//std::vector<Moses::Word> doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
|
||||
#endif
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include "Factor.h"
|
||||
#include "Util.h"
|
||||
#include "LM/SingleFactor.h"
|
||||
#include "onlineRLM.h"
|
||||
#include "DynSAInclude/onlineRLM.h"
|
||||
//#include "multiOnlineRLM.h"
|
||||
#include "DynSAInclude/file.h"
|
||||
#include "DynSAInclude/vocab.h"
|
||||
|
@ -13,6 +13,8 @@
|
||||
#include "RuleTable/Loader.h"
|
||||
#include "RuleTable/LoaderFactory.h"
|
||||
#include "TypeDef.h"
|
||||
#include "StaticData.h"
|
||||
#include "UserMessage.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -27,6 +29,13 @@ bool PhraseDictionaryALSuffixArray::Load(const std::vector<FactorType> &input
|
||||
, const LMList &languageModels
|
||||
, const WordPenaltyProducer* wpProducer)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
if (staticData.ThreadCount() > 1)
|
||||
{
|
||||
UserMessage::Add("Suffix array implementation is not threadsafe");
|
||||
return false;
|
||||
}
|
||||
|
||||
// file path is the directory of the rules for eacg, NOT the file of all the rules
|
||||
SetFilePath(filePath);
|
||||
m_tableLimit = tableLimit;
|
||||
|
@ -42,6 +42,8 @@ if $(location) {
|
||||
install compactify : training/compact-rule-table//compactify : <location>$(location)/training/compact-rule-table/tools ;
|
||||
|
||||
install phrase-extract : training/phrase-extract//programs : <location>$(location)/training/phrase-extract ;
|
||||
install pcfg-extract : training/phrase-extract/pcfg-extract//pcfg-extract : <location>$(location)/training/phrase-extract/pcfg-extract ;
|
||||
install pcfg-score : training/phrase-extract/pcfg-score//pcfg-score : <location>$(location)/training/phrase-extract/pcfg-score ;
|
||||
install lexical-reordering : training/lexical-reordering//score : <location>$(location)/training/lexical-reordering ;
|
||||
install symal : training/symal//symal : <location>$(location)/training/symal ;
|
||||
|
||||
|
@ -260,7 +260,8 @@ script = $moses-script-dir/training/train-model.perl
|
||||
### general options
|
||||
# these are options that are passed on to train-model.perl, for instance
|
||||
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
|
||||
# * "-sort-buffer-size 8G" to reduce on-disk sorting
|
||||
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
|
||||
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
|
||||
#
|
||||
#training-options = ""
|
||||
|
||||
|
@ -280,7 +280,8 @@ script = $moses-script-dir/training/train-model.perl
|
||||
### general options
|
||||
# these are options that are passed on to train-model.perl, for instance
|
||||
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
|
||||
# * "-sort-buffer-size 8G" to reduce on-disk sorting
|
||||
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
|
||||
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
|
||||
#
|
||||
#training-options = ""
|
||||
|
||||
|
@ -260,7 +260,8 @@ script = $moses-script-dir/training/train-model.perl
|
||||
### general options
|
||||
# these are options that are passed on to train-model.perl, for instance
|
||||
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
|
||||
# * "-sort-buffer-size 8G" to reduce on-disk sorting
|
||||
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
|
||||
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
|
||||
#
|
||||
#training-options = ""
|
||||
|
||||
|
@ -264,7 +264,8 @@ script = $moses-script-dir/training/train-model.perl
|
||||
### general options
|
||||
# these are options that are passed on to train-model.perl, for instance
|
||||
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
|
||||
# * "-sort-buffer-size 8G" to reduce on-disk sorting
|
||||
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
|
||||
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
|
||||
#
|
||||
#training-options = ""
|
||||
|
||||
|
@ -244,7 +244,8 @@ script = $moses-script-dir/training/train-model.perl
|
||||
### general options
|
||||
# these are options that are passed on to train-model.perl, for instance
|
||||
# * "-mgiza -mgiza-cpus 8" to use mgiza instead of giza
|
||||
# * "-sort-buffer-size 8G" to reduce on-disk sorting
|
||||
# * "-sort-buffer-size 8G -sort-compress gzip" to reduce on-disk sorting
|
||||
# * "-sort-parallel 8 -cores 8" to speed up phrase table building
|
||||
#
|
||||
#training-options = ""
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
cluster: townhill seville hermes lion seville sannox lutzow frontend
|
||||
multicore-8: tyr thor odin crom saxnot vali vili freyja bragi hoenir
|
||||
multicore-8: tyr thor odin crom
|
||||
multicore-16: saxnot vali vili freyja bragi hoenir
|
||||
multicore-24: syn hel skaol saga
|
||||
|
@ -344,8 +344,21 @@ parse-relax
|
||||
pass-unless: input-parse-relaxer output-parse-relaxer
|
||||
template-if: input-parse-relaxer IN.$input-extension OUT.$input-extension
|
||||
template-if: output-parse-relaxer IN.$output-extension OUT.$output-extension
|
||||
pcfg-extract
|
||||
in: parse-relaxed-corpus
|
||||
out: pcfg
|
||||
default-name: model/pcfg
|
||||
ignore-unless: use-pcfg-feature
|
||||
rerun-on-change: use-pcfg-feature
|
||||
template: $moses-script-dir/training/phrase-extract/pcfg-extract/pcfg-extract < IN.$output-extension > OUT.$output-extension
|
||||
pcfg-score
|
||||
in: parse-relaxed-corpus pcfg
|
||||
out: scored-corpus
|
||||
default-name: model/scored-corpus
|
||||
pass-unless: use-pcfg-feature
|
||||
template: ln -s IN.$input-extension OUT.$input-extension ; $moses-script-dir/training/phrase-extract/pcfg-score/pcfg-score IN1.$output-extension < IN.$output-extension > OUT.$output-extension
|
||||
extract-phrases
|
||||
in: word-alignment parse-relaxed-corpus
|
||||
in: word-alignment scored-corpus
|
||||
out: extracted-phrases
|
||||
rerun-on-change: max-phrase-length translation-factors reordering-factors hierarchical-rule-set extract-settings training-options script use-ghkm
|
||||
default-name: model/extract
|
||||
@ -756,6 +769,20 @@ ibm-bleu-c
|
||||
ignore-unless: ibm-bleu-c
|
||||
rerun-on-change: ibm-bleu-c
|
||||
template: $ibm-bleu-c -s $input-sgm -r IN1 -t IN > OUT
|
||||
bolt-bleu
|
||||
in: detokenized-output
|
||||
out: bolt-bleu-score
|
||||
default-name: evaluation/bolt-bleu
|
||||
ignore-unless: bolt-bleu
|
||||
rerun-on-change: bolt-bleu
|
||||
template: $bolt-bleu IN > OUT
|
||||
bolt-bleu-c
|
||||
in: detokenized-output
|
||||
out: bolt-bleu-c-score
|
||||
default-name: evaluation/bolt-bleu-c
|
||||
ignore-unless: bolt-bleu-c
|
||||
rerun-on-change: bolt-bleu-c
|
||||
template: $bolt-bleu-c IN > OUT
|
||||
multi-bleu
|
||||
in: cleaned-output reference
|
||||
out: multi-bleu-score
|
||||
@ -811,6 +838,6 @@ analysis-precision
|
||||
|
||||
[REPORTING] single
|
||||
report
|
||||
in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model
|
||||
in: EVALUATION:nist-bleu-score EVALUATION:nist-bleu-c-score EVALUATION:bolt-bleu-score EVALUATION:bolt-bleu-c-score EVALUATION:multi-bleu-score EVALUATION:multi-bleu-c-score EVALUATION:meteor-score EVALUATION:ter-score EVALUATION:wer-score EVALUATION:ibm-bleu-score EVALUATION:ibm-bleu-c-score EVALUATION:analysis EVALUATION:analysis-coverage EVALUATION:analysis-prec TRAINING:biconcor-model
|
||||
out: report
|
||||
default-name: evaluation/report
|
||||
|
@ -1020,7 +1020,7 @@ sub execute_steps {
|
||||
}
|
||||
}
|
||||
|
||||
print "number of steps doable or running: ".(scalar keys %DO)."\n";
|
||||
print "number of steps doable or running: ".(scalar keys %DO)." at ".`date`;
|
||||
foreach my $step (keys %DO) { print "\t".($DO{$step}==2?"running: ":"doable: ").$DO_STEP[$step]."\n"; }
|
||||
return unless scalar keys %DO;
|
||||
|
||||
@ -2018,6 +2018,7 @@ sub get_training_setting {
|
||||
my $target_syntax = &get("GENERAL:output-parser");
|
||||
my $score_settings = &get("TRAINING:score-settings");
|
||||
my $parallel = &get("TRAINING:parallel");
|
||||
my $pcfg = &get("TRAINING:use-pcfg-feature");
|
||||
|
||||
my $xml = $source_syntax || $target_syntax;
|
||||
|
||||
@ -2040,6 +2041,7 @@ sub get_training_setting {
|
||||
$cmd .= "-glue-grammar " if $hierarchical;
|
||||
$cmd .= "-score-options '".$score_settings."' " if $score_settings;
|
||||
$cmd .= "-parallel " if $parallel;
|
||||
$cmd .= "-pcfg " if $pcfg;
|
||||
|
||||
# factored training
|
||||
if (&backoff_and_get("TRAINING:input-factors")) {
|
||||
|
@ -3,6 +3,7 @@
|
||||
use strict;
|
||||
|
||||
my $cores = 8;
|
||||
my $serial = 1;
|
||||
my ($infile,$outfile,$cmd,$tmpdir);
|
||||
my $parent = $$;
|
||||
|
||||
@ -12,6 +13,7 @@ GetOptions('cores=i' => \$cores,
|
||||
'in=s' => \$infile,
|
||||
'out=s' => \$outfile,
|
||||
'cmd=s' => \$cmd,
|
||||
'serial=i' => \$serial
|
||||
) or exit(1);
|
||||
|
||||
die("ERROR: specify command with -cmd") unless $cmd;
|
||||
@ -24,8 +26,9 @@ die("ERROR: you need to specify a tempdir with -tmpdir") unless $tmpdir;
|
||||
|
||||
# create split input files
|
||||
my $sentenceN = `cat $infile | wc -l`;
|
||||
my $splitN = int(($sentenceN+$cores-0.5) / $cores);
|
||||
`split -a 2 -l $splitN $infile $tmpdir/in-$parent-`;
|
||||
my $splitN = int(($sentenceN+($cores*$serial)-0.5) / ($cores*$serial));
|
||||
print STDERR "split -a 3 -l $splitN $infile $tmpdir/in-$parent-\n";
|
||||
`split -a 4 -l $splitN $infile $tmpdir/in-$parent-`;
|
||||
|
||||
# find out the names of the processes
|
||||
my @CORE=`ls $tmpdir/in-$parent-*`;
|
||||
@ -33,17 +36,23 @@ chomp(@CORE);
|
||||
grep(s/.+in\-\d+\-([a-z]+)$/$1/e,@CORE);
|
||||
|
||||
# create core scripts
|
||||
foreach my $core (@CORE){
|
||||
for(my $i=0;$i<scalar(@CORE);$i++) {
|
||||
my $core = $CORE[$i];
|
||||
open(BASH,">$tmpdir/core-$parent-$core.bash") or die "Cannot open: $!";
|
||||
print BASH "#bash\n\n";
|
||||
# print BASH "export PATH=$ENV{PATH}\n\n";
|
||||
printf BASH $cmd."\n", "$tmpdir/in-$parent-$core", "$tmpdir/out-$parent-$core";
|
||||
for(my $j=2;$j<=$serial;$j++) {
|
||||
$core = $CORE[++$i];
|
||||
printf BASH $cmd."\n", "$tmpdir/in-$parent-$core", "$tmpdir/out-$parent-$core";
|
||||
}
|
||||
close(BASH);
|
||||
}
|
||||
|
||||
# fork processes
|
||||
my (@CHILDREN);
|
||||
foreach my $core (@CORE){
|
||||
next unless -e "$tmpdir/core-$parent-$core.bash";
|
||||
my $child = fork();
|
||||
if (! $child) { # I am child
|
||||
print STDERR "running child $core\n";
|
||||
|
@ -14,6 +14,10 @@ $TYPE{"multi-bleu-c"}= "BLEU-c";
|
||||
$TYPE{"ibm-bleu"} = "IBM";
|
||||
$TYPE{"ibm-bleu-c"} = "IBM-c";
|
||||
$TYPE{"meteor"} = "METEOR";
|
||||
$TYPE{"bolt-bleu"} = "BLEU";
|
||||
$TYPE{"bolt-bleu-c"} = "BLEU-c";
|
||||
$TYPE{"bolt-ter"} = "TER";
|
||||
$TYPE{"bolt-ter-c"} = "TER-c";
|
||||
|
||||
my %SCORE;
|
||||
my %AVERAGE;
|
||||
@ -60,6 +64,9 @@ sub process {
|
||||
elsif ($type eq 'meteor') {
|
||||
$SCORE{$set} .= &extract_meteor($file,$type)." ";
|
||||
}
|
||||
elsif ($type =~ /^bolt-(.+)$/) {
|
||||
$SCORE{$set} .= &extract_bolt($file,$1)." ";
|
||||
}
|
||||
}
|
||||
|
||||
sub extract_nist_bleu {
|
||||
@ -115,6 +122,19 @@ sub extract_multi_bleu {
|
||||
return $output.$TYPE{$type};
|
||||
}
|
||||
|
||||
sub extract_bolt {
|
||||
my ($file,$type) = @_;
|
||||
my $score;
|
||||
foreach (`cat $file`) {
|
||||
$score = $1 if $type eq 'bleu' && /Lowercase BLEU\s+([\d\.]+)/;
|
||||
$score = $1 if $type eq 'bleu-c' && /Cased BLEU\s+([\d\.]+)/;
|
||||
$score = $1 if $type eq 'ter' && /Lowercase TER\s+([\d\.]+)/;
|
||||
$score = $1 if $type eq 'ter-c' && /Cased TER\s+([\d\.]+)/;
|
||||
}
|
||||
my $output = sprintf("%.02f ",$score*100);
|
||||
$AVERAGE{"bolt-".$type} += $score*100;
|
||||
return $output.$TYPE{"bolt-".$type};
|
||||
}
|
||||
sub extract_meteor {
|
||||
my ($file,$type) = @_;
|
||||
my ($meteor, $precision);
|
||||
|
@ -8,15 +8,23 @@ my $FILLER = ":s:es";
|
||||
my $MIN_SIZE = 3;
|
||||
my $MIN_COUNT = 5;
|
||||
my $MAX_COUNT = 5;
|
||||
my $FACTORED = 0;
|
||||
my $SYNTAX = 0;
|
||||
my $MARK_SPLIT = 0;
|
||||
my $BINARIZE = 0;
|
||||
$HELP = 1
|
||||
unless &GetOptions('corpus=s' => \$CORPUS,
|
||||
'model=s' => \$MODEL,
|
||||
'filler=s' => \$FILLER,
|
||||
'factored' => \$FACTORED,
|
||||
'min-size=i' => \$MIN_SIZE,
|
||||
'min-count=i' => \$MIN_COUNT,
|
||||
'max-count=i' => \$MAX_COUNT,
|
||||
'help' => \$HELP,
|
||||
'verbose' => \$VERBOSE,
|
||||
'syntax' => \$SYNTAX,
|
||||
'binarize' => \$BINARIZE,
|
||||
'mark-split' => \$MARK_SPLIT,
|
||||
'train' => \$TRAIN);
|
||||
|
||||
if ($HELP ||
|
||||
@ -29,59 +37,152 @@ if ($HELP ||
|
||||
print "options: -min-size: minimum word size (default $MIN_SIZE)\n";
|
||||
print " -min-count: minimum word count (default $MIN_COUNT)\n";
|
||||
print " -filler: filler letters between words (default $FILLER)\n";
|
||||
print " -factor: factored data, assuming factor 0 as surface (default $FACTORED)\n";
|
||||
print " -syntax: syntactically parsed data (default $SYNTAX)\n";
|
||||
print " -mark-split: mark non-terminal label of split words (default $MARK_SPLIT)\n";
|
||||
print " -binarize: binarize subtree for split word (default $BINARIZE)\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
if ($TRAIN) {
|
||||
&train;
|
||||
if ($SYNTAX) { &train_syntax(); }
|
||||
elsif ($FACTORED) { &train_factored(); }
|
||||
else { &train(); }
|
||||
}
|
||||
else {
|
||||
&apply;
|
||||
&apply();
|
||||
}
|
||||
|
||||
sub train {
|
||||
my %WORD;
|
||||
my %COUNT;
|
||||
open(CORPUS,$CORPUS) || die("ERROR: could not open corpus '$CORPUS'");
|
||||
while(<CORPUS>) {
|
||||
chop; s/\s+/ /g; s/^ //; s/ $//;
|
||||
foreach (split) {
|
||||
$WORD{$_}++;
|
||||
$COUNT{$_}++;
|
||||
}
|
||||
}
|
||||
close($CORPUS);
|
||||
close(CORPUS);
|
||||
&save_trained_model(\%COUNT);
|
||||
}
|
||||
|
||||
sub save_trained_model {
|
||||
my ($COUNT) = @_;
|
||||
my $id = 0;
|
||||
open(MODEL,">".$MODEL);
|
||||
foreach my $word (keys %WORD) {
|
||||
print MODEL "".(++$id)."\t".$word."\t".$WORD{$word}."\n";
|
||||
foreach my $word (keys %$COUNT) {
|
||||
print MODEL "".(++$id)."\t".$word."\t".$$COUNT{$word}."\n";
|
||||
}
|
||||
close(MODEL);
|
||||
print STDERR "written model file with ".(scalar keys %WORD)." words.\n";
|
||||
print STDERR "written model file with ".(scalar keys %$COUNT)." words.\n";
|
||||
}
|
||||
|
||||
sub train_factored {
|
||||
my (%COUNT,%FACTORED_COUNT);
|
||||
# collect counts for interpretations for each surface word
|
||||
open(CORPUS,$CORPUS) || die("ERROR: could not open corpus '$CORPUS'");
|
||||
while(<CORPUS>) {
|
||||
chop; s/\s+/ /g; s/^ //; s/ $//;
|
||||
foreach my $factored_word (split) {
|
||||
my $word = $factored_word;
|
||||
$word =~ s/\|.+//g; # just first factor
|
||||
$FACTORED_COUNT{$word}{$factored_word}++;
|
||||
}
|
||||
}
|
||||
close(CORPUS);
|
||||
# only preserve most frequent interpretation, assign sum of counts
|
||||
foreach my $word (keys %FACTORED_COUNT) {
|
||||
my ($max,$best,$total) = (0,"",0);
|
||||
foreach my $factored_word (keys %{$FACTORED_COUNT{$word}}) {
|
||||
my $count = $FACTORED_COUNT{$word}{$factored_word};
|
||||
$total += $count;
|
||||
if ($count > $max) {
|
||||
$max = $count;
|
||||
$best = $factored_word;
|
||||
}
|
||||
}
|
||||
$COUNT{$best} = $total;
|
||||
}
|
||||
&save_trained_model(\%COUNT);
|
||||
}
|
||||
|
||||
sub train_syntax {
|
||||
my (%COUNT,%LABELED_COUNT);
|
||||
# collect counts for interpretations for each surface word
|
||||
open(CORPUS,$CORPUS) || die("ERROR: could not open corpus '$CORPUS'");
|
||||
while(<CORPUS>) {
|
||||
chop; s/\s+/ /g; s/^ //; s/ $//;
|
||||
my $label;
|
||||
foreach (split) {
|
||||
if (/^label="([^\"]+)"/) {
|
||||
$label = $1;
|
||||
}
|
||||
elsif (! /^</) {
|
||||
$LABELED_COUNT{$_}{$label}++;
|
||||
}
|
||||
}
|
||||
}
|
||||
close(CORPUS);
|
||||
|
||||
# only preserve most frequent label, assign sum of counts
|
||||
foreach my $word (keys %LABELED_COUNT) {
|
||||
my ($max,$best,$total) = (0,"",0);
|
||||
foreach my $label (keys %{$LABELED_COUNT{$word}}) {
|
||||
my $count = $LABELED_COUNT{$word}{$label};
|
||||
$total += $count;
|
||||
if ($count > $max) {
|
||||
$max = $count;
|
||||
$best = "$word $label";
|
||||
}
|
||||
}
|
||||
$COUNT{$best} = $total;
|
||||
}
|
||||
&save_trained_model(\%COUNT);
|
||||
}
|
||||
|
||||
sub apply {
|
||||
my (%WORD,%TRUECASE);
|
||||
my (%COUNT,%TRUECASE,%LABEL);
|
||||
open(MODEL,$MODEL) || die("ERROR: could not open model '$MODEL'");
|
||||
while(<MODEL>) {
|
||||
chomp;
|
||||
my ($id,$word,$count) = split(/\t/);
|
||||
my ($id,$factored_word,$count) = split(/\t/);
|
||||
my $label;
|
||||
($factored_word,$label) = split(/ /,$factored_word);
|
||||
my $word = $factored_word;
|
||||
$word =~ s/\|.+//g; # just first factor
|
||||
my $lc = lc($word);
|
||||
# if word exists with multipe casings, only record most frequent
|
||||
next if defined($WORD{$lc}) && $WORD{$lc} > $count;
|
||||
$WORD{$lc} = $count;
|
||||
$TRUECASE{$lc} = $word;
|
||||
next if defined($COUNT{$lc}) && $COUNT{$lc} > $count;
|
||||
$COUNT{$lc} = $count;
|
||||
$TRUECASE{$lc} = $factored_word;
|
||||
$LABEL{$lc} = $label if $SYNTAX;
|
||||
}
|
||||
close(MODEL);
|
||||
|
||||
while(<STDIN>) {
|
||||
my $first = 1;
|
||||
chop; s/\s+/ /g; s/^ //; s/ $//;
|
||||
foreach my $word (split) {
|
||||
my @BUFFER; # for xml tags
|
||||
foreach my $factored_word (split) {
|
||||
print " " unless $first;
|
||||
$first = 0;
|
||||
|
||||
# syntax: don't split xml
|
||||
if ($SYNTAX && ($factored_word =~ /^</ || $factored_word =~ />$/)) {
|
||||
push @BUFFER,$factored_word;
|
||||
$first = 1;
|
||||
next;
|
||||
}
|
||||
|
||||
# get case class
|
||||
my $word = $factored_word;
|
||||
$word =~ s/\|.+//g; # just first factor
|
||||
my $lc = lc($word);
|
||||
|
||||
# don't split frequent words
|
||||
if (defined($WORD{$word}) && $WORD{$word}>=$MAX_COUNT) {
|
||||
print $word;
|
||||
if (defined($COUNT{$lc}) && $COUNT{$lc}>=$MAX_COUNT) {
|
||||
print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
|
||||
print $factored_word;
|
||||
next;
|
||||
}
|
||||
|
||||
@ -100,17 +201,18 @@ sub apply {
|
||||
my $subword = lc(substr($word,
|
||||
$start+length($filler),
|
||||
$end-$start+1-length($filler)));
|
||||
next unless defined($WORD{$subword});
|
||||
next unless $WORD{$subword} >= $MIN_COUNT;
|
||||
print STDERR "\tmatching word $start .. $end ($filler)$subword $WORD{$subword}\n" if $VERBOSE;
|
||||
push @{$REACHABLE{$end}},"$start $TRUECASE{$subword} $WORD{$subword}";
|
||||
next unless defined($COUNT{$subword});
|
||||
next unless $COUNT{$subword} >= $MIN_COUNT;
|
||||
print STDERR "\tmatching word $start .. $end ($filler)$subword $COUNT{$subword}\n" if $VERBOSE;
|
||||
push @{$REACHABLE{$end}},"$start $TRUECASE{$subword} $COUNT{$subword}";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# no matches at all?
|
||||
if (!defined($REACHABLE{$final})) {
|
||||
print $word;
|
||||
print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
|
||||
print $factored_word;
|
||||
next;
|
||||
}
|
||||
|
||||
@ -152,9 +254,35 @@ sub apply {
|
||||
last unless scalar @{$REACHABLE{$final}} > $ITERATOR{$final};
|
||||
for(my $i=0;$i<$increase;$i++) { $ITERATOR{$i}=0; }
|
||||
}
|
||||
$best_split = $word unless $best_split =~ / /; # do not change case for unsplit words
|
||||
print $best_split;
|
||||
if ($best_split !~ / /) {
|
||||
print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
|
||||
print $word; # do not change case for unsplit words
|
||||
next;
|
||||
}
|
||||
if (!$SYNTAX) {
|
||||
print $best_split;
|
||||
}
|
||||
else {
|
||||
$BUFFER[$#BUFFER] =~ s/label=\"/label=\"SPLIT-/ if $MARK_SPLIT;
|
||||
$BUFFER[$#BUFFER] =~ /label=\"([^\"]+)\"/ || die("ERROR: $BUFFER[$#BUFFER]\n");
|
||||
my $pos = $1;
|
||||
print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
|
||||
|
||||
my @SPLIT = split(/ /,$best_split);
|
||||
my @OUT = ();
|
||||
if ($BINARIZE) {
|
||||
for(my $w=0;$w<scalar(@SPLIT)-2;$w++) {
|
||||
push @OUT,"<tree label=\"\@$pos\">";
|
||||
}
|
||||
}
|
||||
for(my $w=0;$w<scalar(@SPLIT);$w++) {
|
||||
if ($BINARIZE && $w>=2) { push @OUT, "</tree>"; }
|
||||
push @OUT,"<tree label=\"".$LABEL{lc($SPLIT[$w])}."\"> $SPLIT[$w] </tree>";
|
||||
}
|
||||
print join(" ",@OUT);
|
||||
}
|
||||
}
|
||||
print " ".join(" ",@BUFFER) if scalar(@BUFFER); @BUFFER = (); # clear buffer
|
||||
print "\n";
|
||||
}
|
||||
}
|
||||
|
@ -6,11 +6,15 @@
|
||||
use strict;
|
||||
use File::Basename;
|
||||
|
||||
sub RunFork($);
|
||||
sub systemCheck($);
|
||||
sub NumStr($);
|
||||
|
||||
print "Started ".localtime() ."\n";
|
||||
|
||||
my $numParallel= $ARGV[0];
|
||||
$numParallel = 1 if $numParallel < 1;
|
||||
|
||||
my $splitCmd= $ARGV[1];
|
||||
my $sortCmd= $ARGV[2];
|
||||
my $extractCmd= $ARGV[3];
|
||||
@ -29,25 +33,34 @@ for (my $i = 8; $i < $#ARGV + 1; ++$i)
|
||||
my $TMPDIR=dirname($extract) ."/tmp.$$";
|
||||
mkdir $TMPDIR;
|
||||
|
||||
my $totalLines = int(`wc -l $align`);
|
||||
my $totalLines = int(`cat $align | wc -l`);
|
||||
my $linesPerSplit = int($totalLines / $numParallel) + 1;
|
||||
|
||||
print "total=$totalLines line-per-split=$linesPerSplit \n";
|
||||
|
||||
my @children;
|
||||
my $pid;
|
||||
my $cmd;
|
||||
|
||||
if ($numParallel > 1)
|
||||
{
|
||||
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $target $TMPDIR/target.";
|
||||
print STDERR "Executing: $cmd \n";
|
||||
`$cmd`;
|
||||
$pid = RunFork($cmd);
|
||||
push(@children, $pid);
|
||||
|
||||
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $source $TMPDIR/source.";
|
||||
print STDERR "Executing: $cmd \n";
|
||||
`$cmd`;
|
||||
$pid = RunFork($cmd);
|
||||
push(@children, $pid);
|
||||
|
||||
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $align $TMPDIR/align.";
|
||||
print STDERR "Executing: $cmd \n";
|
||||
`$cmd`;
|
||||
$pid = RunFork($cmd);
|
||||
push(@children, $pid);
|
||||
|
||||
# wait for everything is finished
|
||||
foreach (@children) {
|
||||
waitpid($_, 0);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -67,15 +80,13 @@ else
|
||||
}
|
||||
|
||||
# run extract
|
||||
my $isParent = 1;
|
||||
my @childs;
|
||||
@children = ();
|
||||
for (my $i = 0; $i < $numParallel; ++$i)
|
||||
{
|
||||
my $pid = fork();
|
||||
|
||||
if ($pid == 0)
|
||||
{ # child
|
||||
$isParent = 0;
|
||||
my $numStr = NumStr($i);
|
||||
my $cmd = "$extractCmd $TMPDIR/target.$numStr $TMPDIR/source.$numStr $TMPDIR/align.$numStr $TMPDIR/extract.$numStr $otherExtractArgs \n";
|
||||
print STDERR $cmd;
|
||||
@ -85,20 +96,13 @@ for (my $i = 0; $i < $numParallel; ++$i)
|
||||
}
|
||||
else
|
||||
{ # parent
|
||||
push(@childs, $pid);
|
||||
push(@children, $pid);
|
||||
}
|
||||
}
|
||||
|
||||
# wait for everything is finished
|
||||
if ($isParent)
|
||||
{
|
||||
foreach (@childs) {
|
||||
waitpid($_, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
die "shouldn't be here";
|
||||
foreach (@children) {
|
||||
waitpid($_, 0);
|
||||
}
|
||||
|
||||
# merge
|
||||
@ -116,20 +120,28 @@ for (my $i = 0; $i < $numParallel; ++$i)
|
||||
$catCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.sorted.gz \n";
|
||||
$catInvCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.inv.sorted.gz \n";
|
||||
$catOCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.o.sorted.gz \n";
|
||||
print STDERR $catCmd;
|
||||
print STDERR $catInvCmd;
|
||||
print STDERR $catOCmd;
|
||||
|
||||
systemCheck($catCmd);
|
||||
systemCheck($catInvCmd);
|
||||
|
||||
@children = ();
|
||||
$pid = RunFork($catCmd);
|
||||
push(@children, $pid);
|
||||
|
||||
$pid = RunFork($catInvCmd);
|
||||
push(@children, $pid);
|
||||
|
||||
my $numStr = NumStr(0);
|
||||
if (-e "$TMPDIR/extract.$numStr.o.gz")
|
||||
{
|
||||
systemCheck($catOCmd);
|
||||
$pid = RunFork($catOCmd);
|
||||
push(@children, $pid);
|
||||
}
|
||||
|
||||
# wait for all sorting to finish
|
||||
foreach (@children) {
|
||||
waitpid($_, 0);
|
||||
}
|
||||
|
||||
# delete temporary files
|
||||
$cmd = "rm -rf $TMPDIR \n";
|
||||
print STDERR $cmd;
|
||||
`$cmd`;
|
||||
@ -139,6 +151,21 @@ print STDERR "Finished ".localtime() ."\n";
|
||||
# -----------------------------------------
|
||||
# -----------------------------------------
|
||||
|
||||
sub RunFork($)
|
||||
{
|
||||
my $cmd = shift;
|
||||
|
||||
my $pid = fork();
|
||||
|
||||
if ($pid == 0)
|
||||
{ # child
|
||||
print STDERR $cmd;
|
||||
systemCheck($cmd);
|
||||
exit();
|
||||
}
|
||||
return $pid;
|
||||
}
|
||||
|
||||
sub systemCheck($)
|
||||
{
|
||||
my $cmd = shift;
|
||||
@ -171,4 +198,3 @@ sub NumStr($)
|
||||
return $numStr;
|
||||
}
|
||||
|
||||
|
||||
|
280
scripts/generic/score-parallel.perl
Executable file
280
scripts/generic/score-parallel.perl
Executable file
@ -0,0 +1,280 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
# example
|
||||
# ./score-parallel.perl 8 "gsort --batch-size=253" ./score ./extract.2.sorted.gz ./lex.2.f2e ./phrase-table.2.half.f2e --GoodTuring ./phrase-table.2.coc 0
|
||||
# ./score-parallel.perl 8 "gsort --batch-size=253" ./score ./extract.2.inv.sorted.gz ./lex.2.e2f ./phrase-table.2.half.e2f --Inverse 1
|
||||
|
||||
use strict;
|
||||
use File::Basename;
|
||||
|
||||
sub RunFork($);
|
||||
sub systemCheck($);
|
||||
sub GetSourcePhrase($);
|
||||
sub NumStr($);
|
||||
|
||||
#my $EXTRACT_SPLIT_LINES = 5000000;
|
||||
my $EXTRACT_SPLIT_LINES = 1000000;
|
||||
|
||||
print "Started ".localtime() ."\n";
|
||||
|
||||
my $numParallel = $ARGV[0];
|
||||
$numParallel = 1 if $numParallel < 1;
|
||||
|
||||
my $sortCmd = $ARGV[1];
|
||||
my $scoreCmd = $ARGV[2];
|
||||
|
||||
my $extractFile = $ARGV[3]; # 1st arg of extract argument
|
||||
my $lexFile = $ARGV[4];
|
||||
my $ptHalf = $ARGV[5]; # output
|
||||
|
||||
my $otherExtractArgs= "";
|
||||
for (my $i = 6; $i < $#ARGV; ++$i)
|
||||
{
|
||||
$otherExtractArgs .= $ARGV[$i] ." ";
|
||||
}
|
||||
#$scoreCmd $extractFile $lexFile $ptHalf $otherExtractArgs
|
||||
|
||||
my $doSort = $ARGV[$#ARGV]; # last arg
|
||||
|
||||
my $TMPDIR=dirname($ptHalf) ."/tmp.$$";
|
||||
mkdir $TMPDIR;
|
||||
|
||||
my $cmd;
|
||||
|
||||
my $fileCount = 0;
|
||||
if ($numParallel <= 1)
|
||||
{ # don't do parallel. Just link the extract file into place
|
||||
$cmd = "ln -s $extractFile $TMPDIR/extract.0.gz";
|
||||
print STDERR "$cmd \n";
|
||||
systemCheck($cmd);
|
||||
|
||||
$fileCount = 1;
|
||||
}
|
||||
else
|
||||
{ # cut up extract file into smaller mini-extract files.
|
||||
if ($extractFile =~ /\.gz$/) {
|
||||
open(IN, "gunzip -c $extractFile |") || die "can't open pipe to $extractFile";
|
||||
}
|
||||
else {
|
||||
open(IN, $extractFile) || die "can't open $extractFile";
|
||||
}
|
||||
|
||||
my $filePath = "$TMPDIR/extract.$fileCount.gz";
|
||||
open (OUT, "| gzip -c > $filePath") or die "error starting gzip $!";
|
||||
|
||||
my $lineCount = 0;
|
||||
my $line;
|
||||
my $prevSourcePhrase = "";
|
||||
while ($line=<IN>)
|
||||
{
|
||||
chomp($line);
|
||||
++$lineCount;
|
||||
|
||||
if ($lineCount > $EXTRACT_SPLIT_LINES)
|
||||
{ # over line limit. Cut off at next source phrase change
|
||||
my $sourcePhrase = GetSourcePhrase($line);
|
||||
|
||||
if ($prevSourcePhrase eq "")
|
||||
{ # start comparing
|
||||
$prevSourcePhrase = $sourcePhrase;
|
||||
}
|
||||
elsif ($sourcePhrase eq $prevSourcePhrase)
|
||||
{ # can't cut off yet. Do nothing
|
||||
}
|
||||
else
|
||||
{ # cut off, open next min-extract file & write to that instead
|
||||
close OUT;
|
||||
|
||||
$prevSourcePhrase = "";
|
||||
$lineCount = 0;
|
||||
++$fileCount;
|
||||
my $filePath = $fileCount;
|
||||
$filePath = "$TMPDIR/extract.$filePath.gz";
|
||||
open (OUT, "| gzip -c > $filePath") or die "error starting gzip $!";
|
||||
}
|
||||
}
|
||||
else
|
||||
{ # keep on writing to current mini-extract file
|
||||
}
|
||||
|
||||
print OUT "$line\n";
|
||||
|
||||
}
|
||||
close OUT;
|
||||
++$fileCount;
|
||||
}
|
||||
|
||||
|
||||
# create run scripts
|
||||
my @runFiles = (0..($numParallel-1));
|
||||
for (my $i = 0; $i < $numParallel; ++$i)
|
||||
{
|
||||
my $path = "$TMPDIR/run.$i.sh";
|
||||
open(my $fh, ">", $path) or die "cannot open $path: $!";
|
||||
$runFiles[$i] = $fh;
|
||||
}
|
||||
|
||||
# write scoring of mini-extracts to run scripts
|
||||
for (my $i = 0; $i < $fileCount; ++$i)
|
||||
{
|
||||
my $numStr = NumStr($i);
|
||||
|
||||
my $fileInd = $i % $numParallel;
|
||||
my $fh = $runFiles[$fileInd];
|
||||
my $cmd = "$scoreCmd $TMPDIR/extract.$i.gz $lexFile $TMPDIR/phrase-table.half.$numStr.gz $otherExtractArgs\n";
|
||||
print $fh $cmd;
|
||||
}
|
||||
|
||||
# close run script files
|
||||
for (my $i = 0; $i < $numParallel; ++$i)
|
||||
{
|
||||
close($runFiles[$i]);
|
||||
my $path = "$TMPDIR/run.$i.sh";
|
||||
systemCheck("chmod +x $path");
|
||||
}
|
||||
|
||||
# run each score script in parallel
|
||||
my @children;
|
||||
for (my $i = 0; $i < $numParallel; ++$i)
|
||||
{
|
||||
my $cmd = "$TMPDIR/run.$i.sh";
|
||||
my $pid = RunFork($cmd);
|
||||
push(@children, $pid);
|
||||
}
|
||||
|
||||
# wait for everything is finished
|
||||
foreach (@children) {
|
||||
waitpid($_, 0);
|
||||
}
|
||||
|
||||
# merge & sort
|
||||
$cmd = "\n\nOH SHIT. This should have been filled in \n\n";
|
||||
if ($fileCount == 1 && !$doSort)
|
||||
{
|
||||
my $numStr = NumStr(0);
|
||||
$cmd = "mv $TMPDIR/phrase-table.half.$numStr.gz $ptHalf";
|
||||
}
|
||||
else
|
||||
{
|
||||
$cmd = "zcat $TMPDIR/phrase-table.half.*.gz";
|
||||
|
||||
if ($doSort) {
|
||||
$cmd .= "| LC_ALL=C $sortCmd -T $TMPDIR ";
|
||||
}
|
||||
|
||||
$cmd .= " | gzip -c > $ptHalf";
|
||||
}
|
||||
print STDERR $cmd;
|
||||
systemCheck($cmd);
|
||||
|
||||
# merge coc
|
||||
my $numStr = NumStr(0);
|
||||
my $cocPath = "$TMPDIR/phrase-table.half.$numStr.gz.coc";
|
||||
|
||||
if (-e $cocPath)
|
||||
{
|
||||
my @arrayCOC;
|
||||
my $line;
|
||||
|
||||
# 1st file
|
||||
open(FHCOC, $cocPath) || die "can't open pipe to $cocPath";
|
||||
while ($line = <FHCOC>)
|
||||
{
|
||||
my $coc = int($line);
|
||||
push(@arrayCOC, $coc);
|
||||
}
|
||||
close(FHCOC);
|
||||
|
||||
# all other files
|
||||
for (my $i = 1; $i < $fileCount; ++$i)
|
||||
{
|
||||
$numStr = NumStr($i);
|
||||
$cocPath = "$TMPDIR/phrase-table.half.$numStr.gz.coc";
|
||||
open(FHCOC, $cocPath) || die "can't open pipe to $cocPath";
|
||||
my $arrayInd = 0;
|
||||
while ($line = <FHCOC>)
|
||||
{
|
||||
my $coc = int($line);
|
||||
$arrayCOC[$arrayInd] += $coc;
|
||||
|
||||
++$arrayInd;
|
||||
}
|
||||
|
||||
close(FHCOC);
|
||||
}
|
||||
|
||||
# output
|
||||
$cocPath = "$ptHalf.coc";
|
||||
open(FHCOC, ">", $cocPath) or die "cannot open $cocPath: $!";
|
||||
for (my $i = 0; $i < @arrayCOC; ++$i)
|
||||
{
|
||||
print FHCOC $arrayCOC[$i]."\n";
|
||||
}
|
||||
close(FHCOC);
|
||||
}
|
||||
|
||||
$cmd = "rm -rf $TMPDIR \n";
|
||||
print STDERR $cmd;
|
||||
systemCheck($cmd);
|
||||
|
||||
print STDERR "Finished ".localtime() ."\n";
|
||||
|
||||
# -----------------------------------------
|
||||
# -----------------------------------------
|
||||
|
||||
sub RunFork($)
|
||||
{
|
||||
my $cmd = shift;
|
||||
|
||||
my $pid = fork();
|
||||
|
||||
if ($pid == 0)
|
||||
{ # child
|
||||
print STDERR $cmd;
|
||||
systemCheck($cmd);
|
||||
exit();
|
||||
}
|
||||
return $pid;
|
||||
}
|
||||
sub systemCheck($)
|
||||
{
|
||||
my $cmd = shift;
|
||||
my $retVal = system($cmd);
|
||||
if ($retVal != 0)
|
||||
{
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
sub GetSourcePhrase($)
|
||||
{
|
||||
my $line = shift;
|
||||
my $pos = index($line, "|||");
|
||||
my $sourcePhrase = substr($line, 0, $pos);
|
||||
return $sourcePhrase;
|
||||
}
|
||||
|
||||
|
||||
sub NumStr($)
|
||||
{
|
||||
my $i = shift;
|
||||
my $numStr;
|
||||
if ($i < 10) {
|
||||
$numStr = "0000$i";
|
||||
}
|
||||
elsif ($i < 100) {
|
||||
$numStr = "000$i";
|
||||
}
|
||||
elsif ($i < 1000) {
|
||||
$numStr = "00$i";
|
||||
}
|
||||
elsif ($i < 10000) {
|
||||
$numStr = "0$i";
|
||||
}
|
||||
else {
|
||||
$numStr = $i;
|
||||
}
|
||||
return $numStr;
|
||||
}
|
||||
|
||||
|
@ -3,11 +3,15 @@
|
||||
use strict;
|
||||
|
||||
while(<STDIN>) {
|
||||
s/\&bar;/\|/g;
|
||||
s/\</\</g;
|
||||
s/\>/\>/g;
|
||||
s/\&bra;/\[/g;
|
||||
s/\&ket;/\]/g;
|
||||
s/\&/\&/g;
|
||||
s/\&bar;/\|/g; # factor separator
|
||||
s/\</\</g; # xml
|
||||
s/\>/\>/g; # xml
|
||||
s/\&bra;/\[/g; # syntax non-terminal (legacy)
|
||||
s/\&ket;/\]/g; # syntax non-terminal (legacy)
|
||||
s/\"/\"/g; # xml
|
||||
s/\'/\'/g; # xml
|
||||
s/\[/\[/g; # syntax non-terminal
|
||||
s/\]/\]/g; # syntax non-terminal
|
||||
s/\&/\&/g; # escape escape
|
||||
print $_;
|
||||
}
|
||||
|
@ -33,8 +33,9 @@ if ($HELP) {
|
||||
exit;
|
||||
}
|
||||
|
||||
die "No built-in rules for language $language, claim en for default behaviour."
|
||||
if $language !~ /^(cs|en|fr|it)$/;
|
||||
if ($language !~ /^(cs|en|fr|it)$/) {
|
||||
print STDERR "Warning: No built-in rules for language $language.\n"
|
||||
}
|
||||
|
||||
if (!$QUIET) {
|
||||
print STDERR "Detokenizer Version ".'$Revision: 4134 $'."\n";
|
||||
@ -65,12 +66,16 @@ sub detokenize {
|
||||
$text = " $text ";
|
||||
$text =~ s/ \@\-\@ /-/g;
|
||||
# de-escape special chars
|
||||
$text =~ s/\&bar;/\|/g;
|
||||
$text =~ s/\</\</g;
|
||||
$text =~ s/\>/\>/g;
|
||||
$text =~ s/\&bra;/\[/g;
|
||||
$text =~ s/\&ket;/\]/g;
|
||||
$text =~ s/\&/\&/g;
|
||||
$text =~ s/\&bar;/\|/g; # factor separator
|
||||
$text =~ s/\</\</g; # xml
|
||||
$text =~ s/\>/\>/g; # xml
|
||||
$text =~ s/\&bra;/\[/g; # syntax non-terminal (legacy)
|
||||
$text =~ s/\&ket;/\]/g; # syntax non-terminal (legacy)
|
||||
$text =~ s/\"/\"/g; # xml
|
||||
$text =~ s/\'/\'/g; # xml
|
||||
$text =~ s/\[/\[/g; # syntax non-terminal
|
||||
$text =~ s/\]/\]/g; # syntax non-terminal
|
||||
$text =~ s/\&/\&/g; # escape escape
|
||||
|
||||
my $word;
|
||||
my $i;
|
||||
|
@ -6,18 +6,22 @@ while(<STDIN>) {
|
||||
chop;
|
||||
|
||||
# avoid general madness
|
||||
s/[\000-\037]//g;
|
||||
s/\s+/ /g;
|
||||
s/^ //g;
|
||||
s/ $//g;
|
||||
s/[\000-\037]//g;
|
||||
|
||||
# special characters in moses
|
||||
s/\&/\&/g;
|
||||
s/\|/\&bar;/g;
|
||||
s/\</\</g;
|
||||
s/\>/\>/g;
|
||||
s/\[/\&bra;/g;
|
||||
s/\]/\&ket;/g;
|
||||
s/\&/\&/g; # escape escape
|
||||
s/\|/\&bar;/g; # factor separator
|
||||
s/\</\</g; # xml
|
||||
s/\>/\>/g; # xml
|
||||
s/\'/\'/g; # xml
|
||||
s/\"/\"/g; # xml
|
||||
s/\[/\[/g; # syntax non-terminal
|
||||
s/\]/\]/g; # syntax non-terminal
|
||||
|
||||
# restore xml instructions
|
||||
s/\<(\S+) translation="([^\"]+)"> (.+?) <\/(\S+)>/\<$1 translation=\"$2\"> $3 <\/$4>/g;
|
||||
print $_."\n";
|
||||
}
|
||||
|
@ -149,12 +149,14 @@ sub tokenize {
|
||||
$text =~ s/DOTMULTI/./g;
|
||||
|
||||
#escape special chars
|
||||
$text =~ s/\&/\&/g;
|
||||
$text =~ s/\|/\&bar;/g;
|
||||
$text =~ s/\</\</g;
|
||||
$text =~ s/\>/\>/g;
|
||||
$text =~ s/\[/\&bra;/g;
|
||||
$text =~ s/\]/\&ket;/g;
|
||||
$text =~ s/\&/\&/g; # escape escape
|
||||
$text =~ s/\|/\&bar;/g; # factor separator
|
||||
$text =~ s/\</\</g; # xml
|
||||
$text =~ s/\>/\>/g; # xml
|
||||
$text =~ s/\'/\'/g; # xml
|
||||
$text =~ s/\"/\"/g; # xml
|
||||
$text =~ s/\[/\[/g; # syntax non-terminal
|
||||
$text =~ s/\]/\]/g; # syntax non-terminal
|
||||
|
||||
#ensure final line break
|
||||
$text .= "\n" unless $text =~ /\n$/;
|
||||
|
@ -117,6 +117,9 @@ my $___HISTORIC_INTERPOLATION = 0; # interpolate optimize weights with previous
|
||||
# TODO: Should we also add these values to options of this script?
|
||||
my $megam_default_options = "-fvals -maxi 30 -nobias binary";
|
||||
|
||||
# Flags related to Batch MIRA (Cherry & Foster, 2012)
|
||||
my $___BATCH_MIRA = 0; # flg to enable batch MIRA
|
||||
|
||||
my $__THREADS = 0;
|
||||
|
||||
# Parameter for effective reference length when computing BLEU score
|
||||
@ -206,6 +209,7 @@ GetOptions(
|
||||
"pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER,
|
||||
"pro-starting-point" => \$___PRO_STARTING_POINT,
|
||||
"historic-interpolation=f" => \$___HISTORIC_INTERPOLATION,
|
||||
"batch-mira" => \$___BATCH_MIRA,
|
||||
"threads=i" => \$__THREADS
|
||||
) or exit(1);
|
||||
|
||||
@ -324,10 +328,12 @@ if (!defined $mertdir) {
|
||||
my $mert_extract_cmd = File::Spec->catfile($mertdir, "extractor");
|
||||
my $mert_mert_cmd = File::Spec->catfile($mertdir, "mert");
|
||||
my $mert_pro_cmd = File::Spec->catfile($mertdir, "pro");
|
||||
my $mert_mira_cmd = File::Spec->catfile($mertdir, "kbmira");
|
||||
|
||||
die "Not executable: $mert_extract_cmd" if ! -x $mert_extract_cmd;
|
||||
die "Not executable: $mert_mert_cmd" if ! -x $mert_mert_cmd;
|
||||
die "Not executable: $mert_pro_cmd" if ! -x $mert_pro_cmd;
|
||||
die "Not executable: $mert_mira_cmd" if ! -x $mert_mira_cmd;
|
||||
|
||||
my $pro_optimizer = File::Spec->catfile($mertdir, "megam_i686.opt"); # or set to your installation
|
||||
|
||||
@ -727,6 +733,11 @@ while (1) {
|
||||
$scfiles = "$score_file";
|
||||
}
|
||||
|
||||
my $mira_settings = "";
|
||||
$mira_settings .= " --dense-init run$run.$weights_in_file";
|
||||
if (-e "run$run.sparse-weights") {
|
||||
$mira_settings .= " --sparse-init run$run.sparse-weights";
|
||||
}
|
||||
my $file_settings = " --ffile $ffiles --scfile $scfiles";
|
||||
my $pro_file_settings = "--ffile " . join(" --ffile ", split(/,/, $ffiles)) .
|
||||
" --scfile " . join(" --scfile ", split(/,/, $scfiles));
|
||||
@ -774,11 +785,14 @@ while (1) {
|
||||
$cmd = $cmd." --sparse-weights run$run.merge-weights";
|
||||
|
||||
# ... and run mert
|
||||
&submit_or_exec($cmd.$mert_settings,$mert_outfile,$mert_logfile);
|
||||
}
|
||||
# just mert
|
||||
else {
|
||||
&submit_or_exec($cmd.$mert_settings,$mert_outfile,$mert_logfile);
|
||||
$cmd =~ s/(--ifile \S+)/$1,run$run.init.pro/;
|
||||
&submit_or_exec($cmd . $mert_settings, $mert_outfile, $mert_logfile);
|
||||
} elsif ($___BATCH_MIRA) { # batch MIRA optimization
|
||||
safesystem("echo 'not used' > $weights_out_file") or die;
|
||||
$cmd = "$mert_mira_cmd $mira_settings $seed_settings $pro_file_settings -o $mert_outfile";
|
||||
&submit_or_exec($cmd, "run$run.mira.out", $mert_logfile);
|
||||
} else { # just mert
|
||||
&submit_or_exec($cmd . $mert_settings, $mert_outfile, $mert_logfile);
|
||||
}
|
||||
|
||||
die "Optimization failed, file $weights_out_file does not exist or is empty"
|
||||
@ -932,7 +946,7 @@ chdir($cwd);
|
||||
sub get_weights_from_mert {
|
||||
my ($outfile, $logfile, $weight_count, $sparse_weights) = @_;
|
||||
my ($bestpoint, $devbleu);
|
||||
if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/)) {
|
||||
if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/) || $___BATCH_MIRA) {
|
||||
open my $fh, '<', $outfile or die "Can't open $outfile: $!";
|
||||
my (@WEIGHT, $sum);
|
||||
for (my $i = 0; $i < $weight_count; $i++) { push @WEIGHT, 0; }
|
||||
@ -949,6 +963,14 @@ sub get_weights_from_mert {
|
||||
foreach (keys %{$sparse_weights}) { $$sparse_weights{$_} /= $sum; }
|
||||
$bestpoint = join(" ", @WEIGHT);
|
||||
close $fh;
|
||||
if($___BATCH_MIRA) {
|
||||
open my $fh2, '<', $logfile or die "Can't open $logfile: $!";
|
||||
while(<$fh2>) {
|
||||
if(/Best BLEU = ([\-\d\.]+)/) {
|
||||
$devbleu = $1;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
open my $fh, '<', $logfile or die "Can't open $logfile: $!";
|
||||
while (<$fh>) {
|
||||
@ -1115,7 +1137,7 @@ sub get_order_of_scores_from_nbestlist {
|
||||
# return the score labels in order
|
||||
my $fname_or_source = shift;
|
||||
# print STDERR "Peeking at the beginning of nbestlist to get order of scores: $fname_or_source\n";
|
||||
open my $fh, '<', $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source': $!";
|
||||
open my $fh, $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source': $!";
|
||||
my $line = <$fh>;
|
||||
close $fh;
|
||||
die "Line empty in nbestlist '$fname_or_source'" if !defined $line;
|
||||
@ -1195,7 +1217,7 @@ sub create_config {
|
||||
}
|
||||
|
||||
if (defined($sparse_weights_file)) {
|
||||
push @{$P{"weights-file"}}, File::Spec->catfile($___WORKING_DIR, $sparse_weights_file);
|
||||
push @{$P{"weight-file"}}, File::Spec->catfile($___WORKING_DIR, $sparse_weights_file);
|
||||
}
|
||||
|
||||
# create new moses.ini decoder config file by cloning and overriding the original one
|
||||
|
@ -43,6 +43,7 @@ public:
|
||||
int startS;
|
||||
int endS;
|
||||
float count;
|
||||
double pcfgScore;
|
||||
|
||||
std::map<size_t, std::pair<size_t, size_t> > m_ntLengths;
|
||||
|
||||
@ -58,6 +59,7 @@ public:
|
||||
, startS(sS)
|
||||
, endS(eS)
|
||||
, count(0)
|
||||
, pcfgScore(0.0)
|
||||
{}
|
||||
|
||||
void SetSpanLength(size_t sourcePos, size_t sourceLength, size_t targetLength)
|
||||
|
@ -10,13 +10,13 @@ obj XmlTree.o : XmlTree.cpp : <include>. ;
|
||||
alias filestreams : InputFileStream.cpp OutputFileStream.cpp : : : <include>. ;
|
||||
alias trees : SyntaxTree.cpp tables-core.o XmlTree.o : : : <include>. ;
|
||||
|
||||
exe extract : tables-core.o SentenceAlignment.o extract.cpp InputFileStream ../../..//boost_iostreams ;
|
||||
exe extract : tables-core.o SentenceAlignment.o extract.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
|
||||
|
||||
exe extract-rules : tables-core.o SentenceAlignment.o SyntaxTree.o XmlTree.o SentenceAlignmentWithSyntax.cpp HoleCollection.cpp extract-rules.cpp ExtractedRule.cpp OutputFileStream.cpp InputFileStream ../../../moses/src//ThreadPool ../../..//boost_iostreams ;
|
||||
|
||||
exe extract-lex : extract-lex.cpp InputFileStream ;
|
||||
|
||||
exe score : tables-core.o AlignmentPhrase.o score.cpp PhraseAlignment.cpp InputFileStream ../../..//boost_iostreams ;
|
||||
exe score : tables-core.o AlignmentPhrase.o score.cpp PhraseAlignment.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
|
||||
|
||||
exe consolidate : consolidate.cpp tables-core.o OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
|
||||
|
||||
@ -33,3 +33,5 @@ alias programs : extract extract-rules extract-lex score consolidate consolidate
|
||||
install legacy : programs : <location>. <install-type>EXE ;
|
||||
|
||||
build-project extract-ghkm ;
|
||||
build-project pcfg-extract ;
|
||||
build-project pcfg-score ;
|
||||
|
@ -13,6 +13,8 @@
|
||||
#include "tables-core.h"
|
||||
#include "score.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
using namespace std;
|
||||
|
||||
extern Vocabulary vcbT;
|
||||
@ -111,6 +113,9 @@ void PhraseAlignment::create( char line[], int lineID )
|
||||
}
|
||||
else if (item == 5) { // non-term lengths
|
||||
addNTLength(token[j]);
|
||||
} else if (item == 6) { // target syntax PCFG score
|
||||
float pcfgScore = std::atof(token[j].c_str());
|
||||
pcfgSum = pcfgScore * count;
|
||||
}
|
||||
}
|
||||
|
||||
@ -119,7 +124,7 @@ void PhraseAlignment::create( char line[], int lineID )
|
||||
if (item == 3) {
|
||||
count = 1.0;
|
||||
}
|
||||
if (item < 3 || item > 5) {
|
||||
if (item < 3 || item > 6) {
|
||||
cerr << "ERROR: faulty line " << lineID << ": " << line << endl;
|
||||
}
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ protected:
|
||||
void createAlignVec(size_t sourceSize, size_t targetSize);
|
||||
void addNTLength(const std::string &tok);
|
||||
public:
|
||||
float pcfgSum;
|
||||
float count;
|
||||
std::vector< std::set<size_t> > alignedToT;
|
||||
std::vector< std::set<size_t> > alignedToS;
|
||||
|
@ -45,8 +45,11 @@ public:
|
||||
bool targetSyntax;
|
||||
bool duplicateRules;
|
||||
bool fractionalCounting;
|
||||
bool pcfgScore;
|
||||
bool outputNTLengths;
|
||||
bool gzOutput;
|
||||
bool unpairedExtractFormat;
|
||||
bool conditionOnTargetLhs;
|
||||
|
||||
RuleExtractionOptions()
|
||||
: maxSpan(10)
|
||||
@ -74,8 +77,11 @@ public:
|
||||
, targetSyntax(false)
|
||||
, duplicateRules(true)
|
||||
, fractionalCounting(true)
|
||||
, pcfgScore(false)
|
||||
, outputNTLengths(false)
|
||||
, gzOutput(false)
|
||||
, unpairedExtractFormat(false)
|
||||
, conditionOnTargetLhs(false)
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -27,6 +27,8 @@
|
||||
#include "XmlException.h"
|
||||
#include "XmlTree.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetString, int sentenceID)
|
||||
{
|
||||
if (!m_options.targetSyntax) {
|
||||
|
@ -42,11 +42,12 @@ void SyntaxTree::Clear()
|
||||
m_index.clear();
|
||||
}
|
||||
|
||||
void SyntaxTree::AddNode( int startPos, int endPos, std::string label )
|
||||
SyntaxNode *SyntaxTree::AddNode( int startPos, int endPos, std::string label )
|
||||
{
|
||||
SyntaxNode* newNode = new SyntaxNode( startPos, endPos, label );
|
||||
m_nodes.push_back( newNode );
|
||||
m_index[ startPos ][ endPos ].push_back( newNode );
|
||||
return newNode;
|
||||
}
|
||||
|
||||
ParentNodes SyntaxTree::Parse()
|
||||
|
@ -34,12 +34,14 @@ protected:
|
||||
std::string m_label;
|
||||
std::vector< SyntaxNode* > m_children;
|
||||
SyntaxNode* m_parent;
|
||||
float m_pcfgScore;
|
||||
public:
|
||||
SyntaxNode( int startPos, int endPos, std::string label )
|
||||
:m_start(startPos)
|
||||
,m_end(endPos)
|
||||
,m_label(label)
|
||||
,m_parent(0)
|
||||
,m_pcfgScore(0.0f)
|
||||
{}
|
||||
int GetStart() const {
|
||||
return m_start;
|
||||
@ -50,6 +52,12 @@ public:
|
||||
std::string GetLabel() const {
|
||||
return m_label;
|
||||
}
|
||||
float GetPcfgScore() const {
|
||||
return m_pcfgScore;
|
||||
}
|
||||
void SetPcfgScore(float score) {
|
||||
m_pcfgScore = score;
|
||||
}
|
||||
SyntaxNode *GetParent() {
|
||||
return m_parent;
|
||||
}
|
||||
@ -89,11 +97,12 @@ public:
|
||||
}
|
||||
~SyntaxTree();
|
||||
|
||||
SyntaxNode *AddNode( int startPos, int endPos, std::string label );
|
||||
|
||||
SyntaxNode *GetTop() {
|
||||
return m_top;
|
||||
}
|
||||
|
||||
void AddNode( int startPos, int endPos, std::string label );
|
||||
ParentNodes Parse();
|
||||
bool HasNode( int startPos, int endPos ) const;
|
||||
const std::vector< SyntaxNode* >& GetNodes( int startPos, int endPos ) const;
|
||||
|
@ -25,7 +25,7 @@
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
#include <sstream>
|
||||
#include "SyntaxTree.h"
|
||||
#include "XmlException.h"
|
||||
@ -128,6 +128,16 @@ string unescape(const string& str)
|
||||
s += string("<");
|
||||
} else if (name == "gt") {
|
||||
s += string(">");
|
||||
} else if (name == "#91") {
|
||||
s += string("[");
|
||||
} else if (name == "#93") {
|
||||
s += string("]");
|
||||
} else if (name == "bra") {
|
||||
s += string("[");
|
||||
} else if (name == "ket") {
|
||||
s += string("]");
|
||||
} else if (name == "bar") {
|
||||
s += string("|");
|
||||
} else if (name == "amp") {
|
||||
s += string("&");
|
||||
} else if (name == "apos") {
|
||||
@ -345,13 +355,18 @@ bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &label
|
||||
string label = ParseXmlTagAttribute(tagContent,"label");
|
||||
labelCollection.insert( label );
|
||||
|
||||
string pcfgString = ParseXmlTagAttribute(tagContent,"pcfg");
|
||||
float pcfgScore = pcfgString == "" ? 0.0f
|
||||
: std::atof(pcfgString.c_str());
|
||||
|
||||
// report what we have processed so far
|
||||
if (0) {
|
||||
cerr << "XML TAG NAME IS: '" << tagName << "'" << endl;
|
||||
cerr << "XML TAG LABEL IS: '" << label << "'" << endl;
|
||||
cerr << "XML SPAN IS: " << startPos << "-" << (endPos-1) << endl;
|
||||
}
|
||||
tree.AddNode( startPos, endPos-1, label );
|
||||
SyntaxNode *node = tree.AddNode( startPos, endPos-1, label );
|
||||
node->SetPcfgScore(pcfgScore);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -212,6 +212,10 @@ Node *AlignmentGraph::CopyParseTree(const ParseTree *root)
|
||||
|
||||
std::auto_ptr<Node> n(new Node(root->GetLabel(), nodeType));
|
||||
|
||||
if (nodeType == TREE) {
|
||||
n->SetPcfgScore(root->GetPcfgScore());
|
||||
}
|
||||
|
||||
const std::vector<ParseTree *> &children = root->GetChildren();
|
||||
std::vector<Node *> childNodes;
|
||||
childNodes.reserve(children.size());
|
||||
|
@ -266,6 +266,8 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
|
||||
//("help", "print this help message and exit")
|
||||
("AllowUnary",
|
||||
"allow fully non-lexical unary rules")
|
||||
("ConditionOnTargetLHS",
|
||||
"write target LHS instead of \"X\" as source LHS")
|
||||
("GlueGrammar",
|
||||
po::value(&options.glueGrammarFile),
|
||||
"write glue grammar to named file")
|
||||
@ -285,6 +287,8 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
|
||||
"set maximum allowed scope")
|
||||
("Minimal",
|
||||
"extract minimal rules only")
|
||||
("PCFG",
|
||||
"include score based on PCFG scores in target corpus")
|
||||
("UnknownWordLabel",
|
||||
po::value(&options.unknownWordFile),
|
||||
"write unknown word labels to named file")
|
||||
@ -355,12 +359,18 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
|
||||
if (vm.count("AllowUnary")) {
|
||||
options.allowUnary = true;
|
||||
}
|
||||
if (vm.count("ConditionOnTargetLHS")) {
|
||||
options.conditionOnTargetLhs = true;
|
||||
}
|
||||
if (vm.count("GZOutput")) {
|
||||
options.gzOutput = true;
|
||||
}
|
||||
if (vm.count("Minimal")) {
|
||||
options.minimal = true;
|
||||
}
|
||||
if (vm.count("PCFG")) {
|
||||
options.pcfg = true;
|
||||
}
|
||||
if (vm.count("UnpairedExtractFormat")) {
|
||||
options.unpairedExtractFormat = true;
|
||||
}
|
||||
|
@ -41,8 +41,7 @@ class Node
|
||||
Node(const std::string &label, NodeType type)
|
||||
: m_label(label)
|
||||
, m_type(type)
|
||||
, m_children()
|
||||
, m_parents() {}
|
||||
, m_pcfgScore(0.0f) {}
|
||||
|
||||
~Node();
|
||||
|
||||
@ -50,12 +49,14 @@ class Node
|
||||
NodeType GetType() const { return m_type; }
|
||||
const std::vector<Node*> &GetChildren() const { return m_children; }
|
||||
const std::vector<Node*> &GetParents() const { return m_parents; }
|
||||
float GetPcfgScore() const { return m_pcfgScore; }
|
||||
const Span &GetSpan() const { return m_span; }
|
||||
const Span &GetComplementSpan() const { return m_complementSpan; }
|
||||
const std::vector<const Subgraph*> &GetRules() const { return m_rules; }
|
||||
|
||||
void SetChildren(const std::vector<Node*> &c) { m_children = c; }
|
||||
void SetParents(const std::vector<Node*> &p) { m_parents = p; }
|
||||
void SetPcfgScore(float s) { m_pcfgScore = s; }
|
||||
void SetSpan(const Span &s) { m_span = s; }
|
||||
void SetComplementSpan(const Span &cs) { m_complementSpan = cs; }
|
||||
|
||||
@ -92,6 +93,7 @@ class Node
|
||||
NodeType m_type;
|
||||
std::vector<Node*> m_children;
|
||||
std::vector<Node*> m_parents;
|
||||
float m_pcfgScore;
|
||||
Span m_span;
|
||||
Span m_complementSpan;
|
||||
std::vector<const Subgraph*> m_rules;
|
||||
|
@ -30,12 +30,14 @@ struct Options {
|
||||
public:
|
||||
Options()
|
||||
: allowUnary(false)
|
||||
, conditionOnTargetLhs(false)
|
||||
, gzOutput(false)
|
||||
, maxNodes(15)
|
||||
, maxRuleDepth(3)
|
||||
, maxRuleSize(3)
|
||||
, maxScope(3)
|
||||
, minimal(false)
|
||||
, pcfg(false)
|
||||
, unpairedExtractFormat(false) {}
|
||||
|
||||
// Positional options
|
||||
@ -46,6 +48,7 @@ struct Options {
|
||||
|
||||
// All other options
|
||||
bool allowUnary;
|
||||
bool conditionOnTargetLhs;
|
||||
std::string glueGrammarFile;
|
||||
bool gzOutput;
|
||||
int maxNodes;
|
||||
@ -53,6 +56,7 @@ struct Options {
|
||||
int maxRuleSize;
|
||||
int maxScope;
|
||||
bool minimal;
|
||||
bool pcfg;
|
||||
bool unpairedExtractFormat;
|
||||
std::string unknownWordFile;
|
||||
};
|
||||
|
@ -32,17 +32,19 @@ class ParseTree
|
||||
public:
|
||||
ParseTree(const std::string &label)
|
||||
: m_label(label)
|
||||
, m_children()
|
||||
, m_parent() {}
|
||||
, m_parent(0)
|
||||
, m_pcfgScore(0.0) {}
|
||||
|
||||
~ParseTree();
|
||||
|
||||
const std::string &GetLabel() const { return m_label; }
|
||||
const std::vector<ParseTree*> &GetChildren() const { return m_children; }
|
||||
const ParseTree *GetParent() const { return m_parent; }
|
||||
float GetPcfgScore() const { return m_pcfgScore; }
|
||||
|
||||
void SetParent(ParseTree *);
|
||||
void SetChildren(const std::vector<ParseTree*> &);
|
||||
void SetPcfgScore(float score) { m_pcfgScore = score; }
|
||||
|
||||
void AddChild(ParseTree *);
|
||||
|
||||
@ -59,6 +61,7 @@ class ParseTree
|
||||
std::string m_label;
|
||||
std::vector<ParseTree*> m_children;
|
||||
ParseTree *m_parent;
|
||||
float m_pcfgScore; // log probability
|
||||
};
|
||||
|
||||
template<typename OutputIterator>
|
||||
|
@ -30,6 +30,7 @@ namespace GHKM {
|
||||
ScfgRule::ScfgRule(const Subgraph &fragment)
|
||||
: m_sourceLHS("X", NonTerminal)
|
||||
, m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
|
||||
, m_pcfgScore(fragment.GetPcfgScore())
|
||||
{
|
||||
// Source RHS
|
||||
|
||||
|
@ -57,6 +57,7 @@ class ScfgRule
|
||||
const std::vector<Symbol> &GetSourceRHS() const { return m_sourceRHS; }
|
||||
const std::vector<Symbol> &GetTargetRHS() const { return m_targetRHS; }
|
||||
const Alignment &GetAlignment() const { return m_alignment; }
|
||||
float GetPcfgScore() const { return m_pcfgScore; }
|
||||
|
||||
int Scope() const;
|
||||
|
||||
@ -68,6 +69,7 @@ class ScfgRule
|
||||
std::vector<Symbol> m_sourceRHS;
|
||||
std::vector<Symbol> m_targetRHS;
|
||||
Alignment m_alignment;
|
||||
float m_pcfgScore;
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "ScfgRule.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <ostream>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
@ -34,14 +35,43 @@ namespace GHKM {
|
||||
|
||||
void ScfgRuleWriter::Write(const ScfgRule &rule)
|
||||
{
|
||||
std::ostringstream sourceSS;
|
||||
std::ostringstream targetSS;
|
||||
|
||||
if (m_options.unpairedExtractFormat) {
|
||||
WriteUnpairedFormat(rule);
|
||||
WriteUnpairedFormat(rule, sourceSS, targetSS);
|
||||
} else {
|
||||
WriteStandardFormat(rule);
|
||||
WriteStandardFormat(rule, sourceSS, targetSS);
|
||||
}
|
||||
|
||||
// Write the rule to the forward and inverse extract files.
|
||||
m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
|
||||
m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
|
||||
|
||||
const Alignment &alignment = rule.GetAlignment();
|
||||
for (Alignment::const_iterator p = alignment.begin();
|
||||
p != alignment.end(); ++p) {
|
||||
m_fwd << " " << p->first << "-" << p->second;
|
||||
m_inv << " " << p->second << "-" << p->first;
|
||||
}
|
||||
|
||||
// Write a count of 1 and an empty NT length column to the forward extract
|
||||
// file.
|
||||
// TODO Add option to write NT length?
|
||||
m_fwd << " ||| 1 ||| |||";
|
||||
if (m_options.pcfg) {
|
||||
// Write the PCFG score.
|
||||
m_fwd << " " << std::exp(rule.GetPcfgScore());
|
||||
}
|
||||
m_fwd << std::endl;
|
||||
|
||||
// Write a count of 1 to the inverse extract file.
|
||||
m_inv << " ||| 1" << std::endl;
|
||||
}
|
||||
|
||||
void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
|
||||
void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule,
|
||||
std::ostream &sourceSS,
|
||||
std::ostream &targetSS)
|
||||
{
|
||||
const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
|
||||
const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();
|
||||
@ -60,9 +90,6 @@ void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
|
||||
}
|
||||
}
|
||||
|
||||
std::ostringstream sourceSS;
|
||||
std::ostringstream targetSS;
|
||||
|
||||
// Write the source side of the rule to sourceSS.
|
||||
int i = 0;
|
||||
for (std::vector<Symbol>::const_iterator p(sourceRHS.begin());
|
||||
@ -74,7 +101,11 @@ void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
|
||||
}
|
||||
sourceSS << " ";
|
||||
}
|
||||
WriteSymbol(rule.GetSourceLHS(), sourceSS);
|
||||
if (m_options.conditionOnTargetLhs) {
|
||||
WriteSymbol(rule.GetTargetLHS(), sourceSS);
|
||||
} else {
|
||||
WriteSymbol(rule.GetSourceLHS(), sourceSS);
|
||||
}
|
||||
|
||||
// Write the target side of the rule to targetSS.
|
||||
i = 0;
|
||||
@ -88,27 +119,14 @@ void ScfgRuleWriter::WriteStandardFormat(const ScfgRule &rule)
|
||||
targetSS << " ";
|
||||
}
|
||||
WriteSymbol(rule.GetTargetLHS(), targetSS);
|
||||
|
||||
// Write the rule to the forward and inverse extract files.
|
||||
m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
|
||||
m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
|
||||
for (Alignment::const_iterator p(alignment.begin());
|
||||
p != alignment.end(); ++p) {
|
||||
m_fwd << " " << p->first << "-" << p->second;
|
||||
m_inv << " " << p->second << "-" << p->first;
|
||||
}
|
||||
m_fwd << " ||| 1" << std::endl;
|
||||
m_inv << " ||| 1" << std::endl;
|
||||
}
|
||||
|
||||
void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule)
|
||||
void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule,
|
||||
std::ostream &sourceSS,
|
||||
std::ostream &targetSS)
|
||||
{
|
||||
const std::vector<Symbol> &sourceRHS = rule.GetSourceRHS();
|
||||
const std::vector<Symbol> &targetRHS = rule.GetTargetRHS();
|
||||
const Alignment &alignment = rule.GetAlignment();
|
||||
|
||||
std::ostringstream sourceSS;
|
||||
std::ostringstream targetSS;
|
||||
|
||||
// Write the source side of the rule to sourceSS.
|
||||
int i = 0;
|
||||
@ -117,7 +135,11 @@ void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule)
|
||||
WriteSymbol(*p, sourceSS);
|
||||
sourceSS << " ";
|
||||
}
|
||||
WriteSymbol(rule.GetSourceLHS(), sourceSS);
|
||||
if (m_options.conditionOnTargetLhs) {
|
||||
WriteSymbol(rule.GetTargetLHS(), sourceSS);
|
||||
} else {
|
||||
WriteSymbol(rule.GetSourceLHS(), sourceSS);
|
||||
}
|
||||
|
||||
// Write the target side of the rule to targetSS.
|
||||
i = 0;
|
||||
@ -127,17 +149,6 @@ void ScfgRuleWriter::WriteUnpairedFormat(const ScfgRule &rule)
|
||||
targetSS << " ";
|
||||
}
|
||||
WriteSymbol(rule.GetTargetLHS(), targetSS);
|
||||
|
||||
// Write the rule to the forward and inverse extract files.
|
||||
m_fwd << sourceSS.str() << " ||| " << targetSS.str() << " |||";
|
||||
m_inv << targetSS.str() << " ||| " << sourceSS.str() << " |||";
|
||||
for (Alignment::const_iterator p(alignment.begin());
|
||||
p != alignment.end(); ++p) {
|
||||
m_fwd << " " << p->first << "-" << p->second;
|
||||
m_inv << " " << p->second << "-" << p->first;
|
||||
}
|
||||
m_fwd << " ||| 1" << std::endl;
|
||||
m_inv << " ||| 1" << std::endl;
|
||||
}
|
||||
|
||||
void ScfgRuleWriter::WriteSymbol(const Symbol &symbol, std::ostream &out)
|
||||
|
@ -45,8 +45,8 @@ class ScfgRuleWriter
|
||||
ScfgRuleWriter(const ScfgRuleWriter &);
|
||||
ScfgRuleWriter &operator=(const ScfgRuleWriter &);
|
||||
|
||||
void WriteStandardFormat(const ScfgRule &);
|
||||
void WriteUnpairedFormat(const ScfgRule &);
|
||||
void WriteStandardFormat(const ScfgRule &, std::ostream &, std::ostream &);
|
||||
void WriteUnpairedFormat(const ScfgRule &, std::ostream &, std::ostream &);
|
||||
void WriteSymbol(const Symbol &, std::ostream &);
|
||||
|
||||
std::ostream &m_fwd;
|
||||
|
@ -101,5 +101,21 @@ int Subgraph::CalcDepth(const Node *n) const
|
||||
return maxChildDepth + 1;
|
||||
}
|
||||
|
||||
float Subgraph::CalcPcfgScore() const
|
||||
{
|
||||
if (m_root->GetType() != TREE || m_leaves.empty()) {
|
||||
return 0.0f;
|
||||
}
|
||||
float score = m_root->GetPcfgScore();
|
||||
for (std::set<const Node *>::const_iterator p = m_leaves.begin();
|
||||
p != m_leaves.end(); ++p) {
|
||||
const Node *leaf = *p;
|
||||
if (leaf->GetType() == TREE) {
|
||||
score -= leaf->GetPcfgScore();
|
||||
}
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
} // namespace Moses
|
||||
} // namespace GHKM
|
||||
|
@ -38,7 +38,8 @@ class Subgraph
|
||||
: m_root(root)
|
||||
, m_depth(0)
|
||||
, m_size(root->GetType() == TREE ? 1 : 0)
|
||||
, m_nodeCount(1) {}
|
||||
, m_nodeCount(1)
|
||||
, m_pcfgScore(0.0f) {}
|
||||
|
||||
Subgraph(const Node *root, const std::set<const Node *> &leaves)
|
||||
: m_root(root)
|
||||
@ -46,10 +47,12 @@ class Subgraph
|
||||
, m_depth(-1)
|
||||
, m_size(-1)
|
||||
, m_nodeCount(-1)
|
||||
, m_pcfgScore(0.0f)
|
||||
{
|
||||
m_depth = CalcDepth(m_root);
|
||||
m_size = CalcSize(m_root);
|
||||
m_nodeCount = CountNodes(m_root);
|
||||
m_pcfgScore = CalcPcfgScore();
|
||||
}
|
||||
|
||||
const Node *GetRoot() const { return m_root; }
|
||||
@ -57,6 +60,7 @@ class Subgraph
|
||||
int GetDepth() const { return m_depth; }
|
||||
int GetSize() const { return m_size; }
|
||||
int GetNodeCount() const { return m_nodeCount; }
|
||||
float GetPcfgScore() const { return m_pcfgScore; }
|
||||
|
||||
bool IsTrivial() const { return m_leaves.empty(); }
|
||||
|
||||
@ -66,6 +70,7 @@ class Subgraph
|
||||
void GetTargetLeaves(const Node *, std::vector<const Node *> &) const;
|
||||
int CalcDepth(const Node *) const;
|
||||
int CalcSize(const Node *) const;
|
||||
float CalcPcfgScore() const;
|
||||
int CountNodes(const Node *) const;
|
||||
|
||||
const Node *m_root;
|
||||
@ -73,6 +78,7 @@ class Subgraph
|
||||
int m_depth;
|
||||
int m_size;
|
||||
int m_nodeCount;
|
||||
float m_pcfgScore;
|
||||
};
|
||||
|
||||
} // namespace GHKM
|
||||
|
@ -61,6 +61,7 @@ std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree(
|
||||
const std::vector<std::string> &words)
|
||||
{
|
||||
std::auto_ptr<ParseTree> root(new ParseTree(tree.GetLabel()));
|
||||
root->SetPcfgScore(tree.GetPcfgScore());
|
||||
const std::vector<SyntaxNode*> &children = tree.GetChildren();
|
||||
if (children.empty()) {
|
||||
if (tree.GetStart() != tree.GetEnd()) {
|
||||
|
@ -90,7 +90,7 @@ void addHieroRule( int startT, int endT, int startS, int endS
|
||||
void printHieroPhrase( int startT, int endT, int startS, int endS
|
||||
, HoleCollection &holeColl, LabelIndex &labelIndex);
|
||||
string printTargetHieroPhrase( int startT, int endT, int startS, int endS
|
||||
, WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex);
|
||||
, WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore);
|
||||
string printSourceHieroPhrase( int startT, int endT, int startS, int endS
|
||||
, HoleCollection &holeColl, const LabelIndex &labelIndex);
|
||||
void preprocessSourceHieroPhrase( int startT, int endT, int startS, int endS
|
||||
@ -140,7 +140,9 @@ int main(int argc, char* argv[])
|
||||
<< " | --MaxNonTerm[" << options.maxNonTerm << "]"
|
||||
<< " | --MaxScope[" << options.maxScope << "]"
|
||||
<< " | --SourceSyntax | --TargetSyntax"
|
||||
<< " | --AllowOnlyUnalignedWords | --DisallowNonTermConsecTarget |--NonTermConsecSource | --NoNonTermFirstWord | --NoFractionalCounting ]\n";
|
||||
<< " | --AllowOnlyUnalignedWords | --DisallowNonTermConsecTarget |--NonTermConsecSource | --NoNonTermFirstWord | --NoFractionalCounting"
|
||||
<< " | --UnpairedExtractFormat"
|
||||
<< " | --ConditionOnTargetLHS ]\n";
|
||||
exit(1);
|
||||
}
|
||||
char* &fileNameT = argv[1];
|
||||
@ -257,8 +259,14 @@ int main(int argc, char* argv[])
|
||||
// if an source phrase is paired with two target phrases, then count(t|s) = 0.5
|
||||
else if (strcmp(argv[i],"--NoFractionalCounting") == 0) {
|
||||
options.fractionalCounting = false;
|
||||
} else if (strcmp(argv[i],"--PCFG") == 0) {
|
||||
options.pcfgScore = true;
|
||||
} else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
|
||||
options.outputNTLengths = true;
|
||||
} else if (strcmp(argv[i],"--UnpairedExtractFormat") == 0) {
|
||||
options.unpairedExtractFormat = true;
|
||||
} else if (strcmp(argv[i],"--ConditionOnTargetLHS") == 0) {
|
||||
options.conditionOnTargetLhs = true;
|
||||
#ifdef WITH_THREADS
|
||||
} else if (strcmp(argv[i],"-threads") == 0 ||
|
||||
strcmp(argv[i],"--threads") == 0 ||
|
||||
@ -517,7 +525,7 @@ void ExtractTask::preprocessSourceHieroPhrase( int startT, int endT, int startS,
|
||||
}
|
||||
|
||||
string ExtractTask::printTargetHieroPhrase( int startT, int endT, int startS, int endS
|
||||
, WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex)
|
||||
, WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore)
|
||||
{
|
||||
HoleList::iterator iterHoleList = holeColl.GetHoles().begin();
|
||||
assert(iterHoleList != holeColl.GetHoles().end());
|
||||
@ -543,7 +551,16 @@ string ExtractTask::printTargetHieroPhrase( int startT, int endT, int startS, in
|
||||
m_sentence->targetTree.GetNodes(currPos,hole.GetEnd(1))[ labelI ]->GetLabel() : "X";
|
||||
hole.SetLabel(targetLabel, 1);
|
||||
|
||||
out += "[" + sourceLabel + "][" + targetLabel + "] ";
|
||||
if (m_options.unpairedExtractFormat) {
|
||||
out += "[" + targetLabel + "] ";
|
||||
} else {
|
||||
out += "[" + sourceLabel + "][" + targetLabel + "] ";
|
||||
}
|
||||
|
||||
if (m_options.pcfgScore) {
|
||||
double score = m_sentence->targetTree.GetNodes(currPos,hole.GetEnd(1))[labelI]->GetPcfgScore();
|
||||
logPCFGScore -= score;
|
||||
}
|
||||
|
||||
currPos = hole.GetEnd(1);
|
||||
hole.SetPos(outPos, 1);
|
||||
@ -584,7 +601,11 @@ string ExtractTask::printSourceHieroPhrase( int startT, int endT, int startS, in
|
||||
assert(targetLabel != "");
|
||||
|
||||
const string &sourceLabel = hole.GetLabel(0);
|
||||
out += "[" + sourceLabel + "][" + targetLabel + "] ";
|
||||
if (m_options.unpairedExtractFormat) {
|
||||
out += "[" + sourceLabel + "] ";
|
||||
} else {
|
||||
out += "[" + sourceLabel + "][" + targetLabel + "] ";
|
||||
}
|
||||
|
||||
currPos = hole.GetEnd(0);
|
||||
hole.SetPos(outPos, 0);
|
||||
@ -652,19 +673,29 @@ void ExtractTask::printHieroPhrase( int startT, int endT, int startS, int endS
|
||||
m_sentence->targetTree.GetNodes(startT,endT)[ labelIndex[0] ]->GetLabel() : "X";
|
||||
string sourceLabel = m_options.sourceSyntax ?
|
||||
m_sentence->sourceTree.GetNodes(startS,endS)[ labelIndex[1] ]->GetLabel() : "X";
|
||||
//string sourceLabel = "X";
|
||||
|
||||
// create non-terms on the source side
|
||||
preprocessSourceHieroPhrase(startT, endT, startS, endS, indexS, holeColl, labelIndex);
|
||||
|
||||
// target
|
||||
rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex)
|
||||
if (m_options.pcfgScore) {
|
||||
double logPCFGScore = m_sentence->targetTree.GetNodes(startT,endT)[labelIndex[0]]->GetPcfgScore();
|
||||
rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore)
|
||||
+ " [" + targetLabel + "]";
|
||||
rule.pcfgScore = std::exp(logPCFGScore);
|
||||
} else {
|
||||
double logPCFGScore = 0.0f;
|
||||
rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore)
|
||||
+ " [" + targetLabel + "]";
|
||||
}
|
||||
|
||||
// source
|
||||
// holeColl.SortSourceHoles();
|
||||
rule.source = printSourceHieroPhrase(startT, endT, startS, endS, holeColl, labelIndex)
|
||||
+ " [" + sourceLabel + "]";
|
||||
rule.source = printSourceHieroPhrase(startT, endT, startS, endS, holeColl, labelIndex);
|
||||
if (m_options.conditionOnTargetLhs) {
|
||||
rule.source += " [" + targetLabel + "]";
|
||||
} else {
|
||||
rule.source += " [" + sourceLabel + "]";
|
||||
}
|
||||
|
||||
// alignment
|
||||
printHieroAlignment(startT, endT, startS, endS, indexS, indexT, holeColl, rule);
|
||||
@ -860,10 +891,15 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, RuleExist
|
||||
|
||||
// phrase labels
|
||||
string targetLabel,sourceLabel;
|
||||
sourceLabel = m_options.sourceSyntax ?
|
||||
m_sentence->sourceTree.GetNodes(startS,endS)[0]->GetLabel() : "X";
|
||||
targetLabel = m_options.targetSyntax ?
|
||||
m_sentence->targetTree.GetNodes(startT,endT)[0]->GetLabel() : "X";
|
||||
if (m_options.targetSyntax && m_options.conditionOnTargetLhs) {
|
||||
sourceLabel = targetLabel = m_sentence->targetTree.GetNodes(startT,endT)[0]->GetLabel();
|
||||
}
|
||||
else {
|
||||
sourceLabel = m_options.sourceSyntax ?
|
||||
m_sentence->sourceTree.GetNodes(startS,endS)[0]->GetLabel() : "X";
|
||||
targetLabel = m_options.targetSyntax ?
|
||||
m_sentence->targetTree.GetNodes(startT,endT)[0]->GetLabel() : "X";
|
||||
}
|
||||
|
||||
// source
|
||||
rule.source = "";
|
||||
@ -877,6 +913,11 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, RuleExist
|
||||
rule.target += m_sentence->target[ti] + " ";
|
||||
rule.target += "[" + targetLabel + "]";
|
||||
|
||||
if (m_options.pcfgScore) {
|
||||
double logPCFGScore = m_sentence->targetTree.GetNodes(startT,endT)[0]->GetPcfgScore();
|
||||
rule.pcfgScore = std::exp(logPCFGScore);
|
||||
}
|
||||
|
||||
// alignment
|
||||
for(int ti=startT; ti<=endT; ti++) {
|
||||
for(unsigned int i=0; i<m_sentence->alignedToT[ti].size(); i++) {
|
||||
@ -957,11 +998,13 @@ void ExtractTask::writeRulesToFile()
|
||||
out << rule->source << " ||| "
|
||||
<< rule->target << " ||| "
|
||||
<< rule->alignment << " ||| "
|
||||
<< rule->count;
|
||||
<< rule->count << " ||| ";
|
||||
if (m_options.outputNTLengths) {
|
||||
out << " ||| ";
|
||||
rule->OutputNTLengths(out);
|
||||
}
|
||||
if (m_options.pcfgScore) {
|
||||
out << " ||| " << rule->pcfgScore;
|
||||
}
|
||||
out << "\n";
|
||||
|
||||
if (!m_options.onlyDirectFlag) {
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include "SentenceAlignment.h"
|
||||
#include "tables-core.h"
|
||||
#include "InputFileStream.h"
|
||||
#include "OutputFileStream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -82,15 +83,16 @@ bool hierModel = false;
|
||||
REO_MODEL_TYPE hierType = REO_MSD;
|
||||
|
||||
|
||||
ofstream extractFile;
|
||||
ofstream extractFileInv;
|
||||
ofstream extractFileOrientation;
|
||||
ofstream extractFileSentenceId;
|
||||
Moses::OutputFileStream extractFile;
|
||||
Moses::OutputFileStream extractFileInv;
|
||||
Moses::OutputFileStream extractFileOrientation;
|
||||
Moses::OutputFileStream extractFileSentenceId;
|
||||
int maxPhraseLength;
|
||||
bool orientationFlag = false;
|
||||
bool translationFlag = true;
|
||||
bool sentenceIdFlag = false; //create extract file with sentence id
|
||||
bool onlyOutputSpanInfo = false;
|
||||
bool gzOutput = false;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
@ -116,6 +118,8 @@ int main(int argc, char* argv[])
|
||||
translationFlag = false;
|
||||
} else if (strcmp(argv[i], "--SentenceId") == 0) {
|
||||
sentenceIdFlag = true;
|
||||
} else if (strcmp(argv[i], "--GZOutput") == 0) {
|
||||
gzOutput = true;
|
||||
} else if(strcmp(argv[i],"--model") == 0) {
|
||||
if (i+1 >= argc) {
|
||||
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
|
||||
@ -193,18 +197,18 @@ int main(int argc, char* argv[])
|
||||
|
||||
// open output files
|
||||
if (translationFlag) {
|
||||
string fileNameExtractInv = fileNameExtract + ".inv";
|
||||
extractFile.open(fileNameExtract.c_str());
|
||||
extractFileInv.open(fileNameExtractInv.c_str());
|
||||
string fileNameExtractInv = fileNameExtract + ".inv" + (gzOutput?".gz":"");
|
||||
extractFile.Open( (fileNameExtract + (gzOutput?".gz":"")).c_str());
|
||||
extractFileInv.Open(fileNameExtractInv.c_str());
|
||||
}
|
||||
if (orientationFlag) {
|
||||
string fileNameExtractOrientation = fileNameExtract + ".o";
|
||||
extractFileOrientation.open(fileNameExtractOrientation.c_str());
|
||||
string fileNameExtractOrientation = fileNameExtract + ".o" + (gzOutput?".gz":"");
|
||||
extractFileOrientation.Open(fileNameExtractOrientation.c_str());
|
||||
}
|
||||
|
||||
if (sentenceIdFlag) {
|
||||
string fileNameExtractSentenceId = fileNameExtract + ".sid";
|
||||
extractFileSentenceId.open(fileNameExtractSentenceId.c_str());
|
||||
string fileNameExtractSentenceId = fileNameExtract + ".sid" + (gzOutput?".gz":"");
|
||||
extractFileSentenceId.Open(fileNameExtractSentenceId.c_str());
|
||||
}
|
||||
|
||||
int i=0;
|
||||
@ -239,12 +243,12 @@ int main(int argc, char* argv[])
|
||||
//az: only close if we actually opened it
|
||||
if (!onlyOutputSpanInfo) {
|
||||
if (translationFlag) {
|
||||
extractFile.close();
|
||||
extractFileInv.close();
|
||||
extractFile.Close();
|
||||
extractFileInv.Close();
|
||||
}
|
||||
if (orientationFlag) extractFileOrientation.close();
|
||||
if (orientationFlag) extractFileOrientation.Close();
|
||||
if (sentenceIdFlag) {
|
||||
extractFileSentenceId.close();
|
||||
extractFileSentenceId.Close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -14,22 +14,20 @@
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
|
||||
// HPhraseVertex represents a point in the alignment matrix
|
||||
typedef pair <int, int> HPhraseVertex;
|
||||
typedef std::pair <int, int> HPhraseVertex;
|
||||
|
||||
// Phrase represents a bi-phrase; each bi-phrase is defined by two points in the alignment matrix:
|
||||
// bottom-left and top-right
|
||||
typedef pair<HPhraseVertex, HPhraseVertex> HPhrase;
|
||||
typedef std::pair<HPhraseVertex, HPhraseVertex> HPhrase;
|
||||
|
||||
// HPhraseVector is a vector of phrases
|
||||
// HPhraseVector is a std::vector of phrases
|
||||
// the bool value indicates if the associated phrase is within the length limit or not
|
||||
typedef vector < HPhrase > HPhraseVector;
|
||||
typedef std::vector < HPhrase > HPhraseVector;
|
||||
|
||||
// SentenceVertices represents all vertices that have the same positioning of all extracted phrases
|
||||
// The key of the map is the English index and the value is a set of the foreign ones
|
||||
typedef map <int, set<int> > HSenteceVertices;
|
||||
// The key of the std::map is the English index and the value is a std::set of the foreign ones
|
||||
typedef std::map <int, std::set<int> > HSenteceVertices;
|
||||
|
||||
|
||||
#endif /* HIERARCHICAL_H_ */
|
||||
|
1
scripts/training/phrase-extract/pcfg-common/Jamfile
Normal file
1
scripts/training/phrase-extract/pcfg-common/Jamfile
Normal file
@ -0,0 +1 @@
|
||||
lib pcfg_common : [ glob *.cc ] ..//trees ;
|
41
scripts/training/phrase-extract/pcfg-common/exception.h
Normal file
41
scripts/training/phrase-extract/pcfg-common/exception.h
Normal file
@ -0,0 +1,41 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef PCFG_EXCEPTION_H_
|
||||
#define PCFG_EXCEPTION_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
class Exception {
|
||||
public:
|
||||
Exception(const char *msg) : msg_(msg) {}
|
||||
Exception(const std::string &msg) : msg_(msg) {}
|
||||
const std::string &msg() const { return msg_; }
|
||||
private:
|
||||
std::string msg_;
|
||||
};
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
109
scripts/training/phrase-extract/pcfg-common/numbered_set.h
Normal file
109
scripts/training/phrase-extract/pcfg-common/numbered_set.h
Normal file
@ -0,0 +1,109 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef PCFG_NUMBERED_SET_H_
|
||||
#define PCFG_NUMBERED_SET_H_
|
||||
|
||||
#include "exception.h"
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
|
||||
#include <limits>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
// Stores a set of elements of type T, each of which is allocated an integral
|
||||
// ID of type I. IDs are contiguous starting at 0. Individual elements cannot
|
||||
// be removed once inserted (but the whole set can be cleared).
|
||||
template<typename T, typename I=std::size_t>
|
||||
class NumberedSet {
|
||||
private:
|
||||
typedef boost::unordered_map<T, I> ElementToIdMap;
|
||||
typedef std::vector<const T *> IdToElementMap;
|
||||
|
||||
public:
|
||||
typedef I IdType;
|
||||
typedef typename IdToElementMap::const_iterator const_iterator;
|
||||
|
||||
NumberedSet() {}
|
||||
|
||||
const_iterator begin() const { return id_to_element_.begin(); }
|
||||
const_iterator end() const { return id_to_element_.end(); }
|
||||
|
||||
// Static value
|
||||
static I NullId() { return std::numeric_limits<I>::max(); }
|
||||
|
||||
bool Empty() const { return id_to_element_.empty(); }
|
||||
std::size_t Size() const { return id_to_element_.size(); }
|
||||
|
||||
// Insert the given object and return its ID.
|
||||
I Insert(const T &);
|
||||
|
||||
I Lookup(const T &) const;
|
||||
const T &Lookup(I) const;
|
||||
|
||||
void Clear();
|
||||
|
||||
private:
|
||||
ElementToIdMap element_to_id_;
|
||||
IdToElementMap id_to_element_;
|
||||
};
|
||||
|
||||
template<typename T, typename I>
|
||||
I NumberedSet<T, I>::Lookup(const T &s) const {
|
||||
typename ElementToIdMap::const_iterator p = element_to_id_.find(s);
|
||||
return (p == element_to_id_.end()) ? NullId() : p->second;
|
||||
}
|
||||
|
||||
template<typename T, typename I>
|
||||
const T &NumberedSet<T, I>::Lookup(I id) const {
|
||||
if (id < 0 || id >= id_to_element_.size()) {
|
||||
std::ostringstream msg;
|
||||
msg << "Value not found: " << id;
|
||||
throw Exception(msg.str());
|
||||
}
|
||||
return *(id_to_element_[id]);
|
||||
}
|
||||
|
||||
template<typename T, typename I>
|
||||
I NumberedSet<T, I>::Insert(const T &x) {
|
||||
std::pair<T, I> value(x, id_to_element_.size());
|
||||
std::pair<typename ElementToIdMap::iterator, bool> result =
|
||||
element_to_id_.insert(value);
|
||||
if (result.second) {
|
||||
// x is a new element.
|
||||
id_to_element_.push_back(&result.first->first);
|
||||
}
|
||||
return result.first->second;
|
||||
}
|
||||
|
||||
template<typename T, typename I>
|
||||
void NumberedSet<T, I>::Clear() {
|
||||
element_to_id_.clear();
|
||||
id_to_element_.clear();
|
||||
}
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
106
scripts/training/phrase-extract/pcfg-common/pcfg.cc
Normal file
106
scripts/training/phrase-extract/pcfg-common/pcfg.cc
Normal file
@ -0,0 +1,106 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "pcfg.h"
|
||||
|
||||
#include "exception.h"
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
#include <cassert>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
void Pcfg::Add(const Key &key, double score) {
|
||||
rules_[key] = score;
|
||||
}
|
||||
|
||||
bool Pcfg::Lookup(const Key &key, double &score) const {
|
||||
Map::const_iterator p = rules_.find(key);
|
||||
if (p == rules_.end()) {
|
||||
return false;
|
||||
}
|
||||
score = p->second;
|
||||
return true;
|
||||
}
|
||||
|
||||
void Pcfg::Read(std::istream &input, Vocabulary &vocab) {
|
||||
std::string line;
|
||||
std::string lhs_string;
|
||||
std::vector<std::string> rhs_strings;
|
||||
std::string score_string;
|
||||
Key key;
|
||||
while (std::getline(input, line)) {
|
||||
// Read LHS.
|
||||
std::size_t pos = line.find("|||");
|
||||
if (pos == std::string::npos) {
|
||||
throw Exception("missing first delimiter");
|
||||
}
|
||||
lhs_string = line.substr(0, pos);
|
||||
boost::trim(lhs_string);
|
||||
|
||||
// Read RHS.
|
||||
std::size_t begin = pos+3;
|
||||
pos = line.find("|||", begin);
|
||||
if (pos == std::string::npos) {
|
||||
throw Exception("missing second delimiter");
|
||||
}
|
||||
std::string rhs_text = line.substr(begin, pos-begin);
|
||||
boost::trim(rhs_text);
|
||||
rhs_strings.clear();
|
||||
boost::split(rhs_strings, rhs_text, boost::algorithm::is_space(),
|
||||
boost::algorithm::token_compress_on);
|
||||
|
||||
// Read score.
|
||||
score_string = line.substr(pos+3);
|
||||
boost::trim(score_string);
|
||||
|
||||
// Construct key.
|
||||
key.clear();
|
||||
key.reserve(rhs_strings.size()+1);
|
||||
key.push_back(vocab.Insert(lhs_string));
|
||||
for (std::vector<std::string>::const_iterator p = rhs_strings.begin();
|
||||
p != rhs_strings.end(); ++p) {
|
||||
key.push_back(vocab.Insert(*p));
|
||||
}
|
||||
|
||||
// Add rule.
|
||||
double score = boost::lexical_cast<double>(score_string);
|
||||
Add(key, score);
|
||||
}
|
||||
}
|
||||
|
||||
void Pcfg::Write(const Vocabulary &vocab, std::ostream &output) const {
|
||||
for (const_iterator p = begin(); p != end(); ++p) {
|
||||
const Key &key = p->first;
|
||||
double score = p->second;
|
||||
std::vector<std::size_t>::const_iterator q = key.begin();
|
||||
std::vector<std::size_t>::const_iterator end = key.end();
|
||||
output << vocab.Lookup(*q++) << " |||";
|
||||
while (q != end) {
|
||||
output << " " << vocab.Lookup(*q++);
|
||||
}
|
||||
output << " ||| " << score << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
61
scripts/training/phrase-extract/pcfg-common/pcfg.h
Normal file
61
scripts/training/phrase-extract/pcfg-common/pcfg.h
Normal file
@ -0,0 +1,61 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef PCFG_PCFG_H_
|
||||
#define PCFG_PCFG_H_
|
||||
|
||||
#include "typedef.h"
|
||||
|
||||
#include <istream>
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
class Pcfg {
|
||||
public:
|
||||
typedef std::vector<std::size_t> Key;
|
||||
typedef std::map<Key, double> Map;
|
||||
typedef Map::iterator iterator;
|
||||
typedef Map::const_iterator const_iterator;
|
||||
|
||||
Pcfg() {}
|
||||
|
||||
iterator begin() { return rules_.begin(); }
|
||||
const_iterator begin() const { return rules_.begin(); }
|
||||
|
||||
iterator end() { return rules_.end(); }
|
||||
const_iterator end() const { return rules_.end(); }
|
||||
|
||||
void Add(const Key &, double);
|
||||
bool Lookup(const Key &, double &) const;
|
||||
void Read(std::istream &, Vocabulary &);
|
||||
void Write(const Vocabulary &, std::ostream &) const;
|
||||
|
||||
private:
|
||||
Map rules_;
|
||||
};
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
77
scripts/training/phrase-extract/pcfg-common/pcfg_tree.h
Normal file
77
scripts/training/phrase-extract/pcfg-common/pcfg_tree.h
Normal file
@ -0,0 +1,77 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef PCFG_PCFG_TREE_H_
|
||||
#define PCFG_PCFG_TREE_H_
|
||||
|
||||
#include "syntax_tree.h"
|
||||
#include "xml_tree_writer.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
template<typename DerivedType>
|
||||
class PcfgTreeBase : public SyntaxTreeBase<std::string, DerivedType> {
|
||||
public:
|
||||
typedef std::string LabelType;
|
||||
typedef SyntaxTreeBase<LabelType, DerivedType> BaseType;
|
||||
|
||||
PcfgTreeBase(const LabelType &label) : BaseType(label), score_(0.0) {}
|
||||
|
||||
double score() const { return score_; }
|
||||
void set_score(double s) { score_ = s; }
|
||||
|
||||
private:
|
||||
double score_;
|
||||
};
|
||||
|
||||
class PcfgTree : public PcfgTreeBase<PcfgTree> {
|
||||
public:
|
||||
typedef PcfgTreeBase<PcfgTree> BaseType;
|
||||
PcfgTree(const BaseType::LabelType &label) : BaseType(label) {}
|
||||
};
|
||||
|
||||
// Specialise XmlOutputHandler for PcfgTree.
|
||||
template<>
|
||||
class XmlOutputHandler<PcfgTree> {
|
||||
public:
|
||||
typedef std::map<std::string, std::string> AttributeMap;
|
||||
|
||||
void GetLabel(const PcfgTree &tree, std::string &label) const {
|
||||
label = tree.label();
|
||||
}
|
||||
|
||||
void GetAttributes(const PcfgTree &tree, AttributeMap &attribute_map) const {
|
||||
attribute_map.clear();
|
||||
double score = tree.score();
|
||||
if (score != 0.0) {
|
||||
std::ostringstream out;
|
||||
out << tree.score();
|
||||
attribute_map["pcfg"] = out.str();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
91
scripts/training/phrase-extract/pcfg-common/syntax_tree.h
Normal file
91
scripts/training/phrase-extract/pcfg-common/syntax_tree.h
Normal file
@ -0,0 +1,91 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef PCFG_SYNTAX_TREE_H_
|
||||
#define PCFG_SYNTAX_TREE_H_
|
||||
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
// Base class for SyntaxTree, AgreementTree, and friends.
|
||||
template<typename T, typename DerivedType>
|
||||
class SyntaxTreeBase {
|
||||
public:
|
||||
// Constructors
|
||||
SyntaxTreeBase(const T &label)
|
||||
: label_(label)
|
||||
, children_()
|
||||
, parent_(0) {}
|
||||
|
||||
SyntaxTreeBase(const T &label, const std::vector<DerivedType *> &children)
|
||||
: label_(label)
|
||||
, children_(children)
|
||||
, parent_(0) {}
|
||||
|
||||
// Destructor
|
||||
virtual ~SyntaxTreeBase();
|
||||
|
||||
const T &label() const { return label_; }
|
||||
const DerivedType *parent() const { return parent_; }
|
||||
DerivedType *parent() { return parent_; }
|
||||
const std::vector<DerivedType *> &children() const { return children_; }
|
||||
std::vector<DerivedType *> &children() { return children_; }
|
||||
|
||||
void set_label(const T &label) { label_ = label; }
|
||||
void set_parent(DerivedType *parent) { parent_ = parent; }
|
||||
void set_children(const std::vector<DerivedType *> &c) { children_ = c; }
|
||||
|
||||
bool IsLeaf() const { return children_.empty(); }
|
||||
|
||||
bool IsPreterminal() const {
|
||||
return children_.size() == 1 && children_[0]->IsLeaf();
|
||||
}
|
||||
|
||||
void AddChild(DerivedType *child) { children_.push_back(child); }
|
||||
|
||||
private:
|
||||
T label_;
|
||||
std::vector<DerivedType *> children_;
|
||||
DerivedType *parent_;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
class SyntaxTree : public SyntaxTreeBase<T, SyntaxTree<T> > {
|
||||
public:
|
||||
typedef SyntaxTreeBase<T, SyntaxTree<T> > BaseType;
|
||||
SyntaxTree(const T &label) : BaseType(label) {}
|
||||
SyntaxTree(const T &label, const std::vector<SyntaxTree *> &children)
|
||||
: BaseType(label, children) {}
|
||||
};
|
||||
|
||||
template<typename T, typename DerivedType>
|
||||
SyntaxTreeBase<T, DerivedType>::~SyntaxTreeBase() {
|
||||
for (std::size_t i = 0; i < children_.size(); ++i) {
|
||||
delete children_[i];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
80
scripts/training/phrase-extract/pcfg-common/tool.cc
Normal file
80
scripts/training/phrase-extract/pcfg-common/tool.cc
Normal file
@ -0,0 +1,80 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "tool.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
std::istream &Tool::OpenInputOrDie(const std::string &filename) {
|
||||
// TODO Check that function is only called once?
|
||||
if (filename.empty() || filename == "-") {
|
||||
input_ptr_ = &(std::cin);
|
||||
} else {
|
||||
input_file_stream_.open(filename.c_str());
|
||||
if (!input_file_stream_) {
|
||||
std::ostringstream msg;
|
||||
msg << "failed to open input file: " << filename;
|
||||
Error(msg.str());
|
||||
}
|
||||
input_ptr_ = &input_file_stream_;
|
||||
}
|
||||
return *input_ptr_;
|
||||
}
|
||||
|
||||
std::ostream &Tool::OpenOutputOrDie(const std::string &filename) {
|
||||
// TODO Check that function is only called once?
|
||||
if (filename.empty() || filename == "-") {
|
||||
output_ptr_ = &(std::cout);
|
||||
} else {
|
||||
output_file_stream_.open(filename.c_str());
|
||||
if (!output_file_stream_) {
|
||||
std::ostringstream msg;
|
||||
msg << "failed to open output file: " << filename;
|
||||
Error(msg.str());
|
||||
}
|
||||
output_ptr_ = &output_file_stream_;
|
||||
}
|
||||
return *output_ptr_;
|
||||
}
|
||||
|
||||
void Tool::OpenNamedInputOrDie(const std::string &filename,
|
||||
std::ifstream &stream) {
|
||||
stream.open(filename.c_str());
|
||||
if (!stream) {
|
||||
std::ostringstream msg;
|
||||
msg << "failed to open input file: " << filename;
|
||||
Error(msg.str());
|
||||
}
|
||||
}
|
||||
|
||||
void Tool::OpenNamedOutputOrDie(const std::string &filename,
|
||||
std::ofstream &stream) {
|
||||
stream.open(filename.c_str());
|
||||
if (!stream) {
|
||||
std::ostringstream msg;
|
||||
msg << "failed to open output file: " << filename;
|
||||
Error(msg.str());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
91
scripts/training/phrase-extract/pcfg-common/tool.h
Normal file
91
scripts/training/phrase-extract/pcfg-common/tool.h
Normal file
@ -0,0 +1,91 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef PCFG_TOOL_H_
|
||||
#define PCFG_TOOL_H_
|
||||
|
||||
#include <boost/program_options/cmdline.hpp>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
class Tool {
|
||||
public:
|
||||
virtual ~Tool() {}
|
||||
|
||||
const std::string &name() const { return name_; }
|
||||
|
||||
virtual int Main(int argc, char *argv[]) = 0;
|
||||
|
||||
protected:
|
||||
Tool(const std::string &name) : name_(name) {}
|
||||
|
||||
// Returns the boost::program_options style that should be used by all tools.
|
||||
static int CommonOptionStyle() {
|
||||
namespace cls = boost::program_options::command_line_style;
|
||||
return cls::default_style & (~cls::allow_guessing);
|
||||
}
|
||||
|
||||
void Warn(const std::string &msg) const {
|
||||
std::cerr << name_ << ": warning: " << msg << std::endl;
|
||||
}
|
||||
|
||||
void Error(const std::string &msg) const {
|
||||
std::cerr << name_ << ": error: " << msg << std::endl;
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
// Initialises the tool's main input stream and returns a reference that is
|
||||
// valid for the remainder of the tool's lifetime. If filename is empty or
|
||||
// "-" then input is standard input; otherwise it is the named file. Calls
|
||||
// Error() if the file cannot be opened for reading.
|
||||
std::istream &OpenInputOrDie(const std::string &filename);
|
||||
|
||||
// Initialises the tool's main output stream and returns a reference that is
|
||||
// valid for the remainder of the tool's lifetime. If filename is empty or
|
||||
// "-" then output is standard output; otherwise it is the named file. Calls
|
||||
// Error() if the file cannot be opened for writing.
|
||||
std::ostream &OpenOutputOrDie(const std::string &filename);
|
||||
|
||||
// Opens the named input file using the supplied ifstream. Calls Error() if
|
||||
// the file cannot be opened for reading.
|
||||
void OpenNamedInputOrDie(const std::string &, std::ifstream &);
|
||||
|
||||
// Opens the named output file using the supplied ofstream. Calls Error() if
|
||||
// the file cannot be opened for writing.
|
||||
void OpenNamedOutputOrDie(const std::string &, std::ofstream &);
|
||||
|
||||
private:
|
||||
std::string name_;
|
||||
std::istream *input_ptr_;
|
||||
std::ifstream input_file_stream_;
|
||||
std::ostream *output_ptr_;
|
||||
std::ofstream output_file_stream_;
|
||||
};
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
37
scripts/training/phrase-extract/pcfg-common/typedef.h
Normal file
37
scripts/training/phrase-extract/pcfg-common/typedef.h
Normal file
@ -0,0 +1,37 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef PCFG_TYPEDEF_H_
|
||||
#define PCFG_TYPEDEF_H_
|
||||
|
||||
#include "numbered_set.h"
|
||||
#include "syntax_tree.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
typedef NumberedSet<std::string> Vocabulary;
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
@ -0,0 +1,88 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "xml_tree_parser.h"
|
||||
|
||||
#include "exception.h"
|
||||
#include "tables-core.h"
|
||||
#include "XmlException.h"
|
||||
#include "XmlTree.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
XmlTreeParser::XmlTreeParser()
|
||||
{
|
||||
}
|
||||
|
||||
std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line)
|
||||
{
|
||||
m_line = line;
|
||||
m_tree.Clear();
|
||||
try {
|
||||
if (!ProcessAndStripXMLTags(m_line, m_tree, m_labelSet, m_topLabelSet)) {
|
||||
throw Exception("");
|
||||
}
|
||||
} catch (const XmlException &e) {
|
||||
throw Exception(e.getMsg());
|
||||
}
|
||||
m_tree.ConnectNodes();
|
||||
SyntaxNode *root = m_tree.GetTop();
|
||||
if (!root) {
|
||||
// There is no XML tree.
|
||||
return std::auto_ptr<PcfgTree>();
|
||||
}
|
||||
m_words = tokenize(m_line.c_str());
|
||||
return ConvertTree(*root, m_words);
|
||||
}
|
||||
|
||||
// Converts a SyntaxNode tree to a Moses::PCFG::PcfgTree.
|
||||
std::auto_ptr<PcfgTree> XmlTreeParser::ConvertTree(
|
||||
const SyntaxNode &tree,
|
||||
const std::vector<std::string> &words)
|
||||
{
|
||||
std::auto_ptr<PcfgTree> root(new PcfgTree(tree.GetLabel()));
|
||||
const std::vector<SyntaxNode*> &children = tree.GetChildren();
|
||||
if (children.empty()) {
|
||||
if (tree.GetStart() != tree.GetEnd()) {
|
||||
std::ostringstream msg;
|
||||
msg << "leaf node covers multiple words (" << tree.GetStart()
|
||||
<< "-" << tree.GetEnd() << "): this is currently unsupported";
|
||||
throw Exception(msg.str());
|
||||
}
|
||||
std::auto_ptr<PcfgTree> leaf(new PcfgTree(words[tree.GetStart()]));
|
||||
leaf->set_parent(root.get());
|
||||
root->AddChild(leaf.release());
|
||||
} else {
|
||||
for (std::vector<SyntaxNode*>::const_iterator p = children.begin();
|
||||
p != children.end(); ++p) {
|
||||
assert(*p);
|
||||
std::auto_ptr<PcfgTree> child = ConvertTree(**p, words);
|
||||
child->set_parent(root.get());
|
||||
root->AddChild(child.release());
|
||||
}
|
||||
}
|
||||
return root;
|
||||
}
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
@ -0,0 +1,56 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef PCFG_XML_TREE_PARSER_H_
|
||||
#define PCFG_XML_TREE_PARSER_H_
|
||||
|
||||
#include "pcfg_tree.h"
|
||||
#include "SyntaxTree.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
// Parses a string in Moses' XML parse tree format and returns a PcfgTree
|
||||
// object.
|
||||
class XmlTreeParser {
|
||||
public:
|
||||
XmlTreeParser();
|
||||
std::auto_ptr<PcfgTree> Parse(const std::string &);
|
||||
private:
|
||||
std::auto_ptr<PcfgTree> ConvertTree(const SyntaxNode &,
|
||||
const std::vector<std::string> &);
|
||||
|
||||
std::set<std::string> m_labelSet;
|
||||
std::map<std::string, int> m_topLabelSet;
|
||||
std::string m_line;
|
||||
::SyntaxTree m_tree;
|
||||
std::vector<std::string> m_words;
|
||||
};
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
133
scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
Normal file
133
scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
Normal file
@ -0,0 +1,133 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef PCFG_XML_TREE_WRITER_H_
|
||||
#define PCFG_XML_TREE_WRITER_H_
|
||||
|
||||
#include "syntax_tree.h"
|
||||
|
||||
#include "XmlTree.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <ostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace Moses {
|
||||
namespace PCFG {
|
||||
|
||||
template<typename InputTree>
|
||||
class XmlOutputHandler {
|
||||
public:
|
||||
typedef std::map<std::string, std::string> AttributeMap;
|
||||
|
||||
void GetLabel(const InputTree &, std::string &) const;
|
||||
void GetAttributes(const InputTree &, AttributeMap &) const;
|
||||
};
|
||||
|
||||
template<typename InputTree>
|
||||
class XmlTreeWriter : public XmlOutputHandler<InputTree> {
|
||||
public:
|
||||
typedef XmlOutputHandler<InputTree> Base;
|
||||
void Write(const InputTree &, std::ostream &) const;
|
||||
private:
|
||||
std::string Escape(const std::string &) const;
|
||||
};
|
||||
|
||||
template<typename InputTree>
|
||||
void XmlTreeWriter<InputTree>::Write(const InputTree &tree,
|
||||
std::ostream &out) const {
|
||||
assert(!tree.IsLeaf());
|
||||
|
||||
// Opening tag
|
||||
|
||||
std::string label;
|
||||
Base::GetLabel(tree, label);
|
||||
out << "<tree label=\"" << Escape(label) << "\"";
|
||||
|
||||
typename Base::AttributeMap attribute_map;
|
||||
Base::GetAttributes(tree, attribute_map);
|
||||
|
||||
for (typename Base::AttributeMap::const_iterator p = attribute_map.begin();
|
||||
p != attribute_map.end(); ++p) {
|
||||
out << " " << p->first << "=\"" << p->second << "\"";
|
||||
}
|
||||
|
||||
out << ">";
|
||||
|
||||
// Children
|
||||
|
||||
const std::vector<InputTree *> &children = tree.children();
|
||||
for (typename std::vector<InputTree *>::const_iterator p = children.begin();
|
||||
p != children.end(); ++p) {
|
||||
InputTree &child = **p;
|
||||
if (child.IsLeaf()) {
|
||||
Base::GetLabel(child, label);
|
||||
out << " " << Escape(label);
|
||||
} else {
|
||||
out << " ";
|
||||
Write(**p, out);
|
||||
}
|
||||
}
|
||||
|
||||
// Closing tag
|
||||
out << " </tree>";
|
||||
|
||||
if (tree.parent() == 0) {
|
||||
out << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Escapes XML special characters.
|
||||
template<typename InputTree>
|
||||
std::string XmlTreeWriter<InputTree>::Escape(const std::string &s) const {
|
||||
std::string t;
|
||||
std::size_t len = s.size();
|
||||
t.reserve(len);
|
||||
for (std::size_t i = 0; i < len; ++i) {
|
||||
if (s[i] == '<') {
|
||||
t += "<";
|
||||
} else if (s[i] == '>') {
|
||||
t += ">";
|
||||
} else if (s[i] == '[') {
|
||||
t += "[";
|
||||
} else if (s[i] == ']') {
|
||||
t += "]";
|
||||
} else if (s[i] == '|') {
|
||||
t += "&bar;";
|
||||
} else if (s[i] == '&') {
|
||||
t += "&";
|
||||
} else if (s[i] == '\'') {
|
||||
t += "'";
|
||||
} else if (s[i] == '"') {
|
||||
t += """;
|
||||
} else {
|
||||
t += s[i];
|
||||
}
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
} // namespace PCFG
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
1
scripts/training/phrase-extract/pcfg-extract/Jamfile
Normal file
1
scripts/training/phrase-extract/pcfg-extract/Jamfile
Normal file
@ -0,0 +1 @@
|
||||
exe pcfg-extract : [ glob *.cc ] ..//pcfg-common ../../../..//boost_program_options ;
|
25
scripts/training/phrase-extract/pcfg-extract/main.cc
Normal file
25
scripts/training/phrase-extract/pcfg-extract/main.cc
Normal file
@ -0,0 +1,25 @@
|
||||
/***********************************************************************
|
||||
Moses - statistical machine translation system
|
||||
Copyright (C) 2006-2012 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "pcfg_extract.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
Moses::PCFG::PcfgExtract tool;
|
||||
return tool.Main(argc, argv);
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user