Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Barry Haddow 2015-07-02 09:31:21 +00:00
commit 01e6b3f0b3
410 changed files with 5599 additions and 4360 deletions

View File

@ -18,6 +18,8 @@ irstlm
jam-files
lm
mingw/MosesGUI/icons_rc.py
mingw/MosesGUI/Ui_credits.py
mingw/MosesGUI/Ui_mainWindow.py
moses/TranslationModel/UG
phrase-extract/pcfg-common
phrase-extract/syntax-common

View File

@ -89,7 +89,7 @@ if [ path.exists $(home)/moses-environment.jam ]
include $(TOP)/jam-files/check-environment.jam ; # get resource locations
# from environment variables
include $(TOP)/jam-files/xmlrpc-c.jam ; # xmlrpc-c stuff for the server
include $(TOP)/jam-files/curlpp.jam ; # curlpp stuff for bias lookup (MMT only)
# include $(TOP)/jam-files/curlpp.jam ; # curlpp stuff for bias lookup (MMT only)
# exit "done" : 0 ;
@ -108,7 +108,7 @@ external-lib z ;
#lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
#requirements += <library>dl ;
#requirements += <cxxflags>-std=c++0x ;
if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_minimal" ] {
if [ option.get "full-tcmalloc" : : "yes" ] {

View File

@ -62,7 +62,7 @@ void Vocabulary::Save(const string& fileName ) const
vcbFile.open( fileName.c_str(), ios::out | ios::ate | ios::trunc);
if (!vcbFile) {
cerr << "Failed to open " << vcbFile << endl;
cerr << "Failed to open " << fileName << endl;
exit(1);
}
@ -81,7 +81,7 @@ void Vocabulary::Load(const string& fileName )
vcbFile.open(fileName.c_str());
if (!vcbFile) {
cerr << "no such file or directory: " << vcbFile << endl;
cerr << "no such file or directory: " << fileName << endl;
exit(1);
}

View File

@ -96,4 +96,4 @@ reset-lm:
-rm -rf lm
reset-all: reset-lm reset-aln
-rm -rf $(wildcard crp/trn/*/[ct]* crp/dev/[ct]* crp/tst/[ct]*)
-rm -rf aux
-rm -rf auxiliary

View File

@ -8,7 +8,7 @@ m4mdir := $(patsubst %modules/,%,\
# $(info M4MDIR is ${m4mdir})
# m4m modules to be included
M4M_MODULES := aux init
M4M_MODULES := auxiliary init
M4M_MODULES += tools moses-parameters prepare-corpus
M4M_MODULES += mgiza fastalign mmbitext phrase-table moses-ini
M4M_MODULES += tune-moses eval-system kenlm

View File

@ -40,8 +40,8 @@ endef
define truecase
$2/cased/%.$3.gz: caser = ${run-truecaser}
$2/cased/%.$3.gz: caser += -model ${WDIR}/aux/truecasing-model.$1
$2/cased/%.$3.gz: | $2/tok/%.$3.gz ${WDIR}/aux/truecasing-model.$1
$2/cased/%.$3.gz: caser += -model ${WDIR}/auxiliary/truecasing-model.$1
$2/cased/%.$3.gz: | $2/tok/%.$3.gz ${WDIR}/auxiliary/truecasing-model.$1
$$(lock)
zcat $$(word 1, $$|) | ${parallel} --pipe -k $${caser} | gzip > $$@_
mv $$@_ $$@
@ -127,8 +127,8 @@ endef
# .SECONDARY: $(call trn.tok-mno,${L1}) $(call trn.tok-pll,${L1})
# .SECONDARY: $(call trn.tok-mno,${L2}) $(call trn.tok-pll,${L2})
#${WDIR}/aux/truecasing-model.${L1}: | $(call trn.tok-mno,${L1}) $(call trn.tok-pll,${L1})
${WDIR}/aux/truecasing-model.${L1}: | $(call trn.tok-mno,${L1})
#${WDIR}/auxiliary/truecasing-model.${L1}: | $(call trn.tok-mno,${L1}) $(call trn.tok-pll,${L1})
${WDIR}/auxiliary/truecasing-model.${L1}: | $(call trn.tok-mno,${L1})
$(lock)
$(if $|,,$(error Can't find training data for $@!))#'
${train-truecaser} -model $@_ -corpus <(echo $| | xargs zcat -f)
@ -136,8 +136,8 @@ ${WDIR}/aux/truecasing-model.${L1}: | $(call trn.tok-mno,${L1})
mv $@_ $@
$(unlock)
#${WDIR}/aux/truecasing-model.${L2}: | $(call trn.tok-mno,${L2}) $(call trn.tok-pll,${L2})
${WDIR}/aux/truecasing-model.${L2}: | $(call trn.tok-mno,${L2})
#${WDIR}/auxiliary/truecasing-model.${L2}: | $(call trn.tok-mno,${L2}) $(call trn.tok-pll,${L2})
${WDIR}/auxiliary/truecasing-model.${L2}: | $(call trn.tok-mno,${L2})
$(lock)
$(if $|,,$(error Can't find training data for $@!))#'
${train-truecaser} -model $@_ -corpus <(echo $| | xargs zcat -f)

View File

@ -11,12 +11,12 @@
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -72,13 +72,13 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">

View File

@ -1,5 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Project Name="OnDiskPt" InternalType="Library">
<Plugins>
<Plugin Name="CMakePlugin">
<![CDATA[[{
"name": "Debug",
"enabled": false,
"buildDirectory": "build",
"sourceDirectory": "$(ProjectPath)",
"generator": "",
"buildType": "",
"arguments": [],
"parentProject": ""
}]]]>
</Plugin>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
</Plugins>
<Description/>
<Dependencies/>
<VirtualDirectory Name="src"/>
@ -27,6 +44,8 @@
<File Name="../../../OnDiskPt/Word.cpp"/>
<File Name="../../../OnDiskPt/Word.h"/>
</VirtualDirectory>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
<Settings Type="Static Library">
<GlobalSettings>
<Compiler Options="" C_Options="" Assembler="">
@ -40,9 +59,9 @@
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
<IncludePath Value="../../.."/>
<IncludePath Value="../../../phrase-extract"/>
<IncludePath Value="../../../boost/include"/>
<Preprocessor Value="MAX_NUM_FACTORS=4"/>
</Compiler>
<Linker Options="" Required="yes"/>
@ -72,7 +91,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -110,7 +129,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -118,6 +137,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
</CodeLite_Project>

View File

@ -1,16 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Workspace Name="all" Database="all.tags">
<Project Name="manual-label" Path="manual-label/manual-label.project" Active="No"/>
<Project Name="extract" Path="extract/extract.project" Active="No"/>
<Project Name="util" Path="util/util.project" Active="No"/>
<Project Name="extract-mixed-syntax" Path="extract-mixed-syntax/extract-mixed-syntax.project" Active="No"/>
<Project Name="lm" Path="lm/lm.project" Active="No"/>
<Project Name="OnDiskPt" Path="OnDiskPt/OnDiskPt.project" Active="No"/>
<Project Name="search" Path="search/search.project" Active="No"/>
<Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="Yes"/>
<Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="No"/>
<Project Name="score" Path="score/score.project" Active="No"/>
<Project Name="consolidate" Path="consolidate/consolidate.project" Active="No"/>
<Project Name="moses" Path="moses/moses.project" Active="No"/>
<Project Name="pruneGeneration" Path="pruneGeneration/pruneGeneration.project" Active="Yes"/>
<BuildMatrix>
<WorkspaceConfiguration Name="Debug" Selected="yes">
<Project Name="manual-label" ConfigName="Debug"/>
@ -24,6 +24,7 @@
<Project Name="score" ConfigName="Debug"/>
<Project Name="consolidate" ConfigName="Debug"/>
<Project Name="moses" ConfigName="Debug"/>
<Project Name="pruneGeneration" ConfigName="Debug"/>
</WorkspaceConfiguration>
<WorkspaceConfiguration Name="Release" Selected="yes">
<Project Name="manual-label" ConfigName="Release"/>
@ -37,6 +38,7 @@
<Project Name="score" ConfigName="Release"/>
<Project Name="consolidate" ConfigName="Release"/>
<Project Name="moses" ConfigName="Release"/>
<Project Name="pruneGeneration" ConfigName="Release"/>
</WorkspaceConfiguration>
</BuildMatrix>
</CodeLite_Workspace>

View File

@ -1,5 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Project Name="extract-mixed-syntax" InternalType="Console">
<Plugins>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
<Plugin Name="CMakePlugin">
<![CDATA[[{
"name": "Debug",
"enabled": false,
"buildDirectory": "build",
"sourceDirectory": "$(ProjectPath)",
"generator": "",
"buildType": "",
"arguments": [],
"parentProject": ""
}]]]>
</Plugin>
</Plugins>
<Description/>
<Dependencies/>
<VirtualDirectory Name="src"/>
@ -43,6 +60,10 @@
<File Name="../../../phrase-extract/OutputFileStream.cpp"/>
<File Name="../../../phrase-extract/OutputFileStream.h"/>
</VirtualDirectory>
<Dependencies Name="Debug">
<Project Name="util"/>
</Dependencies>
<Dependencies Name="Release"/>
<Settings Type="Executable">
<GlobalSettings>
<Compiler Options="" C_Options="" Assembler="">
@ -56,13 +77,14 @@
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
<IncludePath Value="../../../"/>
<IncludePath Value="../../../phrase-extract"/>
<IncludePath Value="../../../boost/include"/>
</Compiler>
<Linker Options="" Required="yes">
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
<LibraryPath Value="../../../boost/lib64"/>
<LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
<LibraryPath Value="Debug"/>
<Library Value="util"/>
<Library Value="boost_iostreams"/>
<Library Value="boost_program_options"/>
@ -94,7 +116,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -133,7 +155,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -141,8 +163,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug">
<Project Name="util"/>
</Dependencies>
<Dependencies Name="Release"/>
</CodeLite_Project>

View File

@ -26,6 +26,7 @@
<option id="gnu.cpp.compiler.option.include.paths.231971122" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.61884195" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>

View File

@ -5,16 +5,16 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2119725657." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1708444053" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.645190133" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
@ -25,6 +25,7 @@
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.535775760" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.874182289" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1355287045" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
@ -61,16 +62,16 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1230189043" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1230189043" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1230189043" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1230189043." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.280378247" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1881910636" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>

View File

@ -1,5 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Project Name="extract" InternalType="Console">
<Plugins>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
<Plugin Name="CMakePlugin">
<![CDATA[[{
"name": "Debug",
"enabled": false,
"buildDirectory": "build",
"sourceDirectory": "$(ProjectPath)",
"generator": "",
"buildType": "",
"arguments": [],
"parentProject": ""
}]]]>
</Plugin>
</Plugins>
<Description/>
<Dependencies/>
<VirtualDirectory Name="src">
@ -13,6 +30,8 @@
<File Name="../../../phrase-extract/tables-core.cpp"/>
<File Name="../../../phrase-extract/tables-core.h"/>
</VirtualDirectory>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
<Settings Type="Executable">
<GlobalSettings>
<Compiler Options="" C_Options="" Assembler="">
@ -26,11 +45,11 @@
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
<IncludePath Value="../../../"/>
<IncludePath Value="../../../boost/include"/>
</Compiler>
<Linker Options="" Required="yes">
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
<LibraryPath Value="../../../boost/lib64"/>
<Library Value="boost_iostreams"/>
<Library Value="z"/>
</Linker>
@ -60,7 +79,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -99,7 +118,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -107,6 +126,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
</CodeLite_Project>

View File

@ -1,5 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Project Name="lm" InternalType="Library">
<Plugins>
<Plugin Name="CMakePlugin">
<![CDATA[[{
"name": "Debug",
"enabled": false,
"buildDirectory": "build",
"sourceDirectory": "$(ProjectPath)",
"generator": "",
"buildType": "",
"arguments": [],
"parentProject": ""
}]]]>
</Plugin>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
</Plugins>
<Description/>
<Dependencies/>
<VirtualDirectory Name="src"/>
@ -27,6 +44,8 @@
<File Name="../../../lm/virtual_interface.cc"/>
<File Name="../../../lm/vocab.cc"/>
</VirtualDirectory>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
<Settings Type="Static Library">
<GlobalSettings>
<Compiler Options="" C_Options="" Assembler="">
@ -40,9 +59,9 @@
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
<IncludePath Value="../../.."/>
<IncludePath Value="../../../phrase-extract"/>
<IncludePath Value="../../../boost/include"/>
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
</Compiler>
<Linker Options="" Required="yes"/>
@ -72,7 +91,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -110,7 +129,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -118,6 +137,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
</CodeLite_Project>

View File

@ -1,132 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.2107801703">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2107801703" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2107801703" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2107801703." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.502948364" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.1431969079" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/manual-label}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.2101075234" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1118840081" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2037265673" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.400985496" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1160903812" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.404589863" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="${workspace_loc:}/../.."/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.967940596" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.789243964" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2033266575" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.1568929819" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.676866714" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.254144861" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.319879082" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.132164474" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<option id="gnu.cpp.link.option.libs.1017214824" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_program_options"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1672776758" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1104732611" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.372096550" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.649050588">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.649050588" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.649050588" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.649050588." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1107402972" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1038954684" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/manual-label}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.100518450" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.2005888378" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1743303968" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.968169340" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.977676916" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1889240027" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.924128295" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1914416581" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.826081780" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2048171432" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.940327646" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.369758737" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1186766936" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.266174128" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.558116084" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="manual-label.cdt.managedbuild.target.gnu.exe.1701243340" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.649050588;cdt.managedbuild.config.gnu.exe.release.649050588.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1743303968;cdt.managedbuild.tool.gnu.cpp.compiler.input.1889240027">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.649050588;cdt.managedbuild.config.gnu.exe.release.649050588.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.924128295;cdt.managedbuild.tool.gnu.c.compiler.input.2048171432">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2107801703;cdt.managedbuild.config.gnu.exe.debug.2107801703.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2037265673;cdt.managedbuild.tool.gnu.cpp.compiler.input.967940596">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2107801703;cdt.managedbuild.config.gnu.exe.debug.2107801703.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.789243964;cdt.managedbuild.tool.gnu.c.compiler.input.676866714">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/manual-label"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/manual-label"/>
</configuration>
</storageModule>
</cproject>

View File

@ -1,27 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>manual-label</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
</projectDescription>

View File

@ -1,46 +0,0 @@
#include <list>
#include "DeEn.h"
#include "Main.h"
#include "moses/Util.h"
using namespace std;
extern bool g_debug;
bool Contains(const Phrase &source, int start, int end, int factor, const string &str)
{
for (int pos = start; pos <= end; ++pos) {
bool found = IsA(source, pos, 0, factor, str);
if (found) {
return true;
}
}
return false;
}
void LabelDeEn(const Phrase &source, ostream &out)
{
Ranges ranges;
// find ranges to label
for (int start = 0; start < source.size(); ++start) {
for (int end = start; end < source.size(); ++end) {
if (IsA(source, start, -1, 1, "VAFIN")
&& IsA(source, end, +1, 1, "VVINF VVPP")
&& !Contains(source, start, end, 1, "VAFIN VVINF VVPP VVFIN")) {
Range range(start, end, "reorder-label");
ranges.push_back(range);
}
else if ((start == 0 || IsA(source, start, -1, 1, "$,"))
&& IsA(source, end, +1, 0, "zu")
&& IsA(source, end, +2, 1, "VVINF")
&& !Contains(source, start, end, 1, "$,")) {
Range range(start, end, "reorder-label");
ranges.push_back(range);
}
}
}
OutputWithLabels(source, ranges, out);
}

View File

@ -1,5 +0,0 @@
#pragma once
#include "Main.h"
void LabelDeEn(const Phrase &source, std::ostream &out);

View File

@ -1,202 +0,0 @@
/*
* EnApacheChunker.cpp
*
* Created on: 28 Feb 2014
* Author: hieu
*/
#include <cstdlib>
#include <cstdio>
#include <algorithm>
#include <fstream>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/filesystem.hpp>
#include "EnOpenNLPChunker.h"
#include "moses/Util.h"
using namespace std;
using namespace boost::algorithm;
EnOpenNLPChunker::EnOpenNLPChunker(const std::string &openNLPPath)
:m_openNLPPath(openNLPPath)
{
// TODO Auto-generated constructor stub
}
EnOpenNLPChunker::~EnOpenNLPChunker() {
// TODO Auto-generated destructor stub
}
void EnOpenNLPChunker::Process(std::istream &in, std::ostream &out, const vector<string> &filterList)
{
const boost::filesystem::path
inPath = boost::filesystem::unique_path(),
outPath = boost::filesystem::unique_path();
// read all input to a temp file
ofstream inFile(inPath.c_str());
string line;
while (getline(in, line)) {
Unescape(line);
inFile << line << endl;
}
inFile.close();
// execute chunker
string cmd = "cat " + inPath.native() + " | "
+ m_openNLPPath + "/bin/opennlp POSTagger "
+ m_openNLPPath + "/models/en-pos-maxent.bin | "
+ m_openNLPPath + "/bin/opennlp ChunkerME "
+ m_openNLPPath + "/models/en-chunker.bin > "
+ outPath.native();
//g << "Executing:" << cmd << endl;
int ret = system(cmd.c_str());
// read result of chunker and output as Moses xml trees
ifstream outFile(outPath.c_str());
size_t lineNum = 0;
while (getline(outFile, line)) {
//cerr << line << endl;
MosesReformat(line, out, filterList);
out << endl;
++lineNum;
}
outFile.close();
// clean up temporary files
remove(inPath.c_str());
remove(outPath.c_str());
}
void EnOpenNLPChunker::MosesReformat(const string &line, std::ostream &out, const vector<string> &filterList)
{
//cerr << "REFORMATING:" << line << endl;
bool inLabel = false;
vector<string> toks;
Moses::Tokenize(toks, line);
for (size_t i = 0; i < toks.size(); ++i) {
const string &tok = toks[i];
if (tok.substr(0, 1) == "[" && tok.substr(1,1) != "_") {
// start of chunk
string label = tok.substr(1);
if (UseLabel(label, filterList)) {
out << "<tree label=\"" << label << "\">";
inLabel = true;
}
}
else if (ends_with(tok, "]")) {
// end of chunk
if (tok.size() > 1) {
if (tok.substr(1,1) == "_") {
// just a word that happens to be ]
vector<string> factors;
Moses::Tokenize(factors, tok, "_");
assert(factors.size() == 2);
Escape(factors[0]);
out << factors[0] << " ";
}
else {
// a word and end of tree
string word = tok.substr(0, tok.size()-1);
vector<string> factors;
Moses::Tokenize(factors, word, "_");
assert(factors.size() == 2);
Escape(factors[0]);
out << factors[0] << " ";
}
if (inLabel) {
out << "</tree> ";
inLabel = false;
}
}
else {
if (inLabel) {
out << "</tree> ";
inLabel = false;
}
}
}
else {
// lexical item
vector<string> factors;
Moses::Tokenize(factors, tok, "_");
if (factors.size() == 2) {
Escape(factors[0]);
out << factors[0] << " ";
}
else if (factors.size() == 1) {
// word is _
assert(tok.substr(0, 2) == "__");
out << "_ ";
}
else {
throw "Unknown format:" + tok;
}
}
}
}
std::string
replaceAll( std::string const& original,
std::string const& before,
std::string const& after )
{
std::string retval;
std::string::const_iterator end = original.end();
std::string::const_iterator current = original.begin();
std::string::const_iterator next =
std::search( current, end, before.begin(), before.end() );
while ( next != end ) {
retval.append( current, next );
retval.append( after );
current = next + before.size();
next = std::search( current, end, before.begin(), before.end() );
}
retval.append( current, next );
return retval;
}
void EnOpenNLPChunker::Escape(string &line)
{
line = replaceAll(line, "&", "&amp;");
line = replaceAll(line, "|", "&#124;");
line = replaceAll(line, "<", "&lt;");
line = replaceAll(line, ">", "&gt;");
line = replaceAll(line, "'", "&apos;");
line = replaceAll(line, "\"", "&quot;");
line = replaceAll(line, "[", "&#91;");
line = replaceAll(line, "]", "&#93;");
}
void EnOpenNLPChunker::Unescape(string &line)
{
line = replaceAll(line, "&#124;", "|");
line = replaceAll(line, "&lt;", "<");
line = replaceAll(line, "&gt;", ">");
line = replaceAll(line, "&quot;", "\"");
line = replaceAll(line, "&apos;", "'");
line = replaceAll(line, "&#91;", "[");
line = replaceAll(line, "&#93;", "]");
line = replaceAll(line, "&amp;", "&");
}
bool EnOpenNLPChunker::UseLabel(const std::string &label, const std::vector<std::string> &filterList) const
{
if (filterList.size() == 0) {
return true;
}
for (size_t i = 0; i < filterList.size(); ++i) {
if (label == filterList[i]) {
return true;
}
}
return false;
}

View File

@ -1,29 +0,0 @@
/*
* EnApacheChunker.h
*
* Created on: 28 Feb 2014
* Author: hieu
*/
#pragma once
#include <vector>
#include <string>
#include <iostream>
class EnOpenNLPChunker {
public:
EnOpenNLPChunker(const std::string &openNLPPath);
virtual ~EnOpenNLPChunker();
void Process(std::istream &in, std::ostream &out, const std::vector<std::string> &filterList);
protected:
const std::string m_openNLPPath;
void Escape(std::string &line);
void Unescape(std::string &line);
void MosesReformat(const std::string &line, std::ostream &out, const std::vector<std::string> &filterList);
bool UseLabel(const std::string &label, const std::vector<std::string> &filterList) const;
};

View File

@ -1,226 +0,0 @@
#include <iostream>
#include <list>
#include <limits>
#include <algorithm>
#include "EnPhrasalVerb.h"
#include "moses/Util.h"
using namespace std;
void EnPhrasalVerb(const Phrase &source, int revision, ostream &out)
{
Ranges ranges;
// find ranges to label
for (int start = 0; start < source.size(); ++start) {
size_t end = std::numeric_limits<size_t>::max();
if (IsA(source, start, 0, 0, "ask asked asking")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "back backed backing")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "blow blown blew")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "break broke broken")) {
end = Found(source, start, 0, "down up in");
}
else if (IsA(source, start, 0, 0, "bring brought bringing")) {
end = Found(source, start, 0, "down up in");
}
else if (IsA(source, start, 0, 0, "call called calling")) {
end = Found(source, start, 0, "back up off");
}
else if (IsA(source, start, 0, 0, "check checked checking")) {
end = Found(source, start, 0, "out in");
}
else if (IsA(source, start, 0, 0, "cheer cheered cheering")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "clean cleaned cleaning")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "cross crossed crossing")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "cut cutting")) {
end = Found(source, start, 0, "down off out");
}
else if (IsA(source, start, 0, 0, "do did done")) {
end = Found(source, start, 0, "over up");
}
else if (IsA(source, start, 0, 0, "drop dropped dropping")) {
end = Found(source, start, 0, "off");
}
else if (IsA(source, start, 0, 0, "figure figured figuring")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "fill filled filling")) {
end = Found(source, start, 0, "in out up");
}
else if (IsA(source, start, 0, 0, "find found finding")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "get got getting gotten")) {
end = Found(source, start, 0, "across over back");
}
else if (IsA(source, start, 0, 0, "give given gave giving")) {
end = Found(source, start, 0, "away back out up");
}
else if (IsA(source, start, 0, 0, "hand handed handing")) {
end = Found(source, start, 0, "down in over");
}
else if (IsA(source, start, 0, 0, "hold held holding")) {
end = Found(source, start, 0, "back up");
}
else if (IsA(source, start, 0, 0, "keep kept keeping")) {
end = Found(source, start, 0, "from up");
}
else if (IsA(source, start, 0, 0, "let letting")) {
end = Found(source, start, 0, "down in");
}
else if (IsA(source, start, 0, 0, "look looked looking")) {
end = Found(source, start, 0, "over up");
}
else if (IsA(source, start, 0, 0, "make made making")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "mix mixed mixing")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "pass passed passing")) {
end = Found(source, start, 0, "out up");
}
else if (IsA(source, start, 0, 0, "pay payed paying")) {
end = Found(source, start, 0, "back");
}
else if (IsA(source, start, 0, 0, "pick picked picking")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "point pointed pointing")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "put putting")) {
end = Found(source, start, 0, "down off out together on");
}
else if (IsA(source, start, 0, 0, "send sending")) {
end = Found(source, start, 0, "back");
}
else if (IsA(source, start, 0, 0, "set setting")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "sort sorted sorting")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "switch switched switching")) {
end = Found(source, start, 0, "off on");
}
else if (IsA(source, start, 0, 0, "take took taking")) {
end = Found(source, start, 0, "apart back off out");
}
else if (IsA(source, start, 0, 0, "tear torn tearing")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "think thought thinking")) {
end = Found(source, start, 0, "over");
}
else if (IsA(source, start, 0, 0, "thrown threw thrown throwing")) {
end = Found(source, start, 0, "away");
}
else if (IsA(source, start, 0, 0, "turn turned turning")) {
end = Found(source, start, 0, "down off on");
}
else if (IsA(source, start, 0, 0, "try tried trying")) {
end = Found(source, start, 0, "on out");
}
else if (IsA(source, start, 0, 0, "use used using")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "warm warmed warming")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "work worked working")) {
end = Found(source, start, 0, "out");
}
// found range to label
if (end != std::numeric_limits<size_t>::max() &&
end > start + 1) {
bool add = true;
if (revision == 1 && Exist(source,
start + 1,
end - 1,
1,
"VB VBD VBG VBN VBP VBZ")) {
// there's a verb in between
add = false;
}
if (add) {
Range range(start + 1, end - 1, "reorder-label");
ranges.push_back(range);
}
}
}
OutputWithLabels(source, ranges, out);
}
bool Exist(const Phrase &source, int start, int end, int factor, const std::string &str)
{
vector<string> soughts = Moses::Tokenize(str, " ");
for (size_t i = start; i <= end; ++i) {
const Word &word = source[i];
bool found = Found(word, factor, soughts);
if (found) {
return true;
}
}
return false;
}
size_t Found(const Phrase &source, int pos, int factor, const std::string &str)
{
const size_t MAX_RANGE = 10;
vector<string> soughts = Moses::Tokenize(str, " ");
vector<string> puncts = Moses::Tokenize(". : , ;", " ");
size_t maxEnd = std::min(source.size(), (size_t) pos + MAX_RANGE);
for (size_t i = pos + 1; i < maxEnd; ++i) {
const Word &word = source[i];
bool found;
found = Found(word, factor, puncts);
if (found) {
return std::numeric_limits<size_t>::max();
}
found = Found(word, factor, soughts);
if (found) {
return i;
}
}
return std::numeric_limits<size_t>::max();
}
bool Found(const Word &word, int factor, const vector<string> &soughts)
{
const string &element = word[factor];
for (size_t i = 0; i < soughts.size(); ++i) {
const string &sought = soughts[i];
bool found = (element == sought);
if (found) {
return true;
}
}
return false;
}

View File

@ -1,11 +0,0 @@
#pragma once
#include "Main.h"
// roll your own identification of phrasal verbs
void EnPhrasalVerb(const Phrase &source, int revision, std::ostream &out);
bool Exist(const Phrase &source, int start, int end, int factor, const std::string &str);
size_t Found(const Phrase &source, int pos, int factor, const std::string &str);
bool Found(const Word &word, int factor, const std::vector<std::string> &soughts);

View File

@ -1,29 +0,0 @@
#include "LabelByInitialLetter.h"
#include "Main.h"
using namespace std;
void LabelByInitialLetter(const Phrase &source, std::ostream &out)
{
Ranges ranges;
for (int start = 0; start < source.size(); ++start) {
const string &startWord = source[start][0];
string startChar = startWord.substr(0,1);
for (int end = start + 1; end < source.size(); ++end) {
const string &endWord = source[end][0];
string endChar = endWord.substr(0,1);
if (startChar == endChar) {
Range range(start, end, startChar + "-label");
ranges.push_back(range);
}
}
}
OutputWithLabels(source, ranges, out);
}

View File

@ -1,6 +0,0 @@
#pragma once
#include "Main.h"
void LabelByInitialLetter(const Phrase &source, std::ostream &out);

View File

@ -1,195 +0,0 @@
#include <iostream>
#include <cstdlib>
#include <boost/program_options.hpp>
#include "moses/Util.h"
#include "Main.h"
#include "DeEn.h"
#include "EnPhrasalVerb.h"
#include "EnOpenNLPChunker.h"
#include "LabelByInitialLetter.h"
using namespace std;
bool g_debug = false;
Phrase Tokenize(const string &line);
int main(int argc, char** argv)
{
cerr << "Starting" << endl;
namespace po = boost::program_options;
po::options_description desc("Options");
desc.add_options()
("help", "Print help messages")
("input,i", po::value<string>(), "Input file. Otherwise it will read from standard in")
("output,o", po::value<string>(), "Output file. Otherwise it will print from standard out")
("source-language,s", po::value<string>()->required(), "Source Language")
("target-language,t", po::value<string>()->required(), "Target Language")
("revision,r", po::value<int>()->default_value(0), "Revision")
("filter", po::value<string>(), "Only use labels from this comma-separated list")
("opennlp", po::value<string>()->default_value(""), "Path to Apache OpenNLP toolkit")
;
po::variables_map vm;
try
{
po::store(po::parse_command_line(argc, argv, desc),
vm); // can throw
/** --help option
*/
if ( vm.count("help") )
{
std::cout << "Basic Command Line Parameter App" << std::endl
<< desc << std::endl;
return EXIT_SUCCESS;
}
po::notify(vm); // throws on error, so do after help in case
// there are any problems
}
catch(po::error& e)
{
std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
std::cerr << desc << std::endl;
return EXIT_FAILURE;
}
istream *inStrm = &cin;
if (vm.count("input")) {
string inStr = vm["input"].as<string>();
cerr << "inStr=" << inStr << endl;
ifstream *inFile = new ifstream(inStr.c_str());
inStrm = inFile;
}
ostream *outStrm = &cout;
if (vm.count("output")) {
string outStr = vm["output"].as<string>();
cerr << "outStr=" << outStr << endl;
ostream *outFile = new ofstream(outStr.c_str());
outStrm = outFile;
}
vector<string> filterList;
if (vm.count("filter")) {
string filter = vm["filter"].as<string>();
Moses::Tokenize(filterList, filter, ",");
}
string sourceLang = vm["source-language"].as<string>();
string targetLang = vm["target-language"].as<string>();
int revision = vm["revision"].as<int>();
cerr << sourceLang << " " << targetLang << " " << revision << endl;
if (sourceLang == "en" && revision == 2) {
if (vm.count("opennlp") == 0) {
throw "Need path to openNLP toolkit";
}
string openNLPPath = vm["opennlp"].as<string>();
EnOpenNLPChunker chunker(openNLPPath);
chunker.Process(*inStrm, *outStrm, filterList);
}
else {
// process line-by-line
string line;
size_t lineNum = 1;
while (getline(*inStrm, line)) {
//cerr << lineNum << ":" << line << endl;
if (lineNum % 1000 == 0) {
cerr << lineNum << " ";
}
Phrase source = Tokenize(line);
if (revision == 600 ) {
LabelByInitialLetter(source, *outStrm);
}
else if (sourceLang == "de" && targetLang == "en") {
LabelDeEn(source, *outStrm);
}
else if (sourceLang == "en") {
if (revision == 0 || revision == 1) {
EnPhrasalVerb(source, revision, *outStrm);
}
else if (revision == 2) {
string openNLPPath = vm["opennlp-path"].as<string>();
EnOpenNLPChunker chunker(openNLPPath);
}
}
++lineNum;
}
}
cerr << "Finished" << endl;
return EXIT_SUCCESS;
}
Phrase Tokenize(const string &line)
{
Phrase ret;
vector<string> toks = Moses::Tokenize(line);
for (size_t i = 0; i < toks.size(); ++i) {
Word word = Moses::Tokenize(toks[i], "|");
ret.push_back(word);
}
return ret;
}
bool IsA(const Phrase &source, int pos, int offset, int factor, const string &str)
{
pos += offset;
if (pos >= source.size() || pos < 0) {
return false;
}
const string &word = source[pos][factor];
vector<string> soughts = Moses::Tokenize(str, " ");
for (int i = 0; i < soughts.size(); ++i) {
string &sought = soughts[i];
bool found = (word == sought);
if (found) {
return true;
}
}
return false;
}
void OutputWithLabels(const Phrase &source, const Ranges ranges, ostream &out)
{
// output sentence, with labels
for (int pos = 0; pos < source.size(); ++pos) {
// output beginning of label
for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
const Range &range = *iter;
if (range.range.first == pos) {
out << "<tree label=\"" + range.label + "\"> ";
}
}
const Word &word = source[pos];
out << word[0] << " ";
for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
const Range &range = *iter;
if (range.range.second == pos) {
out << "</tree> ";
}
}
}
out << endl;
}

View File

@ -1,27 +0,0 @@
#pragma once
#include <iostream>
#include <vector>
#include <string>
#include <list>
typedef std::vector<std::string> Word;
typedef std::vector<Word> Phrase;
struct Range
{
Range(int start,int end, const std::string &l)
:range(start, end)
,label(l)
{}
std::pair<int,int> range;
std::string label;
};
typedef std::list<Range> Ranges;
bool IsA(const Phrase &source, int pos, int offset, int factor, const std::string &str);
void OutputWithLabels(const Phrase &source, const Ranges ranges, std::ostream &out);

View File

@ -1,14 +0,0 @@
all: manual-label
clean:
rm -f *.o manual-label
.cpp.o:
g++ -I../../../boost/include -I../../../ -O3 -g -c $<
OBJECTS = DeEn.o EnOpenNLPChunker.o EnPhrasalVerb.o Main.o LabelByInitialLetter.o
manual-label: $(OBJECTS)
g++ $(OBJECTS) -L../../../boost/lib64 -lz -lboost_program_options-mt -o manual-label

View File

@ -46,20 +46,20 @@
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
<IncludePath Value="../../.."/>
<IncludePath Value="../../../phrase-extract"/>
<IncludePath Value="../../../boost/include"/>
<Preprocessor Value="MAX_NUM_FACTORS=4"/>
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
<Preprocessor Value="TRACE_ENABLE=1"/>
</Compiler>
<Linker Options="" Required="yes">
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/lm/Debug"/>
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/moses/Debug"/>
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/OnDiskPt/Debug"/>
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/search/Debug"/>
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
<LibraryPath Value="../../../boost/lib64"/>
<LibraryPath Value="../../../contrib/other-builds/lm/Debug"/>
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
<LibraryPath Value="../../../contrib/other-builds/OnDiskPt/Debug"/>
<LibraryPath Value="../../../contrib/other-builds/search/Debug"/>
<LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
<Library Value="util"/>
<Library Value="moses"/>
<Library Value="search"/>

View File

@ -11,11 +11,11 @@
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -79,12 +79,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1911984684" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">

View File

@ -220,6 +220,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/ConfusionNet.h</locationURI>
</link>
<link>
<name>ContextParameters.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.cpp</locationURI>
</link>
<link>
<name>ContextParameters.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.h</locationURI>
</link>
<link>
<name>DecodeGraph.cpp</name>
<type>1</type>

View File

@ -1,6 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Project Name="moses" InternalType="Library">
<Plugins>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
<Plugin Name="CMakePlugin">
<![CDATA[[{
"name": "Debug",
@ -13,9 +16,6 @@
"parentProject": ""
}]]]>
</Plugin>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
</Plugins>
<VirtualDirectory Name="TranslationModel">
<VirtualDirectory Name="UG">
@ -531,8 +531,6 @@
<File Name="../../../moses/FF/RuleScope.h"/>
<File Name="../../../moses/FF/SetSourcePhrase.cpp"/>
<File Name="../../../moses/FF/SetSourcePhrase.h"/>
<File Name="../../../moses/FF/SkeletonChangeInput.cpp"/>
<File Name="../../../moses/FF/SkeletonChangeInput.h"/>
<File Name="../../../moses/FF/SkeletonStatefulFF.cpp"/>
<File Name="../../../moses/FF/SkeletonStatefulFF.h"/>
<File Name="../../../moses/FF/SkeletonStatelessFF.cpp"/>
@ -777,6 +775,8 @@
<File Name="../../../moses/WordsRange.h"/>
<File Name="../../../moses/XmlOption.cpp"/>
<File Name="../../../moses/XmlOption.h"/>
<File Name="../../../moses/OutputFileStream.cpp"/>
<File Name="../../../moses/OutputFileStream.h"/>
</VirtualDirectory>
<VirtualDirectory Name="PP">
<File Name="../../../moses/PP/CountsPhraseProperty.cpp"/>
@ -795,11 +795,13 @@
<File Name="../../../moses/PP/SpanLengthPhraseProperty.h"/>
<File Name="../../../moses/PP/TreeStructurePhraseProperty.h"/>
</VirtualDirectory>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
<VirtualDirectory Name="parameters">
<File Name="../../../moses/parameters/ContextParameters.cpp"/>
<File Name="../../../moses/parameters/ContextParameters.h"/>
<File Name="../../../moses/parameters/BookkeepingOptions.cpp"/>
<File Name="../../../moses/parameters/BookkeepingOptions.h"/>
<File Name="../../../moses/parameters/NBestOptions.cpp"/>
<File Name="../../../moses/parameters/NBestOptions.h"/>
</VirtualDirectory>
<Settings Type="Static Library">
<GlobalSettings>
@ -812,11 +814,11 @@
<ResourceCompiler Options=""/>
</GlobalSettings>
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<Compiler Options="-g -std=c++0x" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
<IncludePath Value="../../../"/>
<IncludePath Value="../../../phrase-extract"/>
<IncludePath Value="../../../boost/include"/>
<Preprocessor Value="MAX_NUM_FACTORS=4"/>
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
<Preprocessor Value="WITH_THREADS"/>
@ -895,4 +897,6 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
</CodeLite_Project>

View File

@ -1,9 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Project Name="manual-label" InternalType="Console">
<CodeLite_Project Name="pruneGeneration" InternalType="Console">
<Plugins>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
<Plugin Name="CMakePlugin">
<![CDATA[[{
"name": "Debug",
@ -16,20 +13,15 @@
"parentProject": ""
}]]]>
</Plugin>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
</Plugins>
<Description/>
<Dependencies/>
<VirtualDirectory Name="manual-label">
<File Name="DeEn.cpp"/>
<File Name="DeEn.h"/>
<File Name="EnOpenNLPChunker.cpp"/>
<File Name="EnOpenNLPChunker.h"/>
<File Name="EnPhrasalVerb.cpp"/>
<File Name="EnPhrasalVerb.h"/>
<File Name="LabelByInitialLetter.cpp"/>
<File Name="LabelByInitialLetter.h"/>
<File Name="Main.cpp"/>
<File Name="Main.h"/>
<VirtualDirectory Name="src">
<File Name="../../../misc/pruneGeneration.cpp"/>
<File Name="../../../misc/pruneGeneration.h"/>
</VirtualDirectory>
<Settings Type="Executable">
<GlobalSettings>
@ -41,24 +33,26 @@
</Linker>
<ResourceCompiler Options=""/>
</GlobalSettings>
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Configuration Name="Debug" CompilerType="GCC ( XCode )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
<IncludePath Value="../../.."/>
<IncludePath Value="../../../boost/include"/>
</Compiler>
<Linker Options="" Required="yes">
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
<Library Value="boost_program_options"/>
<LibraryPath Value="../../../boost/lib64"/>
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
<Library Value="boost_filesystem"/>
<Library Value="boost_system"/>
<Library Value="moses"/>
<Library Value="z"/>
</Linker>
<ResourceCompiler Options="" Required="no"/>
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
<Environment EnvVarSetName="&lt;Use Defaults&gt;" DbgSetName="&lt;Use Defaults&gt;">
<![CDATA[]]>
</Environment>
<Debugger IsRemote="no" RemoteHostName="" RemoteHostPort="" DebuggerPath="" IsExtended="no">
<Debugger IsRemote="no" RemoteHostName="" RemoteHostPort="" DebuggerPath="" IsExtended="yes">
<DebuggerSearchPaths/>
<PostConnectCommands/>
<StartupCommands/>
@ -79,14 +73,14 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
<SearchPaths/>
</Completion>
</Configuration>
<Configuration Name="Release" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Configuration Name="Release" CompilerType="GCC ( XCode )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Compiler Options="-O2;-Wall" C_Options="-O2;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<Preprocessor Value="NDEBUG"/>
@ -118,7 +112,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -126,6 +120,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
</CodeLite_Project>

View File

@ -59,7 +59,6 @@
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>

View File

@ -19,6 +19,10 @@
<File Name="../../../phrase-extract/tables-core.cpp"/>
<File Name="../../../phrase-extract/tables-core.h"/>
</VirtualDirectory>
<Dependencies Name="Debug">
<Project Name="util"/>
</Dependencies>
<Dependencies Name="Release"/>
<Settings Type="Executable">
<GlobalSettings>
<Compiler Options="" C_Options="" Assembler="">
@ -32,17 +36,17 @@
<Configuration Name="Debug" CompilerType="clang( based on LLVM 3.5svn )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
<IncludePath Value="../../.."/>
<IncludePath Value="../../../phrase-extract"/>
<IncludePath Value="../../../boost/include"/>
</Compiler>
<Linker Options="" Required="yes">
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/lm/Debug"/>
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/moses/Debug"/>
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/OnDiskPt/Debug"/>
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/search/Debug"/>
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
<LibraryPath Value="../../../boost/lib64"/>
<LibraryPath Value="../../../contrib/other-builds/lm/Debug"/>
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
<LibraryPath Value="../../../contrib/other-builds/OnDiskPt/Debug"/>
<LibraryPath Value="../../../contrib/other-builds/search/Debug"/>
<LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
<Library Value="moses"/>
<Library Value="search"/>
<Library Value="OnDiskPt"/>
@ -86,7 +90,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -125,7 +129,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -133,8 +137,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug">
<Project Name="util"/>
</Dependencies>
<Dependencies Name="Release"/>
</CodeLite_Project>

View File

@ -10,6 +10,8 @@
<File Name="../../../search/rule.cc"/>
<File Name="../../../search/vertex.cc"/>
</VirtualDirectory>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
<Settings Type="Static Library">
<GlobalSettings>
<Compiler Options="" C_Options="" Assembler="">
@ -23,9 +25,9 @@
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
<IncludePath Value="../../.."/>
<IncludePath Value="../../../phrase-extract"/>
<IncludePath Value="../../../boost/include"/>
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
</Compiler>
<Linker Options="" Required="yes"/>
@ -55,7 +57,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -93,7 +95,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -101,6 +103,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
</CodeLite_Project>

View File

@ -75,7 +75,6 @@
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
@ -159,10 +158,10 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/server"/>
</configuration>
<configuration configurationName="Debug">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/server"/>
</configuration>
</storageModule>

View File

@ -62,6 +62,8 @@
<File Name="../../../util/stream/sort_test.cc" ExcludeProjConfig="Debug"/>
<File Name="../../../util/stream/stream_test.cc" ExcludeProjConfig="Debug"/>
</VirtualDirectory>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
<Settings Type="Static Library">
<GlobalSettings>
<Compiler Options="" C_Options="" Assembler="">
@ -75,8 +77,8 @@
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
<IncludePath Value="../../.."/>
<IncludePath Value="../../../boost/include"/>
</Compiler>
<Linker Options="" Required="yes"/>
<ResourceCompiler Options="" Required="no"/>
@ -105,7 +107,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -143,7 +145,7 @@
<CustomPostBuild/>
<CustomPreBuild/>
</AdditionalRules>
<Completion EnableCpp11="no">
<Completion EnableCpp11="no" EnableCpp14="no">
<ClangCmpFlagsC/>
<ClangCmpFlags/>
<ClangPP/>
@ -151,6 +153,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
</CodeLite_Project>

View File

@ -17,6 +17,15 @@ configname=$(basename $configf | sed 's/\.config$//')
source "$configf"
# beautifier
git clone git@github.com:moses-smt/mosesdecoder.git /tmp/moses
cd /tmp/moses
./scripts/other/beautify.py --format --skip-perltidy
git commit -am "daily automatic beautifier"
git push
rm -rf /tmp/moses
cd -
[ -z "$MCC_SCAN_BRANCHES" ] \
&& die "Bad config $configf; does not define MCC_SCAN_BRANCHES"
@ -107,7 +116,6 @@ function run_single_test () {
#regtest_dir=$PWD/$(basename $regtest_file .tgz)
cd ..
echo "## ./bjam clean" >> $longlog
./bjam clean $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
@ -155,6 +163,7 @@ function run_single_test () {
if [ -z "$err" ]; then
status="OK"
else
git reset --hard HEAD
status="FAIL:$err"
fi
echo "## Status: $status" >> $longlog
@ -187,7 +196,7 @@ done
#### Main loop over all commits
for i in $MCC_SCAN_BRANCHES; do
warn "On brach $i"
warn "On branch $i"
git rev-list $i \
| while read commit; do
first_char=$(echo $commit | grep -o '^.')

View File

@ -67,7 +67,9 @@ private:
protected:
/// For child classes only: retrieve filebuf.
__gnu_cxx::stdio_filebuf<char> *get_filebuf() { return _filebuf; }
__gnu_cxx::stdio_filebuf<char> *get_filebuf() {
return _filebuf;
}
};
class ifdstream : public _fdstream

View File

@ -4,14 +4,17 @@
Module implementing Dialog.
"""
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtGui import (
QDialog,
QFileDialog,
)
from PyQt4.QtCore import pyqtSignature
import datetime
import os
from Ui_addMTModel import Ui_Dialog
from util import *
from util import doAlert
class AddMTModelDialog(QDialog, Ui_Dialog):
@ -88,7 +91,7 @@ class AddMTModelDialog(QDialog, Ui_Dialog):
def checkEmpty(mystr):
return len(str(mystr).strip()) <= 0
#check everything
# Check everything.
self.modelName = self.editName.text()
if checkEmpty(self.modelName):
doAlert("Please provide non-empty Model Name")

View File

@ -4,11 +4,18 @@
Module implementing ChooseMTModelDialog.
"""
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtSql import *
import sys
from PyQt4.QtCore import (
pyqtSignature,
QObject,
SIGNAL,
)
from PyQt4.QtGui import QDialog
from PyQt4.QtSql import QSqlQueryModel
from Ui_chooseMTModel import Ui_Dialog
from util import doAlert
class ChooseMTModelDialog(QDialog, Ui_Dialog):
@ -28,14 +35,20 @@ class ChooseMTModelDialog(QDialog, Ui_Dialog):
self.selTableView.hideColumn(0)
self.selTableView.hideColumn(5)
self.selTableView.hideColumn(6)
#change status and keep the column
QObject.connect(datamodel, SIGNAL("modelInstalled()"), self.on_datamodel_modelInstalled)
# Change status and keep the column.
QObject.connect(
datamodel, SIGNAL("modelInstalled()"),
self.on_datamodel_modelInstalled)
def updateModel(self):
self.model.setQuery('SELECT ID, name, srclang, trglang, status, path, mosesini FROM models WHERE status = "READY" AND deleted != "True"', self.database)
self.model.setQuery(
'SELECT ID, name, srclang, trglang, status, path, mosesini '
'FROM models '
'WHERE status = "READY" AND deleted != "True"',
self.database)
def on_datamodel_recordUpdated(self, bRecord):
#deal with the selection changed problem
"""Deal with the selection changed problem."""
try:
if bRecord:
current = self.selTableView.currentIndex()
@ -44,9 +57,9 @@ class ChooseMTModelDialog(QDialog, Ui_Dialog):
else:
self.curSelection = None
else:
if not self.curSelection is None:
if self.curSelection is not None:
self.selTableView.selectRow(self.curSelection)
except Exception, e:
except Exception as e:
print >> sys.stderr, str(e)
def on_datamodel_modelInstalled(self):

View File

@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtGui import QApplication
import os
import sys
@ -9,7 +8,6 @@ import sys
from mainWindow import MainWindow
from datamodel import DataModel
from moses import Moses
from util import *
if __name__ == "__main__":
app = QApplication(sys.argv)

View File

@ -4,10 +4,19 @@
Module implementing MainWindow.
"""
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtSql import *
from PyQt4.QtCore import (
pyqtSignature,
QObject,
Qt,
SIGNAL,
)
from PyQt4.QtGui import (
QMainWindow,
QMessageBox,
QProgressDialog,
)
import sys
import threading
from Ui_mainWindow import Ui_MainWindow
@ -15,7 +24,7 @@ from addMTModel import AddMTModelDialog
from chooseMTModel import ChooseMTModelDialog
from engine import Engine
from credits import DlgCredits
from util import *
from util import doAlert
class MainWindow(QMainWindow, Ui_MainWindow):
@ -54,18 +63,27 @@ class MainWindow(QMainWindow, Ui_MainWindow):
Slot documentation goes here.
"""
current = self.tableView.currentIndex()
if current and current.row() >= 0:
if self.engine and self.datamodel.getRowID(current.row()) == self.engine.model['ID']:
text = '''The model is still in use, do you want to stop and delete it?
It might take a while...'''
reply = QMessageBox.question(None, 'Message', text, QMessageBox.Yes, QMessageBox.No)
if reply == QMessageBox.No:
return
t = self.stopEngine(self.engine)
t.join()
self.engine = None
self.clearPanel()
self.datamodel.delModel(current.row())
if not current or current.row() < 0:
return
model_in_use = (
self.engine and
self.datamodel.getRowID(current.row()) == self.engine.model['ID']
)
if model_in_use:
text = (
"The model is still in use, do you want to "
"stop and delete it?\n"
"It might take a while..."
)
reply = QMessageBox.question(
None, 'Message', text, QMessageBox.Yes, QMessageBox.No)
if reply == QMessageBox.No:
return
t = self.stopEngine(self.engine)
t.join()
self.engine = None
self.clearPanel()
self.datamodel.delModel(current.row())
@pyqtSignature("")
def on_newModelBtn_clicked(self):
@ -153,17 +171,24 @@ It might take a while...'''
if self.progress:
self.progress.close()
self.progress = None
self.progress = QProgressDialog("Model: %s" % model['name'], "Cancel", 0, self.engine.countSteps(), self)
self.progress = QProgressDialog(
"Model: %s" % model['name'], "Cancel", 0,
self.engine.countSteps(), self)
self.progress.setAutoReset(True)
self.progress.setAutoClose(True)
self.progress.setWindowModality(Qt.WindowModal)
self.progress.setWindowTitle('Loading Model...')
QObject.connect(self.progress, SIGNAL("canceled()"), self.progressCancelled)
QObject.connect(
self.progress, SIGNAL("canceled()"), self.progressCancelled)
self.progress.show()
#connect engine signal
QObject.connect(self.engine, SIGNAL("stepFinished(int)"), self.engineStepFinished)
QObject.connect(self.engine, SIGNAL("loaded(bool, QString)"), self.engineLoaded)
# Connect engine signal.
QObject.connect(
self.engine, SIGNAL("stepFinished(int)"),
self.engineStepFinished)
QObject.connect(
self.engine, SIGNAL("loaded(bool, QString)"),
self.engineLoaded)
def startEngineThread():
self.engine.start()
@ -225,7 +250,9 @@ It might take a while...'''
if text.strip() == "":
trans.append(text)
else:
trans.append(self.engine.translate(text.replace('\r', ' ').strip()).decode('utf8'))
trans.append(
self.engine.translate(
text.replace('\r', ' ').strip()).decode('utf8'))
self.editTrg.setText('\n'.join(trans))
except Exception, e:
print >> sys.stderr, str(e)

View File

@ -14,6 +14,8 @@ exe 1-1-Extraction : 1-1-Extraction.cpp ..//boost_filesystem ../moses//moses ;
exe prunePhraseTable : prunePhraseTable.cpp ..//boost_filesystem ../moses//moses ..//boost_program_options ;
exe pruneGeneration : pruneGeneration.cpp ..//boost_filesystem ../moses//moses ..//boost_program_options ;
local with-cmph = [ option.get "with-cmph" ] ;
if $(with-cmph) {
exe processPhraseTableMin : processPhraseTableMin.cpp ..//boost_filesystem ../moses//moses ;
@ -46,6 +48,6 @@ $(TOP)//boost_iostreams
$(TOP)//boost_program_options
;
alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable ;
alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable pruneGeneration ;
#processPhraseTable queryPhraseTable

View File

@ -54,11 +54,11 @@ int main(int argc, char** argv)
bool multipleScoreTrees = true;
size_t quantize = 0;
size_t threads =
#ifdef WITH_THREADS
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
#endif
1;
size_t threads =
#ifdef WITH_THREADS
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
#endif
1;
if(1 >= argc) {
printHelp(argv);

View File

@ -67,11 +67,11 @@ int main(int argc, char **argv)
bool sortScoreIndexSet = false;
size_t sortScoreIndex = 2;
bool warnMe = true;
size_t threads =
#ifdef WITH_THREADS
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
#endif
1;
size_t threads =
#ifdef WITH_THREADS
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
#endif
1;
if(1 >= argc) {
printHelp(argv);

98
misc/pruneGeneration.cpp Normal file
View File

@ -0,0 +1,98 @@
#include <stdio.h>
#include <stdlib.h>
#include <cassert>
#include <algorithm>
#include <functional>
#include <boost/filesystem.hpp>
#include "pruneGeneration.h"
#include "moses/InputFileStream.h"
#include "moses/OutputFileStream.h"
using namespace std;
int main(int argc, char **argv)
{
cerr << "Starting" << endl;
int limit = atoi(argv[1]);
string inPathStem = argv[2];
string outPathStem = argv[3];
namespace fs = boost::filesystem;
//cerr << "inPathStem=" << inPathStem << endl;
fs::path p(inPathStem);
fs::path dir = p.parent_path();
//cerr << "dir=" << dir << endl;
fs::path fileStem = p.filename();
string fileStemStr = fileStem.native();
size_t fileStemStrSize = fileStemStr.size();
//cerr << "fileStem=" << fileStemStr << endl;
// loop thru each file in directory
fs::directory_iterator end_iter;
for( fs::directory_iterator dir_iter(dir) ; dir_iter != end_iter ; ++dir_iter) {
if (fs::is_regular_file(dir_iter->status())) {
fs::path currPath = *dir_iter;
string currPathStr = currPath.native();
//cerr << "currPathStr=" << currPathStr << endl;
fs::path currFile = currPath.filename();
string currFileStr = currFile.native();
if (currFileStr.find(fileStemStr) == 0) {
// found gen table we need
//cerr << "found=" << currPathStr << endl;
string suffix = currFileStr.substr(fileStemStrSize, currFileStr.size() - fileStemStrSize);
string outPath = outPathStem + suffix;
cerr << "PRUNING " << currPathStr << " TO " << outPath << endl;
Moses::InputFileStream inStrme(currPathStr);
Moses::OutputFileStream outStrme(outPath);
Process(limit, inStrme, outStrme);
}
}
}
cerr << "Finished" << endl;
}
void Process(int limit, istream &inStrme, ostream &outStrme)
{
vector<Rec> records;
string prevInWord;
string line;
while (getline(inStrme, line)) {
vector<string> toks;
Tokenize(toks, line);
assert(toks.size() == 4);
if (prevInWord != toks[0]) {
Output(outStrme, records, limit);
records.clear();
}
// add new record
float prob = atof(toks[2].c_str());
records.push_back(Rec(prob, line));
prevInWord = toks[0];
}
// last
Output(outStrme, records, limit);
records.clear();
}
void Output(ostream &outStrme, vector<Rec> &records, int limit)
{
std::sort(records.rbegin(), records.rend());
for (size_t i = 0; i < limit && i < records.size(); ++i) {
const Rec &rec = records[i];
outStrme << rec.line << endl;
}
}

46
misc/pruneGeneration.h Normal file
View File

@ -0,0 +1,46 @@
#pragma once
#include <vector>
#include <string>
#include <iostream>
class Rec
{
public:
float prob;
std::string line;
Rec(float aprob, const std::string &aline)
:prob(aprob)
,line(aline)
{}
inline bool operator< (const Rec &compare) const {
return prob < compare.prob;
}
};
////////////////////////////////////////////////////////////
void Process(int limit, std::istream &inStrme, std::ostream &outStrme);
void Output(std::ostream &outStrme, std::vector<Rec> &records, int limit);
////////////////////////////////////////////////////////////
inline void Tokenize(std::vector<std::string> &output
, const std::string& str
, const std::string& delimiters = " \t")
{
// Skip delimiters at beginning.
std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
// Find first "non-delimiter".
std::string::size_type pos = str.find_first_of(delimiters, lastPos);
while (std::string::npos != pos || std::string::npos != lastPos) {
// Found a token, add it to the vector.
output.push_back(str.substr(lastPos, pos - lastPos));
// Skip delimiters. Note the "not_of"
lastPos = str.find_first_not_of(delimiters, pos);
// Find next "non-delimiter"
pos = str.find_first_of(delimiters, lastPos);
}
}

View File

@ -151,9 +151,6 @@ int main(int argc, char** argv)
ResetUserTime();
}
InputType* foo = source.get();
FeatureFunction::CallChangeSource(foo);
// set up task of training one sentence
boost::shared_ptr<TrainingTask> task;
task = TrainingTask::create(source, ioWrapper);

View File

@ -52,11 +52,7 @@ public:
// shouldn't be mixing hypos with different lhs
assert(hypoA->GetTargetLHS() == hypoB->GetTargetLHS());
int ret = hypoA->RecombineCompare(*hypoB);
if (ret != 0)
return (ret < 0);
return false;
return (hypoA->RecombineCompare(*hypoB) < 0);
}
};

View File

@ -118,8 +118,6 @@ string SimpleTranslationInterface::translate(const string &inputString)
ResetUserTime();
}
FeatureFunction::CallChangeSource(&*source);
// set up task of translating one sentence
boost::shared_ptr<TranslationTask> task
= TranslationTask::create(source, ioWrapper);
@ -223,8 +221,6 @@ batch_run()
while ((source = ioWrapper->ReadInput()) != NULL) {
IFVERBOSE(1) ResetUserTime();
FeatureFunction::CallChangeSource(source.get());
// set up task of translating one sentence
boost::shared_ptr<TranslationTask>
task = TranslationTask::create(source, ioWrapper);

View File

@ -62,7 +62,6 @@
#include "moses/LM/SkeletonLM.h"
#include "moses/FF/SkeletonTranslationOptionListFeature.h"
#include "moses/LM/BilingualLM.h"
#include "SkeletonChangeInput.h"
#include "moses/TranslationModel/SkeletonPT.h"
#include "moses/Syntax/InputWeightFF.h"
#include "moses/Syntax/RuleTableFF.h"
@ -268,7 +267,6 @@ FeatureRegistry::FeatureRegistry()
MOSES_FNAME(SkeletonStatelessFF);
MOSES_FNAME(SkeletonStatefulFF);
MOSES_FNAME(SkeletonLM);
MOSES_FNAME(SkeletonChangeInput);
MOSES_FNAME(SkeletonTranslationOptionListFeature);
MOSES_FNAME(SkeletonPT);

View File

@ -38,20 +38,6 @@ void FeatureFunction::Destroy()
RemoveAllInColl(s_staticColl);
}
// The original declaration as
// void FeatureFunction::CallChangeSource(InputType *&input)
// had me a bit perplexed. Would you really want to allow
// any feature function to replace the InputType behind the
// back of the others? And change what the vector is pointing to?
void FeatureFunction::CallChangeSource(InputType * const&input)
{
for (size_t i = 0; i < s_staticColl.size(); ++i) {
const FeatureFunction &ff = *s_staticColl[i];
ff.ChangeSource(input);
}
}
void FeatureFunction::SetupAll(TranslationTask const& ttask)
{
BOOST_FOREACH(FeatureFunction* ff, s_staticColl)

View File

@ -62,9 +62,6 @@ public:
static FeatureFunction &FindFeatureFunction(const std::string& name);
static void Destroy();
static void CallChangeSource(InputType * const&input);
// see my note in FeatureFunction.cpp --- UG
FeatureFunction(const std::string &line, bool initializeNow);
FeatureFunction(size_t numScoreComponents, const std::string &line);
virtual bool IsStateless() const = 0;
@ -156,9 +153,6 @@ public:
ScoreComponentCollection& scoreBreakdown,
ScoreComponentCollection& estimatedFutureScore) const = 0;
// override this method if you want to change the input before decoding
virtual void ChangeSource(InputType * const&input) const { }
// for context-dependent processing
static void SetupAll(TranslationTask const& task);
virtual void Setup(TranslationTask const& task) const { };

View File

@ -111,8 +111,8 @@ void GlobalLexicalModel::Load()
void GlobalLexicalModel::InitializeForInput(ttasksptr const& ttask)
{
UTIL_THROW_IF2(ttask->GetSource()->GetType() != SentenceInput,
"GlobalLexicalModel works only with sentence input.");
UTIL_THROW_IF2(ttask->GetSource()->GetType() != SentenceInput,
"GlobalLexicalModel works only with sentence input.");
Sentence const* s = reinterpret_cast<Sentence const*>(ttask->GetSource().get());
m_local.reset(new ThreadLocalStorage);
m_local->input = s;

View File

@ -107,8 +107,8 @@ bool GlobalLexicalModelUnlimited::Load(const std::string &filePathSource,
void GlobalLexicalModelUnlimited::InitializeForInput(ttasksptr const& ttask)
{
UTIL_THROW_IF2(ttask->GetSource()->GetType() != SentenceInput,
"GlobalLexicalModel works only with sentence input.");
UTIL_THROW_IF2(ttask->GetSource()->GetType() != SentenceInput,
"GlobalLexicalModel works only with sentence input.");
Sentence const* s = reinterpret_cast<Sentence const*>(ttask->GetSource().get());
m_local.reset(new ThreadLocalStorage);
m_local->input = s;

View File

@ -134,7 +134,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
if (targetPhrase.GetAlignNonTerm().GetSize() != 0) {
// Initialize phrase orientation scoring object
Moses::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
MosesTraining::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm());
PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData();
@ -150,7 +150,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
// LEFT-TO-RIGHT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_L2R);
if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
@ -170,7 +170,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned &&
reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) {
// discontinuous
l2rOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
l2rOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
} else {
reoClassData->firstNonTerminalIsBoundary = true;
}
@ -180,7 +180,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
// RIGHT-TO-LEFT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_R2L);
if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
@ -200,7 +200,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) {
// discontinuous
r2lOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
r2lOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
} else {
reoClassData->lastNonTerminalIsBoundary = true;
}
@ -335,25 +335,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
// LEFT-TO-RIGHT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
IFFEATUREVERBOSE(2) {
FEATUREVERBOSE(2, "l2rOrientation ");
switch (l2rOrientation) {
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
FEATUREVERBOSE2(2, "mono" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
FEATUREVERBOSE2(2, "swap" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
FEATUREVERBOSE2(2, "dleft" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
FEATUREVERBOSE2(2, "dright" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
break;
default:
@ -396,23 +396,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
} else {
if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
} else if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
} else if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
// if sub-derivation has left-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
} else if ( ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
} else if ( ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
// if sub-derivation has left-boundary non-terminal:
@ -437,25 +437,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
// RIGHT-TO-LEFT DIRECTION
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
IFFEATUREVERBOSE(2) {
FEATUREVERBOSE(2, "r2lOrientation ");
switch (r2lOrientation) {
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
FEATUREVERBOSE2(2, "mono" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
FEATUREVERBOSE2(2, "swap" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
FEATUREVERBOSE2(2, "dleft" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
FEATUREVERBOSE2(2, "dright" << std::endl);
break;
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
break;
default:
@ -498,23 +498,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
} else {
if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
} else if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
} else if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
// if sub-derivation has right-boundary non-terminal:
// add recursive actual score of boundary non-terminal from subderivation
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
} else if ( ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
} else if ( ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
// if sub-derivation has right-boundary non-terminal:
@ -862,17 +862,17 @@ void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTermin
}
const std::string* PhraseOrientationFeature::ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const
const std::string* PhraseOrientationFeature::ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const
{
if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
return &MORIENT;
} else if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
} else if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
return &SORIENT;
} else if ( ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
} else if ( ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
return &DORIENT;
} else {

View File

@ -302,8 +302,8 @@ public:
struct ReoClassData {
public:
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
bool firstNonTerminalIsBoundary;
bool firstNonTerminalPreviousSourceSpanIsAligned;
bool firstNonTerminalFollowingSourceSpanIsAligned;
@ -401,7 +401,7 @@ protected:
ScoreComponentCollection* scoreBreakdown,
const std::string* o) const;
const std::string* ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const;
const std::string* ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const;
static const std::string MORIENT;
static const std::string SORIENT;

View File

@ -1,96 +0,0 @@
#include <vector>
#include "SkeletonChangeInput.h"
#include "moses/ScoreComponentCollection.h"
#include "moses/TargetPhrase.h"
#include "moses/Sentence.h"
#include "moses/FactorCollection.h"
#include "util/exception.hh"
using namespace std;
namespace Moses
{
SkeletonChangeInput::SkeletonChangeInput(const std::string &line)
:StatelessFeatureFunction(2, line)
{
ReadParameters();
}
void SkeletonChangeInput::EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const
{
// dense scores
vector<float> newScores(m_numScoreComponents);
newScores[0] = 1.5;
newScores[1] = 0.3;
scoreBreakdown.PlusEquals(this, newScores);
// sparse scores
scoreBreakdown.PlusEquals(this, "sparse-name", 2.4);
}
void SkeletonChangeInput::EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore) const
{
if (targetPhrase.GetNumNonTerminals()) {
vector<float> newScores(m_numScoreComponents);
newScores[0] = - std::numeric_limits<float>::infinity();
scoreBreakdown.PlusEquals(this, newScores);
}
}
void SkeletonChangeInput::EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const
{}
void SkeletonChangeInput::EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const
{}
void SkeletonChangeInput::EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const
{}
void SkeletonChangeInput::ChangeSource(InputType* const& input) const
{
// add factor[1] to each word. Created from first 4 letter of factor[0]
Sentence *sentence = dynamic_cast<Sentence*>(input);
UTIL_THROW_IF2(sentence == NULL, "Not a sentence input");
FactorCollection &fc = FactorCollection::Instance();
size_t size = sentence->GetSize();
for (size_t i = 0; i < size; ++i) {
Word &word = sentence->Phrase::GetWord(i);
const Factor *factor0 = word[0];
std::string str = factor0->GetString().as_string();
if (str.length() > 4) {
str = str.substr(0, 4);
}
const Factor *factor1 = fc.AddFactor(str);
word.SetFactor(1, factor1);
}
}
void SkeletonChangeInput::SetParameter(const std::string& key, const std::string& value)
{
if (key == "arg") {
// set value here
} else {
StatelessFeatureFunction::SetParameter(key, value);
}
}
}

View File

@ -1,45 +0,0 @@
#pragma once
#include <string>
#include "StatelessFeatureFunction.h"
namespace Moses
{
class SkeletonChangeInput : public StatelessFeatureFunction
{
public:
SkeletonChangeInput(const std::string &line);
bool IsUseable(const FactorMask &mask) const {
return true;
}
void EvaluateInIsolation(const Phrase &source
, const TargetPhrase &targetPhrase
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection &estimatedFutureScore) const;
void ChangeSource(InputType* const&input) const;
void EvaluateWithSourceContext(const InputType &input
, const InputPath &inputPath
, const TargetPhrase &targetPhrase
, const StackVec *stackVec
, ScoreComponentCollection &scoreBreakdown
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
, const TranslationOptionList &translationOptionList) const;
void EvaluateWhenApplied(const Hypothesis& hypo,
ScoreComponentCollection* accumulator) const;
void EvaluateWhenApplied(const ChartHypothesis &hypo,
ScoreComponentCollection* accumulator) const;
void SetParameter(const std::string& key, const std::string& value);
};
}

View File

@ -296,7 +296,8 @@ GetBufferedInput()
}
boost::shared_ptr<InputType>
IOWrapper::ReadInput()
IOWrapper::
ReadInput()
{
#ifdef WITH_THREADS
boost::lock_guard<boost::mutex> lock(m_lock);
@ -304,7 +305,8 @@ IOWrapper::ReadInput()
boost::shared_ptr<InputType> source = GetBufferedInput();
if (source) {
source->SetTranslationId(m_currentLine++);
this->set_context_for(*source);
if (m_look_ahead || m_look_back)
this->set_context_for(*source);
}
m_past_input.push_back(source);
return source;
@ -344,7 +346,7 @@ set_context_for(InputType& source)
}
}
// cerr << string(80,'=') << endl;
source.SetContext(context);
if (context->size()) source.SetContext(context);
}

View File

@ -61,8 +61,6 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/ChartKBestExtractor.h"
#include "moses/Syntax/KBestExtractor.h"
#include "search/applied.hh"
#include <boost/format.hpp>
namespace Moses

View File

@ -37,7 +37,7 @@ protected:
std::streambuf *m_streambuf;
public:
InputFileStream(const std::string &filePath);
explicit InputFileStream(const std::string &filePath);
~InputFileStream();
void Close();

View File

@ -0,0 +1,90 @@
// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include "OutputFileStream.h"
#include "gzfilebuf.h"
using namespace std;
using namespace boost::algorithm;
namespace Moses
{
OutputFileStream::OutputFileStream()
:boost::iostreams::filtering_ostream()
,m_outFile(NULL)
,m_open(false)
{
}
OutputFileStream::OutputFileStream(const std::string &filePath)
:m_outFile(NULL)
,m_open(false)
{
Open(filePath);
}
OutputFileStream::~OutputFileStream()
{
Close();
}
bool OutputFileStream::Open(const std::string &filePath)
{
assert(!m_open);
if (filePath == std::string("-")) {
// Write to standard output. Leave m_outFile null.
this->push(std::cout);
} else {
m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary);
if (m_outFile->fail()) {
return false;
}
if (ends_with(filePath, ".gz")) {
this->push(boost::iostreams::gzip_compressor());
}
this->push(*m_outFile);
}
m_open = true;
return true;
}
void OutputFileStream::Close()
{
if (!m_open) return;
this->flush();
if (m_outFile) {
this->pop(); // file
m_outFile->close();
delete m_outFile;
m_outFile = NULL;
}
m_open = false;
}
}

81
moses/OutputFileStream.h Normal file
View File

@ -0,0 +1,81 @@
// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <cstdlib>
#include <fstream>
#include <string>
#include <iostream>
#include <boost/iostreams/filtering_stream.hpp>
namespace Moses
{
/** Version of std::ostream with transparent compression.
*
* Transparently compresses output when writing to a file whose name ends in
* ".gz". Or, writes to stdout instead of a file when given a filename
* consisting of just a dash ("-").
*/
class OutputFileStream : public boost::iostreams::filtering_ostream
{
private:
/** File that needs flushing & closing when we close this stream.
*
* Is NULL when no file is opened, e.g. when writing to standard output.
*/
std::ofstream *m_outFile;
/// Is this stream open?
bool m_open;
public:
/** Create an unopened OutputFileStream.
*
* Until it's been opened, nothing can be done with this stream.
*/
OutputFileStream();
/// Create an OutputFileStream, and open it by calling Open().
OutputFileStream(const std::string &filePath);
virtual ~OutputFileStream();
// TODO: Can we please just always throw an exception when this fails?
/** Open stream.
*
* If filePath is "-" (just a dash), this opens the stream for writing to
* standard output. Otherwise, it opens the given file. If the filename
* has the ".gz" suffix, output will be transparently compressed.
*
* Call Close() to close the file.
*
* Returns whether opening the file was successful. It may also throw an
* exception on failure.
*/
bool Open(const std::string &filePath);
/// Flush and close stream. After this, the stream can be opened again.
void Close();
};
}

View File

@ -143,6 +143,7 @@ Parameter::Parameter()
AddParam(oov_opts,"mark-unknown", "mu", "mark unknown words in output");
AddParam(oov_opts,"lmodel-oov-feature", "add language model oov feature, one per model");
AddParam(oov_opts,"output-unknowns", "Output the unknown (OOV) words to the given file, one line per sentence");
AddParam(oov_opts,"always-create-direct-transopt", "Always create a translation that translates the source word ad-verbatim");
///////////////////////////////////////////////////////////////////////////////////////
// input options

View File

@ -63,8 +63,8 @@ StaticData::StaticData()
: m_sourceStartPosMattersForRecombination(false)
, m_requireSortingAfterSourceContext(false)
, m_inputType(SentenceInput)
, m_onlyDistinctNBest(false)
, m_needAlignmentInfo(false)
// , m_onlyDistinctNBest(false)
// , m_needAlignmentInfo(false)
, m_lmEnableOOVFeature(false)
, m_isAlwaysCreateDirectTranslationOption(false)
, m_currentWeightSetting("default")
@ -203,25 +203,26 @@ StaticData
//word-to-word alignment
// alignments
m_parameter->SetParameter(m_PrintAlignmentInfo, "print-alignment-info", false );
if (m_PrintAlignmentInfo) {
m_needAlignmentInfo = true;
}
// if (m_PrintAlignmentInfo) { // => now in BookkeepingOptions::init()
// m_needAlignmentInfo = true;
// }
m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort);
if (m_PrintAlignmentInfoNbest) {
m_needAlignmentInfo = true;
}
// if (m_PrintAlignmentInfoNbest) { // => now in BookkeepingOptions::init()
// m_needAlignmentInfo = true;
// }
params = m_parameter->GetParam("alignment-output-file");
if (params && params->size()) {
m_alignmentOutputFile = Scan<std::string>(params->at(0));
m_needAlignmentInfo = true;
// m_needAlignmentInfo = true; // => now in BookkeepingOptions::init()
}
m_parameter->SetParameter( m_PrintID, "print-id", false );
m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false );
m_parameter->SetParameter( m_PrintPassthroughInformationInNBest, "print-passthrough-in-n-best", false );
// m_parameter->SetParameter( m_PrintPassthroughInformationInNBest, "print-passthrough-in-n-best", false ); // => now in BookkeepingOptions::init()
// word graph
params = m_parameter->GetParam("output-word-graph");
@ -327,41 +328,7 @@ bool
StaticData
::ini_nbest_options()
{
const PARAM_VEC *params;
// n-best
params = m_parameter->GetParam("n-best-list");
if (params) {
if (params->size() >= 2) {
m_nBestFilePath = params->at(0);
m_nBestSize = Scan<size_t>( params->at(1) );
m_onlyDistinctNBest=(params->size()>2 && params->at(2)=="distinct");
} else {
std::cerr << "wrong format for switch -n-best-list file size [disinct]";
return false;
}
} else {
m_nBestSize = 0;
}
m_parameter->SetParameter<size_t>(m_nBestFactor, "n-best-factor", 20);
m_parameter->SetParameter(m_PrintAlignmentInfoNbest,
"print-alignment-info-in-n-best", false );
// include feature names in the n-best list
m_parameter->SetParameter(m_labeledNBestList, "labeled-n-best-list", true );
// include word alignment in the n-best list
m_parameter->SetParameter(m_nBestIncludesSegmentation,
"include-segmentation-in-n-best", false );
// print all factors of output translations
m_parameter->SetParameter(m_reportAllFactorsNBest,
"report-all-factors-in-n-best", false );
m_parameter->SetParameter(m_printNBestTrees, "n-best-trees", false );
return true;
return m_nbest_options.init(*m_parameter);
}
void
@ -477,6 +444,7 @@ StaticData
//source word deletion
m_parameter->SetParameter(m_wordDeletionEnabled, "phrase-drop-allowed", false );
m_parameter->SetParameter(m_isAlwaysCreateDirectTranslationOption, "always-create-direct-transopt", false );
}
void
@ -625,8 +593,9 @@ bool StaticData::LoadData(Parameter *parameter)
// input, output
ini_factor_maps();
ini_input_options();
m_bookkeeping_options.init(*parameter);
m_nbest_options.init(*parameter); // if (!ini_nbest_options()) return false;
if (!ini_output_options()) return false;
if (!ini_nbest_options()) return false;
// threading etc.
if (!ini_performance_options()) return false;
@ -647,6 +616,16 @@ bool StaticData::LoadData(Parameter *parameter)
ini_mira_options();
// set m_nbest_options.enabled = true if necessary:
if (m_mbr || m_useLatticeMBR || m_outputSearchGraph || m_outputSearchGraphSLF
|| m_mira || m_outputSearchGraphHypergraph || m_useConsensusDecoding
#ifdef HAVE_PROTOBUF
|| m_outputSearchGraphPB
#endif
|| m_latticeSamplesFilePath.size()) {
m_nbest_options.enabled = true;
}
// S2T decoder
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
RecursiveCYKPlus);
@ -1392,4 +1371,3 @@ void StaticData::ResetWeights(const std::string &denseWeights, const std::string
}
} // namespace

View File

@ -45,6 +45,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/PP/Factory.h"
#include "moses/parameters/ContextParameters.h"
#include "moses/parameters/NBestOptions.h"
#include "moses/parameters/BookkeepingOptions.h"
namespace Moses
{
@ -95,18 +97,21 @@ protected:
// 0 = no disortion (monotone in old pharaoh)
bool m_reorderingConstraint; //! use additional reordering constraints
bool m_useEarlyDistortionCost;
size_t
m_maxHypoStackSize //! hypothesis-stack size that triggers pruning
, m_minHypoStackDiversity //! minimum number of hypothesis in stack for each source word coverage
, m_nBestSize
, m_latticeSamplesSize
, m_nBestFactor
, m_maxNoTransOptPerCoverage
, m_maxNoPartTransOpt
, m_maxPhraseLength;
size_t m_maxHypoStackSize; //! hypothesis-stack size that triggers pruning
size_t m_minHypoStackDiversity; //! minimum number of hypothesis in stack for each source word coverage;
NBestOptions m_nbest_options;
BookkeepingOptions m_bookkeeping_options;
// size_t m_nBestSize;
// size_t m_nBestFactor;
std::string m_nBestFilePath, m_latticeSamplesFilePath;
bool m_labeledNBestList,m_nBestIncludesSegmentation;
size_t m_latticeSamplesSize;
size_t m_maxNoTransOptPerCoverage;
size_t m_maxNoPartTransOpt;
size_t m_maxPhraseLength;
// std::string m_nBestFilePath;
std::string m_latticeSamplesFilePath;
// bool m_labeledNBestList,m_nBestIncludesSegmentation;
bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them
bool m_markUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = mark and (ignore) them
bool m_wordDeletionEnabled;
@ -128,21 +133,21 @@ protected:
bool m_reportSegmentation;
bool m_reportSegmentationEnriched;
bool m_reportAllFactors;
bool m_reportAllFactorsNBest;
// bool m_reportAllFactorsNBest;
std::string m_detailedTranslationReportingFilePath;
std::string m_detailedTreeFragmentsTranslationReportingFilePath;
//DIMw
std::string m_detailedAllTranslationReportingFilePath;
bool m_onlyDistinctNBest;
// bool m_onlyDistinctNBest;
bool m_PrintAlignmentInfo;
bool m_needAlignmentInfo;
bool m_PrintAlignmentInfoNbest;
// bool m_needAlignmentInfo; // => BookkeepingOptions
// bool m_PrintAlignmentInfoNbest;
bool m_PrintID;
bool m_PrintPassthroughInformation;
bool m_PrintPassthroughInformationInNBest;
// bool m_PrintPassthroughInformationInNBest;
std::string m_alignmentOutputFile;
@ -214,7 +219,7 @@ protected:
bool m_useLegacyPT;
bool m_defaultNonTermOnlyForEmptyRange;
S2TParsingAlgorithm m_s2tParsingAlgorithm;
bool m_printNBestTrees;
// bool m_printNBestTrees;
FeatureRegistry m_registry;
PhrasePropertyFactory m_phrasePropertyFactory;
@ -361,7 +366,8 @@ public:
return m_PrintPassthroughInformation;
}
bool IsPassthroughInNBestEnabled() const {
return m_PrintPassthroughInformationInNBest;
return m_nbest_options.include_passthrough;
// return m_PrintPassthroughInformationInNBest;
}
int GetMaxDistortion() const {
return m_maxDistortion;
@ -410,7 +416,8 @@ public:
return m_reportAllFactors;
}
bool GetReportAllFactorsNBest() const {
return m_reportAllFactorsNBest;
return m_nbest_options.include_all_factors;
// return m_reportAllFactorsNBest;
}
bool IsDetailedTranslationReportingEnabled() const {
return !m_detailedTranslationReportingFilePath.empty();
@ -430,7 +437,8 @@ public:
return m_detailedTreeFragmentsTranslationReportingFilePath;
}
bool IsLabeledNBestList() const {
return m_labeledNBestList;
return m_nbest_options.include_feature_labels;
// return m_labeledNBestList;
}
bool UseMinphrInMemory() const {
@ -443,21 +451,24 @@ public:
// for mert
size_t GetNBestSize() const {
return m_nBestSize;
return m_nbest_options.nbest_size;
// return m_nBestSize;
}
const std::string &GetNBestFilePath() const {
return m_nBestFilePath;
return m_nbest_options.output_file_path;
// return m_nBestFilePath;
}
bool IsNBestEnabled() const {
return (!m_nBestFilePath.empty() || m_mbr || m_useLatticeMBR || m_mira ||
m_outputSearchGraph || m_outputSearchGraphSLF ||
m_outputSearchGraphHypergraph || m_useConsensusDecoding ||
#ifdef HAVE_PROTOBUF
m_outputSearchGraphPB ||
#endif
!m_latticeSamplesFilePath.empty());
return m_nbest_options.enabled;
// return (!m_nBestFilePath.empty() || m_mbr || m_useLatticeMBR || m_mira ||
// m_outputSearchGraph || m_outputSearchGraphSLF ||
// m_outputSearchGraphHypergraph || m_useConsensusDecoding ||
// #ifdef HAVE_PROTOBUF
// m_outputSearchGraphPB ||
// #endif
// !m_latticeSamplesFilePath.empty());
}
size_t GetLatticeSamplesSize() const {
@ -469,7 +480,8 @@ public:
}
size_t GetNBestFactor() const {
return m_nBestFactor;
return m_nbest_options.factor;
// return m_nBestFactor;
}
bool GetOutputWordGraph() const {
return m_outputWordGraph;
@ -527,7 +539,8 @@ public:
void SetWeights(const FeatureFunction* sp, const std::vector<float>& weights);
bool GetDistinctNBest() const {
return m_onlyDistinctNBest;
return m_nbest_options.only_distinct;
// return m_onlyDistinctNBest;
}
const std::string& GetFactorDelimiter() const {
return m_factorDelimiter;
@ -692,7 +705,8 @@ public:
const std::string &GetBinDirectory() const;
bool NeedAlignmentInfo() const {
return m_needAlignmentInfo;
return m_bookkeeping_options.need_alignment_info;
// return m_needAlignmentInfo;
}
const std::string &GetAlignmentOutputFile() const {
return m_alignmentOutputFile;
@ -701,14 +715,16 @@ public:
return m_PrintAlignmentInfo;
}
bool PrintAlignmentInfoInNbest() const {
return m_PrintAlignmentInfoNbest;
return m_nbest_options.include_alignment_info;
// return m_PrintAlignmentInfoNbest;
}
WordAlignmentSort GetWordAlignmentSort() const {
return m_wordAlignmentSort;
}
bool NBestIncludesSegmentation() const {
return m_nBestIncludesSegmentation;
return m_nbest_options.include_segmentation;
// return m_nBestIncludesSegmentation;
}
bool GetHasAlternateWeightSettings() const {
@ -849,7 +865,8 @@ public:
}
bool PrintNBestTrees() const {
return m_printNBestTrees;
return m_nbest_options.print_trees;
// return m_printNBestTrees;
}
bool RequireSortingAfterSourceContext() const {

View File

@ -18,8 +18,6 @@
***********************************************************************/
#pragma once
#ifndef moses_ChartRuleLookupManagerMemory_h
#define moses_ChartRuleLookupManagerMemory_h
#include <vector>
@ -97,4 +95,3 @@ private:
} // namespace Moses
#endif

View File

@ -34,7 +34,7 @@ namespace Moses
BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
size_t threadsNum)
: m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
m_fileHandle(0), m_fileHandleStart(0), m_size(0),
m_fileHandle(0), m_fileHandleStart(0), m_landmarks(true), m_size(0),
m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0),
m_threadPool(threadsNum)
{

View File

@ -78,9 +78,9 @@ GetScore(const Phrase& f, const Phrase& e, const Phrase& c)
if(m_hash.GetSize() != index) {
std::string scoresString;
if(m_inMemory)
scoresString = m_scoresMemory[index];
scoresString = m_scoresMemory[index].str();
else
scoresString = m_scoresMapped[index];
scoresString = m_scoresMapped[index].str();
BitWrapper<> bitStream(scoresString);
for(size_t i = 0; i < m_numScoreComponent; i++)

View File

@ -53,12 +53,11 @@ LexicalReorderingTableCreator::LexicalReorderingTableCreator(
std::cerr << "Pass 1/2: Creating phrase index + Counting scores" << std::endl;
m_hash.BeginSave(m_outFile);
if(tempfilePath.size()) {
MmapAllocator<unsigned char> allocEncoded(util::FMakeTemp(tempfilePath));
m_encodedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>(allocEncoded);
} else {
m_encodedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>();
m_encodedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>(true);
}
EncodeScores();
@ -73,7 +72,7 @@ LexicalReorderingTableCreator::LexicalReorderingTableCreator(
MmapAllocator<unsigned char> allocCompressed(util::FMakeTemp(tempfilePath));
m_compressedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>(allocCompressed);
} else {
m_compressedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>();
m_compressedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>(true);
}
CompressScores();

View File

@ -151,11 +151,12 @@ public:
if(!m_fixed) {
util::UnmapOrThrow(p, num * sizeof(T));
} else {
size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
size_t relative_offset = m_data_offset - map_offset;
util::UnmapOrThrow((pointer)((char*)p - relative_offset), num * sizeof(T));
}
const size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
const size_t relative_offset = m_data_offset - map_offset;
const size_t adjusted_map_size = m_map_size + relative_offset;
util::UnmapOrThrow((pointer)((char*)p - relative_offset), adjusted_map_size);
}
}
void construct (pointer p, const T& value) {

View File

@ -224,9 +224,9 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
// Retrieve compressed and encoded target phrase collection
std::string encodedPhraseCollection;
if(m_phraseDictionary.m_inMemory)
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId];
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId].str();
else
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId];
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId].str();
BitWrapper<> encodedBitStream(encodedPhraseCollection);
if(m_coding == PREnc && bitsLeft)

View File

@ -130,7 +130,7 @@ PhraseTableCreator::PhraseTableCreator(std::string inPath,
MmapAllocator<unsigned char> allocCompressed(util::FMakeTemp(tempfilePath));
m_compressedTargetPhrases = new StringVector<unsigned char, unsigned long, MmapAllocator>(allocCompressed);
} else {
m_compressedTargetPhrases = new StringVector<unsigned char, unsigned long, MmapAllocator>();
m_compressedTargetPhrases = new StringVector<unsigned char, unsigned long, MmapAllocator>(true);
}
CompressTargetPhrases();
@ -203,7 +203,7 @@ void PhraseTableCreator::Save()
= m_sourceSymbolsMap.begin(); it != m_sourceSymbolsMap.end(); it++)
temp1[it->second] = it->first;
std::sort(temp1.begin(), temp1.end());
StringVector<unsigned char, unsigned, std::allocator> sourceSymbols;
StringVector<unsigned char, unsigned, std::allocator> sourceSymbols(true);
for(std::vector<std::string>::iterator it = temp1.begin();
it != temp1.end(); it++)
sourceSymbols.push_back(*it);
@ -224,7 +224,7 @@ void PhraseTableCreator::Save()
for(boost::unordered_map<std::string, unsigned>::iterator it
= m_targetSymbolsMap.begin(); it != m_targetSymbolsMap.end(); it++)
temp2[it->second] = it->first;
StringVector<unsigned char, unsigned, std::allocator> targetSymbols;
StringVector<unsigned char, unsigned, std::allocator> targetSymbols(true);
for(std::vector<std::string>::iterator it = temp2.begin();
it != temp2.end(); it++)
targetSymbols.push_back(*it);

View File

@ -147,8 +147,8 @@ public:
typedef RangeIterator iterator;
typedef StringIterator string_iterator;
StringVector();
StringVector(Allocator<ValueT> alloc);
StringVector(bool allocate = false);
StringVector(Allocator<ValueT>& alloc);
virtual ~StringVector() {
delete m_charArray;
@ -203,13 +203,13 @@ public:
m_memoryMapped = memoryMapped;
size += std::fread(&m_sorted, sizeof(bool), 1, in) * sizeof(bool);
size += m_positions.load(in, m_memoryMapped);
size += m_positions.load(in, false);
size += loadCharArray(*m_charArray, in, m_memoryMapped);
size += loadCharArray(m_charArray, in, m_memoryMapped);
return size;
}
size_t loadCharArray(std::vector<ValueT, std::allocator<ValueT> >& c,
size_t loadCharArray(std::vector<ValueT, std::allocator<ValueT> >*& c,
std::FILE* in, bool map = false) {
// Can only be read into memory. Mapping not possible with std:allocator.
assert(map == false);
@ -219,13 +219,13 @@ public:
size_t valSize;
byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
c.resize(valSize, 0);
byteSize += std::fread(&c[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
c = new std::vector<ValueT, std::allocator<ValueT> >(valSize, 0);
byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
return byteSize;
}
size_t loadCharArray(std::vector<ValueT, MmapAllocator<ValueT> >& c,
size_t loadCharArray(std::vector<ValueT, MmapAllocator<ValueT> >*& c,
std::FILE* in, bool map = false) {
size_t byteSize = 0;
@ -235,18 +235,16 @@ public:
if(map == false) {
// Read data into temporary file (default constructor of MmapAllocator)
// and map memory onto temporary file. Can be resized.
c.resize(valSize, 0);
byteSize += std::fread(&c[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
c = new std::vector<ValueT, MmapAllocator<ValueT> >(valSize, 0);
byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
} else {
// Map it directly on specified region of file "in" starting at valPos
// with length valSize * sizeof(ValueT). Mapped region cannot be resized.
size_t valPos = std::ftell(in);
Allocator<ValueT> alloc(in, valPos);
std::vector<ValueT, Allocator<ValueT> > charArrayTemp(alloc);
charArrayTemp.resize(valSize, 0);
c.swap(charArrayTemp);
c = new std::vector<ValueT, Allocator<ValueT> >(alloc);
c->resize(valSize, 0);
byteSize += valSize * sizeof(ValueT);
}
@ -369,11 +367,12 @@ OStream& operator<<(OStream &os, ValueIteratorRange<ValueIteratorT> cr)
// StringVector
template<typename ValueT, typename PosT, template <typename> class Allocator>
StringVector<ValueT, PosT, Allocator>::StringVector()
: m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector<ValueT, Allocator<ValueT> >()) { }
StringVector<ValueT, PosT, Allocator>::StringVector(bool allocate)
: m_sorted(true), m_memoryMapped(false),
m_charArray(allocate ? new std::vector<ValueT, Allocator<ValueT> >() : 0) { }
template<typename ValueT, typename PosT, template <typename> class Allocator>
StringVector<ValueT, PosT, Allocator>::StringVector(Allocator<ValueT> alloc)
StringVector<ValueT, PosT, Allocator>::StringVector(Allocator<ValueT> &alloc)
: m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector<ValueT, Allocator<ValueT> >(alloc)) { }
template<typename ValueT, typename PosT, template <typename> class Allocator>

View File

@ -40,9 +40,8 @@ std::auto_ptr<RuleTableLoader> RuleTableLoaderFactory::Create(
{
InputFileStream input(path);
std::string line;
bool cont = std::getline(input, line);
if (cont) {
if (std::getline(input, line)) {
std::vector<std::string> tokens;
Tokenize(tokens, line);
if (tokens.size() == 1) {

View File

@ -1,116 +1,126 @@
# Some systems apparently distinguish between shell
# variables and environment variables. The latter are
# visible to the make utility, the former apparently not,
# so we need to set them if they are not defined yet
SHELL = bash
MAKEFLAGS += --warn-undefined-variables
.DEFAULT_GOAL = all
.SUFFIXES:
# ===============================================================================
# LOCAL ENVIRONMENT
# ===============================================================================
# shell script snippet:
define find_moses_root
d=$$(pwd);
while [[ ! -e $$d/Jamroot && $$d != "/" ]] ; do
d=$$(dirname $$d);
done;
echo $$d
endef
MOSES_ROOT := $(shell $(find_moses_root))
# ===============================================================================
# COMPILATION PREFERENCES
# ===============================================================================
# CCACHE: if set to ccache, use ccache to speed up compilation
# OPTI: optimization level
# PROF: profiler switches
# PROF: profiler switches
CCACHE = ccache
OPTI = 3
EXE_TAG = exe
PROF =
# PROF = -g -pg
# compiler command
compiler ?= g++
variant ?= runtime
link ?= static
# ===============================================================================
CXX = $(shell which ccache) $(compiler)
CXXFLAGS += -DMAX_NUM_FACTORS=4
CXXFLAGS += -DKENLM_MAX_ORDER=5
CXXFLAGS += -DWITH_THREADS
CXXFLAGS += -DNO_MOSES
CXXFLAGS += -I${MOSES_ROOT} -I.
SHELL = bash
MAKEFLAGS += --warn-undefined-variables
.DEFAULT_GOAL = all
.SUFFIXES:
ifeq ($(variant),debug)
CXXFLAGS += -ggdb -O0
else ifeq ($(variant),profile)
CXXFLAGS += -g -pg -O3
else ifeq ($(variant),syntax)
CXXFLAGS += -fsyntax-only
endif
# LDFLAGS = -L${MOSES_ROOT}/lib -L ./lib/
# WDIR = build/$(variant)/${HOSTTYPE}/${KERNEL}
WDIR = build/$(variant)
# ===============================================================================
# COMPILATION 'LOCALIZATION'
HOST ?= $(shell hostname)
HOSTTYPE ?= $(shell uname -m)
KERNEL = $(shell uname -r)
HOST ?= $(shell hostname)
HOSTTYPE ?= $(shell uname -m)
KERNEL = $(shell uname -r)
MOSES_ROOT = ${HOME}/code/mosesdecoder
WDIR = build/${HOSTTYPE}/${KERNEL}/${OPTI}
VPATH = ${HOME}/code/mosesdecoder/
CXXFLAGS = ${PROF} -ggdb -Wall -O${OPTI} ${INCLUDES}
CXXFLAGS += -DMAX_NUM_FACTORS=4
CXXFLAGS += -DKENLM_MAX_ORDER=5
modirs := $(addprefix -I,$(shell find ${MOSES_ROOT}/moses ${MOSES_ROOT}/contrib -type d))
CXXFLAGS += -I${MOSES_ROOT}
INCLUDES =
BZLIB =
BOOSTLIBTAG =
nil:
lzma = lzma
#lzma =
REQLIBS = m z pthread dl ${lzma} ${BZLIB} \
boost_thread${BOOSTLIBTAG} \
boost_program_options${BOOSTLIBTAG} \
boost_system${BOOSTLIBTAG} \
boost_filesystem${BOOSTLIBTAG} \
boost_iostreams${BOOSTLIBTAG} z bz2
# libraries required
# icuuc icuio icui18n \
LIBS = $(addprefix -l, moses ${REQLIBS})
LIBS = $(addprefix -l, ${REQLIBS})
LIBDIRS = -L${HOME}/code/mosesdecoder/lib
LIBDIRS += -L${HOME}/lib
PREFIX ?= .
BINDIR ?= ${PREFIX}/bin
ifeq "$(OPTI)" "0"
BINPREF = debug.
else
BINPREF =
LIBS = m z bz2 pthread dl ${BOOSTLIBS}
BOOSTLIBS := thread system filesystem program_options iostreams
BOOSTLIBS := $(addprefix boost_,${BOOSTLIBS})
ifdef ($(BOOSTLIBTAG),"")
BOOSTLIBS := $(addsuffix ${BOOSTLIBTAG},${BOOSTLIBS})
endif
cc2obj = $(addsuffix .o,$(patsubst ${MOSES_ROOT}%,$(WDIR)%,\
$(patsubst .%,$(WDIR)%,$(basename $1))))
cc2exe = $(addprefix ./bin/$(variant)/,$(basename $(notdir $1)))
cc2trg = $(basename $(notdir $1))
OBJ2 :=
define compile
define compile
DEP += ${WDIR}/$(basename $(notdir $1)).d
${WDIR}/$(basename $(notdir $1)).o : $1 $(wildcard $(basename $1).h)
DEP += $(basename $(call cc2obj,$1)).d
$(call cc2obj,$1): $1
@echo -e "COMPILING $1"
@mkdir -p $$(@D)
${CXX} ${CXXFLAGS} -MD -MP -c $$(abspath $$<) -o $$@
@${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@
endef
testprogs = test-dynamic-im-tsa try-align
programs = mtt-build mtt-dump symal2mam custom-pt mmlex-build ${testprogs}
programs += mtt-count-words
define build
all: $(addprefix ${BINDIR}/${BINPREF}, $(programs))
@echo $^
clean:
rm -f ${WDIR}/*.o ${WDIR}/*.d
$(call cc2trg,$1): $(call cc2exe,$1)
$(call cc2exe,$1): $(call cc2obj,$1) $(LIBOBJ)
ifneq ($(variant),syntax)
@echo -e "LINKING $$@"
@mkdir -p $${@D}
@${CXX} ${CXXFLAGS} -o $$@ $(LIBOBJ) $(addprefix -l,${LIBS}) $$<
endif
custom-pt: ${BINDIR}/${BINPREF}custom-pt
echo $^
endef
INMOGEN = $(wildcard ${MOSES_ROOT}/moses/TranslationModel/UG/generic/*/*.cpp)
#INMOMM = $(wildcard ${MOSES_ROOT}/moses/TranslationModel/UG/mm/*.cc)
#INMOMM += $(wildcard ${MOSES_ROOT}/moses/TranslationModel/UG/mm/*.cpp)
OBJ = $(patsubst %.cc,%.o,$(wildcard $(patsubst %.h,%.cc,$(wildcard *.h))))
OBJ += $(patsubst %.cpp,%.o,${INMOGEN})
#OBJ += $(patsubst %.cpp,%.o,${INMOMM})
#OBJ += $(patsubst %.cc,%.o,${INMOMM})
EXE = $(patsubst %.cc,%.o,$(filter-out $(patsubst %.h,%.cc,$(wildcard *.h)),$(wildcard *.cc)))
# list files here that you want explicitly excluded from compilation
skip = sim-pe.cc
skip += mtt.count.cc
skip += try-align2.cc
skip += spe-check-coverage3.cc
skip += mmsapt.cpp
skip += ug_stringdist.cc
skip += ug_splice_arglist.cc
skip += ug_lexical_reordering.cc
skip += ug_sampling_bias.cc
$(foreach cpp,${INMOGEN},$(eval $(call compile,${cpp})))
$(foreach cpp,$(wildcard *.cc),$(eval $(call compile,${cpp})))
$(addprefix ${BINDIR}/${BINPREF}, $(programs)): $(addprefix ${WDIR}/,$(notdir ${OBJ}))
$(addprefix ${BINDIR}/${BINPREF}, $(programs)): ${MOSES_ROOT}/lib/libmoses.a
${BINDIR}/${BINPREF}%: ${WDIR}/%.o ${WDIR}/mmsapt_align.o
@mkdir -p ${BINDIR}
echo PREREQS: $^
$(CXX) $(CXXFLAGS) -o $@ $^ ${LIBDIRS} ${LIBS}
# objects from elsewhere in the moses tree that are needed
extra = ${MOSES_ROOT}/util/exception.cc
#try-align: ${WDIR}/try-align.o ${WDIR}/tpt_tokenindex.o
# $(CXX) $(CXXFLAGS) -o $@ $^ ${LIBDIRS}
$(foreach f,$(skip),$(eval broken+=$(shell find -name $f)))
.SECONDARY:
$(info SCANNING DIRECTORY TREE FOR FILES)
find_cfiles = find -name '*.cc' -or -name '*.cpp'
CFILES = $(filter-out $(broken), $(shell $(find_cfiles)))
PROGRAMS := $(shell $(find_cfiles) | xargs grep -lP '^(int +)?main')
PROGRAMS := $(filter-out $(broken),$(PROGRAMS))
ALLOBJ = $(call cc2obj,$(CFILES) $(extra))
LIBOBJ = $(call cc2obj,$(filter-out $(PROGRAMS),$(CFILES) $(extra)))
$(foreach f,$(CFILES) $(extra),$(eval $(call compile,$f)))
$(foreach p,$(PROGRAMS),$(eval $(call build,$p)))
-include $(DEP)

View File

@ -16,24 +16,28 @@
namespace Moses
{
using namespace std;
// using namespace std;
using std::greater;
template<typename VAL,
typename COMP = greater<VAL>,
typename IDX_T=size_t>
class
VectorIndexSorter
: public binary_function<IDX_T const&, IDX_T const&, bool>
: public std::binary_function<IDX_T const&, IDX_T const&, bool>
{
vector<VAL> const& m_vecref;
std::vector<VAL> const& m_vecref;
boost::shared_ptr<COMP> m_comp;
public:
COMP const& Compare;
VectorIndexSorter(vector<VAL> const& v, COMP const& comp)
VectorIndexSorter(std::vector<VAL> const& v, COMP const& comp)
: m_vecref(v), Compare(comp) {
}
VectorIndexSorter(vector<VAL> const& v)
VectorIndexSorter(std::vector<VAL> const& v)
: m_vecref(v), m_comp(new COMP()), Compare(*m_comp) {
}
@ -43,20 +47,20 @@ namespace Moses
return (fwd == bwd ? a < b : fwd);
}
boost::shared_ptr<vector<IDX_T> >
boost::shared_ptr<std::vector<IDX_T> >
GetOrder() const;
void
GetOrder(vector<IDX_T> & order) const;
GetOrder(std::vector<IDX_T> & order) const;
};
template<typename VAL, typename COMP, typename IDX_T>
boost::shared_ptr<vector<IDX_T> >
boost::shared_ptr<std::vector<IDX_T> >
VectorIndexSorter<VAL,COMP,IDX_T>::
GetOrder() const
{
boost::shared_ptr<vector<IDX_T> > ret(new vector<IDX_T>(m_vecref.size()));
boost::shared_ptr<std::vector<IDX_T> > ret(new std::vector<IDX_T>(m_vecref.size()));
get_order(*ret);
return ret;
}
@ -64,7 +68,7 @@ namespace Moses
template<typename VAL, typename COMP, typename IDX_T>
void
VectorIndexSorter<VAL,COMP,IDX_T>::
GetOrder(vector<IDX_T> & order) const
GetOrder(std::vector<IDX_T> & order) const
{
order.resize(m_vecref.size());
for (IDX_T i = 0; i < IDX_T(m_vecref.size()); ++i) order[i] = i;

View File

@ -0,0 +1,27 @@
// -*- c++ -*-
#include "ug_http_client.h"
int main(int argc, char* argv[])
{
try
{
if (argc != 2)
{
std::cout << "Usage: async_client <url>\n";
std::cout << "Example:\n";
std::cout << " async_client www.boost.org/LICENSE_1_0.txt\n";
return 1;
}
boost::asio::io_service io_service;
Moses::http_client c(io_service, argv[1]);
io_service.run();
std::cout << c.content() << std::endl;
}
catch (std::exception& e)
{
std::cout << "Exception: " << e.what() << "\n";
}
return 0;
}

View File

@ -0,0 +1,13 @@
#include <iostream>
#include <string>
#include <iomanip>
#include "ug_http_client.h"
using namespace std;
int main()
{
string line;
while (getline(cin,line))
cout << Moses::uri_encode(line) << endl;
}

View File

@ -16,7 +16,8 @@ namespace ugdiss
TokenIndex::
TokenIndex(string unkToken)
: ridx(0),unkLabel(unkToken),unkId(1),numTokens(0)
: ridx(0), unkLabel(unkToken), unkId(1), numTokens(0)
, startIdx(0), endIdx(0)
{
lock.reset(new boost::mutex());
};
@ -94,15 +95,25 @@ namespace ugdiss
TokenIndex::
operator[](char const* p) const
{
if (startIdx==endIdx && !dynamic) return strcmp(p,"NULL") && unkId;
Entry const* bla = lower_bound(startIdx,endIdx,p,comp);
if (bla != endIdx && !strcmp(comp.base+bla->offset,p))
return bla->id;
if (!dynamic) return unkId;
if (startIdx != endIdx)
{
Entry const* bla = lower_bound(startIdx,endIdx,p,comp);
if (bla != endIdx && !strcmp(comp.base+bla->offset,p))
return bla->id;
if (!dynamic) return unkId;
}
else if (!dynamic) return strcmp(p,"NULL") && unkId;
boost::lock_guard<boost::mutex> lk(*this->lock);
// stuff below is new as of 2011-01-30, for dynamic adding of unknown items
// IMPORTANT: numTokens is not currently not changed, it is the number of
// PRE-EXISING TOKENS, not including dynamically added Items
// stuff below is new as of 2011-01-30, for dynamic adding of
// unknown items IMPORTANT: numTokens is not currently not
// changed, it is the number of PRE-EXISING TOKENS, not including
// dynamically added Items
// if (!str2idExtra)
// {
// this->str2idExtra.reset(new map<string,id_type>());
// this->newWords.reset(new vector<string>());
// }
map<string,id_type>::value_type newItem(p,str2idExtra->size()+numTokens);
pair<map<string,id_type>::iterator,bool> foo = str2idExtra->insert(newItem);
if (foo.second) // it actually is a new item
@ -144,10 +155,13 @@ namespace ugdiss
if (!ridx.size())
{
boost::lock_guard<boost::mutex> lk(*this->lock);
// Someone else (multi-threading!) may have created the
// reverse index in the meantime, so let's check again
if (!ridx.size()) ridx = reverseIndex();
}
if (id < ridx.size())
return ridx[id];
boost::lock_guard<boost::mutex> lk(*this->lock);
if (dynamic && id < ridx.size()+newWords->size())
return (*newWords)[id-ridx.size()].c_str();

View File

@ -35,12 +35,18 @@
#include "moses/TranslationModel/UG/generic/sampling/Sampling.h"
#include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"
#include "moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.h"
#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
// #include "moses/FF/LexicalReordering/LexicalReorderingState.h"
#include "moses/Util.h"
// #include "moses/StaticData.h"
#ifndef NO_MOSES
// #pragma message "COMPILING WITH MOSES SUPPORT!"
#include "moses/StaticData.h"
#include "moses/thread_safe_container.h"
#include "moses/ContextScope.h"
#include "moses/TranslationTask.h"
#else
// #pragma message "COMPILING WITHOUT MOSES SUPPORT!"
#endif
#include "util/exception.hh"
// #include "util/check.hh"
@ -70,6 +76,7 @@ namespace Moses {
float lbop(size_t const tries, size_t const succ, float const confidence);
void write_bitvector(bitvector const& v, ostream& out);
#ifndef NO_MOSES
struct
ContextForQuery
{
@ -82,7 +89,7 @@ namespace Moses {
ostream* bias_log;
ContextForQuery() : bias_log(NULL) { }
};
#endif
template<typename TKN>
class Bitext
@ -140,8 +147,13 @@ namespace Moses {
// prep2 launches sampling and returns immediately.
// lookup (below) waits for the job to finish before it returns
sptr<pstats>
prep2(iter const& phrase, int max_sample = -1) const;
#ifndef NO_MOSES
sptr<pstats>
prep2(ttasksptr const& ttask, iter const& phrase, int max_sample = -1) const;
#endif
public:
Bitext(size_t const max_sample = 1000, size_t const xnum_workers = 16);
@ -157,9 +169,15 @@ namespace Moses {
open(string const base, string const L1, string const L2) = 0;
sptr<pstats>
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample = -1) const;
lookup(iter const& phrase, int max_sample = -1) const;
void prep(iter const& phrase) const;
#ifndef NO_MOSES
sptr<pstats>
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample = -1) const;
void prep(ttasksptr const& ttask, iter const& phrase) const;
#endif
void setDefaultSampleSize(size_t const max_samples);
size_t getDefaultSampleSize() const;
@ -181,16 +199,7 @@ namespace Moses {
void
write_yawat_alignment
( id_type const sid, iter const* m1, iter const* m2, ostream& out ) const;
#if 0
// needs to be adapted to the new API
void
lookup(std::vector<Token> const& snt, TSA<Token>& idx,
std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > >& dest,
std::vector<std::vector<uint64_t> >* pidmap = NULL,
typename PhrasePair<Token>::Scorer* scorer=NULL,
sptr<SamplingBias const> const bias,
bool multithread=true) const;
#endif
string docname(id_type const sid) const;
};
@ -427,11 +436,13 @@ namespace Moses {
template<typename Token>
void
Bitext<Token>::
prep(ttasksptr const& ttask, iter const& phrase) const
prep(iter const& phrase) const
{
prep2(ttask, phrase, m_default_sample_size);
prep2(phrase, m_default_sample_size);
}
// prep2 schedules a phrase for sampling, and returns immediately
// the member function lookup retrieves the respective pstats instance
// and waits until the sampling is finished before it returns.
@ -440,26 +451,20 @@ namespace Moses {
sptr<pstats>
Bitext<Token>
::prep2
( ttasksptr const& ttask, iter const& phrase, int max_sample) const
(iter const& phrase, int max_sample) const
{
if (max_sample < 0) max_sample = m_default_sample_size;
sptr<ContextScope> scope = ttask->GetScope();
sptr<ContextForQuery> context = scope->get<ContextForQuery>(this);
sptr<SamplingBias> bias;
if (context) bias = context->bias;
sptr<pstats::cache_t> cache;
// - no caching for rare phrases and special requests (max_sample)
// (still need to test what a good caching threshold is ...)
// - use the task-specific cache when there is a sampling bias
if (max_sample == int(m_default_sample_size)
&& phrase.approxOccurrenceCount() > m_pstats_cache_threshold)
{
cache = (phrase.root == I1.get()
? (bias ? context->cache1 : m_cache1)
: (bias ? context->cache2 : m_cache2));
// if (bias) cerr << "Using bias." << endl;
cache = (phrase.root == I1.get() ? m_cache1 : m_cache2);
}
sptr<pstats> ret;
sptr<pstats> const* cached;
@ -472,9 +477,6 @@ namespace Moses {
if (m_num_workers > 1)
ag->add_workers(m_num_workers);
}
// cerr << "NEW FREQUENT PHRASE: "
// << phrase.str(V1.get()) << " " << phrase.approxOccurrenceCount()
// << " at " << __FILE__ << ":" << __LINE__ << endl;
ret = ag->add_job(this, phrase, max_sample, bias);
if (cache) cache->set(phrase.getPid(),ret);
UTIL_THROW_IF2(ret == NULL, "Couldn't schedule sampling job.");
@ -545,87 +547,6 @@ namespace Moses {
}
};
#if 0
template<typename Token>
void
Bitext<Token>::
lookup(std::vector<Token> const& snt, TSA<Token>& idx,
std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > >& dest,
std::vector<std::vector<uint64_t> >* pidmap,
typename PhrasePair<Token>::Scorer* scorer,
sptr<SamplingBias const> const& bias, bool multithread) const
{
// typedef std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > > ret_t;
dest.clear();
dest.resize(snt.size());
if (pidmap) { pidmap->clear(); pidmap->resize(snt.size()); }
// collect statistics in parallel, then build PT entries as
// the sampling finishes
bool fwd = &idx == I1.get();
std::vector<boost::thread*> workers; // background threads doing the lookup
pplist_cache_t& C = (fwd ? m_pplist_cache1 : m_pplist_cache2);
if (C.capacity() < 100000) C.reserve(100000);
for (size_t i = 0; i < snt.size(); ++i)
{
dest[i].reserve(snt.size()-i);
typename TSA<Token>::tree_iterator m(&idx);
for (size_t k = i; k < snt.size() && m.extend(snt[k].id()); ++k)
{
uint64_t key = m.getPid();
if (pidmap) (*pidmap)[i].push_back(key);
sptr<std::vector<PhrasePair<Token> > > pp = C.get(key);
if (pp)
dest[i].push_back(pp);
else
{
pp.reset(new std::vector<PhrasePair<Token> >());
C.set(key,pp);
dest[i].push_back(pp);
sptr<pstats> x = prep2(m, this->default_sample_size,bias);
pstats2pplist<Token> w(m,*(fwd?T2:T1),x,*pp,scorer);
if (multithread)
{
boost::thread* t = new boost::thread(w);
workers.push_back(t);
}
else w();
}
}
}
for (size_t w = 0; w < workers.size(); ++w)
{
workers[w]->join();
delete workers[w];
}
}
#endif
template<typename Token>
sptr<pstats>
Bitext<Token>::
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample) const
{
sptr<pstats> ret = prep2(ttask, phrase, max_sample);
UTIL_THROW_IF2(!ret, "Got NULL pointer where I expected a valid pointer.");
// Why were we locking here?
if (m_num_workers <= 1)
{
boost::unique_lock<boost::shared_mutex> guard(m_lock);
typename agenda::worker(*this->ag)();
}
else
{
boost::unique_lock<boost::mutex> lock(ret->lock);
while (ret->in_progress)
ret->ready.wait(lock);
}
return ret;
}
template<typename Token>
void
Bitext<Token>
@ -729,27 +650,6 @@ namespace Moses {
}
}
#if 0
template<typename Token>
sptr<pstats>
Bitext<Token>::
lookup(siter const& phrase, size_t const max_sample,
sptr<SamplingBias const> const& bias) const
{
sptr<pstats> ret = prep2(phrase, max_sample);
boost::unique_lock<boost::shared_mutex> guard(m_lock);
if (this->num_workers <= 1)
typename agenda::worker(*this->ag)();
else
{
boost::unique_lock<boost::mutex> lock(ret->lock);
while (ret->in_progress)
ret->ready.wait(lock);
}
return ret;
}
#endif
template<typename Token>
void
expand(typename Bitext<Token>::iter const& m,
@ -773,33 +673,9 @@ namespace Moses {
}
}
#if 0
template<typename Token>
class
PStatsCache
{
typedef boost::unordered_map<uint64_t, sptr<pstats> > my_cache_t;
boost::shared_mutex m_lock;
my_cache_t m_cache;
public:
sptr<pstats> get(Bitext<Token>::iter const& phrase) const;
sptr<pstats>
add(Bitext<Token>::iter const& phrase) const
{
uint64_t pid = phrase.getPid();
std::pair<my_cache_t::iterator,bool>
}
};
#endif
} // end of namespace bitext
} // end of namespace moses
#include "ug_im_bitext.h"
#include "ug_mm_bitext.h"
#include "ug_bitext_moses.h"

View File

@ -137,7 +137,10 @@ int Bitext<Token>::agenda::job
float p = (*m_bias)[sid];
id_type docid = m_bias->GetClass(sid);
uint32_t k = docid < stats->indoc.size() ? stats->indoc[docid] : 0;
// uint32_t k = docid < stats->indoc.size() ? stats->indoc[docid] : 0;
std::map<uint32_t,uint32_t>::const_iterator m = stats->indoc.find(docid);
uint32_t k = m != stats->indoc.end() ? m->second : 0 ;
// always consider candidates from dominating documents and
// from documents that have not been considered at all yet
@ -159,11 +162,17 @@ int Bitext<Token>::agenda::job
e = root->getCorpus()->sntEnd(sid);
*log << docid << ":" << sid << " " << size_t(k) << "/" << N
<< " @" << p << " => " << d << " [";
for (size_t i = 0; i < stats->indoc.size(); ++i)
for (std::map<uint32_t, uint32_t>::const_iterator m = stats->indoc.begin();
m != stats->indoc.end(); ++m)
{
if (i) *log << " ";
*log << stats->indoc[i];
if (m != stats->indoc.begin()) *log << " ";
*log << m->first << ":" << m->second;
}
// for (size_t i = 0; i < stats->indoc.size(); ++i)
// {
// if (i) *log << " ";
// *log << stats->indoc[i];
// }
*log << "] ";
for (; x < e; ++x) *log << (*m_bitext->V1)[x->id()] << " ";
if (!ret) *log << "SKIP";

View File

@ -76,7 +76,7 @@ namespace Moses
++obwd[bwd_orient];
if (docid >= 0)
{
while (int(indoc.size()) <= docid) indoc.push_back(0);
// while (int(indoc.size()) <= docid) indoc.push_back(0);
++indoc[docid];
}
}

View File

@ -27,7 +27,8 @@ namespace Moses
uint32_t obwd[Moses::LRModel::NONE+1]; // backward distortion type counts
public:
vector<uint32_t> indoc; // counts origin of samples (for biased sampling)
std::map<uint32_t,uint32_t> indoc;
// vector<uint32_t> indoc; // counts origin of samples (for biased sampling)
jstats();
jstats(jstats const& other);
uint32_t rcnt() const; // raw joint counts

View File

@ -0,0 +1,88 @@
// -*- mode: c++; cc-style: moses-cc-style -*-
#pragma once
#ifndef NO_MOSES
namespace Moses {
namespace bitext {
template<typename Token>
sptr<pstats>
Bitext<Token>::
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample) const
{
sptr<pstats> ret = prep2(ttask, phrase, max_sample);
UTIL_THROW_IF2(!ret, "Got NULL pointer where I expected a valid pointer.");
// Why were we locking here?
if (m_num_workers <= 1)
{
boost::unique_lock<boost::shared_mutex> guard(m_lock);
typename agenda::worker(*this->ag)();
}
else
{
boost::unique_lock<boost::mutex> lock(ret->lock);
while (ret->in_progress)
ret->ready.wait(lock);
}
return ret;
}
template<typename Token>
void
Bitext<Token>::
prep(ttasksptr const& ttask, iter const& phrase) const
{
prep2(ttask, phrase, m_default_sample_size);
}
// prep2 schedules a phrase for sampling, and returns immediately
// the member function lookup retrieves the respective pstats instance
// and waits until the sampling is finished before it returns.
// This allows sampling in the background
template<typename Token>
sptr<pstats>
Bitext<Token>
::prep2
( ttasksptr const& ttask, iter const& phrase, int max_sample) const
{
if (max_sample < 0) max_sample = m_default_sample_size;
sptr<SamplingBias> bias;
sptr<ContextScope> scope = ttask->GetScope();
sptr<ContextForQuery> context = scope->get<ContextForQuery>(this);
if (context) bias = context->bias;
sptr<pstats::cache_t> cache;
// - no caching for rare phrases and special requests (max_sample)
// (still need to test what a good caching threshold is ...)
// - use the task-specific cache when there is a sampling bias
if (max_sample == int(m_default_sample_size)
&& phrase.approxOccurrenceCount() > m_pstats_cache_threshold)
{
cache = (phrase.root == I1.get()
? (bias ? context->cache1 : m_cache1)
: (bias ? context->cache2 : m_cache2));
}
sptr<pstats> ret;
sptr<pstats> const* cached;
if (cache && (cached = cache->get(phrase.getPid(), ret)) && *cached)
return *cached;
boost::unique_lock<boost::shared_mutex> guard(m_lock);
if (!ag)
{
ag.reset(new agenda(*this));
if (m_num_workers > 1)
ag->add_workers(m_num_workers);
}
ret = ag->add_job(this, phrase, max_sample, bias);
if (cache) cache->set(phrase.getPid(),ret);
UTIL_THROW_IF2(ret == NULL, "Couldn't schedule sampling job.");
return ret;
}
}
}
#endif

View File

@ -58,7 +58,7 @@ namespace Moses
++obwd[po_bwd];
if (docid >= 0)
{
while (int(indoc.size()) <= docid) indoc.push_back(0);
// while (int(indoc.size()) <= docid) indoc.push_back(0);
++indoc[docid];
}
}

View File

@ -33,8 +33,8 @@ namespace Moses
uint32_t ofwd[Moses::LRModel::NONE+1]; // distribution of fwd phrase orientations
uint32_t obwd[Moses::LRModel::NONE+1]; // distribution of bwd phrase orientations
std::vector<uint32_t> indoc; // distribution over where samples came from
// std::vector<uint32_t> indoc; // distribution over where samples came from
std::map<uint32_t,uint32_t> indoc;
typedef std::map<uint64_t, jstats> trg_map_t;
trg_map_t trg;
pstats();

View File

@ -0,0 +1,219 @@
#include "ug_http_client.h"
#include "moses/Util.h"
namespace Moses
{
using boost::asio::ip::tcp;
std::string http_client::content() const { return m_content.str(); }
http_client::
http_client(boost::asio::io_service& io_service,
std::string const& server,
std::string const& port,
std::string const& path)
: resolver_(io_service), socket_(io_service)
{
init(server, port, path);
}
http_client::
http_client(boost::asio::io_service& io_service, std::string url)
: resolver_(io_service), socket_(io_service)
{
std::string server;
std::string path = "/";
std::string port = "http";
size_t p = url.find("://"), q;
if (p < url.size())
{
port = url.substr(0,p);
url.erase(0, p+3);
}
p = std::min(url.find_first_of(":/"), url.size());
q = std::min(url.find("/"), url.size());
if (p < url.size() && url[p] == ':')
port = url.substr(p+1,q-p-1);
server = url.substr(0,p);
if (q < url.size())
path = url.substr(q);
#if 0
std::cerr << HERE << std::endl;
std::cerr << "SERVER " << server << std::endl;
std::cerr << "PORT |" << port << "|" << std::endl;
std::cerr << "PATH " << path << std::endl;
#endif
init(server, port, path);
}
void
http_client::
init(std::string const& server, std::string const& port, std::string const& path)
{
// Form the request. We specify the "Connection: close" header so
// that the server will close the socket after transmitting the
// response. This will allow us to treat all data up until the EOF
// as the content.
std::ostream request_stream(&request_);
request_stream << "GET " << path << " HTTP/1.0\r\n";
request_stream << "Host: " << server << "\r\n";
request_stream << "Accept: */*\r\n";
request_stream << "Connection: close\r\n\r\n";
// Start an asynchronous resolve to translate the server and service names
// into a list of endpoints.
tcp::resolver::query query(server, port.c_str());
resolver_.async_resolve(query,
boost::bind(&http_client::handle_resolve, this,
boost::asio::placeholders::error,
boost::asio::placeholders::iterator));
}
void
http_client::
handle_resolve(const boost::system::error_code& err,
tcp::resolver::iterator endpoint_iterator)
{
if (!err)
{
// Attempt a connection to the first endpoint in the list. Each endpoint
// will be tried until we successfully establish a connection.
tcp::endpoint endpoint = *endpoint_iterator;
socket_.async_connect(endpoint,
boost::bind(&http_client::handle_connect, this,
boost::asio::placeholders::error, ++endpoint_iterator));
}
else
{
m_error << "Error: " << err.message() << "\n";
}
}
void
http_client::
handle_connect(const boost::system::error_code& err,
tcp::resolver::iterator endpoint_iterator)
{
if (!err)
{
// The connection was successful. Send the request.
boost::asio::async_write(socket_, request_,
boost::bind(&http_client::handle_write_request, this,
boost::asio::placeholders::error));
}
else if (endpoint_iterator != tcp::resolver::iterator())
{
// The connection failed. Try the next endpoint in the list.
socket_.close();
tcp::endpoint endpoint = *endpoint_iterator;
socket_.async_connect(endpoint,
boost::bind(&http_client::handle_connect, this,
boost::asio::placeholders::error, ++endpoint_iterator));
}
else m_error << "Error: " << err.message() << "\n";
}
void
http_client::
handle_write_request(const boost::system::error_code& err)
{
using namespace boost::asio;
if (err) { m_error << "Error: " << err.message() << "\n"; return; }
// Read the response status line. The response_ streambuf will
// automatically grow to accommodate the entire line. The growth may be
// limited by passing a maximum size to the streambuf constructor.
async_read_until(socket_, response_, "\r\n",
boost::bind(&http_client::handle_read_status_line,
this, placeholders::error));
}
void
http_client::
handle_read_status_line(const boost::system::error_code& err)
{
if (err) { m_error << "Error: " << err << "\n"; return; }
using namespace boost::asio;
// Check that response is OK.
std::istream response_stream(&response_);
response_stream >> m_http_version >> m_status_code;
std::getline(response_stream, m_status_message);
if (!response_stream || m_http_version.substr(0, 5) != "HTTP/")
m_error << "Invalid response\n";
else if (m_status_code != 200)
m_error << "Response returned with status code " << m_status_code << "\n";
else // Read the response headers, which are terminated by a blank line.
async_read_until(socket_, response_, "\r\n\r\n",
boost::bind(&http_client::handle_read_headers, this,
placeholders::error));
}
void
http_client::
handle_read_headers(const boost::system::error_code& err)
{
if (err) { m_error << "Error: " << err << "\n"; return; }
// Process the response headers.
std::istream response_stream(&response_);
std::string line;
while (std::getline(response_stream, line) && line != "\r")
m_header.push_back(line);
// Write whatever content we already have to output.
if (response_.size() > 0)
m_content << &response_;
using namespace boost::asio;
// Start reading remaining data until EOF.
async_read(socket_, response_, transfer_at_least(1),
boost::bind(&http_client::handle_read_content, this,
placeholders::error));
}
void
http_client::
handle_read_content(const boost::system::error_code& err)
{
using namespace boost::asio;
if(!err)
{
// Write all of the data that has been read so far.
// Then continue reading remaining data until EOF.
m_content << &response_;
async_read(socket_, response_, transfer_at_least(1),
boost::bind(&http_client::handle_read_content, this,
placeholders::error));
}
else if (err != boost::asio::error::eof)
{
m_error << "Error: " << err << "\n";
}
}
std::string
uri_encode(std::string const& in)
{
char buf[3 * in.size() + 1];
size_t i = 0;
for (unsigned char const* c = (unsigned char const*)in.c_str(); *c; ++c)
{
// cout << *c << " " << int(*c) << endl;
if (*c == ' ') buf[i++] = '+';
else if (*c == '.' || *c == '~' || *c == '_' || *c == '-') buf[i++] = *c;
else if (*c < '0') i += sprintf(buf+i, "%%%x", int(*c));
else if (*c <= '9') buf[i++] = *c;
else if (*c < 'A') i += sprintf(buf+i, "%%%x", int(*c));
else if (*c <= 'Z') buf[i++] = *c;
else if (*c < 'a') i += sprintf(buf+i, "%%%x", int(*c));
else if (*c <= 'z') buf[i++] = *c;
else i += sprintf(buf+i, "%%%x", int(*c));
}
buf[i] = 0;
return std::string(buf);
}
}

View File

@ -0,0 +1,63 @@
// -*- c++ -*-
// Adapted by Ulrich Germann from:
// async_client.cpp
// ~~~~~~~~~~~~~~~~
//
// Copyright (c) 2003-2011 Christopher M. Kohlhoff (chris at kohlhoff dot com)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
//
#pragma once
#include <iostream>
#include <istream>
#include <ostream>
#include <string>
#include <sstream>
#include <boost/asio.hpp>
#include <boost/bind.hpp>
namespace Moses
{
using boost::asio::ip::tcp;
std::string uri_encode(std::string const& in);
class http_client
{
std::ostringstream m_content;
std::vector<std::string> m_header;
std::string m_http_version;
unsigned int m_status_code;
std::string m_status_message;
std::ostringstream m_error;
public:
http_client(boost::asio::io_service& io_service, std::string url);
http_client(boost::asio::io_service& io_service,
std::string const& server,
std::string const& port,
std::string const& path);
private:
void init(std::string const& server,
std::string const& port,
std::string const& path);
void handle_resolve(const boost::system::error_code& err,
tcp::resolver::iterator endpoint_iterator);
void handle_connect(const boost::system::error_code& err,
tcp::resolver::iterator endpoint_iterator);
void handle_write_request(const boost::system::error_code& err);
void handle_read_status_line(const boost::system::error_code& err);
void handle_read_headers(const boost::system::error_code& err);
void handle_read_content(const boost::system::error_code& err);
tcp::resolver resolver_;
tcp::socket socket_;
boost::asio::streambuf request_;
boost::asio::streambuf response_;
public:
std::string content() const;
};
}

View File

@ -57,7 +57,7 @@ namespace ugdiss
public:
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d);
imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL);
imTtrack(istream& in, TokenIndex& V, ostream* log = NULL);
imTtrack(size_t reserve = 0);
// imTtrack(istream& in, Vocab& V);
@ -131,24 +131,30 @@ namespace ugdiss
template<typename Token>
imTtrack<Token>::
imTtrack(istream& in, TokenIndex const& V, ostream* log)
imTtrack(istream& in, TokenIndex& V, ostream* log)
: numToks(0)
{
myData.reset(new vector<vector<Token> >());
string line,w;
size_t linectr=0;
boost::unordered_map<string,id_type> H;
for (id_type i = 0; i < V.knownVocabSize(); ++i)
H[V[i]] = i;
// for (id_type i = 0; i < V.knownVocabSize(); ++i)
// H[V[i]] = i;
while (getline(in,line))
{
// cout << line << endl;
myData->push_back(vector<Token>());
if (log && ++linectr%1000000==0)
*log << linectr/1000000 << "M lines of input processed" << endl;
istringstream buf(line);
// cout << line << endl;
while (buf>>w)
myData->back().push_back(Token(H[w]));
myData->back().resize(myData.back().size());
{
myData->back().push_back(Token(V[w]));
// cout << w << " " << myData->back().back().id() << " "
// << V[w] << endl;
}
// myData->back().resize(myData->back().size(), Token(0));
numToks += myData->back().size();
}
}

View File

@ -1,9 +1,35 @@
// -*- c++ -*-
#pragma once
#include <vector>
#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
namespace Moses { namespace bitext {
#ifndef NO_MOSES
#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
#endif
namespace Moses {
#ifdef NO_MOSES
namespace LRModel{
enum ModelType { Monotonic, MSD, MSLR, LeftRight, None };
enum Direction { Forward, Backward, Bidirectional };
enum ReorderingType {
M = 0, // monotonic
NM = 1, // non-monotonic
S = 1, // swap
D = 2, // discontinuous
DL = 2, // discontinuous, left
DR = 3, // discontinuous, right
R = 0, // right
L = 1, // left
MAX = 3, // largest possible
NONE = 4 // largest possible
};
}
#endif
namespace bitext {
typedef Moses::LRModel::ReorderingType PhraseOrientation;

View File

@ -45,8 +45,9 @@ namespace Moses
this->m_docname2docid[docname] = docid;
this->m_docname.push_back(docname);
line >> b;
VERBOSE(1, "DOCUMENT MAP " << docname
<< " " << a << "-" << b+a << endl);
#ifndef NO_MOSES
VERBOSE(1, "DOCUMENT MAP " << docname << " " << a << "-" << b+a << endl);
#endif
for (b += a; a < b; ++a)
(*this->m_sid2docid)[a] = docid;
}

View File

@ -3,7 +3,9 @@
#include <vector>
#include "ug_typedefs.h"
#include "ug_bitext_pstats.h"
#ifndef NO_MOSES
#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
#endif
#include "boost/format.hpp"
#include "tpt_tokenindex.h"
namespace Moses
@ -28,7 +30,8 @@ namespace Moses
std::vector<uchar> aln;
float score;
bool inverse;
std::vector<uint32_t> indoc;
// std::vector<uint32_t> indoc;
std::map<uint32_t,uint32_t> indoc;
PhrasePair() { };
PhrasePair(PhrasePair const& o);
@ -52,9 +55,11 @@ namespace Moses
fill_lr_vec(LRModel::Direction const& dir,
LRModel::ModelType const& mdl,
vector<float>& v) const;
#ifndef NO_MOSES
void
print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
LRModel const& LR) const;
#endif
class SortByTargetIdSeq
{
@ -292,6 +297,7 @@ namespace Moses
}
#ifndef NO_MOSES
template<typename Token>
void
PhrasePair<Token>
@ -301,10 +307,12 @@ namespace Moses
out << toString (V1, this->start1, this->len1) << " ::: "
<< toString (V2, this->start2, this->len2) << " "
<< this->joint << " [";
for (size_t i = 0; i < this->indoc.size(); ++i)
// for (size_t i = 0; i < this->indoc.size(); ++i)
for (std::map<uint32_t,uint32_t>::const_iterator m = indoc.begin();
m != indoc.end(); ++m)
{
if (i) out << " ";
out << this->indoc[i];
if (m != indoc.begin()) out << " ";
out << m->first << ":" << m->second;
}
out << "] [";
vector<float> lrscores;
@ -331,5 +339,6 @@ namespace Moses
}
#endif
}
#endif
} // namespace bitext
} // namespace Moses

View File

@ -2,12 +2,16 @@
#include <iostream>
#include <boost/foreach.hpp>
#include "moses/Timer.h"
// #include <curl/curl.h>
// #ifdef HAVE_CURLPP
// #include <curlpp/Options.hpp>
// #include <curlpp/cURLpp.hpp>
// #include <curlpp/Easy.hpp>
// #endif
#ifdef HAVE_CURLPP
#include <curlpp/Options.hpp>
#include <curlpp/cURLpp.hpp>
#include <curlpp/Easy.hpp>
#endif
// #ifdef WITH_MMT_BIAS_CLIENT
#include "ug_http_client.h"
// #endif
namespace Moses
{
@ -15,23 +19,77 @@ namespace Moses
{
using ugdiss::id_type;
#ifdef HAVE_CURLPP
std::string
query_bias_server(std::string const& url, std::string const& text)
size_t ca_write_callback(void *ptr, size_t size, size_t nmemb,
std::string* response)
{
// communicate with the bias server; resuts will be in ...
std::ostringstream os;
curlpp::Easy myRequest;
std::string query = url+curlpp::escape(text);
myRequest.setOpt(new curlpp::options::Url(query));
curlpp::options::WriteStream ws(&os);
myRequest.setOpt(ws); // Give it to your request
myRequest.perform(); // This will output to os
return os.str();
char const* c = reinterpret_cast<char const*>(ptr);
*response += std::string(c, size * nmemb);
return size * nmemb;
}
#endif
DocumentBias
std::string
query_bias_server(std::string const& server, std::string const& context)
{
#if 0
std::string query = server + uri_encode(context);
std::string response;
CURL* curl = curl_easy_init();
UTIL_THROW_IF2(!curl, "Could not init curl.");
curl_easy_setopt(curl, CURLOPT_URL, query.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, ca_write_callback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
CURLcode res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
return response;
#else
std::string query = server+uri_encode(context);
boost::asio::io_service io_service;
Moses::http_client c(io_service, query);
io_service.run();
std::string response = c.content();
std::cerr << "SERVER RESPONSE: " << response << std::endl;
return c.content();
#endif
}
// // #ifdef WITH_MMT_BIAS_CLIENT
// std::string
// query_bias_server(std::string const& url, std::string const& text)
// {
// #if 1
// std::string query = url+uri_encode(text);
// boost::asio::io_service io_service;
// Moses::http_client c(io_service, query);
// io_service.run();
// std::string response = c.content();
// std::cerr << "SERVER RESPONSE: " << response << std::endl;
// return c.content();
// #else
// return "";
// #endif
// }
// // #endif
// std::string
// query_bias_server(std::string const& url, int const port,
// std::string const& context,
// std::string const& src_lang)
// {
// char* response
// = ca_get_context(url.c_str(), port, context.c_str(), src_lang.c_str());
// UTIL_THROW_IF2(!response, "No response from server");
// std::string json = response;
// free(response);
// return json;
// }
DocumentBias
::DocumentBias
( std::vector<id_type> const& sid2doc,
std::map<std::string,id_type> const& docname2docid,
@ -40,13 +98,14 @@ namespace Moses
: m_sid2docid(sid2doc)
, m_bias(docname2docid.size(), 0)
{
#ifdef HAVE_CURLPP
// #ifdef HAVE_CURLPP
Timer timer;
if (log) timer.start(NULL);
std::string json = query_bias_server(server_url, text);
std::cerr << "SERVER RESPONSE " << json << std::endl;
init_from_json(json, docname2docid, log);
if (log) *log << "Bias query took " << timer << " seconds." << std::endl;
#endif
// #endif
}
void

View File

@ -37,6 +37,7 @@ namespace Moses
{
std::vector<id_type> const& m_sid2docid;
std::vector<float> m_bias;
// std::map<int,float> m_bias;
public:

View File

@ -1,8 +1,8 @@
#ifdef HAVE_CURLPP
#include <curlpp/Options.hpp>
#include <curlpp/cURLpp.hpp>
#include <curlpp/Easy.hpp>
#endif
// #ifdef HAVE_CURLPP
// #include <curlpp/Options.hpp>
// #include <curlpp/cURLpp.hpp>
// #include <curlpp/Easy.hpp>
// #endif
#include "mmsapt.h"
#include <boost/foreach.hpp>

View File

@ -40,7 +40,7 @@ BOOST_AUTO_TEST_CASE(initialise)
bitvec[2] = true;
bitvec[3] = true;
bitvec[7] = true;
WordsBitmap wbm2(7,bitvec);
BOOST_CHECK_EQUAL(wbm2.GetSize(),7);
for (size_t i = 0; i < 7; ++i) {

Some files were not shown because too many files have changed in this diff Show More