mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
Merge branch 'master' of github.com:moses-smt/mosesdecoder
This commit is contained in:
commit
01e6b3f0b3
@ -18,6 +18,8 @@ irstlm
|
||||
jam-files
|
||||
lm
|
||||
mingw/MosesGUI/icons_rc.py
|
||||
mingw/MosesGUI/Ui_credits.py
|
||||
mingw/MosesGUI/Ui_mainWindow.py
|
||||
moses/TranslationModel/UG
|
||||
phrase-extract/pcfg-common
|
||||
phrase-extract/syntax-common
|
||||
|
4
Jamroot
4
Jamroot
@ -89,7 +89,7 @@ if [ path.exists $(home)/moses-environment.jam ]
|
||||
include $(TOP)/jam-files/check-environment.jam ; # get resource locations
|
||||
# from environment variables
|
||||
include $(TOP)/jam-files/xmlrpc-c.jam ; # xmlrpc-c stuff for the server
|
||||
include $(TOP)/jam-files/curlpp.jam ; # curlpp stuff for bias lookup (MMT only)
|
||||
# include $(TOP)/jam-files/curlpp.jam ; # curlpp stuff for bias lookup (MMT only)
|
||||
|
||||
# exit "done" : 0 ;
|
||||
|
||||
@ -108,7 +108,7 @@ external-lib z ;
|
||||
|
||||
#lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
|
||||
#requirements += <library>dl ;
|
||||
|
||||
#requirements += <cxxflags>-std=c++0x ;
|
||||
|
||||
if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_minimal" ] {
|
||||
if [ option.get "full-tcmalloc" : : "yes" ] {
|
||||
|
@ -62,7 +62,7 @@ void Vocabulary::Save(const string& fileName ) const
|
||||
vcbFile.open( fileName.c_str(), ios::out | ios::ate | ios::trunc);
|
||||
|
||||
if (!vcbFile) {
|
||||
cerr << "Failed to open " << vcbFile << endl;
|
||||
cerr << "Failed to open " << fileName << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ void Vocabulary::Load(const string& fileName )
|
||||
vcbFile.open(fileName.c_str());
|
||||
|
||||
if (!vcbFile) {
|
||||
cerr << "no such file or directory: " << vcbFile << endl;
|
||||
cerr << "no such file or directory: " << fileName << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
@ -96,4 +96,4 @@ reset-lm:
|
||||
-rm -rf lm
|
||||
reset-all: reset-lm reset-aln
|
||||
-rm -rf $(wildcard crp/trn/*/[ct]* crp/dev/[ct]* crp/tst/[ct]*)
|
||||
-rm -rf aux
|
||||
-rm -rf auxiliary
|
||||
|
@ -8,7 +8,7 @@ m4mdir := $(patsubst %modules/,%,\
|
||||
# $(info M4MDIR is ${m4mdir})
|
||||
|
||||
# m4m modules to be included
|
||||
M4M_MODULES := aux init
|
||||
M4M_MODULES := auxiliary init
|
||||
M4M_MODULES += tools moses-parameters prepare-corpus
|
||||
M4M_MODULES += mgiza fastalign mmbitext phrase-table moses-ini
|
||||
M4M_MODULES += tune-moses eval-system kenlm
|
||||
|
@ -40,8 +40,8 @@ endef
|
||||
define truecase
|
||||
|
||||
$2/cased/%.$3.gz: caser = ${run-truecaser}
|
||||
$2/cased/%.$3.gz: caser += -model ${WDIR}/aux/truecasing-model.$1
|
||||
$2/cased/%.$3.gz: | $2/tok/%.$3.gz ${WDIR}/aux/truecasing-model.$1
|
||||
$2/cased/%.$3.gz: caser += -model ${WDIR}/auxiliary/truecasing-model.$1
|
||||
$2/cased/%.$3.gz: | $2/tok/%.$3.gz ${WDIR}/auxiliary/truecasing-model.$1
|
||||
$$(lock)
|
||||
zcat $$(word 1, $$|) | ${parallel} --pipe -k $${caser} | gzip > $$@_
|
||||
mv $$@_ $$@
|
||||
@ -127,8 +127,8 @@ endef
|
||||
# .SECONDARY: $(call trn.tok-mno,${L1}) $(call trn.tok-pll,${L1})
|
||||
# .SECONDARY: $(call trn.tok-mno,${L2}) $(call trn.tok-pll,${L2})
|
||||
|
||||
#${WDIR}/aux/truecasing-model.${L1}: | $(call trn.tok-mno,${L1}) $(call trn.tok-pll,${L1})
|
||||
${WDIR}/aux/truecasing-model.${L1}: | $(call trn.tok-mno,${L1})
|
||||
#${WDIR}/auxiliary/truecasing-model.${L1}: | $(call trn.tok-mno,${L1}) $(call trn.tok-pll,${L1})
|
||||
${WDIR}/auxiliary/truecasing-model.${L1}: | $(call trn.tok-mno,${L1})
|
||||
$(lock)
|
||||
$(if $|,,$(error Can't find training data for $@!))#'
|
||||
${train-truecaser} -model $@_ -corpus <(echo $| | xargs zcat -f)
|
||||
@ -136,8 +136,8 @@ ${WDIR}/aux/truecasing-model.${L1}: | $(call trn.tok-mno,${L1})
|
||||
mv $@_ $@
|
||||
$(unlock)
|
||||
|
||||
#${WDIR}/aux/truecasing-model.${L2}: | $(call trn.tok-mno,${L2}) $(call trn.tok-pll,${L2})
|
||||
${WDIR}/aux/truecasing-model.${L2}: | $(call trn.tok-mno,${L2})
|
||||
#${WDIR}/auxiliary/truecasing-model.${L2}: | $(call trn.tok-mno,${L2}) $(call trn.tok-pll,${L2})
|
||||
${WDIR}/auxiliary/truecasing-model.${L2}: | $(call trn.tok-mno,${L2})
|
||||
$(lock)
|
||||
$(if $|,,$(error Can't find training data for $@!))#'
|
||||
${train-truecaser} -model $@_ -corpus <(echo $| | xargs zcat -f)
|
||||
|
@ -11,12 +11,12 @@
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
@ -72,13 +72,13 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
|
@ -1,5 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="OnDiskPt" InternalType="Library">
|
||||
<Plugins>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="src"/>
|
||||
@ -27,6 +44,8 @@
|
||||
<File Name="../../../OnDiskPt/Word.cpp"/>
|
||||
<File Name="../../../OnDiskPt/Word.h"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Static Library">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -40,9 +59,9 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
<Preprocessor Value="MAX_NUM_FACTORS=4"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes"/>
|
||||
@ -72,7 +91,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -110,7 +129,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -118,6 +137,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -1,16 +1,16 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Workspace Name="all" Database="all.tags">
|
||||
<Project Name="manual-label" Path="manual-label/manual-label.project" Active="No"/>
|
||||
<Project Name="extract" Path="extract/extract.project" Active="No"/>
|
||||
<Project Name="util" Path="util/util.project" Active="No"/>
|
||||
<Project Name="extract-mixed-syntax" Path="extract-mixed-syntax/extract-mixed-syntax.project" Active="No"/>
|
||||
<Project Name="lm" Path="lm/lm.project" Active="No"/>
|
||||
<Project Name="OnDiskPt" Path="OnDiskPt/OnDiskPt.project" Active="No"/>
|
||||
<Project Name="search" Path="search/search.project" Active="No"/>
|
||||
<Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="Yes"/>
|
||||
<Project Name="moses-cmd" Path="moses-cmd/moses-cmd.project" Active="No"/>
|
||||
<Project Name="score" Path="score/score.project" Active="No"/>
|
||||
<Project Name="consolidate" Path="consolidate/consolidate.project" Active="No"/>
|
||||
<Project Name="moses" Path="moses/moses.project" Active="No"/>
|
||||
<Project Name="pruneGeneration" Path="pruneGeneration/pruneGeneration.project" Active="Yes"/>
|
||||
<BuildMatrix>
|
||||
<WorkspaceConfiguration Name="Debug" Selected="yes">
|
||||
<Project Name="manual-label" ConfigName="Debug"/>
|
||||
@ -24,6 +24,7 @@
|
||||
<Project Name="score" ConfigName="Debug"/>
|
||||
<Project Name="consolidate" ConfigName="Debug"/>
|
||||
<Project Name="moses" ConfigName="Debug"/>
|
||||
<Project Name="pruneGeneration" ConfigName="Debug"/>
|
||||
</WorkspaceConfiguration>
|
||||
<WorkspaceConfiguration Name="Release" Selected="yes">
|
||||
<Project Name="manual-label" ConfigName="Release"/>
|
||||
@ -37,6 +38,7 @@
|
||||
<Project Name="score" ConfigName="Release"/>
|
||||
<Project Name="consolidate" ConfigName="Release"/>
|
||||
<Project Name="moses" ConfigName="Release"/>
|
||||
<Project Name="pruneGeneration" ConfigName="Release"/>
|
||||
</WorkspaceConfiguration>
|
||||
</BuildMatrix>
|
||||
</CodeLite_Workspace>
|
||||
|
@ -1,5 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="extract-mixed-syntax" InternalType="Console">
|
||||
<Plugins>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="src"/>
|
||||
@ -43,6 +60,10 @@
|
||||
<File Name="../../../phrase-extract/OutputFileStream.cpp"/>
|
||||
<File Name="../../../phrase-extract/OutputFileStream.h"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug">
|
||||
<Project Name="util"/>
|
||||
</Dependencies>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Executable">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -56,13 +77,14 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../../"/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
|
||||
<LibraryPath Value="../../../boost/lib64"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
|
||||
<LibraryPath Value="Debug"/>
|
||||
<Library Value="util"/>
|
||||
<Library Value="boost_iostreams"/>
|
||||
<Library Value="boost_program_options"/>
|
||||
@ -94,7 +116,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -133,7 +155,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -141,8 +163,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug">
|
||||
<Project Name="util"/>
|
||||
</Dependencies>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -26,6 +26,7 @@
|
||||
<option id="gnu.cpp.compiler.option.include.paths.231971122" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/include""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.61884195" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
|
@ -5,16 +5,16 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2119725657." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1708444053" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.645190133" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
|
||||
@ -25,6 +25,7 @@
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.535775760" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.874182289" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1355287045" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
@ -61,16 +62,16 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1230189043" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1230189043" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1230189043" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1230189043." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.280378247" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1881910636" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
|
||||
|
@ -1,5 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="extract" InternalType="Console">
|
||||
<Plugins>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="src">
|
||||
@ -13,6 +30,8 @@
|
||||
<File Name="../../../phrase-extract/tables-core.cpp"/>
|
||||
<File Name="../../../phrase-extract/tables-core.h"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Executable">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -26,11 +45,11 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../../"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
|
||||
<LibraryPath Value="../../../boost/lib64"/>
|
||||
<Library Value="boost_iostreams"/>
|
||||
<Library Value="z"/>
|
||||
</Linker>
|
||||
@ -60,7 +79,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -99,7 +118,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -107,6 +126,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -1,5 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="lm" InternalType="Library">
|
||||
<Plugins>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="src"/>
|
||||
@ -27,6 +44,8 @@
|
||||
<File Name="../../../lm/virtual_interface.cc"/>
|
||||
<File Name="../../../lm/vocab.cc"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Static Library">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -40,9 +59,9 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes"/>
|
||||
@ -72,7 +91,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -110,7 +129,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -118,6 +137,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -1,132 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.2107801703">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2107801703" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2107801703" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2107801703." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.502948364" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.1431969079" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
|
||||
<builder buildPath="${workspace_loc:/manual-label}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.2101075234" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1118840081" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2037265673" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.400985496" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1160903812" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.404589863" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="${workspace_loc:}/../.."/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.967940596" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.789243964" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2033266575" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.exe.debug.option.debugging.level.1568929819" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.676866714" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.254144861" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.319879082" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
|
||||
<option id="gnu.cpp.link.option.paths.132164474" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.libs.1017214824" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="boost_program_options"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1672776758" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1104732611" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.372096550" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.649050588">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.649050588" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.649050588" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.649050588." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1107402972" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1038954684" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
|
||||
<builder buildPath="${workspace_loc:/manual-label}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.100518450" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.base.2005888378" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1743303968" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
|
||||
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.968169340" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.977676916" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1889240027" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.924128295" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1914416581" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.exe.release.option.debugging.level.826081780" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2048171432" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.940327646" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.369758737" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1186766936" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.266174128" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.558116084" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="manual-label.cdt.managedbuild.target.gnu.exe.1701243340" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.649050588;cdt.managedbuild.config.gnu.exe.release.649050588.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1743303968;cdt.managedbuild.tool.gnu.cpp.compiler.input.1889240027">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.649050588;cdt.managedbuild.config.gnu.exe.release.649050588.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.924128295;cdt.managedbuild.tool.gnu.c.compiler.input.2048171432">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2107801703;cdt.managedbuild.config.gnu.exe.debug.2107801703.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2037265673;cdt.managedbuild.tool.gnu.cpp.compiler.input.967940596">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2107801703;cdt.managedbuild.config.gnu.exe.debug.2107801703.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.789243964;cdt.managedbuild.tool.gnu.c.compiler.input.676866714">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/manual-label"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/manual-label"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
</cproject>
|
@ -1,27 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>manual-label</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<triggers>full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
@ -1,46 +0,0 @@
|
||||
#include <list>
|
||||
#include "DeEn.h"
|
||||
#include "Main.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
extern bool g_debug;
|
||||
|
||||
bool Contains(const Phrase &source, int start, int end, int factor, const string &str)
|
||||
{
|
||||
for (int pos = start; pos <= end; ++pos) {
|
||||
bool found = IsA(source, pos, 0, factor, str);
|
||||
if (found) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void LabelDeEn(const Phrase &source, ostream &out)
|
||||
{
|
||||
Ranges ranges;
|
||||
|
||||
// find ranges to label
|
||||
for (int start = 0; start < source.size(); ++start) {
|
||||
for (int end = start; end < source.size(); ++end) {
|
||||
if (IsA(source, start, -1, 1, "VAFIN")
|
||||
&& IsA(source, end, +1, 1, "VVINF VVPP")
|
||||
&& !Contains(source, start, end, 1, "VAFIN VVINF VVPP VVFIN")) {
|
||||
Range range(start, end, "reorder-label");
|
||||
ranges.push_back(range);
|
||||
}
|
||||
else if ((start == 0 || IsA(source, start, -1, 1, "$,"))
|
||||
&& IsA(source, end, +1, 0, "zu")
|
||||
&& IsA(source, end, +2, 1, "VVINF")
|
||||
&& !Contains(source, start, end, 1, "$,")) {
|
||||
Range range(start, end, "reorder-label");
|
||||
ranges.push_back(range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OutputWithLabels(source, ranges, out);
|
||||
}
|
||||
|
@ -1,5 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "Main.h"
|
||||
|
||||
void LabelDeEn(const Phrase &source, std::ostream &out);
|
@ -1,202 +0,0 @@
|
||||
/*
|
||||
* EnApacheChunker.cpp
|
||||
*
|
||||
* Created on: 28 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include "EnOpenNLPChunker.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace boost::algorithm;
|
||||
|
||||
EnOpenNLPChunker::EnOpenNLPChunker(const std::string &openNLPPath)
|
||||
:m_openNLPPath(openNLPPath)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
EnOpenNLPChunker::~EnOpenNLPChunker() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void EnOpenNLPChunker::Process(std::istream &in, std::ostream &out, const vector<string> &filterList)
|
||||
{
|
||||
const boost::filesystem::path
|
||||
inPath = boost::filesystem::unique_path(),
|
||||
outPath = boost::filesystem::unique_path();
|
||||
// read all input to a temp file
|
||||
ofstream inFile(inPath.c_str());
|
||||
|
||||
string line;
|
||||
while (getline(in, line)) {
|
||||
Unescape(line);
|
||||
inFile << line << endl;
|
||||
}
|
||||
inFile.close();
|
||||
|
||||
// execute chunker
|
||||
string cmd = "cat " + inPath.native() + " | "
|
||||
+ m_openNLPPath + "/bin/opennlp POSTagger "
|
||||
+ m_openNLPPath + "/models/en-pos-maxent.bin | "
|
||||
+ m_openNLPPath + "/bin/opennlp ChunkerME "
|
||||
+ m_openNLPPath + "/models/en-chunker.bin > "
|
||||
+ outPath.native();
|
||||
//g << "Executing:" << cmd << endl;
|
||||
int ret = system(cmd.c_str());
|
||||
|
||||
// read result of chunker and output as Moses xml trees
|
||||
ifstream outFile(outPath.c_str());
|
||||
|
||||
size_t lineNum = 0;
|
||||
while (getline(outFile, line)) {
|
||||
//cerr << line << endl;
|
||||
MosesReformat(line, out, filterList);
|
||||
out << endl;
|
||||
++lineNum;
|
||||
}
|
||||
outFile.close();
|
||||
|
||||
// clean up temporary files
|
||||
remove(inPath.c_str());
|
||||
remove(outPath.c_str());
|
||||
}
|
||||
|
||||
void EnOpenNLPChunker::MosesReformat(const string &line, std::ostream &out, const vector<string> &filterList)
|
||||
{
|
||||
//cerr << "REFORMATING:" << line << endl;
|
||||
bool inLabel = false;
|
||||
vector<string> toks;
|
||||
Moses::Tokenize(toks, line);
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
const string &tok = toks[i];
|
||||
|
||||
if (tok.substr(0, 1) == "[" && tok.substr(1,1) != "_") {
|
||||
// start of chunk
|
||||
string label = tok.substr(1);
|
||||
if (UseLabel(label, filterList)) {
|
||||
out << "<tree label=\"" << label << "\">";
|
||||
inLabel = true;
|
||||
}
|
||||
}
|
||||
else if (ends_with(tok, "]")) {
|
||||
// end of chunk
|
||||
if (tok.size() > 1) {
|
||||
if (tok.substr(1,1) == "_") {
|
||||
// just a word that happens to be ]
|
||||
vector<string> factors;
|
||||
Moses::Tokenize(factors, tok, "_");
|
||||
assert(factors.size() == 2);
|
||||
|
||||
Escape(factors[0]);
|
||||
out << factors[0] << " ";
|
||||
}
|
||||
else {
|
||||
// a word and end of tree
|
||||
string word = tok.substr(0, tok.size()-1);
|
||||
|
||||
vector<string> factors;
|
||||
Moses::Tokenize(factors, word, "_");
|
||||
assert(factors.size() == 2);
|
||||
|
||||
Escape(factors[0]);
|
||||
out << factors[0] << " ";
|
||||
}
|
||||
|
||||
if (inLabel) {
|
||||
out << "</tree> ";
|
||||
inLabel = false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (inLabel) {
|
||||
out << "</tree> ";
|
||||
inLabel = false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
// lexical item
|
||||
vector<string> factors;
|
||||
Moses::Tokenize(factors, tok, "_");
|
||||
if (factors.size() == 2) {
|
||||
Escape(factors[0]);
|
||||
out << factors[0] << " ";
|
||||
}
|
||||
else if (factors.size() == 1) {
|
||||
// word is _
|
||||
assert(tok.substr(0, 2) == "__");
|
||||
out << "_ ";
|
||||
}
|
||||
else {
|
||||
throw "Unknown format:" + tok;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
replaceAll( std::string const& original,
|
||||
std::string const& before,
|
||||
std::string const& after )
|
||||
{
|
||||
std::string retval;
|
||||
std::string::const_iterator end = original.end();
|
||||
std::string::const_iterator current = original.begin();
|
||||
std::string::const_iterator next =
|
||||
std::search( current, end, before.begin(), before.end() );
|
||||
while ( next != end ) {
|
||||
retval.append( current, next );
|
||||
retval.append( after );
|
||||
current = next + before.size();
|
||||
next = std::search( current, end, before.begin(), before.end() );
|
||||
}
|
||||
retval.append( current, next );
|
||||
return retval;
|
||||
}
|
||||
|
||||
void EnOpenNLPChunker::Escape(string &line)
|
||||
{
|
||||
line = replaceAll(line, "&", "&");
|
||||
line = replaceAll(line, "|", "|");
|
||||
line = replaceAll(line, "<", "<");
|
||||
line = replaceAll(line, ">", ">");
|
||||
line = replaceAll(line, "'", "'");
|
||||
line = replaceAll(line, "\"", """);
|
||||
line = replaceAll(line, "[", "[");
|
||||
line = replaceAll(line, "]", "]");
|
||||
}
|
||||
|
||||
void EnOpenNLPChunker::Unescape(string &line)
|
||||
{
|
||||
line = replaceAll(line, "|", "|");
|
||||
line = replaceAll(line, "<", "<");
|
||||
line = replaceAll(line, ">", ">");
|
||||
line = replaceAll(line, """, "\"");
|
||||
line = replaceAll(line, "'", "'");
|
||||
line = replaceAll(line, "[", "[");
|
||||
line = replaceAll(line, "]", "]");
|
||||
line = replaceAll(line, "&", "&");
|
||||
}
|
||||
|
||||
bool EnOpenNLPChunker::UseLabel(const std::string &label, const std::vector<std::string> &filterList) const
|
||||
{
|
||||
if (filterList.size() == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < filterList.size(); ++i) {
|
||||
if (label == filterList[i]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
* EnApacheChunker.h
|
||||
*
|
||||
* Created on: 28 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
class EnOpenNLPChunker {
|
||||
public:
|
||||
EnOpenNLPChunker(const std::string &openNLPPath);
|
||||
virtual ~EnOpenNLPChunker();
|
||||
void Process(std::istream &in, std::ostream &out, const std::vector<std::string> &filterList);
|
||||
protected:
|
||||
const std::string m_openNLPPath;
|
||||
|
||||
void Escape(std::string &line);
|
||||
void Unescape(std::string &line);
|
||||
|
||||
void MosesReformat(const std::string &line, std::ostream &out, const std::vector<std::string> &filterList);
|
||||
|
||||
bool UseLabel(const std::string &label, const std::vector<std::string> &filterList) const;
|
||||
};
|
||||
|
@ -1,226 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
#include "EnPhrasalVerb.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void EnPhrasalVerb(const Phrase &source, int revision, ostream &out)
|
||||
{
|
||||
Ranges ranges;
|
||||
|
||||
// find ranges to label
|
||||
for (int start = 0; start < source.size(); ++start) {
|
||||
size_t end = std::numeric_limits<size_t>::max();
|
||||
|
||||
if (IsA(source, start, 0, 0, "ask asked asking")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "back backed backing")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "blow blown blew")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "break broke broken")) {
|
||||
end = Found(source, start, 0, "down up in");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "bring brought bringing")) {
|
||||
end = Found(source, start, 0, "down up in");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "call called calling")) {
|
||||
end = Found(source, start, 0, "back up off");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "check checked checking")) {
|
||||
end = Found(source, start, 0, "out in");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "cheer cheered cheering")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "clean cleaned cleaning")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "cross crossed crossing")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "cut cutting")) {
|
||||
end = Found(source, start, 0, "down off out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "do did done")) {
|
||||
end = Found(source, start, 0, "over up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "drop dropped dropping")) {
|
||||
end = Found(source, start, 0, "off");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "figure figured figuring")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "fill filled filling")) {
|
||||
end = Found(source, start, 0, "in out up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "find found finding")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "get got getting gotten")) {
|
||||
end = Found(source, start, 0, "across over back");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "give given gave giving")) {
|
||||
end = Found(source, start, 0, "away back out up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "hand handed handing")) {
|
||||
end = Found(source, start, 0, "down in over");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "hold held holding")) {
|
||||
end = Found(source, start, 0, "back up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "keep kept keeping")) {
|
||||
end = Found(source, start, 0, "from up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "let letting")) {
|
||||
end = Found(source, start, 0, "down in");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "look looked looking")) {
|
||||
end = Found(source, start, 0, "over up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "make made making")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "mix mixed mixing")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "pass passed passing")) {
|
||||
end = Found(source, start, 0, "out up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "pay payed paying")) {
|
||||
end = Found(source, start, 0, "back");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "pick picked picking")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "point pointed pointing")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "put putting")) {
|
||||
end = Found(source, start, 0, "down off out together on");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "send sending")) {
|
||||
end = Found(source, start, 0, "back");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "set setting")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "sort sorted sorting")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "switch switched switching")) {
|
||||
end = Found(source, start, 0, "off on");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "take took taking")) {
|
||||
end = Found(source, start, 0, "apart back off out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "tear torn tearing")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "think thought thinking")) {
|
||||
end = Found(source, start, 0, "over");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "thrown threw thrown throwing")) {
|
||||
end = Found(source, start, 0, "away");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "turn turned turning")) {
|
||||
end = Found(source, start, 0, "down off on");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "try tried trying")) {
|
||||
end = Found(source, start, 0, "on out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "use used using")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "warm warmed warming")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "work worked working")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
|
||||
// found range to label
|
||||
if (end != std::numeric_limits<size_t>::max() &&
|
||||
end > start + 1) {
|
||||
bool add = true;
|
||||
if (revision == 1 && Exist(source,
|
||||
start + 1,
|
||||
end - 1,
|
||||
1,
|
||||
"VB VBD VBG VBN VBP VBZ")) {
|
||||
// there's a verb in between
|
||||
add = false;
|
||||
}
|
||||
|
||||
if (add) {
|
||||
Range range(start + 1, end - 1, "reorder-label");
|
||||
ranges.push_back(range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OutputWithLabels(source, ranges, out);
|
||||
}
|
||||
|
||||
bool Exist(const Phrase &source, int start, int end, int factor, const std::string &str)
|
||||
{
|
||||
vector<string> soughts = Moses::Tokenize(str, " ");
|
||||
for (size_t i = start; i <= end; ++i) {
|
||||
const Word &word = source[i];
|
||||
bool found = Found(word, factor, soughts);
|
||||
if (found) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t Found(const Phrase &source, int pos, int factor, const std::string &str)
|
||||
{
|
||||
const size_t MAX_RANGE = 10;
|
||||
|
||||
vector<string> soughts = Moses::Tokenize(str, " ");
|
||||
vector<string> puncts = Moses::Tokenize(". : , ;", " ");
|
||||
|
||||
|
||||
size_t maxEnd = std::min(source.size(), (size_t) pos + MAX_RANGE);
|
||||
for (size_t i = pos + 1; i < maxEnd; ++i) {
|
||||
const Word &word = source[i];
|
||||
bool found;
|
||||
|
||||
found = Found(word, factor, puncts);
|
||||
if (found) {
|
||||
return std::numeric_limits<size_t>::max();
|
||||
}
|
||||
|
||||
found = Found(word, factor, soughts);
|
||||
if (found) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return std::numeric_limits<size_t>::max();
|
||||
}
|
||||
|
||||
|
||||
bool Found(const Word &word, int factor, const vector<string> &soughts)
|
||||
{
|
||||
const string &element = word[factor];
|
||||
for (size_t i = 0; i < soughts.size(); ++i) {
|
||||
const string &sought = soughts[i];
|
||||
bool found = (element == sought);
|
||||
if (found) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1,11 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "Main.h"
|
||||
|
||||
// roll your own identification of phrasal verbs
|
||||
void EnPhrasalVerb(const Phrase &source, int revision, std::ostream &out);
|
||||
|
||||
bool Exist(const Phrase &source, int start, int end, int factor, const std::string &str);
|
||||
size_t Found(const Phrase &source, int pos, int factor, const std::string &str);
|
||||
bool Found(const Word &word, int factor, const std::vector<std::string> &soughts);
|
||||
|
@ -1,29 +0,0 @@
|
||||
#include "LabelByInitialLetter.h"
|
||||
#include "Main.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void LabelByInitialLetter(const Phrase &source, std::ostream &out)
|
||||
{
|
||||
Ranges ranges;
|
||||
|
||||
for (int start = 0; start < source.size(); ++start) {
|
||||
const string &startWord = source[start][0];
|
||||
string startChar = startWord.substr(0,1);
|
||||
|
||||
for (int end = start + 1; end < source.size(); ++end) {
|
||||
const string &endWord = source[end][0];
|
||||
string endChar = endWord.substr(0,1);
|
||||
|
||||
if (startChar == endChar) {
|
||||
Range range(start, end, startChar + "-label");
|
||||
ranges.push_back(range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OutputWithLabels(source, ranges, out);
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,6 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "Main.h"
|
||||
|
||||
void LabelByInitialLetter(const Phrase &source, std::ostream &out);
|
||||
|
@ -1,195 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <boost/program_options.hpp>
|
||||
#include "moses/Util.h"
|
||||
#include "Main.h"
|
||||
#include "DeEn.h"
|
||||
#include "EnPhrasalVerb.h"
|
||||
#include "EnOpenNLPChunker.h"
|
||||
#include "LabelByInitialLetter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool g_debug = false;
|
||||
|
||||
Phrase Tokenize(const string &line);
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
cerr << "Starting" << endl;
|
||||
|
||||
namespace po = boost::program_options;
|
||||
po::options_description desc("Options");
|
||||
desc.add_options()
|
||||
("help", "Print help messages")
|
||||
|
||||
("input,i", po::value<string>(), "Input file. Otherwise it will read from standard in")
|
||||
("output,o", po::value<string>(), "Output file. Otherwise it will print from standard out")
|
||||
|
||||
("source-language,s", po::value<string>()->required(), "Source Language")
|
||||
("target-language,t", po::value<string>()->required(), "Target Language")
|
||||
("revision,r", po::value<int>()->default_value(0), "Revision")
|
||||
("filter", po::value<string>(), "Only use labels from this comma-separated list")
|
||||
|
||||
("opennlp", po::value<string>()->default_value(""), "Path to Apache OpenNLP toolkit")
|
||||
|
||||
;
|
||||
|
||||
po::variables_map vm;
|
||||
try
|
||||
{
|
||||
po::store(po::parse_command_line(argc, argv, desc),
|
||||
vm); // can throw
|
||||
|
||||
/** --help option
|
||||
*/
|
||||
if ( vm.count("help") )
|
||||
{
|
||||
std::cout << "Basic Command Line Parameter App" << std::endl
|
||||
<< desc << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
po::notify(vm); // throws on error, so do after help in case
|
||||
// there are any problems
|
||||
}
|
||||
catch(po::error& e)
|
||||
{
|
||||
std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
|
||||
std::cerr << desc << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
istream *inStrm = &cin;
|
||||
if (vm.count("input")) {
|
||||
string inStr = vm["input"].as<string>();
|
||||
cerr << "inStr=" << inStr << endl;
|
||||
ifstream *inFile = new ifstream(inStr.c_str());
|
||||
inStrm = inFile;
|
||||
}
|
||||
|
||||
ostream *outStrm = &cout;
|
||||
if (vm.count("output")) {
|
||||
string outStr = vm["output"].as<string>();
|
||||
cerr << "outStr=" << outStr << endl;
|
||||
ostream *outFile = new ofstream(outStr.c_str());
|
||||
outStrm = outFile;
|
||||
}
|
||||
|
||||
vector<string> filterList;
|
||||
if (vm.count("filter")) {
|
||||
string filter = vm["filter"].as<string>();
|
||||
Moses::Tokenize(filterList, filter, ",");
|
||||
}
|
||||
|
||||
string sourceLang = vm["source-language"].as<string>();
|
||||
string targetLang = vm["target-language"].as<string>();
|
||||
int revision = vm["revision"].as<int>();
|
||||
|
||||
cerr << sourceLang << " " << targetLang << " " << revision << endl;
|
||||
|
||||
if (sourceLang == "en" && revision == 2) {
|
||||
if (vm.count("opennlp") == 0) {
|
||||
throw "Need path to openNLP toolkit";
|
||||
}
|
||||
|
||||
string openNLPPath = vm["opennlp"].as<string>();
|
||||
EnOpenNLPChunker chunker(openNLPPath);
|
||||
chunker.Process(*inStrm, *outStrm, filterList);
|
||||
}
|
||||
else {
|
||||
// process line-by-line
|
||||
string line;
|
||||
size_t lineNum = 1;
|
||||
|
||||
while (getline(*inStrm, line)) {
|
||||
//cerr << lineNum << ":" << line << endl;
|
||||
if (lineNum % 1000 == 0) {
|
||||
cerr << lineNum << " ";
|
||||
}
|
||||
|
||||
Phrase source = Tokenize(line);
|
||||
|
||||
if (revision == 600 ) {
|
||||
LabelByInitialLetter(source, *outStrm);
|
||||
}
|
||||
else if (sourceLang == "de" && targetLang == "en") {
|
||||
LabelDeEn(source, *outStrm);
|
||||
}
|
||||
else if (sourceLang == "en") {
|
||||
if (revision == 0 || revision == 1) {
|
||||
EnPhrasalVerb(source, revision, *outStrm);
|
||||
}
|
||||
else if (revision == 2) {
|
||||
string openNLPPath = vm["opennlp-path"].as<string>();
|
||||
EnOpenNLPChunker chunker(openNLPPath);
|
||||
}
|
||||
}
|
||||
|
||||
++lineNum;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
cerr << "Finished" << endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
Phrase Tokenize(const string &line)
|
||||
{
|
||||
Phrase ret;
|
||||
|
||||
vector<string> toks = Moses::Tokenize(line);
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
Word word = Moses::Tokenize(toks[i], "|");
|
||||
ret.push_back(word);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool IsA(const Phrase &source, int pos, int offset, int factor, const string &str)
|
||||
{
|
||||
pos += offset;
|
||||
if (pos >= source.size() || pos < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const string &word = source[pos][factor];
|
||||
vector<string> soughts = Moses::Tokenize(str, " ");
|
||||
for (int i = 0; i < soughts.size(); ++i) {
|
||||
string &sought = soughts[i];
|
||||
bool found = (word == sought);
|
||||
if (found) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void OutputWithLabels(const Phrase &source, const Ranges ranges, ostream &out)
|
||||
{
|
||||
// output sentence, with labels
|
||||
for (int pos = 0; pos < source.size(); ++pos) {
|
||||
// output beginning of label
|
||||
for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
|
||||
const Range &range = *iter;
|
||||
if (range.range.first == pos) {
|
||||
out << "<tree label=\"" + range.label + "\"> ";
|
||||
}
|
||||
}
|
||||
|
||||
const Word &word = source[pos];
|
||||
out << word[0] << " ";
|
||||
|
||||
for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
|
||||
const Range &range = *iter;
|
||||
if (range.range.second == pos) {
|
||||
out << "</tree> ";
|
||||
}
|
||||
}
|
||||
}
|
||||
out << endl;
|
||||
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <list>
|
||||
|
||||
typedef std::vector<std::string> Word;
|
||||
typedef std::vector<Word> Phrase;
|
||||
|
||||
struct Range
|
||||
{
|
||||
Range(int start,int end, const std::string &l)
|
||||
:range(start, end)
|
||||
,label(l)
|
||||
{}
|
||||
|
||||
std::pair<int,int> range;
|
||||
std::string label;
|
||||
};
|
||||
|
||||
typedef std::list<Range> Ranges;
|
||||
|
||||
bool IsA(const Phrase &source, int pos, int offset, int factor, const std::string &str);
|
||||
void OutputWithLabels(const Phrase &source, const Ranges ranges, std::ostream &out);
|
||||
|
||||
|
@ -1,14 +0,0 @@
|
||||
all: manual-label
|
||||
|
||||
clean:
|
||||
rm -f *.o manual-label
|
||||
|
||||
.cpp.o:
|
||||
g++ -I../../../boost/include -I../../../ -O3 -g -c $<
|
||||
|
||||
OBJECTS = DeEn.o EnOpenNLPChunker.o EnPhrasalVerb.o Main.o LabelByInitialLetter.o
|
||||
|
||||
manual-label: $(OBJECTS)
|
||||
g++ $(OBJECTS) -L../../../boost/lib64 -lz -lboost_program_options-mt -o manual-label
|
||||
|
||||
|
@ -46,20 +46,20 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
<Preprocessor Value="MAX_NUM_FACTORS=4"/>
|
||||
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
|
||||
<Preprocessor Value="TRACE_ENABLE=1"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/lm/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/moses/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/OnDiskPt/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/search/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
|
||||
<LibraryPath Value="../../../boost/lib64"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/lm/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/OnDiskPt/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/search/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
|
||||
<Library Value="util"/>
|
||||
<Library Value="moses"/>
|
||||
<Library Value="search"/>
|
||||
|
@ -11,11 +11,11 @@
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
@ -79,12 +79,12 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1911984684" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
|
@ -220,6 +220,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/ConfusionNet.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ContextParameters.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ContextParameters.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>DecodeGraph.cpp</name>
|
||||
<type>1</type>
|
||||
|
@ -1,6 +1,9 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="moses" InternalType="Library">
|
||||
<Plugins>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
@ -13,9 +16,6 @@
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<VirtualDirectory Name="TranslationModel">
|
||||
<VirtualDirectory Name="UG">
|
||||
@ -531,8 +531,6 @@
|
||||
<File Name="../../../moses/FF/RuleScope.h"/>
|
||||
<File Name="../../../moses/FF/SetSourcePhrase.cpp"/>
|
||||
<File Name="../../../moses/FF/SetSourcePhrase.h"/>
|
||||
<File Name="../../../moses/FF/SkeletonChangeInput.cpp"/>
|
||||
<File Name="../../../moses/FF/SkeletonChangeInput.h"/>
|
||||
<File Name="../../../moses/FF/SkeletonStatefulFF.cpp"/>
|
||||
<File Name="../../../moses/FF/SkeletonStatefulFF.h"/>
|
||||
<File Name="../../../moses/FF/SkeletonStatelessFF.cpp"/>
|
||||
@ -777,6 +775,8 @@
|
||||
<File Name="../../../moses/WordsRange.h"/>
|
||||
<File Name="../../../moses/XmlOption.cpp"/>
|
||||
<File Name="../../../moses/XmlOption.h"/>
|
||||
<File Name="../../../moses/OutputFileStream.cpp"/>
|
||||
<File Name="../../../moses/OutputFileStream.h"/>
|
||||
</VirtualDirectory>
|
||||
<VirtualDirectory Name="PP">
|
||||
<File Name="../../../moses/PP/CountsPhraseProperty.cpp"/>
|
||||
@ -795,11 +795,13 @@
|
||||
<File Name="../../../moses/PP/SpanLengthPhraseProperty.h"/>
|
||||
<File Name="../../../moses/PP/TreeStructurePhraseProperty.h"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<VirtualDirectory Name="parameters">
|
||||
<File Name="../../../moses/parameters/ContextParameters.cpp"/>
|
||||
<File Name="../../../moses/parameters/ContextParameters.h"/>
|
||||
<File Name="../../../moses/parameters/BookkeepingOptions.cpp"/>
|
||||
<File Name="../../../moses/parameters/BookkeepingOptions.h"/>
|
||||
<File Name="../../../moses/parameters/NBestOptions.cpp"/>
|
||||
<File Name="../../../moses/parameters/NBestOptions.h"/>
|
||||
</VirtualDirectory>
|
||||
<Settings Type="Static Library">
|
||||
<GlobalSettings>
|
||||
@ -812,11 +814,11 @@
|
||||
<ResourceCompiler Options=""/>
|
||||
</GlobalSettings>
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<Compiler Options="-g -std=c++0x" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../../"/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
<Preprocessor Value="MAX_NUM_FACTORS=4"/>
|
||||
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
|
||||
<Preprocessor Value="WITH_THREADS"/>
|
||||
@ -895,4 +897,6 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -1,9 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="manual-label" InternalType="Console">
|
||||
<CodeLite_Project Name="pruneGeneration" InternalType="Console">
|
||||
<Plugins>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
@ -16,20 +13,15 @@
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="manual-label">
|
||||
<File Name="DeEn.cpp"/>
|
||||
<File Name="DeEn.h"/>
|
||||
<File Name="EnOpenNLPChunker.cpp"/>
|
||||
<File Name="EnOpenNLPChunker.h"/>
|
||||
<File Name="EnPhrasalVerb.cpp"/>
|
||||
<File Name="EnPhrasalVerb.h"/>
|
||||
<File Name="LabelByInitialLetter.cpp"/>
|
||||
<File Name="LabelByInitialLetter.h"/>
|
||||
<File Name="Main.cpp"/>
|
||||
<File Name="Main.h"/>
|
||||
<VirtualDirectory Name="src">
|
||||
<File Name="../../../misc/pruneGeneration.cpp"/>
|
||||
<File Name="../../../misc/pruneGeneration.h"/>
|
||||
</VirtualDirectory>
|
||||
<Settings Type="Executable">
|
||||
<GlobalSettings>
|
||||
@ -41,24 +33,26 @@
|
||||
</Linker>
|
||||
<ResourceCompiler Options=""/>
|
||||
</GlobalSettings>
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Configuration Name="Debug" CompilerType="GCC ( XCode )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
|
||||
<Library Value="boost_program_options"/>
|
||||
<LibraryPath Value="../../../boost/lib64"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
|
||||
<Library Value="boost_filesystem"/>
|
||||
<Library Value="boost_system"/>
|
||||
<Library Value="moses"/>
|
||||
<Library Value="z"/>
|
||||
</Linker>
|
||||
<ResourceCompiler Options="" Required="no"/>
|
||||
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
|
||||
<Environment EnvVarSetName="<Use Defaults>" DbgSetName="<Use Defaults>">
|
||||
<![CDATA[]]>
|
||||
</Environment>
|
||||
<Debugger IsRemote="no" RemoteHostName="" RemoteHostPort="" DebuggerPath="" IsExtended="no">
|
||||
<Debugger IsRemote="no" RemoteHostName="" RemoteHostPort="" DebuggerPath="" IsExtended="yes">
|
||||
<DebuggerSearchPaths/>
|
||||
<PostConnectCommands/>
|
||||
<StartupCommands/>
|
||||
@ -79,14 +73,14 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
<SearchPaths/>
|
||||
</Completion>
|
||||
</Configuration>
|
||||
<Configuration Name="Release" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Configuration Name="Release" CompilerType="GCC ( XCode )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-O2;-Wall" C_Options="-O2;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<Preprocessor Value="NDEBUG"/>
|
||||
@ -118,7 +112,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -126,6 +120,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
@ -59,7 +59,6 @@
|
||||
<listOptionValue builtIn="false" value="boost_program_options"/>
|
||||
<listOptionValue builtIn="false" value="pthread"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="bz2"/>
|
||||
<listOptionValue builtIn="false" value="dl"/>
|
||||
<listOptionValue builtIn="false" value="rt"/>
|
||||
</option>
|
||||
|
@ -19,6 +19,10 @@
|
||||
<File Name="../../../phrase-extract/tables-core.cpp"/>
|
||||
<File Name="../../../phrase-extract/tables-core.h"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug">
|
||||
<Project Name="util"/>
|
||||
</Dependencies>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Executable">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -32,17 +36,17 @@
|
||||
<Configuration Name="Debug" CompilerType="clang( based on LLVM 3.5svn )" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/lm/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/moses/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/OnDiskPt/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/search/Debug"/>
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/contrib/other-builds/util/Debug"/>
|
||||
<LibraryPath Value="../../../boost/lib64"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/lm/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/OnDiskPt/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/search/Debug"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/util/Debug"/>
|
||||
<Library Value="moses"/>
|
||||
<Library Value="search"/>
|
||||
<Library Value="OnDiskPt"/>
|
||||
@ -86,7 +90,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -125,7 +129,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -133,8 +137,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug">
|
||||
<Project Name="util"/>
|
||||
</Dependencies>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -10,6 +10,8 @@
|
||||
<File Name="../../../search/rule.cc"/>
|
||||
<File Name="../../../search/vertex.cc"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Static Library">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -23,9 +25,9 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/phrase-extract"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../phrase-extract"/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
<Preprocessor Value="KENLM_MAX_ORDER=7"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes"/>
|
||||
@ -55,7 +57,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -93,7 +95,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -101,6 +103,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -75,7 +75,6 @@
|
||||
<listOptionValue builtIn="false" value="boost_filesystem"/>
|
||||
<listOptionValue builtIn="false" value="boost_program_options"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="bz2"/>
|
||||
<listOptionValue builtIn="false" value="dl"/>
|
||||
<listOptionValue builtIn="false" value="rt"/>
|
||||
</option>
|
||||
@ -159,10 +158,10 @@
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/server"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/server"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
|
@ -62,6 +62,8 @@
|
||||
<File Name="../../../util/stream/sort_test.cc" ExcludeProjConfig="Debug"/>
|
||||
<File Name="../../../util/stream/stream_test.cc" ExcludeProjConfig="Debug"/>
|
||||
</VirtualDirectory>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
<Settings Type="Static Library">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
@ -75,8 +77,8 @@
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Static Library" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g" C_Options="-g" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes"/>
|
||||
<ResourceCompiler Options="" Required="no"/>
|
||||
@ -105,7 +107,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -143,7 +145,7 @@
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<Completion EnableCpp11="no" EnableCpp14="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
@ -151,6 +153,4 @@
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
||||
|
@ -17,6 +17,15 @@ configname=$(basename $configf | sed 's/\.config$//')
|
||||
|
||||
source "$configf"
|
||||
|
||||
# beautifier
|
||||
git clone git@github.com:moses-smt/mosesdecoder.git /tmp/moses
|
||||
cd /tmp/moses
|
||||
./scripts/other/beautify.py --format --skip-perltidy
|
||||
git commit -am "daily automatic beautifier"
|
||||
git push
|
||||
rm -rf /tmp/moses
|
||||
cd -
|
||||
|
||||
[ -z "$MCC_SCAN_BRANCHES" ] \
|
||||
&& die "Bad config $configf; does not define MCC_SCAN_BRANCHES"
|
||||
|
||||
@ -107,7 +116,6 @@ function run_single_test () {
|
||||
#regtest_dir=$PWD/$(basename $regtest_file .tgz)
|
||||
cd ..
|
||||
|
||||
|
||||
echo "## ./bjam clean" >> $longlog
|
||||
./bjam clean $MCC_CONFIGURE_ARGS --with-regtest=$regtest_dir >> $longlog 2>&1 || warn "bjam clean failed, suspicious"
|
||||
|
||||
@ -155,6 +163,7 @@ function run_single_test () {
|
||||
if [ -z "$err" ]; then
|
||||
status="OK"
|
||||
else
|
||||
git reset --hard HEAD
|
||||
status="FAIL:$err"
|
||||
fi
|
||||
echo "## Status: $status" >> $longlog
|
||||
@ -187,7 +196,7 @@ done
|
||||
|
||||
#### Main loop over all commits
|
||||
for i in $MCC_SCAN_BRANCHES; do
|
||||
warn "On brach $i"
|
||||
warn "On branch $i"
|
||||
git rev-list $i \
|
||||
| while read commit; do
|
||||
first_char=$(echo $commit | grep -o '^.')
|
||||
|
@ -67,7 +67,9 @@ private:
|
||||
|
||||
protected:
|
||||
/// For child classes only: retrieve filebuf.
|
||||
__gnu_cxx::stdio_filebuf<char> *get_filebuf() { return _filebuf; }
|
||||
__gnu_cxx::stdio_filebuf<char> *get_filebuf() {
|
||||
return _filebuf;
|
||||
}
|
||||
};
|
||||
|
||||
class ifdstream : public _fdstream
|
||||
|
@ -4,14 +4,17 @@
|
||||
Module implementing Dialog.
|
||||
"""
|
||||
|
||||
from PyQt4.QtGui import *
|
||||
from PyQt4.QtCore import *
|
||||
from PyQt4.QtGui import (
|
||||
QDialog,
|
||||
QFileDialog,
|
||||
)
|
||||
from PyQt4.QtCore import pyqtSignature
|
||||
|
||||
import datetime
|
||||
import os
|
||||
|
||||
from Ui_addMTModel import Ui_Dialog
|
||||
from util import *
|
||||
from util import doAlert
|
||||
|
||||
|
||||
class AddMTModelDialog(QDialog, Ui_Dialog):
|
||||
@ -88,7 +91,7 @@ class AddMTModelDialog(QDialog, Ui_Dialog):
|
||||
def checkEmpty(mystr):
|
||||
return len(str(mystr).strip()) <= 0
|
||||
|
||||
#check everything
|
||||
# Check everything.
|
||||
self.modelName = self.editName.text()
|
||||
if checkEmpty(self.modelName):
|
||||
doAlert("Please provide non-empty Model Name")
|
||||
|
@ -4,11 +4,18 @@
|
||||
Module implementing ChooseMTModelDialog.
|
||||
"""
|
||||
|
||||
from PyQt4.QtCore import *
|
||||
from PyQt4.QtGui import *
|
||||
from PyQt4.QtSql import *
|
||||
import sys
|
||||
|
||||
from PyQt4.QtCore import (
|
||||
pyqtSignature,
|
||||
QObject,
|
||||
SIGNAL,
|
||||
)
|
||||
from PyQt4.QtGui import QDialog
|
||||
from PyQt4.QtSql import QSqlQueryModel
|
||||
|
||||
from Ui_chooseMTModel import Ui_Dialog
|
||||
from util import doAlert
|
||||
|
||||
|
||||
class ChooseMTModelDialog(QDialog, Ui_Dialog):
|
||||
@ -28,14 +35,20 @@ class ChooseMTModelDialog(QDialog, Ui_Dialog):
|
||||
self.selTableView.hideColumn(0)
|
||||
self.selTableView.hideColumn(5)
|
||||
self.selTableView.hideColumn(6)
|
||||
#change status and keep the column
|
||||
QObject.connect(datamodel, SIGNAL("modelInstalled()"), self.on_datamodel_modelInstalled)
|
||||
# Change status and keep the column.
|
||||
QObject.connect(
|
||||
datamodel, SIGNAL("modelInstalled()"),
|
||||
self.on_datamodel_modelInstalled)
|
||||
|
||||
def updateModel(self):
|
||||
self.model.setQuery('SELECT ID, name, srclang, trglang, status, path, mosesini FROM models WHERE status = "READY" AND deleted != "True"', self.database)
|
||||
self.model.setQuery(
|
||||
'SELECT ID, name, srclang, trglang, status, path, mosesini '
|
||||
'FROM models '
|
||||
'WHERE status = "READY" AND deleted != "True"',
|
||||
self.database)
|
||||
|
||||
def on_datamodel_recordUpdated(self, bRecord):
|
||||
#deal with the selection changed problem
|
||||
"""Deal with the selection changed problem."""
|
||||
try:
|
||||
if bRecord:
|
||||
current = self.selTableView.currentIndex()
|
||||
@ -44,9 +57,9 @@ class ChooseMTModelDialog(QDialog, Ui_Dialog):
|
||||
else:
|
||||
self.curSelection = None
|
||||
else:
|
||||
if not self.curSelection is None:
|
||||
if self.curSelection is not None:
|
||||
self.selTableView.selectRow(self.curSelection)
|
||||
except Exception, e:
|
||||
except Exception as e:
|
||||
print >> sys.stderr, str(e)
|
||||
|
||||
def on_datamodel_modelInstalled(self):
|
||||
|
@ -1,7 +1,6 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from PyQt4.QtCore import *
|
||||
from PyQt4.QtGui import *
|
||||
from PyQt4.QtGui import QApplication
|
||||
|
||||
import os
|
||||
import sys
|
||||
@ -9,7 +8,6 @@ import sys
|
||||
from mainWindow import MainWindow
|
||||
from datamodel import DataModel
|
||||
from moses import Moses
|
||||
from util import *
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = QApplication(sys.argv)
|
||||
|
@ -4,10 +4,19 @@
|
||||
Module implementing MainWindow.
|
||||
"""
|
||||
|
||||
from PyQt4.QtCore import *
|
||||
from PyQt4.QtGui import *
|
||||
from PyQt4.QtSql import *
|
||||
from PyQt4.QtCore import (
|
||||
pyqtSignature,
|
||||
QObject,
|
||||
Qt,
|
||||
SIGNAL,
|
||||
)
|
||||
from PyQt4.QtGui import (
|
||||
QMainWindow,
|
||||
QMessageBox,
|
||||
QProgressDialog,
|
||||
)
|
||||
|
||||
import sys
|
||||
import threading
|
||||
|
||||
from Ui_mainWindow import Ui_MainWindow
|
||||
@ -15,7 +24,7 @@ from addMTModel import AddMTModelDialog
|
||||
from chooseMTModel import ChooseMTModelDialog
|
||||
from engine import Engine
|
||||
from credits import DlgCredits
|
||||
from util import *
|
||||
from util import doAlert
|
||||
|
||||
|
||||
class MainWindow(QMainWindow, Ui_MainWindow):
|
||||
@ -54,18 +63,27 @@ class MainWindow(QMainWindow, Ui_MainWindow):
|
||||
Slot documentation goes here.
|
||||
"""
|
||||
current = self.tableView.currentIndex()
|
||||
if current and current.row() >= 0:
|
||||
if self.engine and self.datamodel.getRowID(current.row()) == self.engine.model['ID']:
|
||||
text = '''The model is still in use, do you want to stop and delete it?
|
||||
It might take a while...'''
|
||||
reply = QMessageBox.question(None, 'Message', text, QMessageBox.Yes, QMessageBox.No)
|
||||
if reply == QMessageBox.No:
|
||||
return
|
||||
t = self.stopEngine(self.engine)
|
||||
t.join()
|
||||
self.engine = None
|
||||
self.clearPanel()
|
||||
self.datamodel.delModel(current.row())
|
||||
if not current or current.row() < 0:
|
||||
return
|
||||
model_in_use = (
|
||||
self.engine and
|
||||
self.datamodel.getRowID(current.row()) == self.engine.model['ID']
|
||||
)
|
||||
if model_in_use:
|
||||
text = (
|
||||
"The model is still in use, do you want to "
|
||||
"stop and delete it?\n"
|
||||
"It might take a while..."
|
||||
)
|
||||
reply = QMessageBox.question(
|
||||
None, 'Message', text, QMessageBox.Yes, QMessageBox.No)
|
||||
if reply == QMessageBox.No:
|
||||
return
|
||||
t = self.stopEngine(self.engine)
|
||||
t.join()
|
||||
self.engine = None
|
||||
self.clearPanel()
|
||||
self.datamodel.delModel(current.row())
|
||||
|
||||
@pyqtSignature("")
|
||||
def on_newModelBtn_clicked(self):
|
||||
@ -153,17 +171,24 @@ It might take a while...'''
|
||||
if self.progress:
|
||||
self.progress.close()
|
||||
self.progress = None
|
||||
self.progress = QProgressDialog("Model: %s" % model['name'], "Cancel", 0, self.engine.countSteps(), self)
|
||||
self.progress = QProgressDialog(
|
||||
"Model: %s" % model['name'], "Cancel", 0,
|
||||
self.engine.countSteps(), self)
|
||||
self.progress.setAutoReset(True)
|
||||
self.progress.setAutoClose(True)
|
||||
self.progress.setWindowModality(Qt.WindowModal)
|
||||
self.progress.setWindowTitle('Loading Model...')
|
||||
QObject.connect(self.progress, SIGNAL("canceled()"), self.progressCancelled)
|
||||
QObject.connect(
|
||||
self.progress, SIGNAL("canceled()"), self.progressCancelled)
|
||||
self.progress.show()
|
||||
|
||||
#connect engine signal
|
||||
QObject.connect(self.engine, SIGNAL("stepFinished(int)"), self.engineStepFinished)
|
||||
QObject.connect(self.engine, SIGNAL("loaded(bool, QString)"), self.engineLoaded)
|
||||
# Connect engine signal.
|
||||
QObject.connect(
|
||||
self.engine, SIGNAL("stepFinished(int)"),
|
||||
self.engineStepFinished)
|
||||
QObject.connect(
|
||||
self.engine, SIGNAL("loaded(bool, QString)"),
|
||||
self.engineLoaded)
|
||||
|
||||
def startEngineThread():
|
||||
self.engine.start()
|
||||
@ -225,7 +250,9 @@ It might take a while...'''
|
||||
if text.strip() == "":
|
||||
trans.append(text)
|
||||
else:
|
||||
trans.append(self.engine.translate(text.replace('\r', ' ').strip()).decode('utf8'))
|
||||
trans.append(
|
||||
self.engine.translate(
|
||||
text.replace('\r', ' ').strip()).decode('utf8'))
|
||||
self.editTrg.setText('\n'.join(trans))
|
||||
except Exception, e:
|
||||
print >> sys.stderr, str(e)
|
||||
|
@ -14,6 +14,8 @@ exe 1-1-Extraction : 1-1-Extraction.cpp ..//boost_filesystem ../moses//moses ;
|
||||
|
||||
exe prunePhraseTable : prunePhraseTable.cpp ..//boost_filesystem ../moses//moses ..//boost_program_options ;
|
||||
|
||||
exe pruneGeneration : pruneGeneration.cpp ..//boost_filesystem ../moses//moses ..//boost_program_options ;
|
||||
|
||||
local with-cmph = [ option.get "with-cmph" ] ;
|
||||
if $(with-cmph) {
|
||||
exe processPhraseTableMin : processPhraseTableMin.cpp ..//boost_filesystem ../moses//moses ;
|
||||
@ -46,6 +48,6 @@ $(TOP)//boost_iostreams
|
||||
$(TOP)//boost_program_options
|
||||
;
|
||||
|
||||
alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable ;
|
||||
alias programs : 1-1-Extraction TMining generateSequences processLexicalTable queryLexicalTable programsMin programsProbing merge-sorted prunePhraseTable pruneGeneration ;
|
||||
#processPhraseTable queryPhraseTable
|
||||
|
||||
|
@ -54,11 +54,11 @@ int main(int argc, char** argv)
|
||||
bool multipleScoreTrees = true;
|
||||
size_t quantize = 0;
|
||||
|
||||
size_t threads =
|
||||
#ifdef WITH_THREADS
|
||||
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
|
||||
#endif
|
||||
1;
|
||||
size_t threads =
|
||||
#ifdef WITH_THREADS
|
||||
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
|
||||
#endif
|
||||
1;
|
||||
|
||||
if(1 >= argc) {
|
||||
printHelp(argv);
|
||||
|
@ -67,11 +67,11 @@ int main(int argc, char **argv)
|
||||
bool sortScoreIndexSet = false;
|
||||
size_t sortScoreIndex = 2;
|
||||
bool warnMe = true;
|
||||
size_t threads =
|
||||
#ifdef WITH_THREADS
|
||||
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
|
||||
#endif
|
||||
1;
|
||||
size_t threads =
|
||||
#ifdef WITH_THREADS
|
||||
boost::thread::hardware_concurrency() ? boost::thread::hardware_concurrency() :
|
||||
#endif
|
||||
1;
|
||||
|
||||
if(1 >= argc) {
|
||||
printHelp(argv);
|
||||
|
98
misc/pruneGeneration.cpp
Normal file
98
misc/pruneGeneration.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include "pruneGeneration.h"
|
||||
#include "moses/InputFileStream.h"
|
||||
#include "moses/OutputFileStream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
cerr << "Starting" << endl;
|
||||
int limit = atoi(argv[1]);
|
||||
string inPathStem = argv[2];
|
||||
string outPathStem = argv[3];
|
||||
|
||||
namespace fs = boost::filesystem;
|
||||
|
||||
//cerr << "inPathStem=" << inPathStem << endl;
|
||||
fs::path p(inPathStem);
|
||||
fs::path dir = p.parent_path();
|
||||
//cerr << "dir=" << dir << endl;
|
||||
|
||||
fs::path fileStem = p.filename();
|
||||
string fileStemStr = fileStem.native();
|
||||
size_t fileStemStrSize = fileStemStr.size();
|
||||
//cerr << "fileStem=" << fileStemStr << endl;
|
||||
|
||||
// loop thru each file in directory
|
||||
fs::directory_iterator end_iter;
|
||||
for( fs::directory_iterator dir_iter(dir) ; dir_iter != end_iter ; ++dir_iter) {
|
||||
if (fs::is_regular_file(dir_iter->status())) {
|
||||
fs::path currPath = *dir_iter;
|
||||
string currPathStr = currPath.native();
|
||||
//cerr << "currPathStr=" << currPathStr << endl;
|
||||
|
||||
fs::path currFile = currPath.filename();
|
||||
string currFileStr = currFile.native();
|
||||
|
||||
if (currFileStr.find(fileStemStr) == 0) {
|
||||
// found gen table we need
|
||||
//cerr << "found=" << currPathStr << endl;
|
||||
string suffix = currFileStr.substr(fileStemStrSize, currFileStr.size() - fileStemStrSize);
|
||||
string outPath = outPathStem + suffix;
|
||||
cerr << "PRUNING " << currPathStr << " TO " << outPath << endl;
|
||||
|
||||
Moses::InputFileStream inStrme(currPathStr);
|
||||
Moses::OutputFileStream outStrme(outPath);
|
||||
Process(limit, inStrme, outStrme);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cerr << "Finished" << endl;
|
||||
}
|
||||
|
||||
void Process(int limit, istream &inStrme, ostream &outStrme)
|
||||
{
|
||||
vector<Rec> records;
|
||||
string prevInWord;
|
||||
string line;
|
||||
while (getline(inStrme, line)) {
|
||||
vector<string> toks;
|
||||
Tokenize(toks, line);
|
||||
assert(toks.size() == 4);
|
||||
|
||||
if (prevInWord != toks[0]) {
|
||||
Output(outStrme, records, limit);
|
||||
records.clear();
|
||||
}
|
||||
|
||||
// add new record
|
||||
float prob = atof(toks[2].c_str());
|
||||
records.push_back(Rec(prob, line));
|
||||
|
||||
prevInWord = toks[0];
|
||||
}
|
||||
|
||||
// last
|
||||
Output(outStrme, records, limit);
|
||||
records.clear();
|
||||
|
||||
}
|
||||
|
||||
void Output(ostream &outStrme, vector<Rec> &records, int limit)
|
||||
{
|
||||
std::sort(records.rbegin(), records.rend());
|
||||
|
||||
for (size_t i = 0; i < limit && i < records.size(); ++i) {
|
||||
const Rec &rec = records[i];
|
||||
outStrme << rec.line << endl;
|
||||
}
|
||||
}
|
||||
|
46
misc/pruneGeneration.h
Normal file
46
misc/pruneGeneration.h
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
class Rec
|
||||
{
|
||||
public:
|
||||
float prob;
|
||||
std::string line;
|
||||
|
||||
Rec(float aprob, const std::string &aline)
|
||||
:prob(aprob)
|
||||
,line(aline)
|
||||
{}
|
||||
|
||||
inline bool operator< (const Rec &compare) const {
|
||||
return prob < compare.prob;
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
void Process(int limit, std::istream &inStrme, std::ostream &outStrme);
|
||||
void Output(std::ostream &outStrme, std::vector<Rec> &records, int limit);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
inline void Tokenize(std::vector<std::string> &output
|
||||
, const std::string& str
|
||||
, const std::string& delimiters = " \t")
|
||||
{
|
||||
// Skip delimiters at beginning.
|
||||
std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
|
||||
// Find first "non-delimiter".
|
||||
std::string::size_type pos = str.find_first_of(delimiters, lastPos);
|
||||
|
||||
while (std::string::npos != pos || std::string::npos != lastPos) {
|
||||
// Found a token, add it to the vector.
|
||||
output.push_back(str.substr(lastPos, pos - lastPos));
|
||||
// Skip delimiters. Note the "not_of"
|
||||
lastPos = str.find_first_not_of(delimiters, pos);
|
||||
// Find next "non-delimiter"
|
||||
pos = str.find_first_of(delimiters, lastPos);
|
||||
}
|
||||
}
|
||||
|
@ -151,9 +151,6 @@ int main(int argc, char** argv)
|
||||
ResetUserTime();
|
||||
}
|
||||
|
||||
InputType* foo = source.get();
|
||||
FeatureFunction::CallChangeSource(foo);
|
||||
|
||||
// set up task of training one sentence
|
||||
boost::shared_ptr<TrainingTask> task;
|
||||
task = TrainingTask::create(source, ioWrapper);
|
||||
|
@ -52,11 +52,7 @@ public:
|
||||
// shouldn't be mixing hypos with different lhs
|
||||
assert(hypoA->GetTargetLHS() == hypoB->GetTargetLHS());
|
||||
|
||||
int ret = hypoA->RecombineCompare(*hypoB);
|
||||
if (ret != 0)
|
||||
return (ret < 0);
|
||||
|
||||
return false;
|
||||
return (hypoA->RecombineCompare(*hypoB) < 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -118,8 +118,6 @@ string SimpleTranslationInterface::translate(const string &inputString)
|
||||
ResetUserTime();
|
||||
}
|
||||
|
||||
FeatureFunction::CallChangeSource(&*source);
|
||||
|
||||
// set up task of translating one sentence
|
||||
boost::shared_ptr<TranslationTask> task
|
||||
= TranslationTask::create(source, ioWrapper);
|
||||
@ -223,8 +221,6 @@ batch_run()
|
||||
while ((source = ioWrapper->ReadInput()) != NULL) {
|
||||
IFVERBOSE(1) ResetUserTime();
|
||||
|
||||
FeatureFunction::CallChangeSource(source.get());
|
||||
|
||||
// set up task of translating one sentence
|
||||
boost::shared_ptr<TranslationTask>
|
||||
task = TranslationTask::create(source, ioWrapper);
|
||||
|
@ -62,7 +62,6 @@
|
||||
#include "moses/LM/SkeletonLM.h"
|
||||
#include "moses/FF/SkeletonTranslationOptionListFeature.h"
|
||||
#include "moses/LM/BilingualLM.h"
|
||||
#include "SkeletonChangeInput.h"
|
||||
#include "moses/TranslationModel/SkeletonPT.h"
|
||||
#include "moses/Syntax/InputWeightFF.h"
|
||||
#include "moses/Syntax/RuleTableFF.h"
|
||||
@ -268,7 +267,6 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME(SkeletonStatelessFF);
|
||||
MOSES_FNAME(SkeletonStatefulFF);
|
||||
MOSES_FNAME(SkeletonLM);
|
||||
MOSES_FNAME(SkeletonChangeInput);
|
||||
MOSES_FNAME(SkeletonTranslationOptionListFeature);
|
||||
MOSES_FNAME(SkeletonPT);
|
||||
|
||||
|
@ -38,20 +38,6 @@ void FeatureFunction::Destroy()
|
||||
RemoveAllInColl(s_staticColl);
|
||||
}
|
||||
|
||||
// The original declaration as
|
||||
// void FeatureFunction::CallChangeSource(InputType *&input)
|
||||
// had me a bit perplexed. Would you really want to allow
|
||||
// any feature function to replace the InputType behind the
|
||||
// back of the others? And change what the vector is pointing to?
|
||||
|
||||
void FeatureFunction::CallChangeSource(InputType * const&input)
|
||||
{
|
||||
for (size_t i = 0; i < s_staticColl.size(); ++i) {
|
||||
const FeatureFunction &ff = *s_staticColl[i];
|
||||
ff.ChangeSource(input);
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureFunction::SetupAll(TranslationTask const& ttask)
|
||||
{
|
||||
BOOST_FOREACH(FeatureFunction* ff, s_staticColl)
|
||||
|
@ -62,9 +62,6 @@ public:
|
||||
static FeatureFunction &FindFeatureFunction(const std::string& name);
|
||||
static void Destroy();
|
||||
|
||||
static void CallChangeSource(InputType * const&input);
|
||||
// see my note in FeatureFunction.cpp --- UG
|
||||
|
||||
FeatureFunction(const std::string &line, bool initializeNow);
|
||||
FeatureFunction(size_t numScoreComponents, const std::string &line);
|
||||
virtual bool IsStateless() const = 0;
|
||||
@ -156,9 +153,6 @@ public:
|
||||
ScoreComponentCollection& scoreBreakdown,
|
||||
ScoreComponentCollection& estimatedFutureScore) const = 0;
|
||||
|
||||
// override this method if you want to change the input before decoding
|
||||
virtual void ChangeSource(InputType * const&input) const { }
|
||||
|
||||
// for context-dependent processing
|
||||
static void SetupAll(TranslationTask const& task);
|
||||
virtual void Setup(TranslationTask const& task) const { };
|
||||
|
@ -111,8 +111,8 @@ void GlobalLexicalModel::Load()
|
||||
|
||||
void GlobalLexicalModel::InitializeForInput(ttasksptr const& ttask)
|
||||
{
|
||||
UTIL_THROW_IF2(ttask->GetSource()->GetType() != SentenceInput,
|
||||
"GlobalLexicalModel works only with sentence input.");
|
||||
UTIL_THROW_IF2(ttask->GetSource()->GetType() != SentenceInput,
|
||||
"GlobalLexicalModel works only with sentence input.");
|
||||
Sentence const* s = reinterpret_cast<Sentence const*>(ttask->GetSource().get());
|
||||
m_local.reset(new ThreadLocalStorage);
|
||||
m_local->input = s;
|
||||
|
@ -107,8 +107,8 @@ bool GlobalLexicalModelUnlimited::Load(const std::string &filePathSource,
|
||||
|
||||
void GlobalLexicalModelUnlimited::InitializeForInput(ttasksptr const& ttask)
|
||||
{
|
||||
UTIL_THROW_IF2(ttask->GetSource()->GetType() != SentenceInput,
|
||||
"GlobalLexicalModel works only with sentence input.");
|
||||
UTIL_THROW_IF2(ttask->GetSource()->GetType() != SentenceInput,
|
||||
"GlobalLexicalModel works only with sentence input.");
|
||||
Sentence const* s = reinterpret_cast<Sentence const*>(ttask->GetSource().get());
|
||||
m_local.reset(new ThreadLocalStorage);
|
||||
m_local->input = s;
|
||||
|
@ -134,7 +134,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (targetPhrase.GetAlignNonTerm().GetSize() != 0) {
|
||||
|
||||
// Initialize phrase orientation scoring object
|
||||
Moses::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
|
||||
MosesTraining::GHKM::PhraseOrientation phraseOrientation(source.GetSize(), targetPhrase.GetSize(),
|
||||
targetPhrase.GetAlignTerm(), targetPhrase.GetAlignNonTerm());
|
||||
|
||||
PhraseOrientationFeature::ReoClassData* reoClassData = new PhraseOrientationFeature::ReoClassData();
|
||||
@ -150,7 +150,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
|
||||
// LEFT-TO-RIGHT DIRECTION
|
||||
|
||||
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_L2R);
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_L2R);
|
||||
|
||||
if ( ((targetIndex == 0) || !phraseOrientation.TargetSpanIsAligned(0,targetIndex)) // boundary non-terminal in rule-initial position (left boundary)
|
||||
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
||||
@ -170,7 +170,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (reoClassData->firstNonTerminalPreviousSourceSpanIsAligned &&
|
||||
reoClassData->firstNonTerminalFollowingSourceSpanIsAligned) {
|
||||
// discontinuous
|
||||
l2rOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
l2rOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
} else {
|
||||
reoClassData->firstNonTerminalIsBoundary = true;
|
||||
}
|
||||
@ -180,7 +180,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
|
||||
// RIGHT-TO-LEFT DIRECTION
|
||||
|
||||
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,Moses::GHKM::PhraseOrientation::REO_DIR_R2L);
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = phraseOrientation.GetOrientationInfo(sourceIndex,sourceIndex,MosesTraining::GHKM::PhraseOrientation::REO_DIR_R2L);
|
||||
|
||||
if ( ((targetIndex == targetPhrase.GetSize()-1) || !phraseOrientation.TargetSpanIsAligned(targetIndex,targetPhrase.GetSize()-1)) // boundary non-terminal in rule-final position (right boundary)
|
||||
&& (targetPhraseLHS != m_glueTargetLHS) ) { // and not glue rule
|
||||
@ -200,7 +200,7 @@ void PhraseOrientationFeature::EvaluateInIsolation(const Phrase &source,
|
||||
if (reoClassData->lastNonTerminalPreviousSourceSpanIsAligned &&
|
||||
reoClassData->lastNonTerminalFollowingSourceSpanIsAligned) {
|
||||
// discontinuous
|
||||
r2lOrientation = Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
r2lOrientation = MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT;
|
||||
} else {
|
||||
reoClassData->lastNonTerminalIsBoundary = true;
|
||||
}
|
||||
@ -335,25 +335,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
// LEFT-TO-RIGHT DIRECTION
|
||||
|
||||
Moses::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS l2rOrientation = reoClassData->nonTerminalReoClassL2R[nNT];
|
||||
|
||||
IFFEATUREVERBOSE(2) {
|
||||
FEATUREVERBOSE(2, "l2rOrientation ");
|
||||
switch (l2rOrientation) {
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
FEATUREVERBOSE2(2, "mono" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
FEATUREVERBOSE2(2, "swap" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
FEATUREVERBOSE2(2, "dright" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
|
||||
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
||||
break;
|
||||
default:
|
||||
@ -396,23 +396,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
} else {
|
||||
|
||||
if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
|
||||
newScores[0] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityMono());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
||||
|
||||
} else if ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
|
||||
newScores[1] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilitySwap());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
LeftBoundaryL2RScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
||||
|
||||
} else if ( ( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( l2rOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( l2rOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
|
||||
newScores[2] += TransformScore(orientationPhraseProperty->GetLeftToRightProbabilityDiscontinuous());
|
||||
// if sub-derivation has left-boundary non-terminal:
|
||||
@ -437,25 +437,25 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
// RIGHT-TO-LEFT DIRECTION
|
||||
|
||||
Moses::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
|
||||
MosesTraining::GHKM::PhraseOrientation::REO_CLASS r2lOrientation = reoClassData->nonTerminalReoClassR2L[nNT];
|
||||
|
||||
IFFEATUREVERBOSE(2) {
|
||||
FEATUREVERBOSE(2, "r2lOrientation ");
|
||||
switch (r2lOrientation) {
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT:
|
||||
FEATUREVERBOSE2(2, "mono" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT:
|
||||
FEATUREVERBOSE2(2, "swap" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT:
|
||||
FEATUREVERBOSE2(2, "dleft" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT:
|
||||
FEATUREVERBOSE2(2, "dright" << std::endl);
|
||||
break;
|
||||
case Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == Moses::GHKM::PhraseOrientation::REO_MSLR
|
||||
case MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN:
|
||||
// modelType == MosesTraining::GHKM::PhraseOrientation::REO_MSLR
|
||||
FEATUREVERBOSE2(2, "unknown->dleft" << std::endl);
|
||||
break;
|
||||
default:
|
||||
@ -498,23 +498,23 @@ FFState* PhraseOrientationFeature::EvaluateWhenApplied(
|
||||
|
||||
} else {
|
||||
|
||||
if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
|
||||
newScores[m_offsetR2LScores+0] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityMono());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x1, newScores, accumulator);
|
||||
|
||||
} else if ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
|
||||
newScores[m_offsetR2LScores+1] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilitySwap());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
// add recursive actual score of boundary non-terminal from subderivation
|
||||
RightBoundaryR2LScoreRecursive(featureID, prevState, 0x2, newScores, accumulator);
|
||||
|
||||
} else if ( ( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( r2lOrientation == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( r2lOrientation == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
|
||||
newScores[m_offsetR2LScores+2] += TransformScore(orientationPhraseProperty->GetRightToLeftProbabilityDiscontinuous());
|
||||
// if sub-derivation has right-boundary non-terminal:
|
||||
@ -862,17 +862,17 @@ void PhraseOrientationFeature::SparseNonTerminalR2LScore(const Factor* nonTermin
|
||||
}
|
||||
|
||||
|
||||
const std::string* PhraseOrientationFeature::ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const
|
||||
const std::string* PhraseOrientationFeature::ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const
|
||||
{
|
||||
if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_LEFT ) {
|
||||
return &MORIENT;
|
||||
|
||||
} else if ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
} else if ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_RIGHT ) {
|
||||
return &SORIENT;
|
||||
|
||||
} else if ( ( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( o == Moses::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
} else if ( ( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DLEFT ) ||
|
||||
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_DRIGHT ) ||
|
||||
( o == MosesTraining::GHKM::PhraseOrientation::REO_CLASS_UNKNOWN ) ) {
|
||||
return &DORIENT;
|
||||
|
||||
} else {
|
||||
|
@ -302,8 +302,8 @@ public:
|
||||
|
||||
struct ReoClassData {
|
||||
public:
|
||||
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
||||
std::vector<Moses::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
||||
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassL2R;
|
||||
std::vector<MosesTraining::GHKM::PhraseOrientation::REO_CLASS> nonTerminalReoClassR2L;
|
||||
bool firstNonTerminalIsBoundary;
|
||||
bool firstNonTerminalPreviousSourceSpanIsAligned;
|
||||
bool firstNonTerminalFollowingSourceSpanIsAligned;
|
||||
@ -401,7 +401,7 @@ protected:
|
||||
ScoreComponentCollection* scoreBreakdown,
|
||||
const std::string* o) const;
|
||||
|
||||
const std::string* ToString(const Moses::GHKM::PhraseOrientation::REO_CLASS o) const;
|
||||
const std::string* ToString(const MosesTraining::GHKM::PhraseOrientation::REO_CLASS o) const;
|
||||
|
||||
static const std::string MORIENT;
|
||||
static const std::string SORIENT;
|
||||
|
@ -1,96 +0,0 @@
|
||||
#include <vector>
|
||||
#include "SkeletonChangeInput.h"
|
||||
#include "moses/ScoreComponentCollection.h"
|
||||
#include "moses/TargetPhrase.h"
|
||||
#include "moses/Sentence.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
SkeletonChangeInput::SkeletonChangeInput(const std::string &line)
|
||||
:StatelessFeatureFunction(2, line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void SkeletonChangeInput::EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
// dense scores
|
||||
vector<float> newScores(m_numScoreComponents);
|
||||
newScores[0] = 1.5;
|
||||
newScores[1] = 0.3;
|
||||
scoreBreakdown.PlusEquals(this, newScores);
|
||||
|
||||
// sparse scores
|
||||
scoreBreakdown.PlusEquals(this, "sparse-name", 2.4);
|
||||
|
||||
}
|
||||
|
||||
void SkeletonChangeInput::EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
if (targetPhrase.GetNumNonTerminals()) {
|
||||
vector<float> newScores(m_numScoreComponents);
|
||||
newScores[0] = - std::numeric_limits<float>::infinity();
|
||||
scoreBreakdown.PlusEquals(this, newScores);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void SkeletonChangeInput::EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const
|
||||
{}
|
||||
|
||||
void SkeletonChangeInput::EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
void SkeletonChangeInput::EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
void SkeletonChangeInput::ChangeSource(InputType* const& input) const
|
||||
{
|
||||
// add factor[1] to each word. Created from first 4 letter of factor[0]
|
||||
|
||||
Sentence *sentence = dynamic_cast<Sentence*>(input);
|
||||
UTIL_THROW_IF2(sentence == NULL, "Not a sentence input");
|
||||
|
||||
FactorCollection &fc = FactorCollection::Instance();
|
||||
|
||||
size_t size = sentence->GetSize();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
Word &word = sentence->Phrase::GetWord(i);
|
||||
const Factor *factor0 = word[0];
|
||||
|
||||
std::string str = factor0->GetString().as_string();
|
||||
if (str.length() > 4) {
|
||||
str = str.substr(0, 4);
|
||||
}
|
||||
|
||||
const Factor *factor1 = fc.AddFactor(str);
|
||||
word.SetFactor(1, factor1);
|
||||
}
|
||||
}
|
||||
|
||||
void SkeletonChangeInput::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "arg") {
|
||||
// set value here
|
||||
} else {
|
||||
StatelessFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,45 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "StatelessFeatureFunction.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class SkeletonChangeInput : public StatelessFeatureFunction
|
||||
{
|
||||
public:
|
||||
SkeletonChangeInput(const std::string &line);
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
|
||||
void ChangeSource(InputType* const&input) const;
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
|
||||
|
||||
void EvaluateTranslationOptionListWithSourceContext(const InputType &input
|
||||
, const TranslationOptionList &translationOptionList) const;
|
||||
|
||||
void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
void EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -296,7 +296,8 @@ GetBufferedInput()
|
||||
}
|
||||
|
||||
boost::shared_ptr<InputType>
|
||||
IOWrapper::ReadInput()
|
||||
IOWrapper::
|
||||
ReadInput()
|
||||
{
|
||||
#ifdef WITH_THREADS
|
||||
boost::lock_guard<boost::mutex> lock(m_lock);
|
||||
@ -304,7 +305,8 @@ IOWrapper::ReadInput()
|
||||
boost::shared_ptr<InputType> source = GetBufferedInput();
|
||||
if (source) {
|
||||
source->SetTranslationId(m_currentLine++);
|
||||
this->set_context_for(*source);
|
||||
if (m_look_ahead || m_look_back)
|
||||
this->set_context_for(*source);
|
||||
}
|
||||
m_past_input.push_back(source);
|
||||
return source;
|
||||
@ -344,7 +346,7 @@ set_context_for(InputType& source)
|
||||
}
|
||||
}
|
||||
// cerr << string(80,'=') << endl;
|
||||
source.SetContext(context);
|
||||
if (context->size()) source.SetContext(context);
|
||||
}
|
||||
|
||||
|
||||
|
@ -61,8 +61,6 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "moses/ChartKBestExtractor.h"
|
||||
#include "moses/Syntax/KBestExtractor.h"
|
||||
|
||||
#include "search/applied.hh"
|
||||
|
||||
#include <boost/format.hpp>
|
||||
|
||||
namespace Moses
|
||||
|
@ -37,7 +37,7 @@ protected:
|
||||
std::streambuf *m_streambuf;
|
||||
public:
|
||||
|
||||
InputFileStream(const std::string &filePath);
|
||||
explicit InputFileStream(const std::string &filePath);
|
||||
~InputFileStream();
|
||||
|
||||
void Close();
|
||||
|
90
moses/OutputFileStream.cpp
Normal file
90
moses/OutputFileStream.cpp
Normal file
@ -0,0 +1,90 @@
|
||||
// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <boost/iostreams/filter/gzip.hpp>
|
||||
#include "OutputFileStream.h"
|
||||
#include "gzfilebuf.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace boost::algorithm;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
OutputFileStream::OutputFileStream()
|
||||
:boost::iostreams::filtering_ostream()
|
||||
,m_outFile(NULL)
|
||||
,m_open(false)
|
||||
{
|
||||
}
|
||||
|
||||
OutputFileStream::OutputFileStream(const std::string &filePath)
|
||||
:m_outFile(NULL)
|
||||
,m_open(false)
|
||||
{
|
||||
Open(filePath);
|
||||
}
|
||||
|
||||
OutputFileStream::~OutputFileStream()
|
||||
{
|
||||
Close();
|
||||
}
|
||||
|
||||
bool OutputFileStream::Open(const std::string &filePath)
|
||||
{
|
||||
assert(!m_open);
|
||||
if (filePath == std::string("-")) {
|
||||
// Write to standard output. Leave m_outFile null.
|
||||
this->push(std::cout);
|
||||
} else {
|
||||
m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary);
|
||||
if (m_outFile->fail()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ends_with(filePath, ".gz")) {
|
||||
this->push(boost::iostreams::gzip_compressor());
|
||||
}
|
||||
this->push(*m_outFile);
|
||||
}
|
||||
|
||||
m_open = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void OutputFileStream::Close()
|
||||
{
|
||||
if (!m_open) return;
|
||||
this->flush();
|
||||
if (m_outFile) {
|
||||
this->pop(); // file
|
||||
|
||||
m_outFile->close();
|
||||
delete m_outFile;
|
||||
m_outFile = NULL;
|
||||
}
|
||||
m_open = false;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
81
moses/OutputFileStream.h
Normal file
81
moses/OutputFileStream.h
Normal file
@ -0,0 +1,81 @@
|
||||
// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <boost/iostreams/filtering_stream.hpp>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/** Version of std::ostream with transparent compression.
|
||||
*
|
||||
* Transparently compresses output when writing to a file whose name ends in
|
||||
* ".gz". Or, writes to stdout instead of a file when given a filename
|
||||
* consisting of just a dash ("-").
|
||||
*/
|
||||
class OutputFileStream : public boost::iostreams::filtering_ostream
|
||||
{
|
||||
private:
|
||||
/** File that needs flushing & closing when we close this stream.
|
||||
*
|
||||
* Is NULL when no file is opened, e.g. when writing to standard output.
|
||||
*/
|
||||
std::ofstream *m_outFile;
|
||||
|
||||
/// Is this stream open?
|
||||
bool m_open;
|
||||
|
||||
public:
|
||||
/** Create an unopened OutputFileStream.
|
||||
*
|
||||
* Until it's been opened, nothing can be done with this stream.
|
||||
*/
|
||||
OutputFileStream();
|
||||
|
||||
/// Create an OutputFileStream, and open it by calling Open().
|
||||
OutputFileStream(const std::string &filePath);
|
||||
virtual ~OutputFileStream();
|
||||
|
||||
// TODO: Can we please just always throw an exception when this fails?
|
||||
/** Open stream.
|
||||
*
|
||||
* If filePath is "-" (just a dash), this opens the stream for writing to
|
||||
* standard output. Otherwise, it opens the given file. If the filename
|
||||
* has the ".gz" suffix, output will be transparently compressed.
|
||||
*
|
||||
* Call Close() to close the file.
|
||||
*
|
||||
* Returns whether opening the file was successful. It may also throw an
|
||||
* exception on failure.
|
||||
*/
|
||||
bool Open(const std::string &filePath);
|
||||
|
||||
/// Flush and close stream. After this, the stream can be opened again.
|
||||
void Close();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -143,6 +143,7 @@ Parameter::Parameter()
|
||||
AddParam(oov_opts,"mark-unknown", "mu", "mark unknown words in output");
|
||||
AddParam(oov_opts,"lmodel-oov-feature", "add language model oov feature, one per model");
|
||||
AddParam(oov_opts,"output-unknowns", "Output the unknown (OOV) words to the given file, one line per sentence");
|
||||
AddParam(oov_opts,"always-create-direct-transopt", "Always create a translation that translates the source word ad-verbatim");
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////
|
||||
// input options
|
||||
|
@ -63,8 +63,8 @@ StaticData::StaticData()
|
||||
: m_sourceStartPosMattersForRecombination(false)
|
||||
, m_requireSortingAfterSourceContext(false)
|
||||
, m_inputType(SentenceInput)
|
||||
, m_onlyDistinctNBest(false)
|
||||
, m_needAlignmentInfo(false)
|
||||
// , m_onlyDistinctNBest(false)
|
||||
// , m_needAlignmentInfo(false)
|
||||
, m_lmEnableOOVFeature(false)
|
||||
, m_isAlwaysCreateDirectTranslationOption(false)
|
||||
, m_currentWeightSetting("default")
|
||||
@ -203,25 +203,26 @@ StaticData
|
||||
//word-to-word alignment
|
||||
// alignments
|
||||
m_parameter->SetParameter(m_PrintAlignmentInfo, "print-alignment-info", false );
|
||||
if (m_PrintAlignmentInfo) {
|
||||
m_needAlignmentInfo = true;
|
||||
}
|
||||
|
||||
// if (m_PrintAlignmentInfo) { // => now in BookkeepingOptions::init()
|
||||
// m_needAlignmentInfo = true;
|
||||
// }
|
||||
|
||||
m_parameter->SetParameter(m_wordAlignmentSort, "sort-word-alignment", NoSort);
|
||||
|
||||
if (m_PrintAlignmentInfoNbest) {
|
||||
m_needAlignmentInfo = true;
|
||||
}
|
||||
// if (m_PrintAlignmentInfoNbest) { // => now in BookkeepingOptions::init()
|
||||
// m_needAlignmentInfo = true;
|
||||
// }
|
||||
|
||||
params = m_parameter->GetParam("alignment-output-file");
|
||||
if (params && params->size()) {
|
||||
m_alignmentOutputFile = Scan<std::string>(params->at(0));
|
||||
m_needAlignmentInfo = true;
|
||||
// m_needAlignmentInfo = true; // => now in BookkeepingOptions::init()
|
||||
}
|
||||
|
||||
m_parameter->SetParameter( m_PrintID, "print-id", false );
|
||||
m_parameter->SetParameter( m_PrintPassthroughInformation, "print-passthrough", false );
|
||||
m_parameter->SetParameter( m_PrintPassthroughInformationInNBest, "print-passthrough-in-n-best", false );
|
||||
// m_parameter->SetParameter( m_PrintPassthroughInformationInNBest, "print-passthrough-in-n-best", false ); // => now in BookkeepingOptions::init()
|
||||
|
||||
// word graph
|
||||
params = m_parameter->GetParam("output-word-graph");
|
||||
@ -327,41 +328,7 @@ bool
|
||||
StaticData
|
||||
::ini_nbest_options()
|
||||
{
|
||||
const PARAM_VEC *params;
|
||||
// n-best
|
||||
params = m_parameter->GetParam("n-best-list");
|
||||
if (params) {
|
||||
if (params->size() >= 2) {
|
||||
m_nBestFilePath = params->at(0);
|
||||
m_nBestSize = Scan<size_t>( params->at(1) );
|
||||
m_onlyDistinctNBest=(params->size()>2 && params->at(2)=="distinct");
|
||||
} else {
|
||||
std::cerr << "wrong format for switch -n-best-list file size [disinct]";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
m_nBestSize = 0;
|
||||
}
|
||||
|
||||
m_parameter->SetParameter<size_t>(m_nBestFactor, "n-best-factor", 20);
|
||||
|
||||
|
||||
m_parameter->SetParameter(m_PrintAlignmentInfoNbest,
|
||||
"print-alignment-info-in-n-best", false );
|
||||
|
||||
// include feature names in the n-best list
|
||||
m_parameter->SetParameter(m_labeledNBestList, "labeled-n-best-list", true );
|
||||
|
||||
// include word alignment in the n-best list
|
||||
m_parameter->SetParameter(m_nBestIncludesSegmentation,
|
||||
"include-segmentation-in-n-best", false );
|
||||
|
||||
// print all factors of output translations
|
||||
m_parameter->SetParameter(m_reportAllFactorsNBest,
|
||||
"report-all-factors-in-n-best", false );
|
||||
|
||||
m_parameter->SetParameter(m_printNBestTrees, "n-best-trees", false );
|
||||
return true;
|
||||
return m_nbest_options.init(*m_parameter);
|
||||
}
|
||||
|
||||
void
|
||||
@ -477,6 +444,7 @@ StaticData
|
||||
//source word deletion
|
||||
m_parameter->SetParameter(m_wordDeletionEnabled, "phrase-drop-allowed", false );
|
||||
|
||||
m_parameter->SetParameter(m_isAlwaysCreateDirectTranslationOption, "always-create-direct-transopt", false );
|
||||
}
|
||||
|
||||
void
|
||||
@ -625,8 +593,9 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
// input, output
|
||||
ini_factor_maps();
|
||||
ini_input_options();
|
||||
m_bookkeeping_options.init(*parameter);
|
||||
m_nbest_options.init(*parameter); // if (!ini_nbest_options()) return false;
|
||||
if (!ini_output_options()) return false;
|
||||
if (!ini_nbest_options()) return false;
|
||||
|
||||
// threading etc.
|
||||
if (!ini_performance_options()) return false;
|
||||
@ -647,6 +616,16 @@ bool StaticData::LoadData(Parameter *parameter)
|
||||
|
||||
ini_mira_options();
|
||||
|
||||
// set m_nbest_options.enabled = true if necessary:
|
||||
if (m_mbr || m_useLatticeMBR || m_outputSearchGraph || m_outputSearchGraphSLF
|
||||
|| m_mira || m_outputSearchGraphHypergraph || m_useConsensusDecoding
|
||||
#ifdef HAVE_PROTOBUF
|
||||
|| m_outputSearchGraphPB
|
||||
#endif
|
||||
|| m_latticeSamplesFilePath.size()) {
|
||||
m_nbest_options.enabled = true;
|
||||
}
|
||||
|
||||
// S2T decoder
|
||||
m_parameter->SetParameter(m_s2tParsingAlgorithm, "s2t-parsing-algorithm",
|
||||
RecursiveCYKPlus);
|
||||
@ -1392,4 +1371,3 @@ void StaticData::ResetWeights(const std::string &denseWeights, const std::string
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
@ -45,6 +45,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "moses/PP/Factory.h"
|
||||
|
||||
#include "moses/parameters/ContextParameters.h"
|
||||
#include "moses/parameters/NBestOptions.h"
|
||||
#include "moses/parameters/BookkeepingOptions.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -95,18 +97,21 @@ protected:
|
||||
// 0 = no disortion (monotone in old pharaoh)
|
||||
bool m_reorderingConstraint; //! use additional reordering constraints
|
||||
bool m_useEarlyDistortionCost;
|
||||
size_t
|
||||
m_maxHypoStackSize //! hypothesis-stack size that triggers pruning
|
||||
, m_minHypoStackDiversity //! minimum number of hypothesis in stack for each source word coverage
|
||||
, m_nBestSize
|
||||
, m_latticeSamplesSize
|
||||
, m_nBestFactor
|
||||
, m_maxNoTransOptPerCoverage
|
||||
, m_maxNoPartTransOpt
|
||||
, m_maxPhraseLength;
|
||||
size_t m_maxHypoStackSize; //! hypothesis-stack size that triggers pruning
|
||||
size_t m_minHypoStackDiversity; //! minimum number of hypothesis in stack for each source word coverage;
|
||||
NBestOptions m_nbest_options;
|
||||
BookkeepingOptions m_bookkeeping_options;
|
||||
// size_t m_nBestSize;
|
||||
// size_t m_nBestFactor;
|
||||
|
||||
std::string m_nBestFilePath, m_latticeSamplesFilePath;
|
||||
bool m_labeledNBestList,m_nBestIncludesSegmentation;
|
||||
size_t m_latticeSamplesSize;
|
||||
size_t m_maxNoTransOptPerCoverage;
|
||||
size_t m_maxNoPartTransOpt;
|
||||
size_t m_maxPhraseLength;
|
||||
|
||||
// std::string m_nBestFilePath;
|
||||
std::string m_latticeSamplesFilePath;
|
||||
// bool m_labeledNBestList,m_nBestIncludesSegmentation;
|
||||
bool m_dropUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = drop (ignore) them
|
||||
bool m_markUnknown; //! false = treat unknown words as unknowns, and translate them as themselves; true = mark and (ignore) them
|
||||
bool m_wordDeletionEnabled;
|
||||
@ -128,21 +133,21 @@ protected:
|
||||
bool m_reportSegmentation;
|
||||
bool m_reportSegmentationEnriched;
|
||||
bool m_reportAllFactors;
|
||||
bool m_reportAllFactorsNBest;
|
||||
// bool m_reportAllFactorsNBest;
|
||||
std::string m_detailedTranslationReportingFilePath;
|
||||
std::string m_detailedTreeFragmentsTranslationReportingFilePath;
|
||||
|
||||
//DIMw
|
||||
std::string m_detailedAllTranslationReportingFilePath;
|
||||
|
||||
bool m_onlyDistinctNBest;
|
||||
// bool m_onlyDistinctNBest;
|
||||
bool m_PrintAlignmentInfo;
|
||||
bool m_needAlignmentInfo;
|
||||
bool m_PrintAlignmentInfoNbest;
|
||||
// bool m_needAlignmentInfo; // => BookkeepingOptions
|
||||
// bool m_PrintAlignmentInfoNbest;
|
||||
|
||||
bool m_PrintID;
|
||||
bool m_PrintPassthroughInformation;
|
||||
bool m_PrintPassthroughInformationInNBest;
|
||||
// bool m_PrintPassthroughInformationInNBest;
|
||||
|
||||
std::string m_alignmentOutputFile;
|
||||
|
||||
@ -214,7 +219,7 @@ protected:
|
||||
bool m_useLegacyPT;
|
||||
bool m_defaultNonTermOnlyForEmptyRange;
|
||||
S2TParsingAlgorithm m_s2tParsingAlgorithm;
|
||||
bool m_printNBestTrees;
|
||||
// bool m_printNBestTrees;
|
||||
|
||||
FeatureRegistry m_registry;
|
||||
PhrasePropertyFactory m_phrasePropertyFactory;
|
||||
@ -361,7 +366,8 @@ public:
|
||||
return m_PrintPassthroughInformation;
|
||||
}
|
||||
bool IsPassthroughInNBestEnabled() const {
|
||||
return m_PrintPassthroughInformationInNBest;
|
||||
return m_nbest_options.include_passthrough;
|
||||
// return m_PrintPassthroughInformationInNBest;
|
||||
}
|
||||
int GetMaxDistortion() const {
|
||||
return m_maxDistortion;
|
||||
@ -410,7 +416,8 @@ public:
|
||||
return m_reportAllFactors;
|
||||
}
|
||||
bool GetReportAllFactorsNBest() const {
|
||||
return m_reportAllFactorsNBest;
|
||||
return m_nbest_options.include_all_factors;
|
||||
// return m_reportAllFactorsNBest;
|
||||
}
|
||||
bool IsDetailedTranslationReportingEnabled() const {
|
||||
return !m_detailedTranslationReportingFilePath.empty();
|
||||
@ -430,7 +437,8 @@ public:
|
||||
return m_detailedTreeFragmentsTranslationReportingFilePath;
|
||||
}
|
||||
bool IsLabeledNBestList() const {
|
||||
return m_labeledNBestList;
|
||||
return m_nbest_options.include_feature_labels;
|
||||
// return m_labeledNBestList;
|
||||
}
|
||||
|
||||
bool UseMinphrInMemory() const {
|
||||
@ -443,21 +451,24 @@ public:
|
||||
|
||||
// for mert
|
||||
size_t GetNBestSize() const {
|
||||
return m_nBestSize;
|
||||
return m_nbest_options.nbest_size;
|
||||
// return m_nBestSize;
|
||||
}
|
||||
|
||||
const std::string &GetNBestFilePath() const {
|
||||
return m_nBestFilePath;
|
||||
return m_nbest_options.output_file_path;
|
||||
// return m_nBestFilePath;
|
||||
}
|
||||
|
||||
bool IsNBestEnabled() const {
|
||||
return (!m_nBestFilePath.empty() || m_mbr || m_useLatticeMBR || m_mira ||
|
||||
m_outputSearchGraph || m_outputSearchGraphSLF ||
|
||||
m_outputSearchGraphHypergraph || m_useConsensusDecoding ||
|
||||
#ifdef HAVE_PROTOBUF
|
||||
m_outputSearchGraphPB ||
|
||||
#endif
|
||||
!m_latticeSamplesFilePath.empty());
|
||||
return m_nbest_options.enabled;
|
||||
// return (!m_nBestFilePath.empty() || m_mbr || m_useLatticeMBR || m_mira ||
|
||||
// m_outputSearchGraph || m_outputSearchGraphSLF ||
|
||||
// m_outputSearchGraphHypergraph || m_useConsensusDecoding ||
|
||||
// #ifdef HAVE_PROTOBUF
|
||||
// m_outputSearchGraphPB ||
|
||||
// #endif
|
||||
// !m_latticeSamplesFilePath.empty());
|
||||
}
|
||||
|
||||
size_t GetLatticeSamplesSize() const {
|
||||
@ -469,7 +480,8 @@ public:
|
||||
}
|
||||
|
||||
size_t GetNBestFactor() const {
|
||||
return m_nBestFactor;
|
||||
return m_nbest_options.factor;
|
||||
// return m_nBestFactor;
|
||||
}
|
||||
bool GetOutputWordGraph() const {
|
||||
return m_outputWordGraph;
|
||||
@ -527,7 +539,8 @@ public:
|
||||
void SetWeights(const FeatureFunction* sp, const std::vector<float>& weights);
|
||||
|
||||
bool GetDistinctNBest() const {
|
||||
return m_onlyDistinctNBest;
|
||||
return m_nbest_options.only_distinct;
|
||||
// return m_onlyDistinctNBest;
|
||||
}
|
||||
const std::string& GetFactorDelimiter() const {
|
||||
return m_factorDelimiter;
|
||||
@ -692,7 +705,8 @@ public:
|
||||
const std::string &GetBinDirectory() const;
|
||||
|
||||
bool NeedAlignmentInfo() const {
|
||||
return m_needAlignmentInfo;
|
||||
return m_bookkeeping_options.need_alignment_info;
|
||||
// return m_needAlignmentInfo;
|
||||
}
|
||||
const std::string &GetAlignmentOutputFile() const {
|
||||
return m_alignmentOutputFile;
|
||||
@ -701,14 +715,16 @@ public:
|
||||
return m_PrintAlignmentInfo;
|
||||
}
|
||||
bool PrintAlignmentInfoInNbest() const {
|
||||
return m_PrintAlignmentInfoNbest;
|
||||
return m_nbest_options.include_alignment_info;
|
||||
// return m_PrintAlignmentInfoNbest;
|
||||
}
|
||||
WordAlignmentSort GetWordAlignmentSort() const {
|
||||
return m_wordAlignmentSort;
|
||||
}
|
||||
|
||||
bool NBestIncludesSegmentation() const {
|
||||
return m_nBestIncludesSegmentation;
|
||||
return m_nbest_options.include_segmentation;
|
||||
// return m_nBestIncludesSegmentation;
|
||||
}
|
||||
|
||||
bool GetHasAlternateWeightSettings() const {
|
||||
@ -849,7 +865,8 @@ public:
|
||||
}
|
||||
|
||||
bool PrintNBestTrees() const {
|
||||
return m_printNBestTrees;
|
||||
return m_nbest_options.print_trees;
|
||||
// return m_printNBestTrees;
|
||||
}
|
||||
|
||||
bool RequireSortingAfterSourceContext() const {
|
||||
|
@ -18,8 +18,6 @@
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
#ifndef moses_ChartRuleLookupManagerMemory_h
|
||||
#define moses_ChartRuleLookupManagerMemory_h
|
||||
|
||||
#include <vector>
|
||||
|
||||
@ -97,4 +95,3 @@ private:
|
||||
|
||||
} // namespace Moses
|
||||
|
||||
#endif
|
||||
|
@ -34,7 +34,7 @@ namespace Moses
|
||||
BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
|
||||
size_t threadsNum)
|
||||
: m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
|
||||
m_fileHandle(0), m_fileHandleStart(0), m_size(0),
|
||||
m_fileHandle(0), m_fileHandleStart(0), m_landmarks(true), m_size(0),
|
||||
m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0),
|
||||
m_threadPool(threadsNum)
|
||||
{
|
||||
|
@ -78,9 +78,9 @@ GetScore(const Phrase& f, const Phrase& e, const Phrase& c)
|
||||
if(m_hash.GetSize() != index) {
|
||||
std::string scoresString;
|
||||
if(m_inMemory)
|
||||
scoresString = m_scoresMemory[index];
|
||||
scoresString = m_scoresMemory[index].str();
|
||||
else
|
||||
scoresString = m_scoresMapped[index];
|
||||
scoresString = m_scoresMapped[index].str();
|
||||
|
||||
BitWrapper<> bitStream(scoresString);
|
||||
for(size_t i = 0; i < m_numScoreComponent; i++)
|
||||
|
@ -53,12 +53,11 @@ LexicalReorderingTableCreator::LexicalReorderingTableCreator(
|
||||
std::cerr << "Pass 1/2: Creating phrase index + Counting scores" << std::endl;
|
||||
m_hash.BeginSave(m_outFile);
|
||||
|
||||
|
||||
if(tempfilePath.size()) {
|
||||
MmapAllocator<unsigned char> allocEncoded(util::FMakeTemp(tempfilePath));
|
||||
m_encodedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>(allocEncoded);
|
||||
} else {
|
||||
m_encodedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>();
|
||||
m_encodedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>(true);
|
||||
}
|
||||
|
||||
EncodeScores();
|
||||
@ -73,7 +72,7 @@ LexicalReorderingTableCreator::LexicalReorderingTableCreator(
|
||||
MmapAllocator<unsigned char> allocCompressed(util::FMakeTemp(tempfilePath));
|
||||
m_compressedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>(allocCompressed);
|
||||
} else {
|
||||
m_compressedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>();
|
||||
m_compressedScores = new StringVector<unsigned char, unsigned long, MmapAllocator>(true);
|
||||
}
|
||||
CompressScores();
|
||||
|
||||
|
@ -151,11 +151,12 @@ public:
|
||||
if(!m_fixed) {
|
||||
util::UnmapOrThrow(p, num * sizeof(T));
|
||||
} else {
|
||||
size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
|
||||
size_t relative_offset = m_data_offset - map_offset;
|
||||
util::UnmapOrThrow((pointer)((char*)p - relative_offset), num * sizeof(T));
|
||||
}
|
||||
const size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
|
||||
const size_t relative_offset = m_data_offset - map_offset;
|
||||
const size_t adjusted_map_size = m_map_size + relative_offset;
|
||||
|
||||
util::UnmapOrThrow((pointer)((char*)p - relative_offset), adjusted_map_size);
|
||||
}
|
||||
}
|
||||
|
||||
void construct (pointer p, const T& value) {
|
||||
|
@ -224,9 +224,9 @@ TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &
|
||||
// Retrieve compressed and encoded target phrase collection
|
||||
std::string encodedPhraseCollection;
|
||||
if(m_phraseDictionary.m_inMemory)
|
||||
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId];
|
||||
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId].str();
|
||||
else
|
||||
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId];
|
||||
encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId].str();
|
||||
|
||||
BitWrapper<> encodedBitStream(encodedPhraseCollection);
|
||||
if(m_coding == PREnc && bitsLeft)
|
||||
|
@ -130,7 +130,7 @@ PhraseTableCreator::PhraseTableCreator(std::string inPath,
|
||||
MmapAllocator<unsigned char> allocCompressed(util::FMakeTemp(tempfilePath));
|
||||
m_compressedTargetPhrases = new StringVector<unsigned char, unsigned long, MmapAllocator>(allocCompressed);
|
||||
} else {
|
||||
m_compressedTargetPhrases = new StringVector<unsigned char, unsigned long, MmapAllocator>();
|
||||
m_compressedTargetPhrases = new StringVector<unsigned char, unsigned long, MmapAllocator>(true);
|
||||
}
|
||||
CompressTargetPhrases();
|
||||
|
||||
@ -203,7 +203,7 @@ void PhraseTableCreator::Save()
|
||||
= m_sourceSymbolsMap.begin(); it != m_sourceSymbolsMap.end(); it++)
|
||||
temp1[it->second] = it->first;
|
||||
std::sort(temp1.begin(), temp1.end());
|
||||
StringVector<unsigned char, unsigned, std::allocator> sourceSymbols;
|
||||
StringVector<unsigned char, unsigned, std::allocator> sourceSymbols(true);
|
||||
for(std::vector<std::string>::iterator it = temp1.begin();
|
||||
it != temp1.end(); it++)
|
||||
sourceSymbols.push_back(*it);
|
||||
@ -224,7 +224,7 @@ void PhraseTableCreator::Save()
|
||||
for(boost::unordered_map<std::string, unsigned>::iterator it
|
||||
= m_targetSymbolsMap.begin(); it != m_targetSymbolsMap.end(); it++)
|
||||
temp2[it->second] = it->first;
|
||||
StringVector<unsigned char, unsigned, std::allocator> targetSymbols;
|
||||
StringVector<unsigned char, unsigned, std::allocator> targetSymbols(true);
|
||||
for(std::vector<std::string>::iterator it = temp2.begin();
|
||||
it != temp2.end(); it++)
|
||||
targetSymbols.push_back(*it);
|
||||
|
@ -147,8 +147,8 @@ public:
|
||||
typedef RangeIterator iterator;
|
||||
typedef StringIterator string_iterator;
|
||||
|
||||
StringVector();
|
||||
StringVector(Allocator<ValueT> alloc);
|
||||
StringVector(bool allocate = false);
|
||||
StringVector(Allocator<ValueT>& alloc);
|
||||
|
||||
virtual ~StringVector() {
|
||||
delete m_charArray;
|
||||
@ -203,13 +203,13 @@ public:
|
||||
m_memoryMapped = memoryMapped;
|
||||
|
||||
size += std::fread(&m_sorted, sizeof(bool), 1, in) * sizeof(bool);
|
||||
size += m_positions.load(in, m_memoryMapped);
|
||||
size += m_positions.load(in, false);
|
||||
|
||||
size += loadCharArray(*m_charArray, in, m_memoryMapped);
|
||||
size += loadCharArray(m_charArray, in, m_memoryMapped);
|
||||
return size;
|
||||
}
|
||||
|
||||
size_t loadCharArray(std::vector<ValueT, std::allocator<ValueT> >& c,
|
||||
size_t loadCharArray(std::vector<ValueT, std::allocator<ValueT> >*& c,
|
||||
std::FILE* in, bool map = false) {
|
||||
// Can only be read into memory. Mapping not possible with std:allocator.
|
||||
assert(map == false);
|
||||
@ -219,13 +219,13 @@ public:
|
||||
size_t valSize;
|
||||
byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
|
||||
|
||||
c.resize(valSize, 0);
|
||||
byteSize += std::fread(&c[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
|
||||
c = new std::vector<ValueT, std::allocator<ValueT> >(valSize, 0);
|
||||
byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
|
||||
|
||||
return byteSize;
|
||||
}
|
||||
|
||||
size_t loadCharArray(std::vector<ValueT, MmapAllocator<ValueT> >& c,
|
||||
size_t loadCharArray(std::vector<ValueT, MmapAllocator<ValueT> >*& c,
|
||||
std::FILE* in, bool map = false) {
|
||||
size_t byteSize = 0;
|
||||
|
||||
@ -235,18 +235,16 @@ public:
|
||||
if(map == false) {
|
||||
// Read data into temporary file (default constructor of MmapAllocator)
|
||||
// and map memory onto temporary file. Can be resized.
|
||||
|
||||
c.resize(valSize, 0);
|
||||
byteSize += std::fread(&c[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
|
||||
c = new std::vector<ValueT, MmapAllocator<ValueT> >(valSize, 0);
|
||||
byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
|
||||
} else {
|
||||
// Map it directly on specified region of file "in" starting at valPos
|
||||
// with length valSize * sizeof(ValueT). Mapped region cannot be resized.
|
||||
|
||||
size_t valPos = std::ftell(in);
|
||||
Allocator<ValueT> alloc(in, valPos);
|
||||
std::vector<ValueT, Allocator<ValueT> > charArrayTemp(alloc);
|
||||
charArrayTemp.resize(valSize, 0);
|
||||
c.swap(charArrayTemp);
|
||||
c = new std::vector<ValueT, Allocator<ValueT> >(alloc);
|
||||
c->resize(valSize, 0);
|
||||
|
||||
byteSize += valSize * sizeof(ValueT);
|
||||
}
|
||||
@ -369,11 +367,12 @@ OStream& operator<<(OStream &os, ValueIteratorRange<ValueIteratorT> cr)
|
||||
// StringVector
|
||||
|
||||
template<typename ValueT, typename PosT, template <typename> class Allocator>
|
||||
StringVector<ValueT, PosT, Allocator>::StringVector()
|
||||
: m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector<ValueT, Allocator<ValueT> >()) { }
|
||||
StringVector<ValueT, PosT, Allocator>::StringVector(bool allocate)
|
||||
: m_sorted(true), m_memoryMapped(false),
|
||||
m_charArray(allocate ? new std::vector<ValueT, Allocator<ValueT> >() : 0) { }
|
||||
|
||||
template<typename ValueT, typename PosT, template <typename> class Allocator>
|
||||
StringVector<ValueT, PosT, Allocator>::StringVector(Allocator<ValueT> alloc)
|
||||
StringVector<ValueT, PosT, Allocator>::StringVector(Allocator<ValueT> &alloc)
|
||||
: m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector<ValueT, Allocator<ValueT> >(alloc)) { }
|
||||
|
||||
template<typename ValueT, typename PosT, template <typename> class Allocator>
|
||||
|
@ -40,9 +40,8 @@ std::auto_ptr<RuleTableLoader> RuleTableLoaderFactory::Create(
|
||||
{
|
||||
InputFileStream input(path);
|
||||
std::string line;
|
||||
bool cont = std::getline(input, line);
|
||||
|
||||
if (cont) {
|
||||
if (std::getline(input, line)) {
|
||||
std::vector<std::string> tokens;
|
||||
Tokenize(tokens, line);
|
||||
if (tokens.size() == 1) {
|
||||
|
@ -1,116 +1,126 @@
|
||||
# Some systems apparently distinguish between shell
|
||||
# variables and environment variables. The latter are
|
||||
# visible to the make utility, the former apparently not,
|
||||
# so we need to set them if they are not defined yet
|
||||
SHELL = bash
|
||||
MAKEFLAGS += --warn-undefined-variables
|
||||
.DEFAULT_GOAL = all
|
||||
.SUFFIXES:
|
||||
|
||||
# ===============================================================================
|
||||
# LOCAL ENVIRONMENT
|
||||
# ===============================================================================
|
||||
|
||||
# shell script snippet:
|
||||
define find_moses_root
|
||||
d=$$(pwd);
|
||||
while [[ ! -e $$d/Jamroot && $$d != "/" ]] ; do
|
||||
d=$$(dirname $$d);
|
||||
done;
|
||||
echo $$d
|
||||
endef
|
||||
|
||||
MOSES_ROOT := $(shell $(find_moses_root))
|
||||
|
||||
# ===============================================================================
|
||||
# COMPILATION PREFERENCES
|
||||
# ===============================================================================
|
||||
# CCACHE: if set to ccache, use ccache to speed up compilation
|
||||
# OPTI: optimization level
|
||||
# PROF: profiler switches
|
||||
# PROF: profiler switches
|
||||
|
||||
CCACHE = ccache
|
||||
OPTI = 3
|
||||
EXE_TAG = exe
|
||||
PROF =
|
||||
# PROF = -g -pg
|
||||
# compiler command
|
||||
compiler ?= g++
|
||||
variant ?= runtime
|
||||
link ?= static
|
||||
|
||||
# ===============================================================================
|
||||
CXX = $(shell which ccache) $(compiler)
|
||||
CXXFLAGS += -DMAX_NUM_FACTORS=4
|
||||
CXXFLAGS += -DKENLM_MAX_ORDER=5
|
||||
CXXFLAGS += -DWITH_THREADS
|
||||
CXXFLAGS += -DNO_MOSES
|
||||
CXXFLAGS += -I${MOSES_ROOT} -I.
|
||||
|
||||
SHELL = bash
|
||||
MAKEFLAGS += --warn-undefined-variables
|
||||
.DEFAULT_GOAL = all
|
||||
.SUFFIXES:
|
||||
ifeq ($(variant),debug)
|
||||
CXXFLAGS += -ggdb -O0
|
||||
else ifeq ($(variant),profile)
|
||||
CXXFLAGS += -g -pg -O3
|
||||
else ifeq ($(variant),syntax)
|
||||
CXXFLAGS += -fsyntax-only
|
||||
endif
|
||||
|
||||
# LDFLAGS = -L${MOSES_ROOT}/lib -L ./lib/
|
||||
|
||||
# WDIR = build/$(variant)/${HOSTTYPE}/${KERNEL}
|
||||
WDIR = build/$(variant)
|
||||
|
||||
# ===============================================================================
|
||||
# COMPILATION 'LOCALIZATION'
|
||||
HOST ?= $(shell hostname)
|
||||
HOSTTYPE ?= $(shell uname -m)
|
||||
KERNEL = $(shell uname -r)
|
||||
HOST ?= $(shell hostname)
|
||||
HOSTTYPE ?= $(shell uname -m)
|
||||
KERNEL = $(shell uname -r)
|
||||
|
||||
MOSES_ROOT = ${HOME}/code/mosesdecoder
|
||||
WDIR = build/${HOSTTYPE}/${KERNEL}/${OPTI}
|
||||
VPATH = ${HOME}/code/mosesdecoder/
|
||||
CXXFLAGS = ${PROF} -ggdb -Wall -O${OPTI} ${INCLUDES}
|
||||
CXXFLAGS += -DMAX_NUM_FACTORS=4
|
||||
CXXFLAGS += -DKENLM_MAX_ORDER=5
|
||||
modirs := $(addprefix -I,$(shell find ${MOSES_ROOT}/moses ${MOSES_ROOT}/contrib -type d))
|
||||
CXXFLAGS += -I${MOSES_ROOT}
|
||||
INCLUDES =
|
||||
BZLIB =
|
||||
BOOSTLIBTAG =
|
||||
nil:
|
||||
|
||||
lzma = lzma
|
||||
#lzma =
|
||||
REQLIBS = m z pthread dl ${lzma} ${BZLIB} \
|
||||
boost_thread${BOOSTLIBTAG} \
|
||||
boost_program_options${BOOSTLIBTAG} \
|
||||
boost_system${BOOSTLIBTAG} \
|
||||
boost_filesystem${BOOSTLIBTAG} \
|
||||
boost_iostreams${BOOSTLIBTAG} z bz2
|
||||
# libraries required
|
||||
|
||||
# icuuc icuio icui18n \
|
||||
|
||||
LIBS = $(addprefix -l, moses ${REQLIBS})
|
||||
LIBS = $(addprefix -l, ${REQLIBS})
|
||||
LIBDIRS = -L${HOME}/code/mosesdecoder/lib
|
||||
LIBDIRS += -L${HOME}/lib
|
||||
PREFIX ?= .
|
||||
BINDIR ?= ${PREFIX}/bin
|
||||
ifeq "$(OPTI)" "0"
|
||||
BINPREF = debug.
|
||||
else
|
||||
BINPREF =
|
||||
LIBS = m z bz2 pthread dl ${BOOSTLIBS}
|
||||
BOOSTLIBS := thread system filesystem program_options iostreams
|
||||
BOOSTLIBS := $(addprefix boost_,${BOOSTLIBS})
|
||||
ifdef ($(BOOSTLIBTAG),"")
|
||||
BOOSTLIBS := $(addsuffix ${BOOSTLIBTAG},${BOOSTLIBS})
|
||||
endif
|
||||
|
||||
cc2obj = $(addsuffix .o,$(patsubst ${MOSES_ROOT}%,$(WDIR)%,\
|
||||
$(patsubst .%,$(WDIR)%,$(basename $1))))
|
||||
cc2exe = $(addprefix ./bin/$(variant)/,$(basename $(notdir $1)))
|
||||
cc2trg = $(basename $(notdir $1))
|
||||
|
||||
OBJ2 :=
|
||||
define compile
|
||||
|
||||
define compile
|
||||
|
||||
DEP += ${WDIR}/$(basename $(notdir $1)).d
|
||||
${WDIR}/$(basename $(notdir $1)).o : $1 $(wildcard $(basename $1).h)
|
||||
DEP += $(basename $(call cc2obj,$1)).d
|
||||
$(call cc2obj,$1): $1
|
||||
@echo -e "COMPILING $1"
|
||||
@mkdir -p $$(@D)
|
||||
${CXX} ${CXXFLAGS} -MD -MP -c $$(abspath $$<) -o $$@
|
||||
@${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@
|
||||
|
||||
endef
|
||||
|
||||
testprogs = test-dynamic-im-tsa try-align
|
||||
programs = mtt-build mtt-dump symal2mam custom-pt mmlex-build ${testprogs}
|
||||
programs += mtt-count-words
|
||||
define build
|
||||
|
||||
all: $(addprefix ${BINDIR}/${BINPREF}, $(programs))
|
||||
@echo $^
|
||||
clean:
|
||||
rm -f ${WDIR}/*.o ${WDIR}/*.d
|
||||
$(call cc2trg,$1): $(call cc2exe,$1)
|
||||
$(call cc2exe,$1): $(call cc2obj,$1) $(LIBOBJ)
|
||||
ifneq ($(variant),syntax)
|
||||
@echo -e "LINKING $$@"
|
||||
@mkdir -p $${@D}
|
||||
@${CXX} ${CXXFLAGS} -o $$@ $(LIBOBJ) $(addprefix -l,${LIBS}) $$<
|
||||
endif
|
||||
|
||||
custom-pt: ${BINDIR}/${BINPREF}custom-pt
|
||||
echo $^
|
||||
endef
|
||||
|
||||
INMOGEN = $(wildcard ${MOSES_ROOT}/moses/TranslationModel/UG/generic/*/*.cpp)
|
||||
#INMOMM = $(wildcard ${MOSES_ROOT}/moses/TranslationModel/UG/mm/*.cc)
|
||||
#INMOMM += $(wildcard ${MOSES_ROOT}/moses/TranslationModel/UG/mm/*.cpp)
|
||||
OBJ = $(patsubst %.cc,%.o,$(wildcard $(patsubst %.h,%.cc,$(wildcard *.h))))
|
||||
OBJ += $(patsubst %.cpp,%.o,${INMOGEN})
|
||||
#OBJ += $(patsubst %.cpp,%.o,${INMOMM})
|
||||
#OBJ += $(patsubst %.cc,%.o,${INMOMM})
|
||||
EXE = $(patsubst %.cc,%.o,$(filter-out $(patsubst %.h,%.cc,$(wildcard *.h)),$(wildcard *.cc)))
|
||||
# list files here that you want explicitly excluded from compilation
|
||||
skip = sim-pe.cc
|
||||
skip += mtt.count.cc
|
||||
skip += try-align2.cc
|
||||
skip += spe-check-coverage3.cc
|
||||
skip += mmsapt.cpp
|
||||
skip += ug_stringdist.cc
|
||||
skip += ug_splice_arglist.cc
|
||||
skip += ug_lexical_reordering.cc
|
||||
skip += ug_sampling_bias.cc
|
||||
|
||||
$(foreach cpp,${INMOGEN},$(eval $(call compile,${cpp})))
|
||||
$(foreach cpp,$(wildcard *.cc),$(eval $(call compile,${cpp})))
|
||||
$(addprefix ${BINDIR}/${BINPREF}, $(programs)): $(addprefix ${WDIR}/,$(notdir ${OBJ}))
|
||||
$(addprefix ${BINDIR}/${BINPREF}, $(programs)): ${MOSES_ROOT}/lib/libmoses.a
|
||||
${BINDIR}/${BINPREF}%: ${WDIR}/%.o ${WDIR}/mmsapt_align.o
|
||||
@mkdir -p ${BINDIR}
|
||||
echo PREREQS: $^
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ ${LIBDIRS} ${LIBS}
|
||||
# objects from elsewhere in the moses tree that are needed
|
||||
extra = ${MOSES_ROOT}/util/exception.cc
|
||||
|
||||
#try-align: ${WDIR}/try-align.o ${WDIR}/tpt_tokenindex.o
|
||||
# $(CXX) $(CXXFLAGS) -o $@ $^ ${LIBDIRS}
|
||||
$(foreach f,$(skip),$(eval broken+=$(shell find -name $f)))
|
||||
|
||||
.SECONDARY:
|
||||
$(info SCANNING DIRECTORY TREE FOR FILES)
|
||||
find_cfiles = find -name '*.cc' -or -name '*.cpp'
|
||||
CFILES = $(filter-out $(broken), $(shell $(find_cfiles)))
|
||||
PROGRAMS := $(shell $(find_cfiles) | xargs grep -lP '^(int +)?main')
|
||||
PROGRAMS := $(filter-out $(broken),$(PROGRAMS))
|
||||
|
||||
ALLOBJ = $(call cc2obj,$(CFILES) $(extra))
|
||||
LIBOBJ = $(call cc2obj,$(filter-out $(PROGRAMS),$(CFILES) $(extra)))
|
||||
|
||||
$(foreach f,$(CFILES) $(extra),$(eval $(call compile,$f)))
|
||||
$(foreach p,$(PROGRAMS),$(eval $(call build,$p)))
|
||||
|
||||
-include $(DEP)
|
||||
|
||||
|
@ -16,24 +16,28 @@
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
using namespace std;
|
||||
// using namespace std;
|
||||
|
||||
|
||||
using std::greater;
|
||||
|
||||
template<typename VAL,
|
||||
typename COMP = greater<VAL>,
|
||||
typename IDX_T=size_t>
|
||||
class
|
||||
VectorIndexSorter
|
||||
: public binary_function<IDX_T const&, IDX_T const&, bool>
|
||||
: public std::binary_function<IDX_T const&, IDX_T const&, bool>
|
||||
{
|
||||
vector<VAL> const& m_vecref;
|
||||
std::vector<VAL> const& m_vecref;
|
||||
boost::shared_ptr<COMP> m_comp;
|
||||
public:
|
||||
|
||||
COMP const& Compare;
|
||||
VectorIndexSorter(vector<VAL> const& v, COMP const& comp)
|
||||
VectorIndexSorter(std::vector<VAL> const& v, COMP const& comp)
|
||||
: m_vecref(v), Compare(comp) {
|
||||
}
|
||||
|
||||
VectorIndexSorter(vector<VAL> const& v)
|
||||
VectorIndexSorter(std::vector<VAL> const& v)
|
||||
: m_vecref(v), m_comp(new COMP()), Compare(*m_comp) {
|
||||
}
|
||||
|
||||
@ -43,20 +47,20 @@ namespace Moses
|
||||
return (fwd == bwd ? a < b : fwd);
|
||||
}
|
||||
|
||||
boost::shared_ptr<vector<IDX_T> >
|
||||
boost::shared_ptr<std::vector<IDX_T> >
|
||||
GetOrder() const;
|
||||
|
||||
void
|
||||
GetOrder(vector<IDX_T> & order) const;
|
||||
GetOrder(std::vector<IDX_T> & order) const;
|
||||
|
||||
};
|
||||
|
||||
template<typename VAL, typename COMP, typename IDX_T>
|
||||
boost::shared_ptr<vector<IDX_T> >
|
||||
boost::shared_ptr<std::vector<IDX_T> >
|
||||
VectorIndexSorter<VAL,COMP,IDX_T>::
|
||||
GetOrder() const
|
||||
{
|
||||
boost::shared_ptr<vector<IDX_T> > ret(new vector<IDX_T>(m_vecref.size()));
|
||||
boost::shared_ptr<std::vector<IDX_T> > ret(new std::vector<IDX_T>(m_vecref.size()));
|
||||
get_order(*ret);
|
||||
return ret;
|
||||
}
|
||||
@ -64,7 +68,7 @@ namespace Moses
|
||||
template<typename VAL, typename COMP, typename IDX_T>
|
||||
void
|
||||
VectorIndexSorter<VAL,COMP,IDX_T>::
|
||||
GetOrder(vector<IDX_T> & order) const
|
||||
GetOrder(std::vector<IDX_T> & order) const
|
||||
{
|
||||
order.resize(m_vecref.size());
|
||||
for (IDX_T i = 0; i < IDX_T(m_vecref.size()); ++i) order[i] = i;
|
||||
|
27
moses/TranslationModel/UG/mm/test-http-client.cc
Normal file
27
moses/TranslationModel/UG/mm/test-http-client.cc
Normal file
@ -0,0 +1,27 @@
|
||||
// -*- c++ -*-
|
||||
#include "ug_http_client.h"
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
try
|
||||
{
|
||||
if (argc != 2)
|
||||
{
|
||||
std::cout << "Usage: async_client <url>\n";
|
||||
std::cout << "Example:\n";
|
||||
std::cout << " async_client www.boost.org/LICENSE_1_0.txt\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
boost::asio::io_service io_service;
|
||||
Moses::http_client c(io_service, argv[1]);
|
||||
io_service.run();
|
||||
std::cout << c.content() << std::endl;
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
std::cout << "Exception: " << e.what() << "\n";
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
13
moses/TranslationModel/UG/mm/test-xml-escaping.cc
Normal file
13
moses/TranslationModel/UG/mm/test-xml-escaping.cc
Normal file
@ -0,0 +1,13 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <iomanip>
|
||||
#include "ug_http_client.h"
|
||||
|
||||
using namespace std;
|
||||
int main()
|
||||
{
|
||||
string line;
|
||||
while (getline(cin,line))
|
||||
cout << Moses::uri_encode(line) << endl;
|
||||
}
|
||||
|
@ -16,7 +16,8 @@ namespace ugdiss
|
||||
|
||||
TokenIndex::
|
||||
TokenIndex(string unkToken)
|
||||
: ridx(0),unkLabel(unkToken),unkId(1),numTokens(0)
|
||||
: ridx(0), unkLabel(unkToken), unkId(1), numTokens(0)
|
||||
, startIdx(0), endIdx(0)
|
||||
{
|
||||
lock.reset(new boost::mutex());
|
||||
};
|
||||
@ -94,15 +95,25 @@ namespace ugdiss
|
||||
TokenIndex::
|
||||
operator[](char const* p) const
|
||||
{
|
||||
if (startIdx==endIdx && !dynamic) return strcmp(p,"NULL") && unkId;
|
||||
Entry const* bla = lower_bound(startIdx,endIdx,p,comp);
|
||||
if (bla != endIdx && !strcmp(comp.base+bla->offset,p))
|
||||
return bla->id;
|
||||
if (!dynamic) return unkId;
|
||||
if (startIdx != endIdx)
|
||||
{
|
||||
Entry const* bla = lower_bound(startIdx,endIdx,p,comp);
|
||||
if (bla != endIdx && !strcmp(comp.base+bla->offset,p))
|
||||
return bla->id;
|
||||
if (!dynamic) return unkId;
|
||||
}
|
||||
else if (!dynamic) return strcmp(p,"NULL") && unkId;
|
||||
|
||||
boost::lock_guard<boost::mutex> lk(*this->lock);
|
||||
// stuff below is new as of 2011-01-30, for dynamic adding of unknown items
|
||||
// IMPORTANT: numTokens is not currently not changed, it is the number of
|
||||
// PRE-EXISING TOKENS, not including dynamically added Items
|
||||
// stuff below is new as of 2011-01-30, for dynamic adding of
|
||||
// unknown items IMPORTANT: numTokens is not currently not
|
||||
// changed, it is the number of PRE-EXISING TOKENS, not including
|
||||
// dynamically added Items
|
||||
// if (!str2idExtra)
|
||||
// {
|
||||
// this->str2idExtra.reset(new map<string,id_type>());
|
||||
// this->newWords.reset(new vector<string>());
|
||||
// }
|
||||
map<string,id_type>::value_type newItem(p,str2idExtra->size()+numTokens);
|
||||
pair<map<string,id_type>::iterator,bool> foo = str2idExtra->insert(newItem);
|
||||
if (foo.second) // it actually is a new item
|
||||
@ -144,10 +155,13 @@ namespace ugdiss
|
||||
if (!ridx.size())
|
||||
{
|
||||
boost::lock_guard<boost::mutex> lk(*this->lock);
|
||||
// Someone else (multi-threading!) may have created the
|
||||
// reverse index in the meantime, so let's check again
|
||||
if (!ridx.size()) ridx = reverseIndex();
|
||||
}
|
||||
if (id < ridx.size())
|
||||
return ridx[id];
|
||||
|
||||
boost::lock_guard<boost::mutex> lk(*this->lock);
|
||||
if (dynamic && id < ridx.size()+newWords->size())
|
||||
return (*newWords)[id-ridx.size()].c_str();
|
||||
|
@ -35,12 +35,18 @@
|
||||
#include "moses/TranslationModel/UG/generic/sampling/Sampling.h"
|
||||
#include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"
|
||||
#include "moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.h"
|
||||
#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
|
||||
// #include "moses/FF/LexicalReordering/LexicalReorderingState.h"
|
||||
#include "moses/Util.h"
|
||||
// #include "moses/StaticData.h"
|
||||
|
||||
#ifndef NO_MOSES
|
||||
// #pragma message "COMPILING WITH MOSES SUPPORT!"
|
||||
#include "moses/StaticData.h"
|
||||
#include "moses/thread_safe_container.h"
|
||||
#include "moses/ContextScope.h"
|
||||
#include "moses/TranslationTask.h"
|
||||
#else
|
||||
// #pragma message "COMPILING WITHOUT MOSES SUPPORT!"
|
||||
#endif
|
||||
|
||||
#include "util/exception.hh"
|
||||
// #include "util/check.hh"
|
||||
@ -70,6 +76,7 @@ namespace Moses {
|
||||
float lbop(size_t const tries, size_t const succ, float const confidence);
|
||||
void write_bitvector(bitvector const& v, ostream& out);
|
||||
|
||||
#ifndef NO_MOSES
|
||||
struct
|
||||
ContextForQuery
|
||||
{
|
||||
@ -82,7 +89,7 @@ namespace Moses {
|
||||
ostream* bias_log;
|
||||
ContextForQuery() : bias_log(NULL) { }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
template<typename TKN>
|
||||
class Bitext
|
||||
@ -140,8 +147,13 @@ namespace Moses {
|
||||
|
||||
// prep2 launches sampling and returns immediately.
|
||||
// lookup (below) waits for the job to finish before it returns
|
||||
sptr<pstats>
|
||||
prep2(iter const& phrase, int max_sample = -1) const;
|
||||
|
||||
#ifndef NO_MOSES
|
||||
sptr<pstats>
|
||||
prep2(ttasksptr const& ttask, iter const& phrase, int max_sample = -1) const;
|
||||
#endif
|
||||
|
||||
public:
|
||||
Bitext(size_t const max_sample = 1000, size_t const xnum_workers = 16);
|
||||
@ -157,9 +169,15 @@ namespace Moses {
|
||||
open(string const base, string const L1, string const L2) = 0;
|
||||
|
||||
sptr<pstats>
|
||||
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample = -1) const;
|
||||
lookup(iter const& phrase, int max_sample = -1) const;
|
||||
void prep(iter const& phrase) const;
|
||||
|
||||
#ifndef NO_MOSES
|
||||
sptr<pstats>
|
||||
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample = -1) const;
|
||||
void prep(ttasksptr const& ttask, iter const& phrase) const;
|
||||
#endif
|
||||
|
||||
|
||||
void setDefaultSampleSize(size_t const max_samples);
|
||||
size_t getDefaultSampleSize() const;
|
||||
@ -181,16 +199,7 @@ namespace Moses {
|
||||
void
|
||||
write_yawat_alignment
|
||||
( id_type const sid, iter const* m1, iter const* m2, ostream& out ) const;
|
||||
#if 0
|
||||
// needs to be adapted to the new API
|
||||
void
|
||||
lookup(std::vector<Token> const& snt, TSA<Token>& idx,
|
||||
std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > >& dest,
|
||||
std::vector<std::vector<uint64_t> >* pidmap = NULL,
|
||||
typename PhrasePair<Token>::Scorer* scorer=NULL,
|
||||
sptr<SamplingBias const> const bias,
|
||||
bool multithread=true) const;
|
||||
#endif
|
||||
|
||||
string docname(id_type const sid) const;
|
||||
|
||||
};
|
||||
@ -427,11 +436,13 @@ namespace Moses {
|
||||
template<typename Token>
|
||||
void
|
||||
Bitext<Token>::
|
||||
prep(ttasksptr const& ttask, iter const& phrase) const
|
||||
prep(iter const& phrase) const
|
||||
{
|
||||
prep2(ttask, phrase, m_default_sample_size);
|
||||
prep2(phrase, m_default_sample_size);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// prep2 schedules a phrase for sampling, and returns immediately
|
||||
// the member function lookup retrieves the respective pstats instance
|
||||
// and waits until the sampling is finished before it returns.
|
||||
@ -440,26 +451,20 @@ namespace Moses {
|
||||
sptr<pstats>
|
||||
Bitext<Token>
|
||||
::prep2
|
||||
( ttasksptr const& ttask, iter const& phrase, int max_sample) const
|
||||
(iter const& phrase, int max_sample) const
|
||||
{
|
||||
if (max_sample < 0) max_sample = m_default_sample_size;
|
||||
sptr<ContextScope> scope = ttask->GetScope();
|
||||
sptr<ContextForQuery> context = scope->get<ContextForQuery>(this);
|
||||
sptr<SamplingBias> bias;
|
||||
if (context) bias = context->bias;
|
||||
sptr<pstats::cache_t> cache;
|
||||
|
||||
// - no caching for rare phrases and special requests (max_sample)
|
||||
// (still need to test what a good caching threshold is ...)
|
||||
// - use the task-specific cache when there is a sampling bias
|
||||
if (max_sample == int(m_default_sample_size)
|
||||
&& phrase.approxOccurrenceCount() > m_pstats_cache_threshold)
|
||||
{
|
||||
cache = (phrase.root == I1.get()
|
||||
? (bias ? context->cache1 : m_cache1)
|
||||
: (bias ? context->cache2 : m_cache2));
|
||||
// if (bias) cerr << "Using bias." << endl;
|
||||
cache = (phrase.root == I1.get() ? m_cache1 : m_cache2);
|
||||
}
|
||||
|
||||
sptr<pstats> ret;
|
||||
sptr<pstats> const* cached;
|
||||
|
||||
@ -472,9 +477,6 @@ namespace Moses {
|
||||
if (m_num_workers > 1)
|
||||
ag->add_workers(m_num_workers);
|
||||
}
|
||||
// cerr << "NEW FREQUENT PHRASE: "
|
||||
// << phrase.str(V1.get()) << " " << phrase.approxOccurrenceCount()
|
||||
// << " at " << __FILE__ << ":" << __LINE__ << endl;
|
||||
ret = ag->add_job(this, phrase, max_sample, bias);
|
||||
if (cache) cache->set(phrase.getPid(),ret);
|
||||
UTIL_THROW_IF2(ret == NULL, "Couldn't schedule sampling job.");
|
||||
@ -545,87 +547,6 @@ namespace Moses {
|
||||
}
|
||||
};
|
||||
|
||||
#if 0
|
||||
template<typename Token>
|
||||
void
|
||||
Bitext<Token>::
|
||||
lookup(std::vector<Token> const& snt, TSA<Token>& idx,
|
||||
std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > >& dest,
|
||||
std::vector<std::vector<uint64_t> >* pidmap,
|
||||
typename PhrasePair<Token>::Scorer* scorer,
|
||||
sptr<SamplingBias const> const& bias, bool multithread) const
|
||||
{
|
||||
// typedef std::vector<std::vector<sptr<std::vector<PhrasePair<Token> > > > > ret_t;
|
||||
|
||||
dest.clear();
|
||||
dest.resize(snt.size());
|
||||
if (pidmap) { pidmap->clear(); pidmap->resize(snt.size()); }
|
||||
|
||||
// collect statistics in parallel, then build PT entries as
|
||||
// the sampling finishes
|
||||
bool fwd = &idx == I1.get();
|
||||
std::vector<boost::thread*> workers; // background threads doing the lookup
|
||||
pplist_cache_t& C = (fwd ? m_pplist_cache1 : m_pplist_cache2);
|
||||
if (C.capacity() < 100000) C.reserve(100000);
|
||||
for (size_t i = 0; i < snt.size(); ++i)
|
||||
{
|
||||
dest[i].reserve(snt.size()-i);
|
||||
typename TSA<Token>::tree_iterator m(&idx);
|
||||
for (size_t k = i; k < snt.size() && m.extend(snt[k].id()); ++k)
|
||||
{
|
||||
uint64_t key = m.getPid();
|
||||
if (pidmap) (*pidmap)[i].push_back(key);
|
||||
sptr<std::vector<PhrasePair<Token> > > pp = C.get(key);
|
||||
if (pp)
|
||||
dest[i].push_back(pp);
|
||||
else
|
||||
{
|
||||
pp.reset(new std::vector<PhrasePair<Token> >());
|
||||
C.set(key,pp);
|
||||
dest[i].push_back(pp);
|
||||
sptr<pstats> x = prep2(m, this->default_sample_size,bias);
|
||||
pstats2pplist<Token> w(m,*(fwd?T2:T1),x,*pp,scorer);
|
||||
if (multithread)
|
||||
{
|
||||
boost::thread* t = new boost::thread(w);
|
||||
workers.push_back(t);
|
||||
}
|
||||
else w();
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t w = 0; w < workers.size(); ++w)
|
||||
{
|
||||
workers[w]->join();
|
||||
delete workers[w];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename Token>
|
||||
sptr<pstats>
|
||||
Bitext<Token>::
|
||||
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample) const
|
||||
{
|
||||
sptr<pstats> ret = prep2(ttask, phrase, max_sample);
|
||||
|
||||
UTIL_THROW_IF2(!ret, "Got NULL pointer where I expected a valid pointer.");
|
||||
|
||||
// Why were we locking here?
|
||||
if (m_num_workers <= 1)
|
||||
{
|
||||
boost::unique_lock<boost::shared_mutex> guard(m_lock);
|
||||
typename agenda::worker(*this->ag)();
|
||||
}
|
||||
else
|
||||
{
|
||||
boost::unique_lock<boost::mutex> lock(ret->lock);
|
||||
while (ret->in_progress)
|
||||
ret->ready.wait(lock);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
void
|
||||
Bitext<Token>
|
||||
@ -729,27 +650,6 @@ namespace Moses {
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
template<typename Token>
|
||||
sptr<pstats>
|
||||
Bitext<Token>::
|
||||
lookup(siter const& phrase, size_t const max_sample,
|
||||
sptr<SamplingBias const> const& bias) const
|
||||
{
|
||||
sptr<pstats> ret = prep2(phrase, max_sample);
|
||||
boost::unique_lock<boost::shared_mutex> guard(m_lock);
|
||||
if (this->num_workers <= 1)
|
||||
typename agenda::worker(*this->ag)();
|
||||
else
|
||||
{
|
||||
boost::unique_lock<boost::mutex> lock(ret->lock);
|
||||
while (ret->in_progress)
|
||||
ret->ready.wait(lock);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename Token>
|
||||
void
|
||||
expand(typename Bitext<Token>::iter const& m,
|
||||
@ -773,33 +673,9 @@ namespace Moses {
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
template<typename Token>
|
||||
class
|
||||
PStatsCache
|
||||
{
|
||||
typedef boost::unordered_map<uint64_t, sptr<pstats> > my_cache_t;
|
||||
boost::shared_mutex m_lock;
|
||||
my_cache_t m_cache;
|
||||
|
||||
public:
|
||||
sptr<pstats> get(Bitext<Token>::iter const& phrase) const;
|
||||
|
||||
sptr<pstats>
|
||||
add(Bitext<Token>::iter const& phrase) const
|
||||
{
|
||||
uint64_t pid = phrase.getPid();
|
||||
std::pair<my_cache_t::iterator,bool>
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
#endif
|
||||
} // end of namespace bitext
|
||||
} // end of namespace moses
|
||||
|
||||
#include "ug_im_bitext.h"
|
||||
#include "ug_mm_bitext.h"
|
||||
|
||||
|
||||
|
||||
#include "ug_bitext_moses.h"
|
||||
|
@ -137,7 +137,10 @@ int Bitext<Token>::agenda::job
|
||||
|
||||
float p = (*m_bias)[sid];
|
||||
id_type docid = m_bias->GetClass(sid);
|
||||
uint32_t k = docid < stats->indoc.size() ? stats->indoc[docid] : 0;
|
||||
|
||||
// uint32_t k = docid < stats->indoc.size() ? stats->indoc[docid] : 0;
|
||||
std::map<uint32_t,uint32_t>::const_iterator m = stats->indoc.find(docid);
|
||||
uint32_t k = m != stats->indoc.end() ? m->second : 0 ;
|
||||
|
||||
// always consider candidates from dominating documents and
|
||||
// from documents that have not been considered at all yet
|
||||
@ -159,11 +162,17 @@ int Bitext<Token>::agenda::job
|
||||
e = root->getCorpus()->sntEnd(sid);
|
||||
*log << docid << ":" << sid << " " << size_t(k) << "/" << N
|
||||
<< " @" << p << " => " << d << " [";
|
||||
for (size_t i = 0; i < stats->indoc.size(); ++i)
|
||||
for (std::map<uint32_t, uint32_t>::const_iterator m = stats->indoc.begin();
|
||||
m != stats->indoc.end(); ++m)
|
||||
{
|
||||
if (i) *log << " ";
|
||||
*log << stats->indoc[i];
|
||||
if (m != stats->indoc.begin()) *log << " ";
|
||||
*log << m->first << ":" << m->second;
|
||||
}
|
||||
// for (size_t i = 0; i < stats->indoc.size(); ++i)
|
||||
// {
|
||||
// if (i) *log << " ";
|
||||
// *log << stats->indoc[i];
|
||||
// }
|
||||
*log << "] ";
|
||||
for (; x < e; ++x) *log << (*m_bitext->V1)[x->id()] << " ";
|
||||
if (!ret) *log << "SKIP";
|
||||
|
@ -76,7 +76,7 @@ namespace Moses
|
||||
++obwd[bwd_orient];
|
||||
if (docid >= 0)
|
||||
{
|
||||
while (int(indoc.size()) <= docid) indoc.push_back(0);
|
||||
// while (int(indoc.size()) <= docid) indoc.push_back(0);
|
||||
++indoc[docid];
|
||||
}
|
||||
}
|
||||
|
@ -27,7 +27,8 @@ namespace Moses
|
||||
uint32_t obwd[Moses::LRModel::NONE+1]; // backward distortion type counts
|
||||
|
||||
public:
|
||||
vector<uint32_t> indoc; // counts origin of samples (for biased sampling)
|
||||
std::map<uint32_t,uint32_t> indoc;
|
||||
// vector<uint32_t> indoc; // counts origin of samples (for biased sampling)
|
||||
jstats();
|
||||
jstats(jstats const& other);
|
||||
uint32_t rcnt() const; // raw joint counts
|
||||
|
88
moses/TranslationModel/UG/mm/ug_bitext_moses.h
Normal file
88
moses/TranslationModel/UG/mm/ug_bitext_moses.h
Normal file
@ -0,0 +1,88 @@
|
||||
// -*- mode: c++; cc-style: moses-cc-style -*-
|
||||
#pragma once
|
||||
#ifndef NO_MOSES
|
||||
namespace Moses {
|
||||
namespace bitext {
|
||||
|
||||
template<typename Token>
|
||||
sptr<pstats>
|
||||
Bitext<Token>::
|
||||
lookup(ttasksptr const& ttask, iter const& phrase, int max_sample) const
|
||||
{
|
||||
sptr<pstats> ret = prep2(ttask, phrase, max_sample);
|
||||
UTIL_THROW_IF2(!ret, "Got NULL pointer where I expected a valid pointer.");
|
||||
|
||||
// Why were we locking here?
|
||||
if (m_num_workers <= 1)
|
||||
{
|
||||
boost::unique_lock<boost::shared_mutex> guard(m_lock);
|
||||
typename agenda::worker(*this->ag)();
|
||||
}
|
||||
else
|
||||
{
|
||||
boost::unique_lock<boost::mutex> lock(ret->lock);
|
||||
while (ret->in_progress)
|
||||
ret->ready.wait(lock);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
template<typename Token>
|
||||
void
|
||||
Bitext<Token>::
|
||||
prep(ttasksptr const& ttask, iter const& phrase) const
|
||||
{
|
||||
prep2(ttask, phrase, m_default_sample_size);
|
||||
}
|
||||
|
||||
|
||||
// prep2 schedules a phrase for sampling, and returns immediately
|
||||
// the member function lookup retrieves the respective pstats instance
|
||||
// and waits until the sampling is finished before it returns.
|
||||
// This allows sampling in the background
|
||||
template<typename Token>
|
||||
sptr<pstats>
|
||||
Bitext<Token>
|
||||
::prep2
|
||||
( ttasksptr const& ttask, iter const& phrase, int max_sample) const
|
||||
{
|
||||
if (max_sample < 0) max_sample = m_default_sample_size;
|
||||
sptr<SamplingBias> bias;
|
||||
sptr<ContextScope> scope = ttask->GetScope();
|
||||
sptr<ContextForQuery> context = scope->get<ContextForQuery>(this);
|
||||
if (context) bias = context->bias;
|
||||
sptr<pstats::cache_t> cache;
|
||||
// - no caching for rare phrases and special requests (max_sample)
|
||||
// (still need to test what a good caching threshold is ...)
|
||||
// - use the task-specific cache when there is a sampling bias
|
||||
if (max_sample == int(m_default_sample_size)
|
||||
&& phrase.approxOccurrenceCount() > m_pstats_cache_threshold)
|
||||
{
|
||||
cache = (phrase.root == I1.get()
|
||||
? (bias ? context->cache1 : m_cache1)
|
||||
: (bias ? context->cache2 : m_cache2));
|
||||
}
|
||||
sptr<pstats> ret;
|
||||
sptr<pstats> const* cached;
|
||||
|
||||
if (cache && (cached = cache->get(phrase.getPid(), ret)) && *cached)
|
||||
return *cached;
|
||||
boost::unique_lock<boost::shared_mutex> guard(m_lock);
|
||||
if (!ag)
|
||||
{
|
||||
ag.reset(new agenda(*this));
|
||||
if (m_num_workers > 1)
|
||||
ag->add_workers(m_num_workers);
|
||||
}
|
||||
ret = ag->add_job(this, phrase, max_sample, bias);
|
||||
if (cache) cache->set(phrase.getPid(),ret);
|
||||
UTIL_THROW_IF2(ret == NULL, "Couldn't schedule sampling job.");
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
@ -58,7 +58,7 @@ namespace Moses
|
||||
++obwd[po_bwd];
|
||||
if (docid >= 0)
|
||||
{
|
||||
while (int(indoc.size()) <= docid) indoc.push_back(0);
|
||||
// while (int(indoc.size()) <= docid) indoc.push_back(0);
|
||||
++indoc[docid];
|
||||
}
|
||||
}
|
||||
|
@ -33,8 +33,8 @@ namespace Moses
|
||||
uint32_t ofwd[Moses::LRModel::NONE+1]; // distribution of fwd phrase orientations
|
||||
uint32_t obwd[Moses::LRModel::NONE+1]; // distribution of bwd phrase orientations
|
||||
|
||||
std::vector<uint32_t> indoc; // distribution over where samples came from
|
||||
|
||||
// std::vector<uint32_t> indoc; // distribution over where samples came from
|
||||
std::map<uint32_t,uint32_t> indoc;
|
||||
typedef std::map<uint64_t, jstats> trg_map_t;
|
||||
trg_map_t trg;
|
||||
pstats();
|
||||
|
219
moses/TranslationModel/UG/mm/ug_http_client.cc
Normal file
219
moses/TranslationModel/UG/mm/ug_http_client.cc
Normal file
@ -0,0 +1,219 @@
|
||||
#include "ug_http_client.h"
|
||||
#include "moses/Util.h"
|
||||
namespace Moses
|
||||
{
|
||||
using boost::asio::ip::tcp;
|
||||
|
||||
std::string http_client::content() const { return m_content.str(); }
|
||||
|
||||
http_client::
|
||||
http_client(boost::asio::io_service& io_service,
|
||||
std::string const& server,
|
||||
std::string const& port,
|
||||
std::string const& path)
|
||||
: resolver_(io_service), socket_(io_service)
|
||||
{
|
||||
init(server, port, path);
|
||||
}
|
||||
|
||||
http_client::
|
||||
http_client(boost::asio::io_service& io_service, std::string url)
|
||||
: resolver_(io_service), socket_(io_service)
|
||||
{
|
||||
std::string server;
|
||||
std::string path = "/";
|
||||
std::string port = "http";
|
||||
size_t p = url.find("://"), q;
|
||||
if (p < url.size())
|
||||
{
|
||||
port = url.substr(0,p);
|
||||
url.erase(0, p+3);
|
||||
}
|
||||
p = std::min(url.find_first_of(":/"), url.size());
|
||||
q = std::min(url.find("/"), url.size());
|
||||
if (p < url.size() && url[p] == ':')
|
||||
port = url.substr(p+1,q-p-1);
|
||||
server = url.substr(0,p);
|
||||
if (q < url.size())
|
||||
path = url.substr(q);
|
||||
#if 0
|
||||
std::cerr << HERE << std::endl;
|
||||
std::cerr << "SERVER " << server << std::endl;
|
||||
std::cerr << "PORT |" << port << "|" << std::endl;
|
||||
std::cerr << "PATH " << path << std::endl;
|
||||
#endif
|
||||
init(server, port, path);
|
||||
}
|
||||
|
||||
void
|
||||
http_client::
|
||||
init(std::string const& server, std::string const& port, std::string const& path)
|
||||
{
|
||||
// Form the request. We specify the "Connection: close" header so
|
||||
// that the server will close the socket after transmitting the
|
||||
// response. This will allow us to treat all data up until the EOF
|
||||
// as the content.
|
||||
|
||||
std::ostream request_stream(&request_);
|
||||
request_stream << "GET " << path << " HTTP/1.0\r\n";
|
||||
request_stream << "Host: " << server << "\r\n";
|
||||
request_stream << "Accept: */*\r\n";
|
||||
request_stream << "Connection: close\r\n\r\n";
|
||||
|
||||
// Start an asynchronous resolve to translate the server and service names
|
||||
// into a list of endpoints.
|
||||
tcp::resolver::query query(server, port.c_str());
|
||||
resolver_.async_resolve(query,
|
||||
boost::bind(&http_client::handle_resolve, this,
|
||||
boost::asio::placeholders::error,
|
||||
boost::asio::placeholders::iterator));
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
http_client::
|
||||
handle_resolve(const boost::system::error_code& err,
|
||||
tcp::resolver::iterator endpoint_iterator)
|
||||
{
|
||||
if (!err)
|
||||
{
|
||||
// Attempt a connection to the first endpoint in the list. Each endpoint
|
||||
// will be tried until we successfully establish a connection.
|
||||
tcp::endpoint endpoint = *endpoint_iterator;
|
||||
socket_.async_connect(endpoint,
|
||||
boost::bind(&http_client::handle_connect, this,
|
||||
boost::asio::placeholders::error, ++endpoint_iterator));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_error << "Error: " << err.message() << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
http_client::
|
||||
handle_connect(const boost::system::error_code& err,
|
||||
tcp::resolver::iterator endpoint_iterator)
|
||||
{
|
||||
if (!err)
|
||||
{
|
||||
// The connection was successful. Send the request.
|
||||
boost::asio::async_write(socket_, request_,
|
||||
boost::bind(&http_client::handle_write_request, this,
|
||||
boost::asio::placeholders::error));
|
||||
}
|
||||
else if (endpoint_iterator != tcp::resolver::iterator())
|
||||
{
|
||||
// The connection failed. Try the next endpoint in the list.
|
||||
socket_.close();
|
||||
tcp::endpoint endpoint = *endpoint_iterator;
|
||||
socket_.async_connect(endpoint,
|
||||
boost::bind(&http_client::handle_connect, this,
|
||||
boost::asio::placeholders::error, ++endpoint_iterator));
|
||||
}
|
||||
else m_error << "Error: " << err.message() << "\n";
|
||||
}
|
||||
|
||||
void
|
||||
http_client::
|
||||
handle_write_request(const boost::system::error_code& err)
|
||||
{
|
||||
using namespace boost::asio;
|
||||
if (err) { m_error << "Error: " << err.message() << "\n"; return; }
|
||||
|
||||
// Read the response status line. The response_ streambuf will
|
||||
// automatically grow to accommodate the entire line. The growth may be
|
||||
// limited by passing a maximum size to the streambuf constructor.
|
||||
async_read_until(socket_, response_, "\r\n",
|
||||
boost::bind(&http_client::handle_read_status_line,
|
||||
this, placeholders::error));
|
||||
}
|
||||
|
||||
void
|
||||
http_client::
|
||||
handle_read_status_line(const boost::system::error_code& err)
|
||||
{
|
||||
if (err) { m_error << "Error: " << err << "\n"; return; }
|
||||
|
||||
using namespace boost::asio;
|
||||
// Check that response is OK.
|
||||
std::istream response_stream(&response_);
|
||||
response_stream >> m_http_version >> m_status_code;
|
||||
std::getline(response_stream, m_status_message);
|
||||
if (!response_stream || m_http_version.substr(0, 5) != "HTTP/")
|
||||
m_error << "Invalid response\n";
|
||||
else if (m_status_code != 200)
|
||||
m_error << "Response returned with status code " << m_status_code << "\n";
|
||||
else // Read the response headers, which are terminated by a blank line.
|
||||
async_read_until(socket_, response_, "\r\n\r\n",
|
||||
boost::bind(&http_client::handle_read_headers, this,
|
||||
placeholders::error));
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
http_client::
|
||||
handle_read_headers(const boost::system::error_code& err)
|
||||
{
|
||||
if (err) { m_error << "Error: " << err << "\n"; return; }
|
||||
|
||||
// Process the response headers.
|
||||
std::istream response_stream(&response_);
|
||||
std::string line;
|
||||
while (std::getline(response_stream, line) && line != "\r")
|
||||
m_header.push_back(line);
|
||||
|
||||
// Write whatever content we already have to output.
|
||||
if (response_.size() > 0)
|
||||
m_content << &response_;
|
||||
|
||||
using namespace boost::asio;
|
||||
// Start reading remaining data until EOF.
|
||||
async_read(socket_, response_, transfer_at_least(1),
|
||||
boost::bind(&http_client::handle_read_content, this,
|
||||
placeholders::error));
|
||||
}
|
||||
|
||||
void
|
||||
http_client::
|
||||
handle_read_content(const boost::system::error_code& err)
|
||||
{
|
||||
using namespace boost::asio;
|
||||
if(!err)
|
||||
{
|
||||
// Write all of the data that has been read so far.
|
||||
// Then continue reading remaining data until EOF.
|
||||
m_content << &response_;
|
||||
async_read(socket_, response_, transfer_at_least(1),
|
||||
boost::bind(&http_client::handle_read_content, this,
|
||||
placeholders::error));
|
||||
}
|
||||
else if (err != boost::asio::error::eof)
|
||||
{
|
||||
m_error << "Error: " << err << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
uri_encode(std::string const& in)
|
||||
{
|
||||
char buf[3 * in.size() + 1];
|
||||
size_t i = 0;
|
||||
for (unsigned char const* c = (unsigned char const*)in.c_str(); *c; ++c)
|
||||
{
|
||||
// cout << *c << " " << int(*c) << endl;
|
||||
if (*c == ' ') buf[i++] = '+';
|
||||
else if (*c == '.' || *c == '~' || *c == '_' || *c == '-') buf[i++] = *c;
|
||||
else if (*c < '0') i += sprintf(buf+i, "%%%x", int(*c));
|
||||
else if (*c <= '9') buf[i++] = *c;
|
||||
else if (*c < 'A') i += sprintf(buf+i, "%%%x", int(*c));
|
||||
else if (*c <= 'Z') buf[i++] = *c;
|
||||
else if (*c < 'a') i += sprintf(buf+i, "%%%x", int(*c));
|
||||
else if (*c <= 'z') buf[i++] = *c;
|
||||
else i += sprintf(buf+i, "%%%x", int(*c));
|
||||
}
|
||||
buf[i] = 0;
|
||||
return std::string(buf);
|
||||
}
|
||||
|
||||
}
|
63
moses/TranslationModel/UG/mm/ug_http_client.h
Normal file
63
moses/TranslationModel/UG/mm/ug_http_client.h
Normal file
@ -0,0 +1,63 @@
|
||||
// -*- c++ -*-
|
||||
// Adapted by Ulrich Germann from:
|
||||
// async_client.cpp
|
||||
// ~~~~~~~~~~~~~~~~
|
||||
//
|
||||
// Copyright (c) 2003-2011 Christopher M. Kohlhoff (chris at kohlhoff dot com)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
//
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
#include <istream>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <boost/asio.hpp>
|
||||
#include <boost/bind.hpp>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
using boost::asio::ip::tcp;
|
||||
|
||||
std::string uri_encode(std::string const& in);
|
||||
|
||||
class http_client
|
||||
{
|
||||
std::ostringstream m_content;
|
||||
std::vector<std::string> m_header;
|
||||
std::string m_http_version;
|
||||
unsigned int m_status_code;
|
||||
std::string m_status_message;
|
||||
std::ostringstream m_error;
|
||||
|
||||
public:
|
||||
http_client(boost::asio::io_service& io_service, std::string url);
|
||||
http_client(boost::asio::io_service& io_service,
|
||||
std::string const& server,
|
||||
std::string const& port,
|
||||
std::string const& path);
|
||||
private:
|
||||
|
||||
void init(std::string const& server,
|
||||
std::string const& port,
|
||||
std::string const& path);
|
||||
|
||||
void handle_resolve(const boost::system::error_code& err,
|
||||
tcp::resolver::iterator endpoint_iterator);
|
||||
void handle_connect(const boost::system::error_code& err,
|
||||
tcp::resolver::iterator endpoint_iterator);
|
||||
void handle_write_request(const boost::system::error_code& err);
|
||||
void handle_read_status_line(const boost::system::error_code& err);
|
||||
void handle_read_headers(const boost::system::error_code& err);
|
||||
void handle_read_content(const boost::system::error_code& err);
|
||||
tcp::resolver resolver_;
|
||||
tcp::socket socket_;
|
||||
boost::asio::streambuf request_;
|
||||
boost::asio::streambuf response_;
|
||||
public:
|
||||
std::string content() const;
|
||||
};
|
||||
|
||||
}
|
@ -57,7 +57,7 @@ namespace ugdiss
|
||||
public:
|
||||
|
||||
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d);
|
||||
imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL);
|
||||
imTtrack(istream& in, TokenIndex& V, ostream* log = NULL);
|
||||
imTtrack(size_t reserve = 0);
|
||||
// imTtrack(istream& in, Vocab& V);
|
||||
|
||||
@ -131,24 +131,30 @@ namespace ugdiss
|
||||
|
||||
template<typename Token>
|
||||
imTtrack<Token>::
|
||||
imTtrack(istream& in, TokenIndex const& V, ostream* log)
|
||||
imTtrack(istream& in, TokenIndex& V, ostream* log)
|
||||
: numToks(0)
|
||||
{
|
||||
myData.reset(new vector<vector<Token> >());
|
||||
string line,w;
|
||||
size_t linectr=0;
|
||||
boost::unordered_map<string,id_type> H;
|
||||
for (id_type i = 0; i < V.knownVocabSize(); ++i)
|
||||
H[V[i]] = i;
|
||||
// for (id_type i = 0; i < V.knownVocabSize(); ++i)
|
||||
// H[V[i]] = i;
|
||||
while (getline(in,line))
|
||||
{
|
||||
// cout << line << endl;
|
||||
myData->push_back(vector<Token>());
|
||||
if (log && ++linectr%1000000==0)
|
||||
*log << linectr/1000000 << "M lines of input processed" << endl;
|
||||
istringstream buf(line);
|
||||
// cout << line << endl;
|
||||
while (buf>>w)
|
||||
myData->back().push_back(Token(H[w]));
|
||||
myData->back().resize(myData.back().size());
|
||||
{
|
||||
myData->back().push_back(Token(V[w]));
|
||||
// cout << w << " " << myData->back().back().id() << " "
|
||||
// << V[w] << endl;
|
||||
}
|
||||
// myData->back().resize(myData->back().size(), Token(0));
|
||||
numToks += myData->back().size();
|
||||
}
|
||||
}
|
||||
|
@ -1,9 +1,35 @@
|
||||
// -*- c++ -*-
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
|
||||
|
||||
namespace Moses { namespace bitext {
|
||||
#ifndef NO_MOSES
|
||||
#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
|
||||
#endif
|
||||
|
||||
namespace Moses {
|
||||
#ifdef NO_MOSES
|
||||
namespace LRModel{
|
||||
|
||||
enum ModelType { Monotonic, MSD, MSLR, LeftRight, None };
|
||||
enum Direction { Forward, Backward, Bidirectional };
|
||||
|
||||
enum ReorderingType {
|
||||
M = 0, // monotonic
|
||||
NM = 1, // non-monotonic
|
||||
S = 1, // swap
|
||||
D = 2, // discontinuous
|
||||
DL = 2, // discontinuous, left
|
||||
DR = 3, // discontinuous, right
|
||||
R = 0, // right
|
||||
L = 1, // left
|
||||
MAX = 3, // largest possible
|
||||
NONE = 4 // largest possible
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace bitext {
|
||||
|
||||
typedef Moses::LRModel::ReorderingType PhraseOrientation;
|
||||
|
||||
|
@ -45,8 +45,9 @@ namespace Moses
|
||||
this->m_docname2docid[docname] = docid;
|
||||
this->m_docname.push_back(docname);
|
||||
line >> b;
|
||||
VERBOSE(1, "DOCUMENT MAP " << docname
|
||||
<< " " << a << "-" << b+a << endl);
|
||||
#ifndef NO_MOSES
|
||||
VERBOSE(1, "DOCUMENT MAP " << docname << " " << a << "-" << b+a << endl);
|
||||
#endif
|
||||
for (b += a; a < b; ++a)
|
||||
(*this->m_sid2docid)[a] = docid;
|
||||
}
|
||||
|
@ -3,7 +3,9 @@
|
||||
#include <vector>
|
||||
#include "ug_typedefs.h"
|
||||
#include "ug_bitext_pstats.h"
|
||||
#ifndef NO_MOSES
|
||||
#include "moses/FF/LexicalReordering/LexicalReorderingState.h"
|
||||
#endif
|
||||
#include "boost/format.hpp"
|
||||
#include "tpt_tokenindex.h"
|
||||
namespace Moses
|
||||
@ -28,7 +30,8 @@ namespace Moses
|
||||
std::vector<uchar> aln;
|
||||
float score;
|
||||
bool inverse;
|
||||
std::vector<uint32_t> indoc;
|
||||
// std::vector<uint32_t> indoc;
|
||||
std::map<uint32_t,uint32_t> indoc;
|
||||
PhrasePair() { };
|
||||
PhrasePair(PhrasePair const& o);
|
||||
|
||||
@ -52,9 +55,11 @@ namespace Moses
|
||||
fill_lr_vec(LRModel::Direction const& dir,
|
||||
LRModel::ModelType const& mdl,
|
||||
vector<float>& v) const;
|
||||
#ifndef NO_MOSES
|
||||
void
|
||||
print(ostream& out, TokenIndex const& V1, TokenIndex const& V2,
|
||||
LRModel const& LR) const;
|
||||
#endif
|
||||
|
||||
class SortByTargetIdSeq
|
||||
{
|
||||
@ -292,6 +297,7 @@ namespace Moses
|
||||
}
|
||||
|
||||
|
||||
#ifndef NO_MOSES
|
||||
template<typename Token>
|
||||
void
|
||||
PhrasePair<Token>
|
||||
@ -301,10 +307,12 @@ namespace Moses
|
||||
out << toString (V1, this->start1, this->len1) << " ::: "
|
||||
<< toString (V2, this->start2, this->len2) << " "
|
||||
<< this->joint << " [";
|
||||
for (size_t i = 0; i < this->indoc.size(); ++i)
|
||||
// for (size_t i = 0; i < this->indoc.size(); ++i)
|
||||
for (std::map<uint32_t,uint32_t>::const_iterator m = indoc.begin();
|
||||
m != indoc.end(); ++m)
|
||||
{
|
||||
if (i) out << " ";
|
||||
out << this->indoc[i];
|
||||
if (m != indoc.begin()) out << " ";
|
||||
out << m->first << ":" << m->second;
|
||||
}
|
||||
out << "] [";
|
||||
vector<float> lrscores;
|
||||
@ -331,5 +339,6 @@ namespace Moses
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
} // namespace bitext
|
||||
} // namespace Moses
|
||||
|
@ -2,12 +2,16 @@
|
||||
#include <iostream>
|
||||
#include <boost/foreach.hpp>
|
||||
#include "moses/Timer.h"
|
||||
// #include <curl/curl.h>
|
||||
// #ifdef HAVE_CURLPP
|
||||
// #include <curlpp/Options.hpp>
|
||||
// #include <curlpp/cURLpp.hpp>
|
||||
// #include <curlpp/Easy.hpp>
|
||||
// #endif
|
||||
|
||||
#ifdef HAVE_CURLPP
|
||||
#include <curlpp/Options.hpp>
|
||||
#include <curlpp/cURLpp.hpp>
|
||||
#include <curlpp/Easy.hpp>
|
||||
#endif
|
||||
// #ifdef WITH_MMT_BIAS_CLIENT
|
||||
#include "ug_http_client.h"
|
||||
// #endif
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
@ -15,23 +19,77 @@ namespace Moses
|
||||
{
|
||||
using ugdiss::id_type;
|
||||
|
||||
#ifdef HAVE_CURLPP
|
||||
std::string
|
||||
query_bias_server(std::string const& url, std::string const& text)
|
||||
size_t ca_write_callback(void *ptr, size_t size, size_t nmemb,
|
||||
std::string* response)
|
||||
{
|
||||
// communicate with the bias server; resuts will be in ...
|
||||
std::ostringstream os;
|
||||
curlpp::Easy myRequest;
|
||||
std::string query = url+curlpp::escape(text);
|
||||
myRequest.setOpt(new curlpp::options::Url(query));
|
||||
curlpp::options::WriteStream ws(&os);
|
||||
myRequest.setOpt(ws); // Give it to your request
|
||||
myRequest.perform(); // This will output to os
|
||||
return os.str();
|
||||
char const* c = reinterpret_cast<char const*>(ptr);
|
||||
*response += std::string(c, size * nmemb);
|
||||
return size * nmemb;
|
||||
}
|
||||
#endif
|
||||
|
||||
DocumentBias
|
||||
std::string
|
||||
query_bias_server(std::string const& server, std::string const& context)
|
||||
{
|
||||
#if 0
|
||||
std::string query = server + uri_encode(context);
|
||||
std::string response;
|
||||
|
||||
CURL* curl = curl_easy_init();
|
||||
UTIL_THROW_IF2(!curl, "Could not init curl.");
|
||||
curl_easy_setopt(curl, CURLOPT_URL, query.c_str());
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, ca_write_callback);
|
||||
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response);
|
||||
CURLcode res = curl_easy_perform(curl);
|
||||
curl_easy_cleanup(curl);
|
||||
return response;
|
||||
#else
|
||||
std::string query = server+uri_encode(context);
|
||||
boost::asio::io_service io_service;
|
||||
Moses::http_client c(io_service, query);
|
||||
io_service.run();
|
||||
|
||||
std::string response = c.content();
|
||||
std::cerr << "SERVER RESPONSE: " << response << std::endl;
|
||||
|
||||
return c.content();
|
||||
#endif
|
||||
}
|
||||
|
||||
// // #ifdef WITH_MMT_BIAS_CLIENT
|
||||
// std::string
|
||||
// query_bias_server(std::string const& url, std::string const& text)
|
||||
// {
|
||||
// #if 1
|
||||
// std::string query = url+uri_encode(text);
|
||||
// boost::asio::io_service io_service;
|
||||
// Moses::http_client c(io_service, query);
|
||||
// io_service.run();
|
||||
|
||||
// std::string response = c.content();
|
||||
// std::cerr << "SERVER RESPONSE: " << response << std::endl;
|
||||
|
||||
// return c.content();
|
||||
// #else
|
||||
// return "";
|
||||
// #endif
|
||||
// }
|
||||
// // #endif
|
||||
|
||||
|
||||
// std::string
|
||||
// query_bias_server(std::string const& url, int const port,
|
||||
// std::string const& context,
|
||||
// std::string const& src_lang)
|
||||
// {
|
||||
// char* response
|
||||
// = ca_get_context(url.c_str(), port, context.c_str(), src_lang.c_str());
|
||||
// UTIL_THROW_IF2(!response, "No response from server");
|
||||
// std::string json = response;
|
||||
// free(response);
|
||||
// return json;
|
||||
// }
|
||||
|
||||
DocumentBias
|
||||
::DocumentBias
|
||||
( std::vector<id_type> const& sid2doc,
|
||||
std::map<std::string,id_type> const& docname2docid,
|
||||
@ -40,13 +98,14 @@ namespace Moses
|
||||
: m_sid2docid(sid2doc)
|
||||
, m_bias(docname2docid.size(), 0)
|
||||
{
|
||||
#ifdef HAVE_CURLPP
|
||||
// #ifdef HAVE_CURLPP
|
||||
Timer timer;
|
||||
if (log) timer.start(NULL);
|
||||
std::string json = query_bias_server(server_url, text);
|
||||
std::cerr << "SERVER RESPONSE " << json << std::endl;
|
||||
init_from_json(json, docname2docid, log);
|
||||
if (log) *log << "Bias query took " << timer << " seconds." << std::endl;
|
||||
#endif
|
||||
// #endif
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -37,6 +37,7 @@ namespace Moses
|
||||
{
|
||||
std::vector<id_type> const& m_sid2docid;
|
||||
std::vector<float> m_bias;
|
||||
// std::map<int,float> m_bias;
|
||||
|
||||
public:
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
#ifdef HAVE_CURLPP
|
||||
#include <curlpp/Options.hpp>
|
||||
#include <curlpp/cURLpp.hpp>
|
||||
#include <curlpp/Easy.hpp>
|
||||
#endif
|
||||
// #ifdef HAVE_CURLPP
|
||||
// #include <curlpp/Options.hpp>
|
||||
// #include <curlpp/cURLpp.hpp>
|
||||
// #include <curlpp/Easy.hpp>
|
||||
// #endif
|
||||
|
||||
#include "mmsapt.h"
|
||||
#include <boost/foreach.hpp>
|
||||
|
@ -40,7 +40,7 @@ BOOST_AUTO_TEST_CASE(initialise)
|
||||
bitvec[2] = true;
|
||||
bitvec[3] = true;
|
||||
bitvec[7] = true;
|
||||
|
||||
|
||||
WordsBitmap wbm2(7,bitvec);
|
||||
BOOST_CHECK_EQUAL(wbm2.GetSize(),7);
|
||||
for (size_t i = 0; i < 7; ++i) {
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user