Merge branch 'nadir_osm' of github.com:moses-smt/mosesdecoder into nadir_osm

This commit is contained in:
Nadir Durrani 2013-07-04 12:05:00 +01:00
commit 389b7762e8
15 changed files with 704 additions and 16 deletions

@ -1 +1 @@
Subproject commit 6d5d13e1e06a871fbf7adf86dffda5113e315c1f
Subproject commit 1315185203a90b6f80acf2e47b4ea85b420b0d4c

View File

@ -0,0 +1,130 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1035891586" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.242178856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-ghkm/Debug}" id="cdt.managedbuild.builder.gnu.cross.430400318" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.251687262" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.962699619" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.230503798" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.433137197" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.971749711" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.984190691" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1374841264" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../phrase-extract&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1026620601" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1419857560" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.paths.668926503" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/lib64&quot;"/>
</option>
<option id="gnu.cpp.link.option.libs.2091468346" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_program_options-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1684298294" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.320160974" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.2021657841" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1689419664" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.2000920404" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1106451881" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-ghkm/Release}" id="cdt.managedbuild.builder.gnu.cross.727887705" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.819016498" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1057468997" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1130475273" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.164617278" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.406333630" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.1059243022" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1068655225" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1213865062" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.764325642" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1299258961" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.896866692" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.276294580" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="extract-ghkm.cdt.managedbuild.target.gnu.cross.exe.1830080171" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641;cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.c.compiler.251687262;cdt.managedbuild.tool.gnu.c.compiler.input.433137197">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.c.compiler.819016498;cdt.managedbuild.tool.gnu.c.compiler.input.164617278">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268;cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope"/>
</cproject>

View File

@ -0,0 +1,209 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>extract-ghkm</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>Alignment.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.cpp</locationURI>
</link>
<link>
<name>Alignment.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.h</locationURI>
</link>
<link>
<name>AlignmentGraph.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.cpp</locationURI>
</link>
<link>
<name>AlignmentGraph.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.h</locationURI>
</link>
<link>
<name>ComposedRule.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.cpp</locationURI>
</link>
<link>
<name>ComposedRule.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.h</locationURI>
</link>
<link>
<name>Exception.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Exception.h</locationURI>
</link>
<link>
<name>ExtractGHKM.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.cpp</locationURI>
</link>
<link>
<name>ExtractGHKM.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.h</locationURI>
</link>
<link>
<name>InputFileStream.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp</locationURI>
</link>
<link>
<name>InputFileStream.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h</locationURI>
</link>
<link>
<name>Jamfile</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Jamfile</locationURI>
</link>
<link>
<name>Main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Main.cpp</locationURI>
</link>
<link>
<name>Node.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.cpp</locationURI>
</link>
<link>
<name>Node.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.h</locationURI>
</link>
<link>
<name>Options.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Options.h</locationURI>
</link>
<link>
<name>OutputFileStream.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp</locationURI>
</link>
<link>
<name>OutputFileStream.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
</link>
<link>
<name>ParseTree.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.cpp</locationURI>
</link>
<link>
<name>ParseTree.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.h</locationURI>
</link>
<link>
<name>ScfgRule.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.cpp</locationURI>
</link>
<link>
<name>ScfgRule.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.h</locationURI>
</link>
<link>
<name>ScfgRuleWriter.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp</locationURI>
</link>
<link>
<name>ScfgRuleWriter.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.h</locationURI>
</link>
<link>
<name>Span.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.cpp</locationURI>
</link>
<link>
<name>Span.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.h</locationURI>
</link>
<link>
<name>Subgraph.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.cpp</locationURI>
</link>
<link>
<name>Subgraph.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.h</locationURI>
</link>
<link>
<name>SyntaxTree.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.cpp</locationURI>
</link>
<link>
<name>SyntaxTree.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SyntaxTree.h</locationURI>
</link>
<link>
<name>XmlTree.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.cpp</locationURI>
</link>
<link>
<name>XmlTree.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI>
</link>
<link>
<name>XmlTreeParser.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.cpp</locationURI>
</link>
<link>
<name>XmlTreeParser.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.h</locationURI>
</link>
<link>
<name>tables-core.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp</locationURI>
</link>
<link>
<name>tables-core.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h</locationURI>
</link>
</linkedResources>
</projectDescription>

View File

@ -81,6 +81,16 @@
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>BleuDocScorer.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mert/BleuDocScorer.cpp</locationURI>
</link>
<link>
<name>BleuDocScorer.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mert/BleuDocScorer.h</locationURI>
</link>
<link>
<name>mert</name>
<type>2</type>

View File

@ -1616,16 +1616,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/OpSequenceModel.h</locationURI>
</link>
<link>
<name>FF/OSM-Feature/SRILM-API.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/SRILM-API.cpp</locationURI>
</link>
<link>
<name>FF/OSM-Feature/SRILM-API.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/SRILM-API.h</locationURI>
</link>
<link>
<name>FF/OSM-Feature/osmHyp.cpp</name>
<type>1</type>

View File

@ -67,7 +67,7 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
FullScoreReturn FullScoreForgotState(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;
/* Get the state for a context. Don't use this if you can avoid it. Use
* BeginSentenceState or EmptyContextState and extend from those. If
* BeginSentenceState or NullContextState and extend from those. If
* you're only going to use this state to call FullScore once, use
* FullScoreForgotState.
* To use this function, make an array of WordIndex containing the context

207
mert/BleuDocScorer.cpp Normal file
View File

@ -0,0 +1,207 @@
#include "BleuDocScorer.h"
#include <sys/types.h>
#include <algorithm>
#include <cassert>
#include <cmath>
#include <climits>
#include <fstream>
#include <iostream>
#include <stdexcept>
#include "util/check.hh"
#include "Ngram.h"
#include "Reference.h"
#include "Util.h"
#include "Vocabulary.h"
using namespace std;
namespace
{
// configure regularisation
const char KEY_REFLEN[] = "reflen";
const char REFLEN_AVERAGE[] = "average";
const char REFLEN_SHORTEST[] = "shortest";
const char REFLEN_CLOSEST[] = "closest";
} // namespace
namespace MosesTuning
{
BleuDocScorer::BleuDocScorer(const string& config)
: BleuScorer("BLEUDOC", config),
m_ref_length_type(CLOSEST)
{
const string reflen = getConfig(KEY_REFLEN, REFLEN_CLOSEST);
if (reflen == REFLEN_AVERAGE) {
m_ref_length_type = AVERAGE;
} else if (reflen == REFLEN_SHORTEST) {
m_ref_length_type = SHORTEST;
} else if (reflen == REFLEN_CLOSEST) {
m_ref_length_type = CLOSEST;
} else {
throw runtime_error("Unknown reference length strategy: " + reflen);
}
}
BleuDocScorer::~BleuDocScorer() {}
bool BleuDocScorer::OpenReferenceStream(istream* is, size_t file_id)
{
if (is == NULL) return false;
string line;
size_t doc_id = -1;
size_t sid = 0;
while (getline(*is, line)) {
if (line.find("<doc docid") != std::string::npos) { // new document
doc_id++;
m_references.push_back(new ScopedVector<Reference>());
sid = 0;
}
else if (line.find("<seg") != std::string::npos) { //new sentence
int start = line.find_first_of('>') + 1;
std::string trans = line.substr(start, line.find_last_of('<')-start);
trans = preprocessSentence(trans);
if (file_id == 0) {
Reference* ref = new Reference;
m_references[doc_id]->push_back(ref); // Take ownership of the Reference object.
}
if (m_references[doc_id]->size() <= sid) {
return false;
}
NgramCounts counts;
size_t length = CountNgrams(trans, counts, kBleuNgramOrder);
//for any counts larger than those already there, merge them in
for (NgramCounts::const_iterator ci = counts.begin(); ci != counts.end(); ++ci) {
const NgramCounts::Key& ngram = ci->first;
const NgramCounts::Value newcount = ci->second;
NgramCounts::Value oldcount = 0;
m_references[doc_id]->get().at(sid)->get_counts()->Lookup(ngram, &oldcount);
if (newcount > oldcount) {
m_references[doc_id]->get().at(sid)->get_counts()->operator[](ngram) = newcount;
}
}
//add in the length
m_references[doc_id]->get().at(sid)->push_back(length);
if (sid > 0 && sid % 100 == 0) {
TRACE_ERR(".");
}
++sid;
}
}
return true;
}
void BleuDocScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
if (sid >= m_references.size()) {
stringstream msg;
msg << "Sentence id (" << sid << ") not found in reference set";
throw runtime_error(msg.str());
}
std::vector<std::string> sentences = splitDoc(text);
vector<ScoreStatsType> totStats(kBleuNgramOrder * 2 + 1);
for (uint i=0; i<sentences.size(); ++i) {
NgramCounts testcounts;
// stats for this line
vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
string sentence = preprocessSentence(sentences[i]);
const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder);
//precision on each ngram type
for (NgramCounts::const_iterator testcounts_it = testcounts.begin();
testcounts_it != testcounts.end(); ++testcounts_it) {
const NgramCounts::Value guess = testcounts_it->second;
const size_t len = testcounts_it->first.size();
NgramCounts::Value correct = 0;
NgramCounts::Value v = 0;
if (m_references[sid]->get().at(i)->get_counts()->Lookup(testcounts_it->first, &v)) {
correct = min(v, guess);
}
stats[len * 2 - 2] += correct;
stats[len * 2 - 1] += guess;
}
const int reference_len = CalcReferenceLength(sid, i, length);
stats.push_back(reference_len);
//ADD stats to totStats
std::transform(stats.begin(), stats.end(), totStats.begin(),
totStats.begin(), std::plus<int>());
}
entry.set(totStats);
}
std::vector<std::string> BleuDocScorer::splitDoc(const std::string& text)
{
std::vector<std::string> res;
uint index = 0;
std::string::size_type end;
while ((end = text.find(" \\n ", index)) != std::string::npos) {
res.push_back(text.substr(index,end-index));
index = end + 4;
}
return res;
}
statscore_t BleuDocScorer::calculateScore(const vector<int>& comps) const
{
CHECK(comps.size() == kBleuNgramOrder * 2 + 1);
float logbleu = 0.0;
for (int i = 0; i < kBleuNgramOrder; ++i) {
if (comps[2*i] == 0) {
return 0.0;
}
logbleu += log(comps[2*i]) - log(comps[2*i+1]);
}
logbleu /= kBleuNgramOrder;
// reflength divided by test length
const float brevity = 1.0 - static_cast<float>(comps[kBleuNgramOrder * 2]) / comps[1];
if (brevity < 0.0) {
logbleu += brevity;
}
return exp(logbleu);
}
int BleuDocScorer::CalcReferenceLength(size_t doc_id, size_t sentence_id, size_t length)
{
switch (m_ref_length_type) {
case AVERAGE:
return m_references[doc_id]->get().at(sentence_id)->CalcAverage();
break;
case CLOSEST:
return m_references[doc_id]->get().at(sentence_id)->CalcClosest(length);
break;
case SHORTEST:
return m_references[doc_id]->get().at(sentence_id)->CalcShortest();
break;
default:
cerr << "unknown reference types." << endl;
exit(1);
}
}
}

67
mert/BleuDocScorer.h Normal file
View File

@ -0,0 +1,67 @@
#ifndef MERT_BLEU_DOC_SCORER_H_
#define MERT_BLEU_DOC_SCORER_H_
#include <ostream>
#include <string>
#include <vector>
#include "Types.h"
#include "ScoreData.h"
#include "StatisticsBasedScorer.h"
#include "ScopedVector.h"
#include "BleuScorer.h"
namespace MosesTuning
{
/**
* Bleu document scoring
*
* Needs xml reference files, and nbest lists where sentences are separated by '\n'
*/
class BleuDocScorer : public BleuScorer
{
public:
explicit BleuDocScorer(const std::string& config = "");
~BleuDocScorer();
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
virtual statscore_t calculateScore(const std::vector<int>& comps) const;
int CalcReferenceLength(std::size_t doc_id, std::size_t sentence_id, std::size_t length);
// NOTE: this function is used for unit testing.
virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);
private:
ReferenceLengthType m_ref_length_type;
// reference translations.
ScopedVector<ScopedVector<Reference> > m_references;
// no copying allowed
BleuDocScorer(const BleuDocScorer&);
BleuDocScorer& operator=(const BleuDocScorer&);
std::vector<std::string> splitDoc(const std::string& text);
};
/* /\** Computes sentence-level BLEU+1 score. */
/* * This function is used in PRO. */
/* *\/ */
/* float sentenceLevelBleuPlusOne(const std::vector<float>& stats); */
/* /\** Computes sentence-level BLEU score given a background corpus. */
/* * This function is used in batch MIRA. */
/* *\/ */
/* float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vector<float>& bg); */
/* /\** */
/* * Computes plain old BLEU from a vector of stats */
/* *\/ */
/* float unsmoothedBleu(const std::vector<float>& stats); */
}
#endif // MERT_BLEU_DOC_SCORER_H_

View File

@ -65,14 +65,18 @@ public:
bool OpenReference(const char* filename, std::size_t file_id);
// NOTE: this function is used for unit testing.
bool OpenReferenceStream(std::istream* is, std::size_t file_id);
virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);
private:
//private:
protected:
ReferenceLengthType m_ref_length_type;
// reference translations.
ScopedVector<Reference> m_references;
// constructor used by subclasses
BleuScorer(const std::string& name, const std::string& config): StatisticsBasedScorer(name,config) {}
// no copying allowed
BleuScorer(const BleuScorer&);
BleuScorer& operator=(const BleuScorer&);

View File

@ -20,6 +20,7 @@ MiraWeightVector.cpp
HypPackEnumerator.cpp
Data.cpp
BleuScorer.cpp
BleuDocScorer.cpp
SemposScorer.cpp
SemposOverlapping.cpp
InterpolatedScorer.cpp

View File

@ -3,6 +3,7 @@
#include <stdexcept>
#include "Scorer.h"
#include "BleuScorer.h"
#include "BleuDocScorer.h"
#include "PerScorer.h"
#include "TerScorer.h"
#include "CderScorer.h"
@ -20,6 +21,7 @@ vector<string> ScorerFactory::getTypes()
{
vector<string> types;
types.push_back(string("BLEU"));
types.push_back(string("BLEUDOC"));
types.push_back(string("PER"));
types.push_back(string("TER"));
types.push_back(string("CDER"));
@ -34,6 +36,8 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config)
{
if (type == "BLEU") {
return new BleuScorer(config);
} else if (type == "BLEUDOC") {
return new BleuDocScorer(config);
} else if (type == "PER") {
return new PerScorer(config);
} else if (type == "TER") {

View File

@ -7,10 +7,14 @@
#include "moses/TargetPhraseCollection.h"
#include "moses/TranslationModel/PhraseDictionary.h"
#include "util/check.hh"
#include <boost/thread/tss.hpp>
#include <vector>
#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#else
#include <boost/scoped_ptr.hpp>
#endif
namespace Moses
{
@ -26,7 +30,11 @@ class PhraseDictionaryTreeAdaptor : public PhraseDictionary
{
typedef PhraseDictionary MyBase;
#ifdef WITH_THREADS
boost::thread_specific_ptr<PDTAimp> m_implementation;
#else
boost::scoped_ptr<PDTAimp> m_implementation;
#endif
friend class PDTAimp;
PhraseDictionaryTreeAdaptor();

View File

@ -32,6 +32,8 @@
#ifdef WITH_THREADS
#include <boost/thread/tss.hpp>
#else
#include <boost/scoped_ptr.hpp>
#endif
namespace Moses
@ -47,7 +49,11 @@ class PhraseDictionaryOnDisk : public PhraseDictionary
friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryOnDisk&);
protected:
#ifdef WITH_THREADS
boost::thread_specific_ptr<OnDiskPt::OnDiskWrapper> m_implementation;
#else
boost::scoped_ptr<OnDiskPt::OnDiskWrapper> m_implementation;
#endif
OnDiskPt::OnDiskWrapper &GetImplementation();
const OnDiskPt::OnDiskWrapper &GetImplementation() const;

View File

@ -21,6 +21,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <algorithm>
#include <typeinfo>
#include "TranslationOptionCollection.h"
#include "Sentence.h"
#include "DecodeStep.h"

51
scripts/generic/ph_numbers.perl Executable file
View File

@ -0,0 +1,51 @@
#!/usr/bin/perl -w
# Script to recognize and replace numbers in Moses training corpora
# and decoder input
#
# (c) 2013 TAUS
use strict;
use Getopt::Std;
my $debug = $ENV{DEBUG} || 0;
my %opts;
if(!getopts('s:t:cm:hl',\%opts) || $opts{h}) {
print "Usage: perl $0 [-s source_locale][-t target_locale][-c][-h][-l][-m symbol] < in > out\n";
exit;
}
my $sourceLocale = $opts{s} || "";
my $targetLocale = $opts{t} || "";
my $numberSymbol = $opts{m} || '@NUM@';
while(<>) {
# [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?
# while(/\G(.*?)\s*([+-]?\p{Digit}+[+-\p{Digit}\.,eE])/) {
chomp;
my $output = "";
my $remainder = "";
while(/\G(.*?)(\s*)([+-]?\p{Digit}*[\.,]?\p{Digit}+[\p{Digit}\.,+-eE]*)/g) {
print STDERR "Between: x$1x\n" if $debug;
print STDERR "Number: x$3x\n" if $debug;
$output .= $1;
if($opts{c}) {
$output .= $2.$numberSymbol;
}
else {
if($opts{l}) {
$output .= $2."<ne translation=\"$3\">$numberSymbol</ne>";
}
else {
$output .= $2."<ne translation=\"$numberSymbol\" entity=\"$3\">$numberSymbol</ne>";
}
}
$remainder = $';
}
print STDERR "Remainder: x".$remainder."x\n" if $debug;
print STDERR "\n" if $debug;
$output .= $remainder if $remainder;
$output .= "\n";
print $output;
}