mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
Merge branch 'master' of https://github.com/moses-smt/mosesdecoder
This commit is contained in:
commit
35c346378e
@ -257,7 +257,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
|
||||
, const std::vector<float> &weightT
|
||||
, bool isSyntax) const
|
||||
{
|
||||
Moses::TargetPhrase *ret = new Moses::TargetPhrase();
|
||||
Moses::TargetPhrase *ret = new Moses::TargetPhrase(&phraseDict);
|
||||
|
||||
// words
|
||||
size_t phraseSize = GetSize();
|
||||
|
@ -1,140 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.456080129" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.582801917" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract-mixed-syntax/Debug}" id="cdt.managedbuild.builder.gnu.cross.1220166455" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1245611568" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.2055012191" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.1768196213" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2007889843" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1194558915" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.855436310" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.506549229" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1497326561" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:/${ProjName}}""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2118510064" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.606353571" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.740521305" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<option id="gnu.cpp.link.option.libs.1946120010" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="boost_program_options-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.paths.1563475751" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.106010037" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.136661991" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.2112208574" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.172930211" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<sourceEntries>
|
||||
<entry excluding="util/sorted_uniform_test.cc|util/sized_iterator_test.cc|util/read_compressed_test.cc|util/probing_hash_table_test.cc|util/joint_sort_test.cc|util/multi_intersection_test.cc|util/file_piece_test.cc|util/bit_packing_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.715007893">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.715007893" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.715007893" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.715007893." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.99436307" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.801178939" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract-mixed-syntax/Release}" id="cdt.managedbuild.builder.gnu.cross.1999547547" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2138817906" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1481537766" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.1967527847" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.442342681" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1604862038" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.1847950300" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.1130138972" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.870650754" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.158429528" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.2020667840" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1372779734" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.371006952" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1770045040" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.707592414" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="extract-mixed-syntax.cdt.managedbuild.target.gnu.cross.exe.1868010260" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.715007893;cdt.managedbuild.config.gnu.cross.exe.release.715007893.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1604862038;cdt.managedbuild.tool.gnu.cpp.compiler.input.870650754">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.715007893;cdt.managedbuild.config.gnu.cross.exe.release.715007893.;cdt.managedbuild.tool.gnu.cross.c.compiler.2138817906;cdt.managedbuild.tool.gnu.c.compiler.input.442342681">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982;cdt.managedbuild.config.gnu.cross.exe.debug.1919499982.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1194558915;cdt.managedbuild.tool.gnu.cpp.compiler.input.2118510064">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982;cdt.managedbuild.config.gnu.cross.exe.debug.1919499982.;cdt.managedbuild.tool.gnu.cross.c.compiler.1245611568;cdt.managedbuild.tool.gnu.c.compiler.input.2007889843">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract-mixed-syntax"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract-mixed-syntax"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
|
||||
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
|
||||
</cproject>
|
@ -1,27 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>extract-mixed-syntax</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<triggers>full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
@ -1,189 +0,0 @@
|
||||
/*
|
||||
* AlignedSentence.cpp
|
||||
*
|
||||
* Created on: 18 Feb 2014
|
||||
* Author: s0565741
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include "moses/Util.h"
|
||||
#include "AlignedSentence.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
AlignedSentence::AlignedSentence(int lineNum,
|
||||
const std::string &source,
|
||||
const std::string &target,
|
||||
const std::string &alignment)
|
||||
:m_lineNum(lineNum)
|
||||
{
|
||||
PopulateWordVec(m_source, source);
|
||||
PopulateWordVec(m_target, target);
|
||||
PopulateAlignment(alignment);
|
||||
}
|
||||
|
||||
AlignedSentence::~AlignedSentence() {
|
||||
Moses::RemoveAllInColl(m_source);
|
||||
Moses::RemoveAllInColl(m_target);
|
||||
}
|
||||
|
||||
void AlignedSentence::PopulateWordVec(Phrase &vec, const std::string &line)
|
||||
{
|
||||
std::vector<string> toks;
|
||||
Moses::Tokenize(toks, line);
|
||||
|
||||
vec.resize(toks.size());
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
const string &tok = toks[i];
|
||||
Word *word = new Word(i, tok);
|
||||
vec[i] = word;
|
||||
}
|
||||
}
|
||||
|
||||
void AlignedSentence::PopulateAlignment(const std::string &line)
|
||||
{
|
||||
vector<string> alignStr;
|
||||
Moses::Tokenize(alignStr, line);
|
||||
|
||||
for (size_t i = 0; i < alignStr.size(); ++i) {
|
||||
vector<int> alignPair;
|
||||
Moses::Tokenize(alignPair, alignStr[i], "-");
|
||||
assert(alignPair.size() == 2);
|
||||
|
||||
int sourcePos = alignPair[0];
|
||||
int targetPos = alignPair[1];
|
||||
|
||||
if (sourcePos >= m_source.size()) {
|
||||
cerr << "ERROR1:AlignedSentence=" << Debug() << endl;
|
||||
cerr << "m_source=" << m_source.size() << endl;
|
||||
abort();
|
||||
}
|
||||
assert(sourcePos < m_source.size());
|
||||
assert(targetPos < m_target.size());
|
||||
Word *sourceWord = m_source[sourcePos];
|
||||
Word *targetWord = m_target[targetPos];
|
||||
|
||||
sourceWord->AddAlignment(targetWord);
|
||||
targetWord->AddAlignment(sourceWord);
|
||||
}
|
||||
}
|
||||
|
||||
std::string AlignedSentence::Debug() const
|
||||
{
|
||||
stringstream out;
|
||||
out << "m_lineNum:";
|
||||
out << m_lineNum;
|
||||
out << endl;
|
||||
|
||||
out << "m_source:";
|
||||
out << m_source.Debug();
|
||||
out << endl;
|
||||
|
||||
out << "m_target:";
|
||||
out << m_target.Debug();
|
||||
out << endl;
|
||||
|
||||
out << "consistent phrases:" << endl;
|
||||
out << m_consistentPhrases.Debug();
|
||||
out << endl;
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::vector<int> AlignedSentence::GetSourceAlignmentCount() const
|
||||
{
|
||||
vector<int> ret(m_source.size());
|
||||
|
||||
for (size_t i = 0; i < m_source.size(); ++i) {
|
||||
const Word &word = *m_source[i];
|
||||
ret[i] = word.GetAlignmentIndex().size();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void AlignedSentence::Create(const Parameter ¶ms)
|
||||
{
|
||||
CreateConsistentPhrases(params);
|
||||
m_consistentPhrases.AddHieroNonTerms(params);
|
||||
}
|
||||
|
||||
void AlignedSentence::CreateConsistentPhrases(const Parameter ¶ms)
|
||||
{
|
||||
int countT = m_target.size();
|
||||
int countS = m_source.size();
|
||||
|
||||
m_consistentPhrases.Initialize(countS);
|
||||
|
||||
// check alignments for target phrase startT...endT
|
||||
for(int lengthT=1;
|
||||
lengthT <= params.maxSpan && lengthT <= countT;
|
||||
lengthT++) {
|
||||
for(int startT=0; startT < countT-(lengthT-1); startT++) {
|
||||
|
||||
// that's nice to have
|
||||
int endT = startT + lengthT - 1;
|
||||
|
||||
// find find aligned source words
|
||||
// first: find minimum and maximum source word
|
||||
int minS = 9999;
|
||||
int maxS = -1;
|
||||
vector< int > usedS = GetSourceAlignmentCount();
|
||||
for(int ti=startT; ti<=endT; ti++) {
|
||||
const Word &word = *m_target[ti];
|
||||
const std::set<int> &alignment = word.GetAlignmentIndex();
|
||||
|
||||
std::set<int>::const_iterator iterAlign;
|
||||
for(iterAlign = alignment.begin(); iterAlign != alignment.end(); ++iterAlign) {
|
||||
int si = *iterAlign;
|
||||
if (si<minS) {
|
||||
minS = si;
|
||||
}
|
||||
if (si>maxS) {
|
||||
maxS = si;
|
||||
}
|
||||
usedS[ si ]--;
|
||||
}
|
||||
}
|
||||
|
||||
// unaligned phrases are not allowed
|
||||
if( maxS == -1 )
|
||||
continue;
|
||||
|
||||
// source phrase has to be within limits
|
||||
if( maxS-minS >= params.maxSpan )
|
||||
continue;
|
||||
|
||||
// check if source words are aligned to out of bound target words
|
||||
bool out_of_bounds = false;
|
||||
for(int si=minS; si<=maxS && !out_of_bounds; si++)
|
||||
if (usedS[si]>0) {
|
||||
out_of_bounds = true;
|
||||
}
|
||||
|
||||
// if out of bound, you gotta go
|
||||
if (out_of_bounds)
|
||||
continue;
|
||||
|
||||
// done with all the checks, lets go over all consistent phrase pairs
|
||||
// start point of source phrase may retreat over unaligned
|
||||
for(int startS=minS;
|
||||
(startS>=0 &&
|
||||
startS>maxS - params.maxSpan && // within length limit
|
||||
(startS==minS || m_source[startS]->GetAlignment().size()==0)); // unaligned
|
||||
startS--) {
|
||||
// end point of source phrase may advance over unaligned
|
||||
for(int endS=maxS;
|
||||
(endS<countS && endS<startS + params.maxSpan && // within length limit
|
||||
(endS==maxS || m_source[endS]->GetAlignment().size()==0)); // unaligned
|
||||
endS++) {
|
||||
|
||||
// take note that this is a valid phrase alignment
|
||||
m_consistentPhrases.Add(startS, endS, startT, endT, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
/*
|
||||
* AlignedSentence.h
|
||||
*
|
||||
* Created on: 18 Feb 2014
|
||||
* Author: s0565741
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "ConsistentPhrases.h"
|
||||
#include "Phrase.h"
|
||||
#include "moses/TypeDef.h"
|
||||
|
||||
class Parameter;
|
||||
|
||||
class AlignedSentence {
|
||||
public:
|
||||
AlignedSentence(int lineNum)
|
||||
:m_lineNum(lineNum)
|
||||
{}
|
||||
|
||||
AlignedSentence(int lineNum,
|
||||
const std::string &source,
|
||||
const std::string &target,
|
||||
const std::string &alignment);
|
||||
virtual ~AlignedSentence();
|
||||
virtual void Create(const Parameter ¶ms);
|
||||
|
||||
const Phrase &GetPhrase(Moses::FactorDirection direction) const
|
||||
{ return (direction == Moses::Input) ? m_source : m_target; }
|
||||
|
||||
const ConsistentPhrases &GetConsistentPhrases() const
|
||||
{ return m_consistentPhrases; }
|
||||
|
||||
virtual std::string Debug() const;
|
||||
|
||||
int m_lineNum;
|
||||
protected:
|
||||
Phrase m_source, m_target;
|
||||
ConsistentPhrases m_consistentPhrases;
|
||||
|
||||
void CreateConsistentPhrases(const Parameter ¶ms);
|
||||
void PopulateWordVec(Phrase &vec, const std::string &line);
|
||||
|
||||
// m_source and m_target MUST be populated before calling this
|
||||
void PopulateAlignment(const std::string &line);
|
||||
std::vector<int> GetSourceAlignmentCount() const;
|
||||
};
|
||||
|
||||
|
@ -1,183 +0,0 @@
|
||||
/*
|
||||
* AlignedSentenceSyntax.cpp
|
||||
*
|
||||
* Created on: 26 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "AlignedSentenceSyntax.h"
|
||||
#include "Parameter.h"
|
||||
#include "pugixml.hpp"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
AlignedSentenceSyntax::AlignedSentenceSyntax(int lineNum,
|
||||
const std::string &source,
|
||||
const std::string &target,
|
||||
const std::string &alignment)
|
||||
:AlignedSentence(lineNum)
|
||||
,m_sourceStr(source)
|
||||
,m_targetStr(target)
|
||||
,m_alignmentStr(alignment)
|
||||
{
|
||||
}
|
||||
|
||||
AlignedSentenceSyntax::~AlignedSentenceSyntax() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::Populate(bool isSyntax, int mixedSyntaxType, const Parameter ¶ms,
|
||||
string line, Phrase &phrase, SyntaxTree &tree)
|
||||
{
|
||||
// parse source and target string
|
||||
if (isSyntax) {
|
||||
line = "<xml><tree label=\"X\">" + line + "</tree></xml>";
|
||||
XMLParse(phrase, tree, line, params);
|
||||
|
||||
if (mixedSyntaxType != 0) {
|
||||
// mixed syntax. Always add [X] where there isn't 1
|
||||
tree.SetHieroLabel(params.hieroNonTerm);
|
||||
if (mixedSyntaxType == 2) {
|
||||
tree.AddToAll(params.hieroNonTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
PopulateWordVec(phrase, line);
|
||||
tree.SetHieroLabel(params.hieroNonTerm);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::Create(const Parameter ¶ms)
|
||||
{
|
||||
Populate(params.sourceSyntax, params.mixedSyntaxType, params, m_sourceStr,
|
||||
m_source, m_sourceTree);
|
||||
Populate(params.targetSyntax, params.mixedSyntaxType, params, m_targetStr,
|
||||
m_target, m_targetTree);
|
||||
|
||||
PopulateAlignment(m_alignmentStr);
|
||||
CreateConsistentPhrases(params);
|
||||
|
||||
// create labels
|
||||
CreateNonTerms();
|
||||
}
|
||||
|
||||
void Escape(string &text)
|
||||
{
|
||||
text = Moses::Replace(text, "&", "&");
|
||||
text = Moses::Replace(text, "|", "|");
|
||||
text = Moses::Replace(text, "<", "<");
|
||||
text = Moses::Replace(text, ">", ">");
|
||||
text = Moses::Replace(text, "'", "'");
|
||||
text = Moses::Replace(text, "\"", """);
|
||||
text = Moses::Replace(text, "[", "[");
|
||||
text = Moses::Replace(text, "]", "]");
|
||||
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const pugi::xml_node &parentNode,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
int childNum = 0;
|
||||
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling())
|
||||
{
|
||||
string nodeName = childNode.name();
|
||||
|
||||
// span label
|
||||
string label;
|
||||
int startPos = output.size();
|
||||
|
||||
if (!nodeName.empty()) {
|
||||
pugi::xml_attribute attribute = childNode.attribute("label");
|
||||
label = attribute.as_string();
|
||||
|
||||
// recursively call this function. For proper recursive trees
|
||||
XMLParse(output, tree, childNode, params);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// fill phrase vector
|
||||
string text = childNode.value();
|
||||
Escape(text);
|
||||
//cerr << childNum << " " << label << "=" << text << endl;
|
||||
|
||||
std::vector<string> toks;
|
||||
Moses::Tokenize(toks, text);
|
||||
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
const string &tok = toks[i];
|
||||
Word *word = new Word(output.size(), tok);
|
||||
output.push_back(word);
|
||||
}
|
||||
|
||||
// is it a labelled span?
|
||||
int endPos = output.size() - 1;
|
||||
|
||||
// fill syntax labels
|
||||
if (!label.empty()) {
|
||||
label = "[" + label + "]";
|
||||
tree.Add(startPos, endPos, label, params);
|
||||
}
|
||||
|
||||
++childNum;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const std::string input,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
pugi::xml_document doc;
|
||||
pugi::xml_parse_result result = doc.load(input.c_str(),
|
||||
pugi::parse_default | pugi::parse_comments);
|
||||
|
||||
pugi::xml_node topNode = doc.child("xml");
|
||||
XMLParse(output, tree, topNode, params);
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::CreateNonTerms()
|
||||
{
|
||||
for (int sourceStart = 0; sourceStart < m_source.size(); ++sourceStart) {
|
||||
for (int sourceEnd = sourceStart; sourceEnd < m_source.size(); ++sourceEnd) {
|
||||
ConsistentPhrases::Coll &coll = m_consistentPhrases.GetColl(sourceStart, sourceEnd);
|
||||
const SyntaxTree::Labels &sourceLabels = m_sourceTree.Find(sourceStart, sourceEnd);
|
||||
|
||||
ConsistentPhrases::Coll::iterator iter;
|
||||
for (iter = coll.begin(); iter != coll.end(); ++iter) {
|
||||
ConsistentPhrase &cp = **iter;
|
||||
|
||||
int targetStart = cp.corners[2];
|
||||
int targetEnd = cp.corners[3];
|
||||
const SyntaxTree::Labels &targetLabels = m_targetTree.Find(targetStart, targetEnd);
|
||||
|
||||
CreateNonTerms(cp, sourceLabels, targetLabels);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::CreateNonTerms(ConsistentPhrase &cp,
|
||||
const SyntaxTree::Labels &sourceLabels,
|
||||
const SyntaxTree::Labels &targetLabels)
|
||||
{
|
||||
SyntaxTree::Labels::const_iterator iterSource;
|
||||
for (iterSource = sourceLabels.begin(); iterSource != sourceLabels.end(); ++iterSource) {
|
||||
const string &sourceLabel = *iterSource;
|
||||
|
||||
SyntaxTree::Labels::const_iterator iterTarget;
|
||||
for (iterTarget = targetLabels.begin(); iterTarget != targetLabels.end(); ++iterTarget) {
|
||||
const string &targetLabel = *iterTarget;
|
||||
cp.AddNonTerms(sourceLabel, targetLabel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,46 +0,0 @@
|
||||
/*
|
||||
* AlignedSentenceSyntax.h
|
||||
*
|
||||
* Created on: 26 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "AlignedSentence.h"
|
||||
#include "SyntaxTree.h"
|
||||
#include "pugixml.hpp"
|
||||
|
||||
class AlignedSentenceSyntax : public AlignedSentence
|
||||
{
|
||||
public:
|
||||
AlignedSentenceSyntax(int lineNum,
|
||||
const std::string &source,
|
||||
const std::string &target,
|
||||
const std::string &alignment);
|
||||
virtual ~AlignedSentenceSyntax();
|
||||
|
||||
void Create(const Parameter ¶ms);
|
||||
|
||||
//virtual std::string Debug() const;
|
||||
protected:
|
||||
std::string m_sourceStr, m_targetStr, m_alignmentStr;
|
||||
SyntaxTree m_sourceTree, m_targetTree;
|
||||
|
||||
void XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const std::string input,
|
||||
const Parameter ¶ms);
|
||||
void XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const pugi::xml_node &parentNode,
|
||||
const Parameter ¶ms);
|
||||
void CreateNonTerms();
|
||||
void CreateNonTerms(ConsistentPhrase &cp,
|
||||
const SyntaxTree::Labels &sourceLabels,
|
||||
const SyntaxTree::Labels &targetLabels);
|
||||
void Populate(bool isSyntax, int mixedSyntaxType, const Parameter ¶ms,
|
||||
std::string line, Phrase &phrase, SyntaxTree &tree);
|
||||
|
||||
};
|
||||
|
@ -1,66 +0,0 @@
|
||||
/*
|
||||
* ConsistentPhrase.cpp
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <sstream>
|
||||
#include "ConsistentPhrase.h"
|
||||
#include "Word.h"
|
||||
#include "NonTerm.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
ConsistentPhrase::ConsistentPhrase(
|
||||
int sourceStart, int sourceEnd,
|
||||
int targetStart, int targetEnd,
|
||||
const Parameter ¶ms)
|
||||
:corners(4)
|
||||
,m_hieroNonTerm(*this, params.hieroNonTerm, params.hieroNonTerm)
|
||||
{
|
||||
corners[0] = sourceStart;
|
||||
corners[1] = sourceEnd;
|
||||
corners[2] = targetStart;
|
||||
corners[3] = targetEnd;
|
||||
}
|
||||
|
||||
ConsistentPhrase::~ConsistentPhrase() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
bool ConsistentPhrase::operator<(const ConsistentPhrase &other) const
|
||||
{
|
||||
return corners < other.corners;
|
||||
}
|
||||
|
||||
void ConsistentPhrase::AddNonTerms(const std::string &source,
|
||||
const std::string &target)
|
||||
{
|
||||
m_nonTerms.push_back(NonTerm(*this, source, target));
|
||||
}
|
||||
|
||||
bool ConsistentPhrase::TargetOverlap(const ConsistentPhrase &other) const
|
||||
{
|
||||
if ( other.corners[3] < corners[2] || other.corners[2] > corners[3])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string ConsistentPhrase::Debug() const
|
||||
{
|
||||
stringstream out;
|
||||
out << "[" << corners[0] << "-" << corners[1]
|
||||
<< "][" << corners[2] << "-" << corners[3] << "]";
|
||||
|
||||
out << "NT:";
|
||||
for (size_t i = 0; i < m_nonTerms.size(); ++i) {
|
||||
const NonTerm &nonTerm = m_nonTerms[i];
|
||||
out << nonTerm.GetLabel(Moses::Input) << ":" << nonTerm.GetLabel(Moses::Output);
|
||||
}
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
|
@ -1,51 +0,0 @@
|
||||
/*
|
||||
* ConsistentPhrase.h
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "moses/TypeDef.h"
|
||||
#include "NonTerm.h"
|
||||
|
||||
class ConsistentPhrase
|
||||
{
|
||||
public:
|
||||
typedef std::vector<NonTerm> NonTerms;
|
||||
|
||||
std::vector<int> corners;
|
||||
|
||||
ConsistentPhrase(const ConsistentPhrase ©); // do not implement
|
||||
ConsistentPhrase(int sourceStart, int sourceEnd,
|
||||
int targetStart, int targetEnd,
|
||||
const Parameter ¶ms);
|
||||
|
||||
virtual ~ConsistentPhrase();
|
||||
|
||||
int GetWidth(Moses::FactorDirection direction) const
|
||||
{ return (direction == Moses::Input) ? corners[1] - corners[0] + 1 : corners[3] - corners[2] + 1; }
|
||||
|
||||
|
||||
void AddNonTerms(const std::string &source,
|
||||
const std::string &target);
|
||||
const NonTerms &GetNonTerms() const
|
||||
{ return m_nonTerms;}
|
||||
const NonTerm &GetHieroNonTerm() const
|
||||
{ return m_hieroNonTerm;}
|
||||
|
||||
bool TargetOverlap(const ConsistentPhrase &other) const;
|
||||
|
||||
bool operator<(const ConsistentPhrase &other) const;
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
protected:
|
||||
NonTerms m_nonTerms;
|
||||
NonTerm m_hieroNonTerm;
|
||||
};
|
||||
|
@ -1,103 +0,0 @@
|
||||
/*
|
||||
* ConsistentPhrases.cpp
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
#include "ConsistentPhrases.h"
|
||||
#include "NonTerm.h"
|
||||
#include "Parameter.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
ConsistentPhrases::ConsistentPhrases()
|
||||
{
|
||||
}
|
||||
|
||||
ConsistentPhrases::~ConsistentPhrases() {
|
||||
for (int start = 0; start < m_coll.size(); ++start) {
|
||||
std::vector<Coll> &allSourceStart = m_coll[start];
|
||||
|
||||
for (int size = 0; size < allSourceStart.size(); ++size) {
|
||||
Coll &coll = allSourceStart[size];
|
||||
Moses::RemoveAllInColl(coll);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ConsistentPhrases::Initialize(size_t size)
|
||||
{
|
||||
m_coll.resize(size);
|
||||
|
||||
for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
|
||||
std::vector<Coll> &allSourceStart = m_coll[sourceStart];
|
||||
allSourceStart.resize(size - sourceStart);
|
||||
}
|
||||
}
|
||||
|
||||
void ConsistentPhrases::Add(int sourceStart, int sourceEnd,
|
||||
int targetStart, int targetEnd,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
Coll &coll = m_coll[sourceStart][sourceEnd - sourceStart];
|
||||
ConsistentPhrase *cp = new ConsistentPhrase(sourceStart, sourceEnd,
|
||||
targetStart, targetEnd,
|
||||
params);
|
||||
|
||||
pair<Coll::iterator, bool> inserted = coll.insert(cp);
|
||||
assert(inserted.second);
|
||||
}
|
||||
|
||||
const ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd) const
|
||||
{
|
||||
const std::vector<Coll> &allSourceStart = m_coll[sourceStart];
|
||||
const Coll &ret = allSourceStart[sourceEnd - sourceStart];
|
||||
return ret;
|
||||
}
|
||||
|
||||
ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd)
|
||||
{
|
||||
std::vector<Coll> &allSourceStart = m_coll[sourceStart];
|
||||
Coll &ret = allSourceStart[sourceEnd - sourceStart];
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string ConsistentPhrases::Debug() const
|
||||
{
|
||||
std::stringstream out;
|
||||
for (int start = 0; start < m_coll.size(); ++start) {
|
||||
const std::vector<Coll> &allSourceStart = m_coll[start];
|
||||
|
||||
for (int size = 0; size < allSourceStart.size(); ++size) {
|
||||
const Coll &coll = allSourceStart[size];
|
||||
|
||||
Coll::const_iterator iter;
|
||||
for (iter = coll.begin(); iter != coll.end(); ++iter) {
|
||||
const ConsistentPhrase &consistentPhrase = **iter;
|
||||
out << consistentPhrase.Debug() << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
void ConsistentPhrases::AddHieroNonTerms(const Parameter ¶ms)
|
||||
{
|
||||
// add [X] labels everywhere
|
||||
for (int i = 0; i < m_coll.size(); ++i) {
|
||||
vector<Coll> &inner = m_coll[i];
|
||||
for (int j = 0; j < inner.size(); ++j) {
|
||||
ConsistentPhrases::Coll &coll = inner[j];
|
||||
ConsistentPhrases::Coll::iterator iter;
|
||||
for (iter = coll.begin(); iter != coll.end(); ++iter) {
|
||||
ConsistentPhrase &cp = **iter;
|
||||
cp.AddNonTerms(params.hieroNonTerm, params.hieroNonTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,40 +0,0 @@
|
||||
/*
|
||||
* ConsistentPhrases.h
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "ConsistentPhrase.h"
|
||||
|
||||
class Word;
|
||||
class Parameter;
|
||||
|
||||
class ConsistentPhrases {
|
||||
public:
|
||||
typedef std::set<ConsistentPhrase*> Coll;
|
||||
|
||||
ConsistentPhrases();
|
||||
virtual ~ConsistentPhrases();
|
||||
|
||||
void Initialize(size_t size);
|
||||
|
||||
void Add(int sourceStart, int sourceEnd,
|
||||
int targetStart, int targetEnd,
|
||||
const Parameter ¶ms);
|
||||
|
||||
void AddHieroNonTerms(const Parameter ¶ms);
|
||||
|
||||
const Coll &GetColl(int sourceStart, int sourceEnd) const;
|
||||
Coll &GetColl(int sourceStart, int sourceEnd);
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
protected:
|
||||
std::vector< std::vector<Coll> > m_coll;
|
||||
};
|
||||
|
@ -1,62 +0,0 @@
|
||||
// $Id: InputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "InputFileStream.h"
|
||||
#include "gzfilebuf.h"
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
InputFileStream::InputFileStream(const std::string &filePath)
|
||||
: std::istream(NULL)
|
||||
, m_streambuf(NULL)
|
||||
{
|
||||
if (filePath.size() > 3 &&
|
||||
filePath.substr(filePath.size() - 3, 3) == ".gz")
|
||||
{
|
||||
m_streambuf = new gzfilebuf(filePath.c_str());
|
||||
} else {
|
||||
std::filebuf* fb = new std::filebuf();
|
||||
fb = fb->open(filePath.c_str(), std::ios::in);
|
||||
if (! fb) {
|
||||
cerr << "Can't read " << filePath.c_str() << endl;
|
||||
exit(1);
|
||||
}
|
||||
m_streambuf = fb;
|
||||
}
|
||||
this->init(m_streambuf);
|
||||
}
|
||||
|
||||
InputFileStream::~InputFileStream()
|
||||
{
|
||||
delete m_streambuf;
|
||||
m_streambuf = NULL;
|
||||
}
|
||||
|
||||
void InputFileStream::Close()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,48 +0,0 @@
|
||||
// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef moses_InputFileStream_h
|
||||
#define moses_InputFileStream_h
|
||||
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/** Used in place of std::istream, can read zipped files if it ends in .gz
|
||||
*/
|
||||
class InputFileStream : public std::istream
|
||||
{
|
||||
protected:
|
||||
std::streambuf *m_streambuf;
|
||||
public:
|
||||
|
||||
InputFileStream(const std::string &filePath);
|
||||
~InputFileStream();
|
||||
|
||||
void Close();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,174 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include "Main.h"
|
||||
#include "InputFileStream.h"
|
||||
#include "OutputFileStream.h"
|
||||
#include "AlignedSentence.h"
|
||||
#include "AlignedSentenceSyntax.h"
|
||||
#include "Parameter.h"
|
||||
#include "Rules.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool g_debug = false;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
cerr << "Starting" << endl;
|
||||
|
||||
Parameter params;
|
||||
|
||||
namespace po = boost::program_options;
|
||||
po::options_description desc("Options");
|
||||
desc.add_options()
|
||||
("help", "Print help messages")
|
||||
("MaxSpan", po::value<int>()->default_value(params.maxSpan), "Max (source) span of a rule. ie. number of words in the source")
|
||||
("GlueGrammar", po::value<string>()->default_value(params.gluePath), "Output glue grammar to here")
|
||||
("SentenceOffset", po::value<long>()->default_value(params.sentenceOffset), "Starting sentence id. Not used")
|
||||
("GZOutput", "Compress extract files")
|
||||
("MaxNonTerm", po::value<int>()->default_value(params.maxNonTerm), "Maximum number of non-terms allowed per rule")
|
||||
("MaxHieroNonTerm", po::value<int>()->default_value(params.maxHieroNonTerm), "Maximum number of Hiero non-term. Usually, --MaxNonTerm is the normal constraint")
|
||||
("MinHoleSource", po::value<int>()->default_value(params.minHoleSource), "Minimum source span for a non-term.")
|
||||
|
||||
("SourceSyntax", "Source sentence is a parse tree")
|
||||
("TargetSyntax", "Target sentence is a parse tree")
|
||||
("MixedSyntaxType", po::value<int>()->default_value(params.mixedSyntaxType), "Hieu's Mixed syntax type. 0(default)=no mixed syntax, 1=add [X] only if no syntactic label. 2=add [X] everywhere")
|
||||
("MultiLabel", po::value<int>()->default_value(params.multiLabel), "What to do with multiple labels on the same span. 0(default)=keep them all, 1=keep only top-most, 2=keep only bottom-most")
|
||||
("HieroSourceLHS", "Always use Hiero source LHS? Default = 0")
|
||||
("MaxSpanFreeNonTermSource", po::value<int>()->default_value(params.maxSpanFreeNonTermSource), "Max number of words covered by beginning/end NT. Default = 0 (no limit)")
|
||||
("NoNieceTerminal", "Don't extract rule if 1 of the non-term covers the same word as 1 of the terminals")
|
||||
("MaxScope", po::value<int>()->default_value(params.maxScope), "maximum scope (see Hopkins and Langmead (2010)). Default is HIGH")
|
||||
("SpanLength", "Property - span length of RHS each non-term")
|
||||
("NonTermContext", "Property - left and right, inside and outside words of each non-term");
|
||||
|
||||
po::variables_map vm;
|
||||
try
|
||||
{
|
||||
po::store(po::parse_command_line(argc, argv, desc),
|
||||
vm); // can throw
|
||||
|
||||
/** --help option
|
||||
*/
|
||||
if ( vm.count("help") || argc < 5 )
|
||||
{
|
||||
std::cout << argv[0] << " target source alignment [options...]" << std::endl
|
||||
<< desc << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
po::notify(vm); // throws on error, so do after help in case
|
||||
// there are any problems
|
||||
}
|
||||
catch(po::error& e)
|
||||
{
|
||||
std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
|
||||
std::cerr << desc << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (vm.count("MaxSpan")) params.maxSpan = vm["MaxSpan"].as<int>();
|
||||
if (vm.count("GZOutput")) params.gzOutput = true;
|
||||
if (vm.count("GlueGrammar")) params.gluePath = vm["GlueGrammar"].as<string>();
|
||||
if (vm.count("SentenceOffset")) params.sentenceOffset = vm["SentenceOffset"].as<long>();
|
||||
if (vm.count("MaxNonTerm")) params.maxNonTerm = vm["MaxNonTerm"].as<int>();
|
||||
if (vm.count("MaxHieroNonTerm")) params.maxHieroNonTerm = vm["MaxHieroNonTerm"].as<int>();
|
||||
if (vm.count("MinHoleSource")) params.minHoleSource = vm["MinHoleSource"].as<int>();
|
||||
|
||||
if (vm.count("SourceSyntax")) params.sourceSyntax = true;
|
||||
if (vm.count("TargetSyntax")) params.targetSyntax = true;
|
||||
if (vm.count("MixedSyntaxType")) params.mixedSyntaxType = vm["MixedSyntaxType"].as<int>();
|
||||
if (vm.count("MultiLabel")) params.multiLabel = vm["MultiLabel"].as<int>();
|
||||
if (vm.count("HieroSourceLHS")) params.hieroSourceLHS = true;
|
||||
if (vm.count("MaxSpanFreeNonTermSource")) params.maxSpanFreeNonTermSource = vm["MaxSpanFreeNonTermSource"].as<int>();
|
||||
if (vm.count("NoNieceTerminal")) params.nieceTerminal = false;
|
||||
if (vm.count("MaxScope")) params.maxScope = vm["MaxScope"].as<int>();
|
||||
|
||||
// properties
|
||||
if (vm.count("SpanLength")) params.spanLength = true;
|
||||
if (vm.count("NonTermContext")) params.nonTermContext = true;
|
||||
|
||||
// input files;
|
||||
string pathTarget = argv[1];
|
||||
string pathSource = argv[2];
|
||||
string pathAlignment = argv[3];
|
||||
|
||||
string pathExtract = argv[4];
|
||||
string pathExtractInv = pathExtract + ".inv";
|
||||
if (params.gzOutput) {
|
||||
pathExtract += ".gz";
|
||||
pathExtractInv += ".gz";
|
||||
}
|
||||
|
||||
Moses::InputFileStream strmTarget(pathTarget);
|
||||
Moses::InputFileStream strmSource(pathSource);
|
||||
Moses::InputFileStream strmAlignment(pathAlignment);
|
||||
Moses::OutputFileStream extractFile(pathExtract);
|
||||
Moses::OutputFileStream extractInvFile(pathExtractInv);
|
||||
|
||||
|
||||
// MAIN LOOP
|
||||
int lineNum = 1;
|
||||
string lineTarget, lineSource, lineAlignment;
|
||||
while (getline(strmTarget, lineTarget)) {
|
||||
if (lineNum % 10000 == 0) {
|
||||
cerr << lineNum << " ";
|
||||
}
|
||||
|
||||
bool success;
|
||||
success = getline(strmSource, lineSource);
|
||||
if (!success) {
|
||||
throw "Couldn't read source";
|
||||
}
|
||||
success = getline(strmAlignment, lineAlignment);
|
||||
if (!success) {
|
||||
throw "Couldn't read alignment";
|
||||
}
|
||||
|
||||
/*
|
||||
cerr << "lineTarget=" << lineTarget << endl;
|
||||
cerr << "lineSource=" << lineSource << endl;
|
||||
cerr << "lineAlignment=" << lineAlignment << endl;
|
||||
*/
|
||||
|
||||
AlignedSentence *alignedSentence;
|
||||
|
||||
if (params.sourceSyntax || params.targetSyntax) {
|
||||
alignedSentence = new AlignedSentenceSyntax(lineNum, lineSource, lineTarget, lineAlignment);
|
||||
}
|
||||
else {
|
||||
alignedSentence = new AlignedSentence(lineNum, lineSource, lineTarget, lineAlignment);
|
||||
}
|
||||
|
||||
alignedSentence->Create(params);
|
||||
//cerr << alignedSentence->Debug();
|
||||
|
||||
Rules rules(*alignedSentence);
|
||||
rules.Extend(params);
|
||||
rules.Consolidate(params);
|
||||
//cerr << rules.Debug();
|
||||
|
||||
rules.Output(extractFile, true, params);
|
||||
rules.Output(extractInvFile, false, params);
|
||||
|
||||
delete alignedSentence;
|
||||
|
||||
++lineNum;
|
||||
}
|
||||
|
||||
if (!params.gluePath.empty()) {
|
||||
Moses::OutputFileStream glueFile(params.gluePath);
|
||||
CreateGlueGrammar(glueFile);
|
||||
}
|
||||
|
||||
cerr << "Finished" << endl;
|
||||
}
|
||||
|
||||
void CreateGlueGrammar(Moses::OutputFileStream &glueFile)
|
||||
{
|
||||
glueFile << "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" << endl
|
||||
<< "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" << endl
|
||||
<< "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl;
|
||||
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
/*
|
||||
* Main.h
|
||||
*
|
||||
* Created on: 28 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "OutputFileStream.h"
|
||||
|
||||
void CreateGlueGrammar(Moses::OutputFileStream &glueFile);
|
||||
|
@ -1,17 +0,0 @@
|
||||
all: extract-mixed-syntax
|
||||
|
||||
clean:
|
||||
rm -f *.o extract-mixed-syntax
|
||||
|
||||
.cpp.o:
|
||||
g++ -O4 -g -c -I../../../boost/include -I../../../ $<
|
||||
|
||||
OBJECTS = AlignedSentence.o ConsistentPhrase.o ConsistentPhrases.o InputFileStream.o \
|
||||
Main.o OutputFileStream.o Parameter.o Phrase.o Rule.o Rules.o RuleSymbol.o \
|
||||
SyntaxTree.o Word.o NonTerm.o RulePhrase.o AlignedSentenceSyntax.o pugixml.o
|
||||
|
||||
extract-mixed-syntax: $(OBJECTS)
|
||||
|
||||
g++ $(OBJECTS) -L../../../boost/lib64 -lz -lboost_iostreams-mt -lboost_program_options-mt -o extract-mixed-syntax
|
||||
|
||||
|
@ -1,65 +0,0 @@
|
||||
/*
|
||||
* NonTerm.cpp
|
||||
*
|
||||
* Created on: 22 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include "NonTerm.h"
|
||||
#include "Word.h"
|
||||
#include "ConsistentPhrase.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
NonTerm::NonTerm(const ConsistentPhrase &consistentPhrase,
|
||||
const std::string &source,
|
||||
const std::string &target)
|
||||
:m_consistentPhrase(&consistentPhrase)
|
||||
,m_source(source)
|
||||
,m_target(target)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
NonTerm::~NonTerm() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
std::string NonTerm::Debug() const
|
||||
{
|
||||
stringstream out;
|
||||
out << m_source << m_target;
|
||||
out << m_consistentPhrase->Debug();
|
||||
return out.str();
|
||||
}
|
||||
|
||||
void NonTerm::Output(std::ostream &out) const
|
||||
{
|
||||
out << m_source << m_target;
|
||||
}
|
||||
|
||||
void NonTerm::Output(std::ostream &out, Moses::FactorDirection direction) const
|
||||
{
|
||||
out << GetLabel(direction);
|
||||
}
|
||||
|
||||
const std::string &NonTerm::GetLabel(Moses::FactorDirection direction) const
|
||||
{
|
||||
return (direction == Moses::Input) ? m_source : m_target;
|
||||
}
|
||||
|
||||
bool NonTerm::IsHiero(Moses::FactorDirection direction, const Parameter ¶ms) const
|
||||
{
|
||||
const std::string &label = NonTerm::GetLabel(direction);
|
||||
return label == params.hieroNonTerm;
|
||||
}
|
||||
|
||||
bool NonTerm::IsHiero(const Parameter ¶ms) const
|
||||
{
|
||||
return IsHiero(Moses::Input, params) && IsHiero(Moses::Output, params);
|
||||
}
|
||||
int NonTerm::GetWidth(Moses::FactorDirection direction) const
|
||||
{ return GetConsistentPhrase().GetWidth(direction); }
|
@ -1,47 +0,0 @@
|
||||
/*
|
||||
* NonTerm.h
|
||||
*
|
||||
* Created on: 22 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "RuleSymbol.h"
|
||||
#include "moses/TypeDef.h"
|
||||
|
||||
class ConsistentPhrase;
|
||||
class Parameter;
|
||||
|
||||
class NonTerm : public RuleSymbol
|
||||
{
|
||||
public:
|
||||
|
||||
NonTerm(const ConsistentPhrase &consistentPhrase,
|
||||
const std::string &source,
|
||||
const std::string &target);
|
||||
virtual ~NonTerm();
|
||||
|
||||
const ConsistentPhrase &GetConsistentPhrase() const
|
||||
{ return *m_consistentPhrase; }
|
||||
|
||||
int GetWidth(Moses::FactorDirection direction) const;
|
||||
|
||||
virtual bool IsNonTerm() const
|
||||
{ return true; }
|
||||
|
||||
std::string GetString() const
|
||||
{ return m_source + m_target; }
|
||||
|
||||
virtual std::string Debug() const;
|
||||
virtual void Output(std::ostream &out) const;
|
||||
void Output(std::ostream &out, Moses::FactorDirection direction) const;
|
||||
|
||||
const std::string &GetLabel(Moses::FactorDirection direction) const;
|
||||
bool IsHiero(Moses::FactorDirection direction, const Parameter ¶ms) const;
|
||||
bool IsHiero(const Parameter ¶ms) const;
|
||||
|
||||
protected:
|
||||
const ConsistentPhrase *m_consistentPhrase;
|
||||
std::string m_source, m_target;
|
||||
};
|
||||
|
@ -1,79 +0,0 @@
|
||||
// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <boost/iostreams/filter/gzip.hpp>
|
||||
#include "OutputFileStream.h"
|
||||
#include "gzfilebuf.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
OutputFileStream::OutputFileStream()
|
||||
:boost::iostreams::filtering_ostream()
|
||||
,m_outFile(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
OutputFileStream::OutputFileStream(const std::string &filePath)
|
||||
: m_outFile(NULL)
|
||||
{
|
||||
Open(filePath);
|
||||
}
|
||||
|
||||
OutputFileStream::~OutputFileStream()
|
||||
{
|
||||
Close();
|
||||
}
|
||||
|
||||
bool OutputFileStream::Open(const std::string &filePath)
|
||||
{
|
||||
m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary);
|
||||
if (m_outFile->fail()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (filePath.size() > 3 && filePath.substr(filePath.size() - 3, 3) == ".gz") {
|
||||
this->push(boost::iostreams::gzip_compressor());
|
||||
}
|
||||
this->push(*m_outFile);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void OutputFileStream::Close()
|
||||
{
|
||||
if (m_outFile == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
this->flush();
|
||||
this->pop(); // file
|
||||
|
||||
m_outFile->close();
|
||||
delete m_outFile;
|
||||
m_outFile = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,50 +0,0 @@
|
||||
// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <boost/iostreams/filtering_stream.hpp>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/** Used in place of std::istream, can read zipped files if it ends in .gz
|
||||
*/
|
||||
class OutputFileStream : public boost::iostreams::filtering_ostream
|
||||
{
|
||||
protected:
|
||||
std::ofstream *m_outFile;
|
||||
public:
|
||||
OutputFileStream();
|
||||
|
||||
OutputFileStream(const std::string &filePath);
|
||||
virtual ~OutputFileStream();
|
||||
|
||||
bool Open(const std::string &filePath);
|
||||
void Close();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
* Parameter.cpp
|
||||
*
|
||||
* Created on: 17 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#include "Parameter.h"
|
||||
|
||||
Parameter::Parameter()
|
||||
:maxSpan(10)
|
||||
,maxNonTerm(2)
|
||||
,maxHieroNonTerm(999)
|
||||
,maxSymbolsTarget(999)
|
||||
,maxSymbolsSource(5)
|
||||
,minHoleSource(2)
|
||||
,sentenceOffset(0)
|
||||
,nonTermConsecSource(false)
|
||||
,requireAlignedWord(true)
|
||||
,fractionalCounting(true)
|
||||
,gzOutput(false)
|
||||
|
||||
,hieroNonTerm("[X]")
|
||||
,sourceSyntax(false)
|
||||
,targetSyntax(false)
|
||||
|
||||
,mixedSyntaxType(0)
|
||||
,multiLabel(0)
|
||||
,nonTermConsecSourceMixed(true)
|
||||
,hieroSourceLHS(false)
|
||||
,maxSpanFreeNonTermSource(0)
|
||||
,nieceTerminal(true)
|
||||
,maxScope(UNDEFINED)
|
||||
|
||||
,spanLength(false)
|
||||
,nonTermContext(false)
|
||||
{}
|
||||
|
||||
Parameter::~Parameter() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
@ -1,51 +0,0 @@
|
||||
/*
|
||||
* Parameter.h
|
||||
*
|
||||
* Created on: 17 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <limits>
|
||||
|
||||
#define UNDEFINED std::numeric_limits<int>::max()
|
||||
|
||||
class Parameter
|
||||
{
|
||||
public:
|
||||
Parameter();
|
||||
virtual ~Parameter();
|
||||
|
||||
int maxSpan;
|
||||
int maxNonTerm;
|
||||
int maxHieroNonTerm;
|
||||
int maxSymbolsTarget;
|
||||
int maxSymbolsSource;
|
||||
int minHoleSource;
|
||||
|
||||
long sentenceOffset;
|
||||
|
||||
bool nonTermConsecSource;
|
||||
bool requireAlignedWord;
|
||||
bool fractionalCounting;
|
||||
bool gzOutput;
|
||||
|
||||
std::string hieroNonTerm;
|
||||
std::string gluePath;
|
||||
|
||||
bool sourceSyntax, targetSyntax;
|
||||
|
||||
int mixedSyntaxType, multiLabel;
|
||||
bool nonTermConsecSourceMixed;
|
||||
bool hieroSourceLHS;
|
||||
int maxSpanFreeNonTermSource;
|
||||
bool nieceTerminal;
|
||||
int maxScope;
|
||||
|
||||
// prperties
|
||||
bool spanLength;
|
||||
bool nonTermContext;
|
||||
|
||||
};
|
||||
|
@ -1,14 +0,0 @@
|
||||
#include <sstream>
|
||||
#include "Phrase.h"
|
||||
|
||||
std::string Phrase::Debug() const
|
||||
{
|
||||
std::stringstream out;
|
||||
|
||||
for (size_t i = 0; i < size(); ++i) {
|
||||
Word &word = *at(i);
|
||||
out << word.Debug() << " ";
|
||||
}
|
||||
|
||||
return out.str();
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "Word.h"
|
||||
|
||||
// a vector of terminals
|
||||
class Phrase : public std::vector<Word*>
|
||||
{
|
||||
public:
|
||||
Phrase()
|
||||
{}
|
||||
|
||||
Phrase(size_t size)
|
||||
:std::vector<Word*>(size)
|
||||
{}
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
};
|
@ -1,540 +0,0 @@
|
||||
/*
|
||||
* Rule.cpp
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "Rule.h"
|
||||
#include "AlignedSentence.h"
|
||||
#include "ConsistentPhrase.h"
|
||||
#include "NonTerm.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Rule::Rule(const NonTerm &lhsNonTerm, const AlignedSentence &alignedSentence)
|
||||
:m_lhs(lhsNonTerm)
|
||||
,m_alignedSentence(alignedSentence)
|
||||
,m_isValid(true)
|
||||
,m_canRecurse(true)
|
||||
{
|
||||
CreateSource();
|
||||
}
|
||||
|
||||
Rule::Rule(const Rule ©, const NonTerm &nonTerm)
|
||||
:m_lhs(copy.m_lhs)
|
||||
,m_alignedSentence(copy.m_alignedSentence)
|
||||
,m_isValid(true)
|
||||
,m_canRecurse(true)
|
||||
,m_nonterms(copy.m_nonterms)
|
||||
{
|
||||
m_nonterms.push_back(&nonTerm);
|
||||
CreateSource();
|
||||
|
||||
}
|
||||
|
||||
Rule::~Rule() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
const ConsistentPhrase &Rule::GetConsistentPhrase() const
|
||||
{ return m_lhs.GetConsistentPhrase(); }
|
||||
|
||||
void Rule::CreateSource()
|
||||
{
|
||||
const NonTerm *cp = NULL;
|
||||
size_t nonTermInd = 0;
|
||||
if (nonTermInd < m_nonterms.size()) {
|
||||
cp = m_nonterms[nonTermInd];
|
||||
}
|
||||
|
||||
for (int sourcePos = m_lhs.GetConsistentPhrase().corners[0];
|
||||
sourcePos <= m_lhs.GetConsistentPhrase().corners[1];
|
||||
++sourcePos) {
|
||||
|
||||
const RuleSymbol *ruleSymbol;
|
||||
if (cp && cp->GetConsistentPhrase().corners[0] <= sourcePos && sourcePos <= cp->GetConsistentPhrase().corners[1]) {
|
||||
// replace words with non-term
|
||||
ruleSymbol = cp;
|
||||
sourcePos = cp->GetConsistentPhrase().corners[1];
|
||||
if (m_nonterms.size()) {
|
||||
cp = m_nonterms[nonTermInd];
|
||||
}
|
||||
|
||||
// move to next non-term
|
||||
++nonTermInd;
|
||||
cp = (nonTermInd < m_nonterms.size()) ? m_nonterms[nonTermInd] : NULL;
|
||||
}
|
||||
else {
|
||||
// terminal
|
||||
ruleSymbol = m_alignedSentence.GetPhrase(Moses::Input)[sourcePos];
|
||||
}
|
||||
|
||||
m_source.Add(ruleSymbol);
|
||||
}
|
||||
}
|
||||
|
||||
int Rule::GetNextSourcePosForNonTerm() const
|
||||
{
|
||||
if (m_nonterms.empty()) {
|
||||
// no non-terms so far. Can start next non-term on left corner
|
||||
return m_lhs.GetConsistentPhrase().corners[0];
|
||||
}
|
||||
else {
|
||||
// next non-term can start just left of previous
|
||||
const ConsistentPhrase &cp = m_nonterms.back()->GetConsistentPhrase();
|
||||
int nextPos = cp.corners[1] + 1;
|
||||
return nextPos;
|
||||
}
|
||||
}
|
||||
|
||||
std::string Rule::Debug() const
|
||||
{
|
||||
stringstream out;
|
||||
|
||||
// source
|
||||
for (size_t i = 0; i < m_source.GetSize(); ++i) {
|
||||
const RuleSymbol &symbol = *m_source[i];
|
||||
out << symbol.Debug() << " ";
|
||||
}
|
||||
|
||||
// target
|
||||
out << "||| ";
|
||||
for (size_t i = 0; i < m_target.GetSize(); ++i) {
|
||||
const RuleSymbol &symbol = *m_target[i];
|
||||
out << symbol.Debug() << " ";
|
||||
}
|
||||
|
||||
out << "||| ";
|
||||
Alignments::const_iterator iterAlign;
|
||||
for (iterAlign = m_alignments.begin(); iterAlign != m_alignments.end(); ++iterAlign) {
|
||||
const std::pair<int,int> &alignPair = *iterAlign;
|
||||
out << alignPair.first << "-" << alignPair.second << " ";
|
||||
}
|
||||
|
||||
// overall range
|
||||
out << "||| LHS=" << m_lhs.Debug();
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
void Rule::Output(std::ostream &out, bool forward, const Parameter ¶ms) const
|
||||
{
|
||||
if (forward) {
|
||||
// source
|
||||
m_source.Output(out);
|
||||
m_lhs.Output(out, Moses::Input);
|
||||
|
||||
out << " ||| ";
|
||||
|
||||
// target
|
||||
m_target.Output(out);
|
||||
m_lhs.Output(out, Moses::Output);
|
||||
}
|
||||
else {
|
||||
// target
|
||||
m_target.Output(out);
|
||||
m_lhs.Output(out, Moses::Output);
|
||||
|
||||
out << " ||| ";
|
||||
|
||||
// source
|
||||
m_source.Output(out);
|
||||
m_lhs.Output(out, Moses::Input);
|
||||
}
|
||||
|
||||
out << " ||| ";
|
||||
|
||||
// alignment
|
||||
Alignments::const_iterator iterAlign;
|
||||
for (iterAlign = m_alignments.begin(); iterAlign != m_alignments.end(); ++iterAlign) {
|
||||
const std::pair<int,int> &alignPair = *iterAlign;
|
||||
|
||||
if (forward) {
|
||||
out << alignPair.first << "-" << alignPair.second << " ";
|
||||
}
|
||||
else {
|
||||
out << alignPair.second << "-" << alignPair.first << " ";
|
||||
}
|
||||
}
|
||||
|
||||
out << "||| ";
|
||||
|
||||
// count
|
||||
out << m_count;
|
||||
|
||||
out << " ||| ";
|
||||
|
||||
// properties
|
||||
|
||||
// span length
|
||||
if (forward && params.spanLength && m_nonterms.size()) {
|
||||
out << "{{SpanLength ";
|
||||
|
||||
for (size_t i = 0; i < m_nonterms.size(); ++i) {
|
||||
const NonTerm &nonTerm = *m_nonterms[i];
|
||||
const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
|
||||
out << i << "," << cp.GetWidth(Moses::Input) << "," << cp.GetWidth(Moses::Output) << " ";
|
||||
}
|
||||
out << "}} ";
|
||||
}
|
||||
|
||||
// non-term context
|
||||
if (forward && params.nonTermContext && m_nonterms.size()) {
|
||||
out << "{{NonTermContext ";
|
||||
|
||||
for (size_t i = 0; i < m_nonterms.size(); ++i) {
|
||||
const NonTerm &nonTerm = *m_nonterms[i];
|
||||
const ConsistentPhrase &cp = nonTerm.GetConsistentPhrase();
|
||||
NonTermContext(i, cp, out);
|
||||
}
|
||||
out << "}} ";
|
||||
}
|
||||
}
|
||||
|
||||
void Rule::NonTermContext(size_t ntInd, const ConsistentPhrase &cp, std::ostream &out) const
|
||||
{
|
||||
int startPos = cp.corners[0];
|
||||
int endPos = cp.corners[1];
|
||||
|
||||
const Phrase &source = m_alignedSentence.GetPhrase(Moses::Input);
|
||||
|
||||
if (startPos == 0) {
|
||||
out << "<s> ";
|
||||
}
|
||||
else {
|
||||
out << source[startPos - 1]->GetString() << " ";
|
||||
}
|
||||
|
||||
out << source[startPos]->GetString() << " ";
|
||||
out << source[endPos]->GetString() << " ";
|
||||
|
||||
if (endPos == source.size() - 1) {
|
||||
out << "</s> ";
|
||||
}
|
||||
else {
|
||||
out << source[endPos + 1]->GetString() << " ";
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void Rule::Prevalidate(const Parameter ¶ms)
|
||||
{
|
||||
const ConsistentPhrase &cp = m_lhs.GetConsistentPhrase();
|
||||
|
||||
// check number of source symbols in rule
|
||||
if (m_source.GetSize() > params.maxSymbolsSource) {
|
||||
m_isValid = false;
|
||||
}
|
||||
|
||||
// check that last non-term added isn't too small
|
||||
if (m_nonterms.size()) {
|
||||
const NonTerm &lastNonTerm = *m_nonterms.back();
|
||||
const ConsistentPhrase &cp = lastNonTerm.GetConsistentPhrase();
|
||||
|
||||
int sourceWidth = cp.corners[1] - cp.corners[0] + 1;
|
||||
if (sourceWidth < params.minHoleSource) {
|
||||
m_isValid = false;
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// check number of non-terms
|
||||
int numNonTerms = 0;
|
||||
int numHieroNonTerms = 0;
|
||||
for (size_t i = 0; i < m_source.GetSize(); ++i) {
|
||||
const RuleSymbol *arc = m_source[i];
|
||||
if (arc->IsNonTerm()) {
|
||||
++numNonTerms;
|
||||
const NonTerm &nonTerm = *static_cast<const NonTerm*>(arc);
|
||||
bool isHiero = nonTerm.IsHiero(params);
|
||||
if (isHiero) {
|
||||
++numHieroNonTerms;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numNonTerms >= params.maxNonTerm) {
|
||||
m_canRecurse = false;
|
||||
if (numNonTerms > params.maxNonTerm) {
|
||||
m_isValid = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (numHieroNonTerms >= params.maxHieroNonTerm) {
|
||||
m_canRecurse = false;
|
||||
if (numHieroNonTerms > params.maxHieroNonTerm) {
|
||||
m_isValid = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// check if 2 consecutive non-terms in source
|
||||
if (!params.nonTermConsecSource && m_nonterms.size() >= 2) {
|
||||
const NonTerm &lastNonTerm = *m_nonterms.back();
|
||||
const NonTerm &secondLastNonTerm = *m_nonterms[m_nonterms.size() - 2];
|
||||
if (secondLastNonTerm.GetConsistentPhrase().corners[1] + 1 ==
|
||||
lastNonTerm.GetConsistentPhrase().corners[0]) {
|
||||
if (params.mixedSyntaxType == 0) {
|
||||
// ordinary hiero or syntax model
|
||||
m_isValid = false;
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
}
|
||||
else {
|
||||
// Hieu's mixed syntax
|
||||
if (lastNonTerm.IsHiero(Moses::Input, params)
|
||||
&& secondLastNonTerm.IsHiero(Moses::Input, params)) {
|
||||
m_isValid = false;
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//check to see if it overlaps with any other non-terms
|
||||
if (m_nonterms.size() >= 2) {
|
||||
const NonTerm &lastNonTerm = *m_nonterms.back();
|
||||
|
||||
for (size_t i = 0; i < m_nonterms.size() - 1; ++i) {
|
||||
const NonTerm &otherNonTerm = *m_nonterms[i];
|
||||
bool overlap = lastNonTerm.GetConsistentPhrase().TargetOverlap(otherNonTerm.GetConsistentPhrase());
|
||||
|
||||
if (overlap) {
|
||||
m_isValid = false;
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check that at least 1 word is aligned
|
||||
if (params.requireAlignedWord) {
|
||||
bool ok = false;
|
||||
for (size_t i = 0; i < m_source.GetSize(); ++i) {
|
||||
const RuleSymbol &symbol = *m_source[i];
|
||||
if (!symbol.IsNonTerm()) {
|
||||
const Word &word = static_cast<const Word&>(symbol);
|
||||
if (word.GetAlignment().size()) {
|
||||
ok = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
m_isValid = false;
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (params.maxSpanFreeNonTermSource) {
|
||||
const NonTerm *front = dynamic_cast<const NonTerm*>(m_source[0]);
|
||||
if (front) {
|
||||
int width = front->GetWidth(Moses::Input);
|
||||
if (width > params.maxSpanFreeNonTermSource) {
|
||||
m_isValid = false;
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const NonTerm *back = dynamic_cast<const NonTerm*>(m_source.Back());
|
||||
if (back) {
|
||||
int width = back->GetWidth(Moses::Input);
|
||||
if (width > params.maxSpanFreeNonTermSource) {
|
||||
m_isValid = false;
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!params.nieceTerminal) {
|
||||
// collect terminal in a rule
|
||||
std::set<const Word*> terms;
|
||||
for (size_t i = 0; i < m_source.GetSize(); ++i) {
|
||||
const Word *word = dynamic_cast<const Word*>(m_source[i]);
|
||||
if (word) {
|
||||
terms.insert(word);
|
||||
}
|
||||
}
|
||||
|
||||
// look in non-terms
|
||||
for (size_t i = 0; i < m_source.GetSize(); ++i) {
|
||||
const NonTerm *nonTerm = dynamic_cast<const NonTerm*>(m_source[i]);
|
||||
if (nonTerm) {
|
||||
const ConsistentPhrase &cp = nonTerm->GetConsistentPhrase();
|
||||
bool containTerm = ContainTerm(cp, terms);
|
||||
|
||||
if (containTerm) {
|
||||
//cerr << "ruleSource=" << *ruleSource << " ";
|
||||
//cerr << "ntRange=" << ntRange << endl;
|
||||
|
||||
// non-term contains 1 of the terms in the rule.
|
||||
m_isValid = false;
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (params.maxScope != UNDEFINED) {
|
||||
int scope = CalcScope();
|
||||
if (scope > params.maxScope) {
|
||||
m_isValid = false;
|
||||
m_canRecurse = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int Rule::CalcScope() const
|
||||
{
|
||||
int scope = 0;
|
||||
if (m_source.GetSize() > 1) {
|
||||
const RuleSymbol &front = *m_source.Front();
|
||||
if (front.IsNonTerm()) {
|
||||
++scope;
|
||||
}
|
||||
|
||||
const RuleSymbol &back = *m_source.Back();
|
||||
if (back.IsNonTerm()) {
|
||||
++scope;
|
||||
}
|
||||
}
|
||||
return scope;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
bool Contains(const T *sought, const set<const T*> &coll)
|
||||
{
|
||||
std::set<const Word*>::const_iterator iter;
|
||||
for (iter = coll.begin(); iter != coll.end(); ++iter) {
|
||||
const Word *found = *iter;
|
||||
if (sought->CompareString(*found) == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Rule::ContainTerm(const ConsistentPhrase &cp, const std::set<const Word*> &terms) const
|
||||
{
|
||||
const Phrase &sourceSentence = m_alignedSentence.GetPhrase(Moses::Input);
|
||||
|
||||
for (int pos = cp.corners[0]; pos <= cp.corners[1]; ++pos) {
|
||||
const Word *soughtWord = sourceSentence[pos];
|
||||
|
||||
// find same word in set
|
||||
if (Contains(soughtWord, terms)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool CompareTargetNonTerms(const NonTerm *a, const NonTerm *b)
|
||||
{
|
||||
// compare just start target pos
|
||||
return a->GetConsistentPhrase().corners[2] < b->GetConsistentPhrase().corners[2];
|
||||
}
|
||||
|
||||
void Rule::CreateTarget(const Parameter ¶ms)
|
||||
{
|
||||
if (!m_isValid) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<const NonTerm*> targetNonTerm(m_nonterms);
|
||||
std::sort(targetNonTerm.begin(), targetNonTerm.end(), CompareTargetNonTerms);
|
||||
|
||||
const NonTerm *cp = NULL;
|
||||
size_t nonTermInd = 0;
|
||||
if (nonTermInd < targetNonTerm.size()) {
|
||||
cp = targetNonTerm[nonTermInd];
|
||||
}
|
||||
|
||||
for (int targetPos = m_lhs.GetConsistentPhrase().corners[2];
|
||||
targetPos <= m_lhs.GetConsistentPhrase().corners[3];
|
||||
++targetPos) {
|
||||
|
||||
const RuleSymbol *ruleSymbol;
|
||||
if (cp && cp->GetConsistentPhrase().corners[2] <= targetPos && targetPos <= cp->GetConsistentPhrase().corners[3]) {
|
||||
// replace words with non-term
|
||||
ruleSymbol = cp;
|
||||
targetPos = cp->GetConsistentPhrase().corners[3];
|
||||
if (targetNonTerm.size()) {
|
||||
cp = targetNonTerm[nonTermInd];
|
||||
}
|
||||
|
||||
// move to next non-term
|
||||
++nonTermInd;
|
||||
cp = (nonTermInd < targetNonTerm.size()) ? targetNonTerm[nonTermInd] : NULL;
|
||||
}
|
||||
else {
|
||||
// terminal
|
||||
ruleSymbol = m_alignedSentence.GetPhrase(Moses::Output)[targetPos];
|
||||
}
|
||||
|
||||
m_target.Add(ruleSymbol);
|
||||
}
|
||||
|
||||
CreateAlignments();
|
||||
}
|
||||
|
||||
|
||||
void Rule::CreateAlignments()
|
||||
{
|
||||
int sourceStart = GetConsistentPhrase().corners[0];
|
||||
int targetStart = GetConsistentPhrase().corners[2];
|
||||
|
||||
for (size_t sourcePos = 0; sourcePos < m_source.GetSize(); ++sourcePos) {
|
||||
const RuleSymbol *symbol = m_source[sourcePos];
|
||||
if (!symbol->IsNonTerm()) {
|
||||
// terminals
|
||||
const Word &sourceWord = static_cast<const Word&>(*symbol);
|
||||
const std::set<const Word *> &targetWords = sourceWord.GetAlignment();
|
||||
CreateAlignments(sourcePos, targetWords);
|
||||
}
|
||||
else {
|
||||
// non-terms. same object in both source & target
|
||||
CreateAlignments(sourcePos, symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Rule::CreateAlignments(int sourcePos, const std::set<const Word *> &targetWords)
|
||||
{
|
||||
std::set<const Word *>::const_iterator iterTarget;
|
||||
for (iterTarget = targetWords.begin(); iterTarget != targetWords.end(); ++iterTarget) {
|
||||
const Word *targetWord = *iterTarget;
|
||||
CreateAlignments(sourcePos, targetWord);
|
||||
}
|
||||
}
|
||||
|
||||
void Rule::CreateAlignments(int sourcePos, const RuleSymbol *targetSought)
|
||||
{
|
||||
// should be in target phrase
|
||||
for (size_t targetPos = 0; targetPos < m_target.GetSize(); ++targetPos) {
|
||||
const RuleSymbol *foundSymbol = m_target[targetPos];
|
||||
if (targetSought == foundSymbol) {
|
||||
pair<int, int> alignPoint(sourcePos, targetPos);
|
||||
m_alignments.insert(alignPoint);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
throw "not found";
|
||||
}
|
||||
|
@ -1,87 +0,0 @@
|
||||
/*
|
||||
* Rule.h
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include "Phrase.h"
|
||||
#include "RulePhrase.h"
|
||||
#include "moses/TypeDef.h"
|
||||
|
||||
class ConsistentPhrase;
|
||||
class AlignedSentence;
|
||||
class NonTerm;
|
||||
class Parameter;
|
||||
|
||||
|
||||
class Rule {
|
||||
public:
|
||||
typedef std::set<std::pair<int,int> > Alignments;
|
||||
|
||||
Rule(const Rule ©); // do not implement
|
||||
|
||||
// original rule with no non-term
|
||||
Rule(const NonTerm &lhsNonTerm, const AlignedSentence &alignedSentence);
|
||||
|
||||
// extend a rule, adding 1 new non-term
|
||||
Rule(const Rule ©, const NonTerm &nonTerm);
|
||||
|
||||
virtual ~Rule();
|
||||
|
||||
bool IsValid() const
|
||||
{ return m_isValid; }
|
||||
|
||||
bool CanRecurse() const
|
||||
{ return m_canRecurse; }
|
||||
|
||||
const NonTerm &GetLHS() const
|
||||
{ return m_lhs; }
|
||||
|
||||
const ConsistentPhrase &GetConsistentPhrase() const;
|
||||
|
||||
int GetNextSourcePosForNonTerm() const;
|
||||
|
||||
void SetCount(float count)
|
||||
{ m_count = count; }
|
||||
float GetCount() const
|
||||
{ return m_count; }
|
||||
|
||||
const Alignments &GetAlignments() const
|
||||
{ return m_alignments; }
|
||||
|
||||
std::string Debug() const;
|
||||
void Output(std::ostream &out, bool forward, const Parameter ¶ms) const;
|
||||
|
||||
void Prevalidate(const Parameter ¶ms);
|
||||
void CreateTarget(const Parameter ¶ms);
|
||||
|
||||
const RulePhrase &GetPhrase(Moses::FactorDirection direction) const
|
||||
{ return (direction == Moses::Input) ? m_source : m_target; }
|
||||
|
||||
protected:
|
||||
const NonTerm &m_lhs;
|
||||
const AlignedSentence &m_alignedSentence;
|
||||
RulePhrase m_source, m_target;
|
||||
float m_count;
|
||||
|
||||
Alignments m_alignments;
|
||||
|
||||
// in source order
|
||||
std::vector<const NonTerm*> m_nonterms;
|
||||
|
||||
bool m_isValid, m_canRecurse;
|
||||
|
||||
void CreateSource();
|
||||
void CreateAlignments();
|
||||
void CreateAlignments(int sourcePos, const std::set<const Word *> &targetWords);
|
||||
void CreateAlignments(int sourcePos, const RuleSymbol *targetSought);
|
||||
|
||||
bool ContainTerm(const ConsistentPhrase &cp, const std::set<const Word*> &terms) const;
|
||||
int CalcScope() const; // not yet correctly calculated
|
||||
|
||||
void NonTermContext(size_t ntInd, const ConsistentPhrase &cp, std::ostream &out) const;
|
||||
|
||||
};
|
||||
|
@ -1,50 +0,0 @@
|
||||
/*
|
||||
* RulePhrase.cpp
|
||||
*
|
||||
* Created on: 26 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include "RulePhrase.h"
|
||||
#include "RuleSymbol.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
extern bool g_debug;
|
||||
|
||||
int RulePhrase::Compare(const RulePhrase &other) const
|
||||
{
|
||||
if (GetSize() != other.GetSize()) {
|
||||
return GetSize() < other.GetSize() ? -1 : +1;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < m_coll.size(); ++i) {
|
||||
const RuleSymbol &symbol = *m_coll[i];
|
||||
const RuleSymbol &otherSymbol = *other.m_coll[i];
|
||||
int compare = symbol.Compare(otherSymbol);
|
||||
|
||||
if (compare) {
|
||||
return compare;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void RulePhrase::Output(std::ostream &out) const
|
||||
{
|
||||
for (size_t i = 0; i < m_coll.size(); ++i) {
|
||||
const RuleSymbol &symbol = *m_coll[i];
|
||||
symbol.Output(out);
|
||||
out << " ";
|
||||
}
|
||||
}
|
||||
|
||||
std::string RulePhrase::Debug() const
|
||||
{
|
||||
std::stringstream out;
|
||||
Output(out);
|
||||
return out.str();
|
||||
}
|
||||
|
@ -1,49 +0,0 @@
|
||||
/*
|
||||
* RulePhrase.h
|
||||
*
|
||||
* Created on: 26 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#ifndef RULEPHRASE_H_
|
||||
#define RULEPHRASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include <cstddef>
|
||||
#include <iostream>
|
||||
|
||||
class RuleSymbol;
|
||||
|
||||
// a phrase of terms and non-terms for 1 side of a rule
|
||||
class RulePhrase
|
||||
{
|
||||
public:
|
||||
typedef std::vector<const RuleSymbol*> Coll;
|
||||
Coll m_coll;
|
||||
|
||||
size_t GetSize() const
|
||||
{ return m_coll.size(); }
|
||||
|
||||
void Add(const RuleSymbol *symbol)
|
||||
{
|
||||
m_coll.push_back(symbol);
|
||||
}
|
||||
|
||||
const RuleSymbol* operator[](size_t index) const {
|
||||
return m_coll[index];
|
||||
}
|
||||
|
||||
const RuleSymbol* Front() const {
|
||||
return m_coll.front();
|
||||
}
|
||||
const RuleSymbol* Back() const {
|
||||
return m_coll.back();
|
||||
}
|
||||
|
||||
int Compare(const RulePhrase &other) const;
|
||||
|
||||
void Output(std::ostream &out) const;
|
||||
std::string Debug() const;
|
||||
};
|
||||
|
||||
#endif /* RULEPHRASE_H_ */
|
@ -1,36 +0,0 @@
|
||||
/*
|
||||
* RuleSymbol.cpp
|
||||
*
|
||||
* Created on: 21 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "RuleSymbol.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
RuleSymbol::RuleSymbol() {
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
RuleSymbol::~RuleSymbol() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
int RuleSymbol::Compare(const RuleSymbol &other) const
|
||||
{
|
||||
if (IsNonTerm() != other.IsNonTerm()) {
|
||||
return IsNonTerm() ? -1 : +1;
|
||||
}
|
||||
|
||||
string str = GetString();
|
||||
string otherStr = other.GetString();
|
||||
|
||||
if (str == otherStr) {
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
return (str < otherStr) ? -1 : +1;
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
/*
|
||||
* RuleSymbol.h
|
||||
*
|
||||
* Created on: 21 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#ifndef RULESYMBOL_H_
|
||||
#define RULESYMBOL_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
// base class - terminal or non-term
|
||||
class RuleSymbol {
|
||||
public:
|
||||
RuleSymbol();
|
||||
virtual ~RuleSymbol();
|
||||
|
||||
virtual bool IsNonTerm() const = 0;
|
||||
|
||||
virtual std::string Debug() const = 0;
|
||||
virtual void Output(std::ostream &out) const = 0;
|
||||
|
||||
virtual std::string GetString() const = 0;
|
||||
|
||||
int Compare(const RuleSymbol &other) const;
|
||||
|
||||
};
|
||||
|
||||
#endif /* RULESYMBOL_H_ */
|
@ -1,227 +0,0 @@
|
||||
/*
|
||||
* Rules.cpp
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include "Rules.h"
|
||||
#include "ConsistentPhrase.h"
|
||||
#include "ConsistentPhrases.h"
|
||||
#include "AlignedSentence.h"
|
||||
#include "Rule.h"
|
||||
#include "Parameter.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
extern bool g_debug;
|
||||
|
||||
Rules::Rules(const AlignedSentence &alignedSentence)
|
||||
:m_alignedSentence(alignedSentence)
|
||||
{
|
||||
}
|
||||
|
||||
Rules::~Rules() {
|
||||
Moses::RemoveAllInColl(m_keepRules);
|
||||
}
|
||||
|
||||
void Rules::CreateRules(const ConsistentPhrase &cp,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
if (params.hieroSourceLHS) {
|
||||
const NonTerm &nonTerm = cp.GetHieroNonTerm();
|
||||
CreateRule(nonTerm, params);
|
||||
}
|
||||
else {
|
||||
const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
|
||||
for (size_t i = 0; i < nonTerms.size(); ++i) {
|
||||
const NonTerm &nonTerm = nonTerms[i];
|
||||
CreateRule(nonTerm, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::CreateRule(const NonTerm &nonTerm,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
Rule *rule = new Rule(nonTerm, m_alignedSentence);
|
||||
|
||||
rule->Prevalidate(params);
|
||||
rule->CreateTarget(params);
|
||||
|
||||
|
||||
if (rule->CanRecurse()) {
|
||||
Extend(*rule, params);
|
||||
}
|
||||
|
||||
if (rule->IsValid()) {
|
||||
m_keepRules.insert(rule);
|
||||
}
|
||||
else {
|
||||
delete rule;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Rules::Extend(const Parameter ¶ms)
|
||||
{
|
||||
const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
|
||||
|
||||
size_t size = m_alignedSentence.GetPhrase(Moses::Input).size();
|
||||
for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
|
||||
for (size_t sourceEnd = sourceStart; sourceEnd < size; ++sourceEnd) {
|
||||
const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
|
||||
|
||||
ConsistentPhrases::Coll::const_iterator iter;
|
||||
for (iter = cps.begin(); iter != cps.end(); ++iter) {
|
||||
const ConsistentPhrase &cp = **iter;
|
||||
CreateRules(cp, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::Extend(const Rule &rule, const Parameter ¶ms)
|
||||
{
|
||||
const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
|
||||
int sourceMin = rule.GetNextSourcePosForNonTerm();
|
||||
|
||||
int ruleStart = rule.GetConsistentPhrase().corners[0];
|
||||
int ruleEnd = rule.GetConsistentPhrase().corners[1];
|
||||
|
||||
for (int sourceStart = sourceMin; sourceStart <= ruleEnd; ++sourceStart) {
|
||||
for (int sourceEnd = sourceStart; sourceEnd <= ruleEnd; ++sourceEnd) {
|
||||
if (sourceStart == ruleStart && sourceEnd == ruleEnd) {
|
||||
// don't cover whole rule with 1 non-term
|
||||
continue;
|
||||
}
|
||||
|
||||
const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
|
||||
Extend(rule, cps, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter ¶ms)
|
||||
{
|
||||
ConsistentPhrases::Coll::const_iterator iter;
|
||||
for (iter = cps.begin(); iter != cps.end(); ++iter) {
|
||||
const ConsistentPhrase &cp = **iter;
|
||||
Extend(rule, cp, params);
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter ¶ms)
|
||||
{
|
||||
const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
|
||||
for (size_t i = 0; i < nonTerms.size(); ++i) {
|
||||
const NonTerm &nonTerm = nonTerms[i];
|
||||
|
||||
Rule *newRule = new Rule(rule, nonTerm);
|
||||
newRule->Prevalidate(params);
|
||||
newRule->CreateTarget(params);
|
||||
|
||||
if (newRule->CanRecurse()) {
|
||||
// recursively extend
|
||||
Extend(*newRule, params);
|
||||
}
|
||||
|
||||
if (newRule->IsValid()) {
|
||||
m_keepRules.insert(newRule);
|
||||
}
|
||||
else {
|
||||
delete newRule;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string Rules::Debug() const
|
||||
{
|
||||
stringstream out;
|
||||
|
||||
std::set<Rule*>::const_iterator iter;
|
||||
out << "m_keepRules:" << endl;
|
||||
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
|
||||
const Rule &rule = **iter;
|
||||
out << rule.Debug() << endl;
|
||||
}
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
void Rules::Output(std::ostream &out, bool forward, const Parameter ¶ms) const
|
||||
{
|
||||
std::set<Rule*, CompareRules>::const_iterator iter;
|
||||
for (iter = m_mergeRules.begin(); iter != m_mergeRules.end(); ++iter) {
|
||||
const Rule &rule = **iter;
|
||||
rule.Output(out, forward, params);
|
||||
out << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::Consolidate(const Parameter ¶ms)
|
||||
{
|
||||
if (params.fractionalCounting) {
|
||||
CalcFractionalCount();
|
||||
}
|
||||
else {
|
||||
std::set<Rule*>::iterator iter;
|
||||
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
|
||||
Rule &rule = **iter;
|
||||
rule.SetCount(1);
|
||||
}
|
||||
}
|
||||
|
||||
MergeRules(params);
|
||||
}
|
||||
|
||||
void Rules::MergeRules(const Parameter ¶ms)
|
||||
{
|
||||
typedef std::set<Rule*, CompareRules> MergeRules;
|
||||
|
||||
std::set<Rule*>::const_iterator iterOrig;
|
||||
for (iterOrig = m_keepRules.begin(); iterOrig != m_keepRules.end(); ++iterOrig) {
|
||||
Rule *origRule = *iterOrig;
|
||||
|
||||
pair<MergeRules::iterator, bool> inserted = m_mergeRules.insert(origRule);
|
||||
if (!inserted.second) {
|
||||
// already there, just add count
|
||||
Rule &rule = **inserted.first;
|
||||
float newCount = rule.GetCount() + origRule->GetCount();
|
||||
rule.SetCount(newCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::CalcFractionalCount()
|
||||
{
|
||||
typedef std::set<Rule*> RuleColl;
|
||||
typedef std::map<const ConsistentPhrase*, RuleColl> RuleByConsistentPhrase;
|
||||
RuleByConsistentPhrase allRules;
|
||||
|
||||
// sort by source AND target ranges
|
||||
std::set<Rule*>::const_iterator iter;
|
||||
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
|
||||
Rule *rule = *iter;
|
||||
const ConsistentPhrase &cp = rule->GetConsistentPhrase();
|
||||
RuleColl &ruleColl = allRules[&cp];
|
||||
ruleColl.insert(rule);
|
||||
}
|
||||
|
||||
// fractional count
|
||||
RuleByConsistentPhrase::iterator iterOuter;
|
||||
for (iterOuter = allRules.begin(); iterOuter != allRules.end(); ++iterOuter) {
|
||||
RuleColl &rules = iterOuter->second;
|
||||
|
||||
RuleColl::iterator iterInner;
|
||||
for (iterInner = rules.begin(); iterInner != rules.end(); ++iterInner) {
|
||||
Rule &rule = **iterInner;
|
||||
rule.SetCount(1.0f / (float) rules.size());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,72 +0,0 @@
|
||||
/*
|
||||
* Rules.h
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
#include "ConsistentPhrases.h"
|
||||
#include "Rule.h"
|
||||
|
||||
extern bool g_debug;
|
||||
|
||||
class AlignedSentence;
|
||||
class Parameter;
|
||||
|
||||
struct CompareRules {
|
||||
bool operator()(const Rule *a, const Rule *b)
|
||||
{
|
||||
int compare;
|
||||
|
||||
compare = a->GetPhrase(Moses::Input).Compare(b->GetPhrase(Moses::Input));
|
||||
if (compare) return compare < 0;
|
||||
|
||||
compare = a->GetPhrase(Moses::Output).Compare(b->GetPhrase(Moses::Output));
|
||||
if (compare) return compare < 0;
|
||||
|
||||
if (a->GetAlignments() != b->GetAlignments()) {
|
||||
return a->GetAlignments() < b->GetAlignments();
|
||||
}
|
||||
|
||||
if (a->GetLHS().GetString() != b->GetLHS().GetString()) {
|
||||
return a->GetLHS().GetString() < b->GetLHS().GetString();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class Rules {
|
||||
public:
|
||||
Rules(const AlignedSentence &alignedSentence);
|
||||
virtual ~Rules();
|
||||
void Extend(const Parameter ¶ms);
|
||||
void Consolidate(const Parameter ¶ms);
|
||||
|
||||
std::string Debug() const;
|
||||
void Output(std::ostream &out, bool forward, const Parameter ¶ms) const;
|
||||
|
||||
protected:
|
||||
const AlignedSentence &m_alignedSentence;
|
||||
std::set<Rule*> m_keepRules;
|
||||
std::set<Rule*, CompareRules> m_mergeRules;
|
||||
|
||||
void Extend(const Rule &rule, const Parameter ¶ms);
|
||||
void Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter ¶ms);
|
||||
void Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter ¶ms);
|
||||
|
||||
// create original rules
|
||||
void CreateRules(const ConsistentPhrase &cp,
|
||||
const Parameter ¶ms);
|
||||
void CreateRule(const NonTerm &nonTerm,
|
||||
const Parameter ¶ms);
|
||||
|
||||
void MergeRules(const Parameter ¶ms);
|
||||
void CalcFractionalCount();
|
||||
|
||||
};
|
||||
|
@ -1,47 +0,0 @@
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include "SyntaxTree.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void SyntaxTree::Add(int startPos, int endPos, const std::string &label, const Parameter ¶ms)
|
||||
{
|
||||
//cerr << "add " << label << " to " << "[" << startPos << "-" << endPos << "]" << endl;
|
||||
|
||||
Range range(startPos, endPos);
|
||||
Labels &labels = m_coll[range];
|
||||
|
||||
bool add = true;
|
||||
if (labels.size()) {
|
||||
if (params.multiLabel == 1) {
|
||||
// delete the label in collection and add new
|
||||
assert(labels.size() == 1);
|
||||
labels.clear();
|
||||
}
|
||||
else if (params.multiLabel == 2) {
|
||||
// ignore this label
|
||||
add = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (add) {
|
||||
labels.push_back(label);
|
||||
}
|
||||
}
|
||||
|
||||
void SyntaxTree::AddToAll(const std::string &label)
|
||||
{
|
||||
Coll::iterator iter;
|
||||
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
|
||||
Labels &labels = iter->second;
|
||||
labels.push_back(label);
|
||||
}
|
||||
}
|
||||
|
||||
const SyntaxTree::Labels &SyntaxTree::Find(int startPos, int endPos) const
|
||||
{
|
||||
Coll::const_iterator iter;
|
||||
iter = m_coll.find(Range(startPos, endPos));
|
||||
return (iter == m_coll.end()) ? m_defaultLabels : iter->second;
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
class Parameter;
|
||||
|
||||
class SyntaxTree
|
||||
{
|
||||
public:
|
||||
typedef std::pair<int, int> Range;
|
||||
typedef std::vector<std::string> Labels;
|
||||
typedef std::map<Range, Labels> Coll;
|
||||
|
||||
void Add(int startPos, int endPos, const std::string &label, const Parameter ¶ms);
|
||||
void AddToAll(const std::string &label);
|
||||
|
||||
const Labels &Find(int startPos, int endPos) const;
|
||||
|
||||
void SetHieroLabel(const std::string &label) {
|
||||
m_defaultLabels.push_back(label);
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
Coll m_coll;
|
||||
Labels m_defaultLabels;
|
||||
};
|
||||
|
||||
|
@ -1,56 +0,0 @@
|
||||
/*
|
||||
* Word.cpp
|
||||
*
|
||||
* Created on: 18 Feb 2014
|
||||
* Author: s0565741
|
||||
*/
|
||||
#include <limits>
|
||||
#include "Word.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Word::Word(int pos, const std::string &str)
|
||||
:m_pos(pos)
|
||||
,m_str(str)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Word::~Word() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void Word::AddAlignment(const Word *other)
|
||||
{
|
||||
m_alignment.insert(other);
|
||||
}
|
||||
|
||||
std::set<int> Word::GetAlignmentIndex() const
|
||||
{
|
||||
std::set<int> ret;
|
||||
|
||||
std::set<const Word *>::const_iterator iter;
|
||||
for (iter = m_alignment.begin(); iter != m_alignment.end(); ++iter) {
|
||||
const Word &otherWord = **iter;
|
||||
int otherPos = otherWord.GetPos();
|
||||
ret.insert(otherPos);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Word::Output(std::ostream &out) const
|
||||
{
|
||||
out << m_str;
|
||||
}
|
||||
|
||||
std::string Word::Debug() const
|
||||
{
|
||||
return m_str;
|
||||
}
|
||||
|
||||
int Word::CompareString(const Word &other) const
|
||||
{
|
||||
return m_str.compare(other.m_str);
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Word.h
|
||||
*
|
||||
* Created on: 18 Feb 2014
|
||||
* Author: s0565741
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "RuleSymbol.h"
|
||||
|
||||
// a terminal
|
||||
class Word : public RuleSymbol
|
||||
{
|
||||
public:
|
||||
Word(const Word&); // do not implement
|
||||
Word(int pos, const std::string &str);
|
||||
virtual ~Word();
|
||||
|
||||
virtual bool IsNonTerm() const
|
||||
{ return false; }
|
||||
|
||||
std::string GetString() const
|
||||
{ return m_str; }
|
||||
|
||||
int GetPos() const
|
||||
{ return m_pos; }
|
||||
|
||||
void AddAlignment(const Word *other);
|
||||
|
||||
const std::set<const Word *> &GetAlignment() const
|
||||
{ return m_alignment; }
|
||||
|
||||
std::set<int> GetAlignmentIndex() const;
|
||||
|
||||
void Output(std::ostream &out) const;
|
||||
std::string Debug() const;
|
||||
|
||||
int CompareString(const Word &other) const;
|
||||
|
||||
protected:
|
||||
int m_pos; // original position in sentence, NOT in lattice
|
||||
std::string m_str;
|
||||
std::set<const Word *> m_alignment;
|
||||
};
|
||||
|
@ -1,27 +0,0 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
|
||||
binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
binmode(STDERR, ":utf8");
|
||||
|
||||
my $maxNumWords = $ARGV[0];
|
||||
|
||||
while (my $line = <STDIN>) {
|
||||
chomp($line);
|
||||
my @toks = split(/ /,$line);
|
||||
|
||||
my $numSourceWords = 0;
|
||||
my $tok = $toks[$numSourceWords];
|
||||
while ($tok ne "|||") {
|
||||
++$numSourceWords;
|
||||
$tok = $toks[$numSourceWords];
|
||||
}
|
||||
|
||||
if ($numSourceWords <= $maxNumWords) {
|
||||
print "$line\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,81 +0,0 @@
|
||||
#ifndef moses_gzfile_buf_h
|
||||
#define moses_gzfile_buf_h
|
||||
|
||||
#include <streambuf>
|
||||
#include <zlib.h>
|
||||
#include <cstring>
|
||||
|
||||
class gzfilebuf : public std::streambuf {
|
||||
public:
|
||||
gzfilebuf(const char *filename)
|
||||
{ _gzf = gzopen(filename, "rb");
|
||||
setg (_buff+sizeof(int), // beginning of putback area
|
||||
_buff+sizeof(int), // read position
|
||||
_buff+sizeof(int)); // end position
|
||||
}
|
||||
~gzfilebuf() { gzclose(_gzf); }
|
||||
protected:
|
||||
virtual int_type overflow (int_type c) {
|
||||
throw;
|
||||
}
|
||||
|
||||
// write multiple characters
|
||||
virtual
|
||||
std::streamsize xsputn (const char* s,
|
||||
std::streamsize num) {
|
||||
throw;
|
||||
}
|
||||
|
||||
virtual std::streampos seekpos ( std::streampos sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out ){ throw;
|
||||
}
|
||||
|
||||
//read one character
|
||||
virtual int_type underflow () {
|
||||
// is read position before end of _buff?
|
||||
if (gptr() < egptr()) {
|
||||
return traits_type::to_int_type(*gptr());
|
||||
}
|
||||
|
||||
/* process size of putback area
|
||||
* - use number of characters read
|
||||
* - but at most four
|
||||
*/
|
||||
unsigned int numPutback = gptr() - eback();
|
||||
if (numPutback > sizeof(int)) {
|
||||
numPutback = sizeof(int);
|
||||
}
|
||||
|
||||
/* copy up to four characters previously read into
|
||||
* the putback _buff (area of first four characters)
|
||||
*/
|
||||
std::memmove (_buff+(sizeof(int)-numPutback), gptr()-numPutback,
|
||||
numPutback);
|
||||
|
||||
// read new characters
|
||||
int num = gzread(_gzf, _buff+sizeof(int), _buffsize-sizeof(int));
|
||||
if (num <= 0) {
|
||||
// ERROR or EOF
|
||||
return EOF;
|
||||
}
|
||||
|
||||
// reset _buff pointers
|
||||
setg (_buff+(sizeof(int)-numPutback), // beginning of putback area
|
||||
_buff+sizeof(int), // read position
|
||||
_buff+sizeof(int)+num); // end of buffer
|
||||
|
||||
// return next character
|
||||
return traits_type::to_int_type(*gptr());
|
||||
}
|
||||
|
||||
std::streamsize xsgetn (char* s,
|
||||
std::streamsize num) {
|
||||
return gzread(_gzf,s,num);
|
||||
}
|
||||
|
||||
private:
|
||||
gzFile _gzf;
|
||||
static const unsigned int _buffsize = 1024;
|
||||
char _buff[_buffsize];
|
||||
};
|
||||
|
||||
#endif
|
@ -1,33 +0,0 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
sub trim($);
|
||||
|
||||
my $file1 = $ARGV[0];
|
||||
my $file2 = $ARGV[1];
|
||||
|
||||
open (FILE1, $file1);
|
||||
open (FILE2, $file2);
|
||||
|
||||
my $countEqual = 0;
|
||||
while (my $line1 = <FILE1>) {
|
||||
my $line2 = <FILE2>;
|
||||
if (trim($line1) eq trim($line2)) {
|
||||
++$countEqual;
|
||||
}
|
||||
}
|
||||
|
||||
print $countEqual ."\n";
|
||||
|
||||
|
||||
######################
|
||||
# Perl trim function to remove whitespace from the start and end of the string
|
||||
sub trim($) {
|
||||
my $string = shift;
|
||||
$string =~ s/^\s+//;
|
||||
$string =~ s/\s+$//;
|
||||
return $string;
|
||||
}
|
||||
|
||||
|
@ -1,29 +0,0 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
binmode(STDERR, ":utf8");
|
||||
|
||||
my $fileLineNum = $ARGV[0];
|
||||
open (FILE_LINE_NUM, $fileLineNum);
|
||||
|
||||
my $nextLineNum = <FILE_LINE_NUM>;
|
||||
|
||||
my $lineNum = 1;
|
||||
while (my $line = <STDIN>) {
|
||||
if (defined($nextLineNum) && $lineNum == $nextLineNum) {
|
||||
# matches. output line
|
||||
chomp($line);
|
||||
print "$line\n";
|
||||
|
||||
# next line number
|
||||
$nextLineNum = <FILE_LINE_NUM>;
|
||||
}
|
||||
|
||||
++$lineNum;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,108 +0,0 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
my $iniPath = $ARGV[0];
|
||||
my $isHiero = $ARGV[1];
|
||||
my $decoderExec = $ARGV[2];
|
||||
my $extractExec = $ARGV[3];
|
||||
my $tmpName = $ARGV[4];
|
||||
|
||||
my $WORK_DIR = `pwd`;
|
||||
chomp($WORK_DIR);
|
||||
|
||||
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu";
|
||||
|
||||
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
|
||||
$extractExec = "$MOSES_DIR/bin/$extractExec";
|
||||
|
||||
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
|
||||
if($SPLIT_EXEC) {
|
||||
$SPLIT_EXEC = 'gsplit';
|
||||
}
|
||||
else {
|
||||
$SPLIT_EXEC = 'split';
|
||||
}
|
||||
|
||||
my $SORT_EXEC = `gsort --help 2>/dev/null`;
|
||||
if($SORT_EXEC) {
|
||||
$SORT_EXEC = 'gsort';
|
||||
}
|
||||
else {
|
||||
$SORT_EXEC = 'sort';
|
||||
}
|
||||
|
||||
|
||||
my $hieroFlag = "";
|
||||
if ($isHiero == 1) {
|
||||
$hieroFlag = "--Hierarchical";
|
||||
}
|
||||
|
||||
print STDERR "WORK_DIR=$WORK_DIR \n";
|
||||
|
||||
my $cmd;
|
||||
|
||||
open (SOURCE, "source");
|
||||
open (TARGET, "target");
|
||||
open (ALIGNMENT, "alignment");
|
||||
|
||||
my $lineNum = 0;
|
||||
my ($source, $target, $alignment);
|
||||
while ($source = <SOURCE>) {
|
||||
chomp($source);
|
||||
$target = <TARGET>; chomp($target);
|
||||
$alignment = <ALIGNMENT>; chomp($alignment);
|
||||
|
||||
#print STDERR "$source ||| $target ||| $alignment \n";
|
||||
|
||||
# write out 1 line
|
||||
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
|
||||
`mkdir -p $tmpDir`;
|
||||
|
||||
open (SOURCE1, ">$tmpDir/source");
|
||||
open (TARGET1, ">$tmpDir/target");
|
||||
open (ALIGNMENT1, ">$tmpDir/alignment");
|
||||
|
||||
print SOURCE1 "$source\n";
|
||||
print TARGET1 "$target\n";
|
||||
print ALIGNMENT1 "$alignment\n";
|
||||
|
||||
close (SOURCE1);
|
||||
close (TARGET1);
|
||||
close (ALIGNMENT1);
|
||||
|
||||
# train
|
||||
if ($isHiero == 1) {
|
||||
$cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract --GZOutput";
|
||||
}
|
||||
else {
|
||||
# pb
|
||||
$cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract 7 --GZOutput";
|
||||
}
|
||||
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
|
||||
`$cmd`;
|
||||
|
||||
# decode
|
||||
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source -feature-add \"ConstrainedDecoding path=$tmpDir/target\"";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
# `rm -rf $tmpDir`;
|
||||
|
||||
++$lineNum;
|
||||
}
|
||||
|
||||
close(SOURCE);
|
||||
close(TARGET);
|
||||
close(ALIGNMENT);
|
||||
|
@ -1,151 +0,0 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
sub Write1Line;
|
||||
sub WriteCorpus1Holdout;
|
||||
|
||||
my $iniPath = $ARGV[0];
|
||||
my $isHiero = $ARGV[1];
|
||||
my $decoderExec = $ARGV[2];
|
||||
my $extractExec = $ARGV[3];
|
||||
my $tmpName = $ARGV[4];
|
||||
my $startLine = $ARGV[5];
|
||||
my $endLine = $ARGV[6];
|
||||
|
||||
print STDERR "iniPath=$iniPath \n isHiero=$isHiero \n decoderExec=$decoderExec \n extractExec=$extractExec \n";
|
||||
|
||||
my $WORK_DIR = `pwd`;
|
||||
chomp($WORK_DIR);
|
||||
|
||||
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu.gna";
|
||||
|
||||
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
|
||||
$extractExec = "$MOSES_DIR/bin/$extractExec";
|
||||
|
||||
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
|
||||
if($SPLIT_EXEC) {
|
||||
$SPLIT_EXEC = 'gsplit';
|
||||
}
|
||||
else {
|
||||
$SPLIT_EXEC = 'split';
|
||||
}
|
||||
|
||||
my $SORT_EXEC = `gsort --help 2>/dev/null`;
|
||||
if($SORT_EXEC) {
|
||||
$SORT_EXEC = 'gsort';
|
||||
}
|
||||
else {
|
||||
$SORT_EXEC = 'sort';
|
||||
}
|
||||
|
||||
|
||||
my $hieroFlag = "";
|
||||
if ($isHiero == 1) {
|
||||
$hieroFlag = "--Hierarchical";
|
||||
}
|
||||
|
||||
print STDERR "WORK_DIR=$WORK_DIR \n";
|
||||
|
||||
my $cmd;
|
||||
|
||||
open (SOURCE, "source");
|
||||
open (TARGET, "target");
|
||||
open (ALIGNMENT, "alignment");
|
||||
|
||||
my $numLines = `cat source | wc -l`;
|
||||
|
||||
for (my $lineNum = 0; $lineNum < $numLines; ++$lineNum) {
|
||||
my $source = <SOURCE>; chomp($source);
|
||||
my $target = <TARGET>; chomp($target);
|
||||
my $alignment = <ALIGNMENT>; chomp($alignment);
|
||||
|
||||
if ($lineNum < $startLine || $lineNum >= $endLine) {
|
||||
next;
|
||||
}
|
||||
|
||||
#print STDERR "$source ||| $target ||| $alignment \n";
|
||||
# write out 1 line
|
||||
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
|
||||
`mkdir -p $tmpDir`;
|
||||
|
||||
Write1Line($source, $tmpDir, "source.1");
|
||||
Write1Line($target, $tmpDir, "target.1");
|
||||
Write1Line($alignment, $tmpDir, "alignment.1");
|
||||
|
||||
WriteCorpus1Holdout($lineNum, "source", $tmpDir, "source.corpus");
|
||||
WriteCorpus1Holdout($lineNum, "target", $tmpDir, "target.corpus");
|
||||
WriteCorpus1Holdout($lineNum, "alignment", $tmpDir, "alignment.corpus");
|
||||
|
||||
# train
|
||||
if ($isHiero == 1) {
|
||||
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract --GZOutput";
|
||||
}
|
||||
else {
|
||||
# pb
|
||||
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract 7 --GZOutput";
|
||||
}
|
||||
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
|
||||
`$cmd`;
|
||||
|
||||
# decode
|
||||
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source.1 -n-best-list $tmpDir/nbest 10000 distinct -v 2";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
# count the number of translation in nbest list
|
||||
$cmd = "wc -l $tmpDir/nbest >> out";
|
||||
`$cmd`;
|
||||
|
||||
`rm -rf $tmpDir`;
|
||||
}
|
||||
|
||||
close(SOURCE);
|
||||
close(TARGET);
|
||||
close(ALIGNMENT);
|
||||
|
||||
|
||||
######################
|
||||
sub Write1Line
|
||||
{
|
||||
my ($line, $tmpDir, $fileName) = @_;
|
||||
|
||||
open (HANDLE, ">$tmpDir/$fileName");
|
||||
print HANDLE "$line\n";
|
||||
close (HANDLE);
|
||||
}
|
||||
|
||||
sub WriteCorpus1Holdout
|
||||
{
|
||||
my ($holdoutLineNum, $inFilePath, $tmpDir, $outFileName) = @_;
|
||||
|
||||
open (INFILE, "$inFilePath");
|
||||
open (OUTFILE, ">$tmpDir/$outFileName");
|
||||
|
||||
my $lineNum = 0;
|
||||
while (my $line = <INFILE>) {
|
||||
chomp($line);
|
||||
|
||||
if ($lineNum != $holdoutLineNum) {
|
||||
print OUTFILE "$line\n";
|
||||
}
|
||||
|
||||
++$lineNum;
|
||||
}
|
||||
|
||||
close (OUTFILE);
|
||||
close(INFILE);
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,147 +0,0 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
sub Write1Line;
|
||||
sub WriteCorpus1Holdout;
|
||||
|
||||
my $iniPath = $ARGV[0];
|
||||
my $isHiero = $ARGV[1];
|
||||
my $decoderExec = $ARGV[2];
|
||||
my $extractExec = $ARGV[3];
|
||||
my $tmpName = $ARGV[4];
|
||||
my $startLine = $ARGV[5];
|
||||
my $endLine = $ARGV[6];
|
||||
|
||||
print STDERR "iniPath=$iniPath \n isHiero=$isHiero \n decoderExec=$decoderExec \n extractExec=$extractExec \n";
|
||||
|
||||
my $WORK_DIR = `pwd`;
|
||||
chomp($WORK_DIR);
|
||||
|
||||
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu.gna";
|
||||
|
||||
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
|
||||
$extractExec = "$MOSES_DIR/bin/$extractExec";
|
||||
|
||||
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
|
||||
if($SPLIT_EXEC) {
|
||||
$SPLIT_EXEC = 'gsplit';
|
||||
}
|
||||
else {
|
||||
$SPLIT_EXEC = 'split';
|
||||
}
|
||||
|
||||
my $SORT_EXEC = `gsort --help 2>/dev/null`;
|
||||
if($SORT_EXEC) {
|
||||
$SORT_EXEC = 'gsort';
|
||||
}
|
||||
else {
|
||||
$SORT_EXEC = 'sort';
|
||||
}
|
||||
|
||||
|
||||
my $hieroFlag = "";
|
||||
if ($isHiero == 1) {
|
||||
$hieroFlag = "--Hierarchical";
|
||||
}
|
||||
|
||||
print STDERR "WORK_DIR=$WORK_DIR \n";
|
||||
|
||||
my $cmd;
|
||||
|
||||
open (SOURCE, "source");
|
||||
open (TARGET, "target");
|
||||
open (ALIGNMENT, "alignment");
|
||||
|
||||
my $numLines = `cat source | wc -l`;
|
||||
|
||||
for (my $lineNum = 0; $lineNum < $numLines; ++$lineNum) {
|
||||
my $source = <SOURCE>; chomp($source);
|
||||
my $target = <TARGET>; chomp($target);
|
||||
my $alignment = <ALIGNMENT>; chomp($alignment);
|
||||
|
||||
if ($lineNum < $startLine || $lineNum >= $endLine) {
|
||||
next;
|
||||
}
|
||||
|
||||
#print STDERR "$source ||| $target ||| $alignment \n";
|
||||
# write out 1 line
|
||||
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
|
||||
`mkdir -p $tmpDir`;
|
||||
|
||||
Write1Line($source, $tmpDir, "source.1");
|
||||
Write1Line($target, $tmpDir, "target.1");
|
||||
Write1Line($alignment, $tmpDir, "alignment.1");
|
||||
|
||||
WriteCorpus1Holdout($lineNum, "source", $tmpDir, "source.corpus");
|
||||
WriteCorpus1Holdout($lineNum, "target", $tmpDir, "target.corpus");
|
||||
WriteCorpus1Holdout($lineNum, "alignment", $tmpDir, "alignment.corpus");
|
||||
|
||||
# train
|
||||
if ($isHiero == 1) {
|
||||
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract --GZOutput";
|
||||
}
|
||||
else {
|
||||
# pb
|
||||
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract 7 --GZOutput";
|
||||
}
|
||||
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
|
||||
`$cmd`;
|
||||
|
||||
# decode
|
||||
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source.1 -feature-add \"ConstrainedDecoding path=$tmpDir/target.1\" -v 2";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
`rm -rf $tmpDir`;
|
||||
}
|
||||
|
||||
close(SOURCE);
|
||||
close(TARGET);
|
||||
close(ALIGNMENT);
|
||||
|
||||
|
||||
######################
|
||||
sub Write1Line
|
||||
{
|
||||
my ($line, $tmpDir, $fileName) = @_;
|
||||
|
||||
open (HANDLE, ">$tmpDir/$fileName");
|
||||
print HANDLE "$line\n";
|
||||
close (HANDLE);
|
||||
}
|
||||
|
||||
sub WriteCorpus1Holdout
|
||||
{
|
||||
my ($holdoutLineNum, $inFilePath, $tmpDir, $outFileName) = @_;
|
||||
|
||||
open (INFILE, "$inFilePath");
|
||||
open (OUTFILE, ">$tmpDir/$outFileName");
|
||||
|
||||
my $lineNum = 0;
|
||||
while (my $line = <INFILE>) {
|
||||
chomp($line);
|
||||
|
||||
if ($lineNum != $holdoutLineNum) {
|
||||
print OUTFILE "$line\n";
|
||||
}
|
||||
|
||||
++$lineNum;
|
||||
}
|
||||
|
||||
close (OUTFILE);
|
||||
close(INFILE);
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,17 +0,0 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
my $iniPath = $ARGV[0];
|
||||
|
||||
my $SPLIT_LINES = 200;
|
||||
my $lineCount = `cat source | wc -l`;
|
||||
print STDERR "lineCount=$lineCount \n";
|
||||
|
||||
for (my $startLine = 0; $startLine < $lineCount; $startLine += $SPLIT_LINES) {
|
||||
my $endLine = $startLine + $SPLIT_LINES;
|
||||
|
||||
my $cmd = "../../scripts/reachable.perl $iniPath 1 moses_chart extract-rules tmp-reachable $startLine $endLine &>out.reachable.$startLine &";
|
||||
print STDERR "Executing: $cmd \n";
|
||||
system($cmd);
|
||||
|
||||
}
|
||||
|
@ -1,69 +0,0 @@
|
||||
/**
|
||||
* pugixml parser - version 1.2
|
||||
* --------------------------------------------------------
|
||||
* Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at http://pugixml.org/
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end
|
||||
* of this file.
|
||||
*
|
||||
* This work is based on the pugxml parser, which is:
|
||||
* Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
|
||||
*/
|
||||
|
||||
#ifndef HEADER_PUGICONFIG_HPP
|
||||
#define HEADER_PUGICONFIG_HPP
|
||||
|
||||
// Uncomment this to enable wchar_t mode
|
||||
// #define PUGIXML_WCHAR_MODE
|
||||
|
||||
// Uncomment this to disable XPath
|
||||
// #define PUGIXML_NO_XPATH
|
||||
|
||||
// Uncomment this to disable STL
|
||||
// #define PUGIXML_NO_STL
|
||||
|
||||
// Uncomment this to disable exceptions
|
||||
// #define PUGIXML_NO_EXCEPTIONS
|
||||
|
||||
// Set this to control attributes for public classes/functions, i.e.:
|
||||
// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
|
||||
// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
|
||||
// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
|
||||
// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
|
||||
|
||||
// Uncomment this to switch to header-only version
|
||||
// #define PUGIXML_HEADER_ONLY
|
||||
// #include "pugixml.cpp"
|
||||
|
||||
// Tune these constants to adjust memory-related behavior
|
||||
// #define PUGIXML_MEMORY_PAGE_SIZE 32768
|
||||
// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
|
||||
// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2006-2012 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -25,6 +25,7 @@
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.285958391" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.966722418" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1839105433" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
|
@ -79,8 +79,18 @@
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.790052015" name="IRST.h" rcbsApplicability="disable" resourcePath="LM/IRST.h" toolsToInvoke=""/>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1211280539" name="DALMWrapper.h" rcbsApplicability="disable" resourcePath="LM/DALMWrapper.h" toolsToInvoke=""/>
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1783697903" name="/" resourcePath="TranslationModel/UG">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.592606983" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug" unusedChildren="">
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1088985490" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base.1976472988"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1662972728" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1970700220" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2126314903"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1604609677" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1168585173"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1002997408" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.2074660557"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1785961519" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug.933467113"/>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<sourceEntries>
|
||||
<entry excluding="LM/oxlm|LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
<entry excluding="TranslationModel/UG|LM/oxlm|LM/Rand.h|LM/Rand.cpp|TranslationModel/CompactPT|LM/NeuralLMWrapper.cpp|FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
|
@ -536,6 +536,11 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/PCNTools.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>PDTAimp.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/PDTAimp.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>PDTAimp.h</name>
|
||||
<type>1</type>
|
||||
@ -1261,6 +1266,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SetSourcePhrase.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/SkeletonChangeInput.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/SkeletonChangeInput.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SkeletonChangeInput.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/SkeletonStatefulFF.cpp</name>
|
||||
<type>1</type>
|
||||
@ -1866,6 +1881,11 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/SkeletonPT.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/WordCoocTable.cpp</name>
|
||||
<type>1</type>
|
||||
@ -1951,6 +1971,11 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/LexicalReordering/SparseReordering.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/OSM-Feature/OSM-Feature</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/OSM-Feature/OpSequenceModel.cpp</name>
|
||||
<type>1</type>
|
||||
@ -2551,6 +2576,136 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/Scope3Parser/VarSpanTrieBuilder.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/Jamfile</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/Jamfile</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/Makefile</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/Makefile</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mmsapt.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mmsapt.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mmsapt.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mmsapt.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mmsapt_align.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mmsapt_align.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mmsapt_phrase_scorers.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mmsapt_phrase_scorers.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/ptable-lookup.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/ptable-lookup.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sapt_phrase_key.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_phrase_key.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sapt_phrase_scorers.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_phrase_scorers.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sapt_pscore_base.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_base.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sapt_pscore_coherence.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_coherence.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sapt_pscore_lex1.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_lex1.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sapt_pscore_logcnt.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_logcnt.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sapt_pscore_pbwd.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_pbwd.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sapt_pscore_pfwd.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_pfwd.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sapt_pscore_provenance.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_provenance.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sapt_pscore_rareness.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_rareness.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sapt_pscore_unaligned.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sapt_pscore_unaligned.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/sim-pe.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/sim-pe.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/spe-check-coverage.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/spe-check-coverage.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/spe-check-coverage2.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/spe-check-coverage2.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/try-align.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/try-align.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/util</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/fuzzy-match/Alignments.cpp</name>
|
||||
<type>1</type>
|
||||
@ -2626,6 +2781,16 @@
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/OSM-Feature/OSM-Feature/KenOSM.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/KenOSM.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>FF/OSM-Feature/OSM-Feature/KenOSM.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/OSM-Feature/KenOSM.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/CompactPT/bin/gcc-4.7</name>
|
||||
<type>2</type>
|
||||
@ -2636,6 +2801,351 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/pt.log</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/Jamfile</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/Jamfile</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/bin</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/file_io</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/program_options</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/sampling</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/sorting</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/threading</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/Jamfile</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/Jamfile</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/Makefile</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/Makefile</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/calc-coverage.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/calc-coverage.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/custom-pt.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/custom-pt.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/mam2symal.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mam2symal.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/mam_verify.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mam_verify.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/mmlex-build.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mmlex-build.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/mmlex-lookup.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mmlex-lookup.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/mtt-build.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mtt-build.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/mtt-count-words.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mtt-count-words.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/mtt-dump.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mtt-dump.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/mtt.count.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/mtt.count.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/num_read_write.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/num_read_write.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/obsolete</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/symal2mam.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/symal2mam.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/test-dynamic-im-tsa.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/test-dynamic-im-tsa.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/tpt_pickler.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_pickler.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/tpt_pickler.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_pickler.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/tpt_tightindex.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_tightindex.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/tpt_tightindex.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_tightindex.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/tpt_tokenindex.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_tokenindex.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/tpt_tokenindex.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_tokenindex.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/tpt_typedefs.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/tpt_typedefs.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_bitext.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_bitext.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_bitext.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_conll_bottom_up_token.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_conll_bottom_up_token.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_conll_record.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_conll_record.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_conll_record.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_conll_record.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_corpus_token.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_corpus_token.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_corpus_token.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_corpus_token.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_deptree.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_deptree.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_deptree.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_deptree.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_im_tsa.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_im_tsa.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_im_ttrack.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_im_ttrack.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer1.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_load_primer.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_load_primer.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_load_primer.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_load_primer.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_mm_2d_table.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_2d_table.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_mm_tsa.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_tsa.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_mm_tsa_tree_iterator.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_tsa_tree_iterator.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_mm_ttrack.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mm_ttrack.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_mmbitext.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mmbitext.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_mmbitext.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_mmbitext.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_phrasepair.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_phrasepair.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_phrasepair.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_phrasepair.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_tsa_array_entry.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_tsa_array_entry.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_tsa_array_entry.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_tsa_array_entry.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_tsa_base.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_tsa_base.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_tsa_bitset_cache.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_tsa_bitset_cache.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_tsa_tree_iterator.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_tsa_tree_iterator.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_ttrack_base.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_ttrack_base.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_ttrack_base.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_ttrack_base.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_ttrack_position.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_ttrack_position.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_ttrack_position.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_ttrack_position.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/ug_typedefs.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/ug_typedefs.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/util/Makefile</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/util/Makefile</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/util/ibm1-align</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/util/ibm1-align</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/util/ibm1-align.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/util/ibm1-align.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/util/tokenindex.dump.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/util/tokenindex.dump.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>bin/BackwardTest.test/gcc-4.7/release</name>
|
||||
<type>2</type>
|
||||
@ -2651,6 +3161,81 @@
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/bin/gcc-4.8</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/file_io/ug_stream.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/file_io/ug_stream.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/file_io/ug_stream.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/file_io/ug_stream.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/program_options/ug_get_options.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/program_options/ug_get_options.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/program_options/ug_get_options.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/program_options/ug_get_options.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/program_options/ug_splice_arglist.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/program_options/ug_splice_arglist.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/sampling/Sampling.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/sampling/Sampling.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/sorting/NBestList.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/sorting/NBestList.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/sorting/VectorIndexSorter.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/sorting/VectorIndexSorter.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.cc</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/threading/ug_thread_safe_counter.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/obsolete/ug_bitext_base.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/obsolete/ug_bitext_base.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>bin/BackwardTest.test/gcc-4.7/release/debug-symbols-on</name>
|
||||
<type>2</type>
|
||||
@ -2666,6 +3251,21 @@
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/bin/gcc-4.8/release</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>bin/BackwardTest.test/gcc-4.7/release/debug-symbols-on/link-static</name>
|
||||
<type>2</type>
|
||||
@ -2681,6 +3281,21 @@
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>bin/BackwardTest.test/gcc-4.7/release/debug-symbols-on/link-static/threading-multi</name>
|
||||
<type>2</type>
|
||||
@ -3306,6 +3921,21 @@
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>bin/BackwardTest.test/gcc-4.7/release/debug-symbols-on/link-static/threading-multi/Backward.o</name>
|
||||
<type>1</type>
|
||||
@ -3496,6 +4126,71 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/CompactPT/bin/gcc-4.7/release/debug-symbols-on/link-static/threading-multi/ThrowingFwrite.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmsapt_align.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ptable-lookup.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sim-pe</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sim-pe</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sim-pe.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/sim-pe.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage2</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage2</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage2.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/spe-check-coverage2.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/try-align.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi</name>
|
||||
<type>2</type>
|
||||
<locationURI>virtual:/virtual</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>bin/gcc-4.7/release/debug-symbols-on/link-static/threading-multi/TranslationModel/CYKPlusParser/ChartRuleLookupManagerCYKPlus.o</name>
|
||||
<type>1</type>
|
||||
@ -3651,5 +4346,180 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/bin/gcc-4.7/release/debug-symbols-on/link-static/threading-multi/TranslationModel/fuzzy-match/create_xml.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_get_options.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_splice_arglist.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_stream.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_stream.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/generic/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_thread_safe_counter.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/calc-coverage.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam2symal.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mam_verify.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-build.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mmlex-lookup.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-build.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-count-words.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/mtt-dump.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/symal2mam.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_pickler.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tightindex.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/tpt_tokenindex.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_bitext.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_conll_record.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_corpus_token.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_deptree.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_load_primer.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_mmbitext.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_phrasepair.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_tsa_array_entry.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_base.o</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/TranslationModel/UG/mm/bin/gcc-4.8/release/debug-symbols-on/link-static/threading-multi/ug_ttrack_position.o</locationURI>
|
||||
</link>
|
||||
</linkedResources>
|
||||
</projectDescription>
|
||||
|
@ -16,9 +16,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
|
||||
#include <util/string_piece.hh>
|
||||
|
||||
#include "util/string_piece.hh"
|
||||
#include "Types.h"
|
||||
|
||||
namespace MosesTuning
|
||||
|
@ -17,6 +17,7 @@ License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <iterator>
|
||||
|
||||
@ -172,12 +173,14 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
|
||||
static const string kWeights = "weights";
|
||||
fs::directory_iterator dend;
|
||||
size_t fileCount = 0;
|
||||
|
||||
cerr << "Reading hypergraphs" << endl;
|
||||
for (fs::directory_iterator di(hypergraphDir); di != dend; ++di) {
|
||||
if (di->path().filename() == kWeights) continue;
|
||||
const fs::path& hgpath = di->path();
|
||||
if (hgpath.filename() == kWeights) continue;
|
||||
Graph graph(vocab_);
|
||||
size_t id = boost::lexical_cast<size_t>(di->path().stem().string());
|
||||
util::scoped_fd fd(util::OpenReadOrThrow(di->path().string().c_str()));
|
||||
size_t id = boost::lexical_cast<size_t>(hgpath.stem().string());
|
||||
util::scoped_fd fd(util::OpenReadOrThrow(hgpath.string().c_str()));
|
||||
//util::FilePiece file(di->path().string().c_str());
|
||||
util::FilePiece file(fd.release());
|
||||
ReadGraph(file,graph);
|
||||
@ -195,19 +198,24 @@ HypergraphHopeFearDecoder::HypergraphHopeFearDecoder
|
||||
}
|
||||
cerr << endl << "Done" << endl;
|
||||
|
||||
sentenceIds_.resize(graphs_.size());
|
||||
for (size_t i = 0; i < graphs_.size(); ++i) sentenceIds_[i] = i;
|
||||
if (!no_shuffle) {
|
||||
random_shuffle(sentenceIds_.begin(), sentenceIds_.end());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void HypergraphHopeFearDecoder::reset() {
|
||||
graphIter_ = graphs_.begin();
|
||||
sentenceIdIter_ = sentenceIds_.begin();
|
||||
}
|
||||
|
||||
void HypergraphHopeFearDecoder::next() {
|
||||
++graphIter_;
|
||||
sentenceIdIter_++;
|
||||
}
|
||||
|
||||
bool HypergraphHopeFearDecoder::finished() {
|
||||
return graphIter_ == graphs_.end();
|
||||
return sentenceIdIter_ == sentenceIds_.end();
|
||||
}
|
||||
|
||||
void HypergraphHopeFearDecoder::HopeFear(
|
||||
@ -215,10 +223,10 @@ void HypergraphHopeFearDecoder::HopeFear(
|
||||
const MiraWeightVector& wv,
|
||||
HopeFearData* hopeFear
|
||||
) {
|
||||
size_t sentenceId = graphIter_->first;
|
||||
size_t sentenceId = *sentenceIdIter_;
|
||||
SparseVector weights;
|
||||
wv.ToSparse(&weights);
|
||||
const Graph& graph = *(graphIter_->second);
|
||||
const Graph& graph = *(graphs_[sentenceId]);
|
||||
|
||||
ValType hope_scale = 1.0;
|
||||
HgHypothesis hopeHypo, fearHypo, modelHypo;
|
||||
@ -309,11 +317,11 @@ void HypergraphHopeFearDecoder::HopeFear(
|
||||
void HypergraphHopeFearDecoder::MaxModel(const AvgWeightVector& wv, vector<ValType>* stats) {
|
||||
assert(!finished());
|
||||
HgHypothesis bestHypo;
|
||||
size_t sentenceId = graphIter_->first;
|
||||
size_t sentenceId = *sentenceIdIter_;
|
||||
SparseVector weights;
|
||||
wv.ToSparse(&weights);
|
||||
vector<ValType> bg(kBleuNgramOrder*2+1);
|
||||
Viterbi(*(graphIter_->second), weights, 0, references_, sentenceId, bg, &bestHypo);
|
||||
Viterbi(*(graphs_[sentenceId]), weights, 0, references_, sentenceId, bg, &bestHypo);
|
||||
stats->resize(bestHypo.bleuStats.size());
|
||||
/*
|
||||
for (size_t i = 0; i < bestHypo.text.size(); ++i) {
|
||||
|
@ -140,7 +140,8 @@ private:
|
||||
//maps sentence Id to graph ptr
|
||||
typedef std::map<size_t, boost::shared_ptr<Graph> > GraphColl;
|
||||
GraphColl graphs_;
|
||||
GraphColl::const_iterator graphIter_;
|
||||
std::vector<size_t> sentenceIds_;
|
||||
std::vector<size_t>::const_iterator sentenceIdIter_;
|
||||
ReferenceSet references_;
|
||||
Vocab vocab_;
|
||||
};
|
||||
|
@ -301,6 +301,9 @@ int main(int argc, char* argv[])
|
||||
while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
|
||||
IFVERBOSE(1)
|
||||
ResetUserTime();
|
||||
|
||||
FeatureFunction::CallChangeSource(source);
|
||||
|
||||
TranslationTask *task = new TranslationTask(source, *ioWrapper);
|
||||
source = NULL; // task will delete source
|
||||
#ifdef WITH_THREADS
|
||||
|
@ -758,6 +758,9 @@ int main(int argc, char** argv)
|
||||
IFVERBOSE(1) {
|
||||
ResetUserTime();
|
||||
}
|
||||
|
||||
FeatureFunction::CallChangeSource(source);
|
||||
|
||||
// set up task of translating one sentence
|
||||
TranslationTask* task =
|
||||
new TranslationTask(lineCount,source, outputCollector.get(),
|
||||
|
@ -85,7 +85,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
|
||||
UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS");
|
||||
|
||||
// add to dictionary
|
||||
TargetPhrase *targetPhrase = new TargetPhrase();
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(NULL);
|
||||
Word &targetWord = targetPhrase->AddWord();
|
||||
targetWord.CreateUnknownWord(sourceWord);
|
||||
|
||||
@ -108,7 +108,7 @@ void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range
|
||||
// drop source word. create blank trans opt
|
||||
float unknownScore = FloorScore(-numeric_limits<float>::infinity());
|
||||
|
||||
TargetPhrase *targetPhrase = new TargetPhrase();
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(NULL);
|
||||
// loop
|
||||
const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
|
||||
UnknownLHSList::const_iterator iterLHS;
|
||||
|
@ -38,7 +38,7 @@ class DecodeGraph
|
||||
{
|
||||
protected:
|
||||
std::list<const DecodeStep*> m_steps;
|
||||
size_t m_position;
|
||||
size_t m_id; // contiguous unique id, starting from 0
|
||||
size_t m_maxChartSpan;
|
||||
size_t m_backoff;
|
||||
|
||||
@ -46,15 +46,15 @@ public:
|
||||
/**
|
||||
* position: The position of this graph within the decode sequence.
|
||||
**/
|
||||
DecodeGraph(size_t position)
|
||||
: m_position(position)
|
||||
DecodeGraph(size_t id)
|
||||
: m_id(id)
|
||||
, m_maxChartSpan(NOT_FOUND)
|
||||
, m_backoff(0)
|
||||
{}
|
||||
|
||||
// for chart decoding
|
||||
DecodeGraph(size_t position, size_t maxChartSpan)
|
||||
: m_position(position)
|
||||
DecodeGraph(size_t id, size_t maxChartSpan)
|
||||
: m_id(id)
|
||||
, m_maxChartSpan(maxChartSpan) {
|
||||
}
|
||||
|
||||
@ -90,8 +90,8 @@ public:
|
||||
m_backoff = backoff;
|
||||
}
|
||||
|
||||
size_t GetPosition() const {
|
||||
return m_position;
|
||||
size_t GetId() const {
|
||||
return m_id;
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -48,6 +48,7 @@
|
||||
#include "NieceTerminal.h"
|
||||
#include "SpanLength.h"
|
||||
#include "SyntaxRHS.h"
|
||||
#include "SkeletonChangeInput.h"
|
||||
|
||||
#include "moses/FF/SkeletonStatelessFF.h"
|
||||
#include "moses/FF/SkeletonStatefulFF.h"
|
||||
@ -208,6 +209,7 @@ FeatureRegistry::FeatureRegistry()
|
||||
MOSES_FNAME(SparseHieroReorderingFeature);
|
||||
MOSES_FNAME(SpanLength);
|
||||
MOSES_FNAME(SyntaxRHS);
|
||||
MOSES_FNAME(SkeletonChangeInput);
|
||||
|
||||
MOSES_FNAME(SkeletonStatelessFF);
|
||||
MOSES_FNAME(SkeletonStatefulFF);
|
||||
|
@ -34,6 +34,14 @@ void FeatureFunction::Destroy()
|
||||
RemoveAllInColl(s_staticColl);
|
||||
}
|
||||
|
||||
void FeatureFunction::CallChangeSource(InputType *&input)
|
||||
{
|
||||
for (size_t i = 0; i < s_staticColl.size(); ++i) {
|
||||
const FeatureFunction &ff = *s_staticColl[i];
|
||||
ff.ChangeSource(input);
|
||||
}
|
||||
}
|
||||
|
||||
FeatureFunction::
|
||||
FeatureFunction(const std::string& line)
|
||||
: m_tuneable(true)
|
||||
|
@ -48,6 +48,8 @@ public:
|
||||
static FeatureFunction &FindFeatureFunction(const std::string& name);
|
||||
static void Destroy();
|
||||
|
||||
static void CallChangeSource(InputType *&input);
|
||||
|
||||
FeatureFunction(const std::string &line);
|
||||
FeatureFunction(size_t numScoreComponents, const std::string &line);
|
||||
virtual bool IsStateless() const = 0;
|
||||
@ -109,6 +111,10 @@ public:
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const = 0;
|
||||
|
||||
// override this method if you want to change the input before decoding
|
||||
virtual void ChangeSource(InputType *&input) const
|
||||
{}
|
||||
|
||||
// This method is called once all the translation options are retrieved from the phrase table, and
|
||||
// just before search.
|
||||
// 'inputPath' is guaranteed to be the raw substring from the input. No factors were added or taken away
|
||||
|
@ -25,7 +25,6 @@ OpSequenceModel::~OpSequenceModel() {
|
||||
|
||||
void OpSequenceModel :: readLanguageModel(const char *lmFile)
|
||||
{
|
||||
|
||||
string unkOp = "_TRANS_SLF_";
|
||||
OSM = ConstructOSMLM(m_lmPath);
|
||||
|
||||
|
@ -1,11 +1,16 @@
|
||||
|
||||
#include <vector>
|
||||
#include "PhrasePenalty.h"
|
||||
#include "moses/ScoreComponentCollection.h"
|
||||
#include "moses/TranslationModel/PhraseDictionary.h"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
PhrasePenalty::PhrasePenalty(const std::string &line)
|
||||
: StatelessFeatureFunction(1, line)
|
||||
, m_perPhraseTable(false)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
@ -15,8 +20,34 @@ void PhrasePenalty::EvaluateInIsolation(const Phrase &source
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
if (m_perPhraseTable) {
|
||||
const PhraseDictionary *pt = targetPhrase.GetContainer();
|
||||
if (pt) {
|
||||
size_t ptId = pt->GetId();
|
||||
UTIL_THROW_IF2(ptId >= m_numScoreComponents, "Wrong number of scores");
|
||||
|
||||
vector<float> scores(m_numScoreComponents, 0);
|
||||
scores[ptId] = 1.0f;
|
||||
|
||||
scoreBreakdown.Assign(this, scores);
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
scoreBreakdown.Assign(this, 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
void PhrasePenalty::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "per-phrase-table") {
|
||||
m_perPhraseTable =Scan<bool>(value);
|
||||
}
|
||||
else {
|
||||
StatelessFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
@ -34,6 +34,10 @@ public:
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const
|
||||
{}
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
protected:
|
||||
bool m_perPhraseTable;
|
||||
};
|
||||
|
||||
} //namespace
|
||||
|
92
moses/FF/SkeletonChangeInput.cpp
Normal file
92
moses/FF/SkeletonChangeInput.cpp
Normal file
@ -0,0 +1,92 @@
|
||||
#include <vector>
|
||||
#include "SkeletonChangeInput.h"
|
||||
#include "moses/ScoreComponentCollection.h"
|
||||
#include "moses/TargetPhrase.h"
|
||||
#include "moses/Sentence.h"
|
||||
#include "moses/FactorCollection.h"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
SkeletonChangeInput::SkeletonChangeInput(const std::string &line)
|
||||
:StatelessFeatureFunction(2, line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void SkeletonChangeInput::EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const
|
||||
{
|
||||
// dense scores
|
||||
vector<float> newScores(m_numScoreComponents);
|
||||
newScores[0] = 1.5;
|
||||
newScores[1] = 0.3;
|
||||
scoreBreakdown.PlusEquals(this, newScores);
|
||||
|
||||
// sparse scores
|
||||
scoreBreakdown.PlusEquals(this, "sparse-name", 2.4);
|
||||
|
||||
}
|
||||
|
||||
void SkeletonChangeInput::EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore) const
|
||||
{
|
||||
if (targetPhrase.GetNumNonTerminals()) {
|
||||
vector<float> newScores(m_numScoreComponents);
|
||||
newScores[0] = - std::numeric_limits<float>::infinity();
|
||||
scoreBreakdown.PlusEquals(this, newScores);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void SkeletonChangeInput::EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
void SkeletonChangeInput::EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const
|
||||
{}
|
||||
|
||||
void SkeletonChangeInput::ChangeSource(InputType *&input) const
|
||||
{
|
||||
// add factor[1] to each word. Created from first 4 letter of factor[0]
|
||||
|
||||
Sentence *sentence = dynamic_cast<Sentence*>(input);
|
||||
UTIL_THROW_IF2(sentence == NULL, "Not a sentence input");
|
||||
|
||||
FactorCollection &fc = FactorCollection::Instance();
|
||||
|
||||
size_t size = sentence->GetSize();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
Word &word = sentence->Phrase::GetWord(i);
|
||||
const Factor *factor0 = word[0];
|
||||
|
||||
std::string str = factor0->GetString().as_string();
|
||||
if (str.length() > 4) {
|
||||
str = str.substr(0, 4);
|
||||
}
|
||||
|
||||
const Factor *factor1 = fc.AddFactor(str);
|
||||
word.SetFactor(1, factor1);
|
||||
}
|
||||
}
|
||||
|
||||
void SkeletonChangeInput::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "arg") {
|
||||
// set value here
|
||||
} else {
|
||||
StatelessFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
41
moses/FF/SkeletonChangeInput.h
Normal file
41
moses/FF/SkeletonChangeInput.h
Normal file
@ -0,0 +1,41 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "StatelessFeatureFunction.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
class SkeletonChangeInput : public StatelessFeatureFunction
|
||||
{
|
||||
public:
|
||||
SkeletonChangeInput(const std::string &line);
|
||||
|
||||
bool IsUseable(const FactorMask &mask) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
void EvaluateInIsolation(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection &estimatedFutureScore) const;
|
||||
|
||||
void ChangeSource(InputType *&input) const;
|
||||
|
||||
void EvaluateWithSourceContext(const InputType &input
|
||||
, const InputPath &inputPath
|
||||
, const TargetPhrase &targetPhrase
|
||||
, const StackVec *stackVec
|
||||
, ScoreComponentCollection &scoreBreakdown
|
||||
, ScoreComponentCollection *estimatedFutureScore = NULL) const;
|
||||
void EvaluateWhenApplied(const Hypothesis& hypo,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
void EvaluateWhenApplied(const ChartHypothesis &hypo,
|
||||
ScoreComponentCollection* accumulator) const;
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -85,14 +85,13 @@ Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &tran
|
||||
, m_wordDeleted(false)
|
||||
, m_totalScore(0.0f)
|
||||
, m_futureScore(0.0f)
|
||||
, m_scoreBreakdown(prevHypo.GetScoreBreakdown())
|
||||
, m_ffStates(prevHypo.m_ffStates.size())
|
||||
, m_arcList(NULL)
|
||||
, m_transOpt(transOpt)
|
||||
, m_manager(prevHypo.GetManager())
|
||||
, m_id(m_manager.GetNextHypoId())
|
||||
{
|
||||
m_scoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
|
||||
m_currScoreBreakdown.PlusEquals(transOpt.GetScoreBreakdown());
|
||||
|
||||
// assert that we are not extending our hypothesis by retranslating something
|
||||
// that this hypothesis has already translated!
|
||||
@ -214,7 +213,7 @@ void Hypothesis::EvaluateWith(const StatefulFeatureFunction &sfff,
|
||||
m_ffStates[state_idx] = sfff.EvaluateWhenApplied(
|
||||
*this,
|
||||
m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL,
|
||||
&m_scoreBreakdown);
|
||||
&m_currScoreBreakdown);
|
||||
}
|
||||
}
|
||||
|
||||
@ -222,7 +221,7 @@ void Hypothesis::EvaluateWith(const StatelessFeatureFunction& slff)
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
if (! staticData.IsFeatureFunctionIgnored( slff )) {
|
||||
slff.EvaluateWhenApplied(*this, &m_scoreBreakdown);
|
||||
slff.EvaluateWhenApplied(*this, &m_currScoreBreakdown);
|
||||
}
|
||||
}
|
||||
|
||||
@ -256,7 +255,7 @@ void Hypothesis::Evaluate(const SquareMatrix &futureScore)
|
||||
if (! staticData.IsFeatureFunctionIgnored(ff)) {
|
||||
m_ffStates[i] = ff.EvaluateWhenApplied(*this,
|
||||
m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
|
||||
&m_scoreBreakdown);
|
||||
&m_currScoreBreakdown);
|
||||
}
|
||||
}
|
||||
|
||||
@ -269,7 +268,8 @@ void Hypothesis::Evaluate(const SquareMatrix &futureScore)
|
||||
m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
|
||||
|
||||
// TOTAL
|
||||
m_totalScore = m_scoreBreakdown.GetWeightedScore() + m_futureScore;
|
||||
m_totalScore = m_currScoreBreakdown.GetWeightedScore() + m_futureScore;
|
||||
if (m_prevHypo) m_totalScore += m_prevHypo->GetScore();
|
||||
|
||||
IFVERBOSE(2) {
|
||||
m_manager.GetSentenceStats().StopTimeEstimateScore();
|
||||
@ -315,7 +315,7 @@ void Hypothesis::PrintHypothesis() const
|
||||
// TRACE_ERR( "\tlanguage model cost "); // <<m_score[ScoreType::LanguageModelScore]<<endl;
|
||||
// TRACE_ERR( "\tword penalty "); // <<(m_score[ScoreType::WordPenalty]*weightWordPenalty)<<endl;
|
||||
TRACE_ERR( "\tscore "<<m_totalScore - m_futureScore<<" + future cost "<<m_futureScore<<" = "<<m_totalScore<<endl);
|
||||
TRACE_ERR( "\tunweighted feature scores: " << m_scoreBreakdown << endl);
|
||||
TRACE_ERR( "\tunweighted feature scores: " << m_currScoreBreakdown << endl);
|
||||
//PrintLMScores();
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
|
||||
#include <vector>
|
||||
#include "Phrase.h"
|
||||
#include "TypeDef.h"
|
||||
@ -77,7 +80,9 @@ protected:
|
||||
bool m_wordDeleted;
|
||||
float m_totalScore; /*! score so far */
|
||||
float m_futureScore; /*! estimated future cost to translate rest of sentence */
|
||||
ScoreComponentCollection m_scoreBreakdown; /*! scores for this hypothesis */
|
||||
/*! sum of scores of this hypothesis, and previous hypotheses. Lazily initialised. */
|
||||
mutable boost::scoped_ptr<ScoreComponentCollection> m_scoreBreakdown;
|
||||
ScoreComponentCollection m_currScoreBreakdown; /*! scores for this hypothesis only */
|
||||
std::vector<const FFState*> m_ffStates;
|
||||
const Hypothesis *m_winningHypo;
|
||||
ArcList *m_arcList; /*! all arcs that end at the same trellis point as this hypothesis */
|
||||
@ -228,7 +233,14 @@ public:
|
||||
return m_arcList;
|
||||
}
|
||||
const ScoreComponentCollection& GetScoreBreakdown() const {
|
||||
return m_scoreBreakdown;
|
||||
if (!m_scoreBreakdown.get()) {
|
||||
m_scoreBreakdown.reset(new ScoreComponentCollection());
|
||||
m_scoreBreakdown->PlusEquals(m_currScoreBreakdown);
|
||||
if (m_prevHypo) {
|
||||
m_scoreBreakdown->PlusEquals(m_prevHypo->GetScoreBreakdown());
|
||||
}
|
||||
}
|
||||
return *(m_scoreBreakdown.get());
|
||||
}
|
||||
float GetTotalScore() const {
|
||||
return m_totalScore;
|
||||
|
@ -62,7 +62,7 @@ MockHypothesisGuard::MockHypothesisGuard(
|
||||
for (; ti != targetSegments.end() && ai != alignments.end(); ++ti,++ai) {
|
||||
Hypothesis* prevHypo = m_hypothesis;
|
||||
WordsRange wordsRange(ai->first,ai->second);
|
||||
m_targetPhrases.push_back(TargetPhrase());
|
||||
m_targetPhrases.push_back(TargetPhrase(NULL));
|
||||
// m_targetPhrases.back().CreateFromString(Input, factors, *ti, "|", NULL);
|
||||
m_targetPhrases.back().CreateFromString(Input, factors, *ti, NULL);
|
||||
m_toptions.push_back(new TranslationOption
|
||||
|
463
moses/PDTAimp.cpp
Normal file
463
moses/PDTAimp.cpp
Normal file
@ -0,0 +1,463 @@
|
||||
#include "PDTAimp.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
PDTAimp::PDTAimp(PhraseDictionaryTreeAdaptor *p)
|
||||
: m_dict(0),
|
||||
m_obj(p),
|
||||
useCache(1),
|
||||
totalE(0),
|
||||
distinctE(0) {
|
||||
m_numInputScores = 0;
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
m_inputFeature = &InputFeature::Instance();
|
||||
|
||||
if (m_inputFeature) {
|
||||
const PhraseDictionary *firstPt = PhraseDictionary::GetColl()[0];
|
||||
if (firstPt == m_obj) {
|
||||
m_numInputScores = m_inputFeature->GetNumScoreComponents();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PDTAimp::~PDTAimp() {
|
||||
CleanUp();
|
||||
delete m_dict;
|
||||
|
||||
if (StaticData::Instance().GetVerboseLevel() >= 2) {
|
||||
|
||||
TRACE_ERR("tgt candidates stats: total="<<totalE<<"; distinct="
|
||||
<<distinctE<<" ("<<distinctE/(0.01*totalE)<<"); duplicates="
|
||||
<<totalE-distinctE<<" ("<<(totalE-distinctE)/(0.01*totalE)
|
||||
<<")\n");
|
||||
|
||||
TRACE_ERR("\npath statistics\n");
|
||||
|
||||
if(path1Best.size()) {
|
||||
TRACE_ERR("1-best: ");
|
||||
std::copy(path1Best.begin()+1,path1Best.end(),
|
||||
std::ostream_iterator<size_t>(std::cerr," \t"));
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
if(pathCN.size()) {
|
||||
TRACE_ERR("CN (full): ");
|
||||
std::transform(pathCN.begin()+1
|
||||
,pathCN.end()
|
||||
,std::ostream_iterator<double>(std::cerr," \t")
|
||||
,Exp);
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
if(pathExplored.size()) {
|
||||
TRACE_ERR("CN (explored): ");
|
||||
std::copy(pathExplored.begin()+1,pathExplored.end(),
|
||||
std::ostream_iterator<size_t>(std::cerr," \t"));
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void PDTAimp::CleanUp() {
|
||||
assert(m_dict);
|
||||
m_dict->FreeMemory();
|
||||
for(size_t i=0; i<m_tgtColls.size(); ++i) delete m_tgtColls[i];
|
||||
m_tgtColls.clear();
|
||||
m_cache.clear();
|
||||
m_rangeCache.clear();
|
||||
uniqSrcPhr.clear();
|
||||
}
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase const*
|
||||
PDTAimp::GetTargetPhraseCollection(Phrase const &src) const {
|
||||
|
||||
assert(m_dict);
|
||||
if(src.GetSize()==0) return 0;
|
||||
|
||||
std::pair<MapSrc2Tgt::iterator,bool> piter;
|
||||
if(useCache) {
|
||||
piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollectionWithSourcePhrase const*>(0)));
|
||||
if(!piter.second) return piter.first->second;
|
||||
} else if (m_cache.size()) {
|
||||
MapSrc2Tgt::const_iterator i=m_cache.find(src);
|
||||
return (i!=m_cache.end() ? i->second : 0);
|
||||
}
|
||||
|
||||
std::vector<std::string> srcString(src.GetSize());
|
||||
// convert source Phrase into vector of strings
|
||||
for(size_t i=0; i<srcString.size(); ++i) {
|
||||
Factors2String(src.GetWord(i),srcString[i]);
|
||||
}
|
||||
|
||||
// get target phrases in string representation
|
||||
std::vector<StringTgtCand> cands;
|
||||
std::vector<std::string> wacands;
|
||||
m_dict->GetTargetCandidates(srcString,cands,wacands);
|
||||
if(cands.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
//TODO: Multiple models broken here
|
||||
std::vector<float> weights = StaticData::Instance().GetWeights(m_obj);
|
||||
|
||||
std::vector<TargetPhrase> tCands;
|
||||
tCands.reserve(cands.size());
|
||||
|
||||
std::vector<std::pair<float,size_t> > costs;
|
||||
costs.reserve(cands.size());
|
||||
|
||||
std::vector<Phrase> sourcePhrases;
|
||||
sourcePhrases.reserve(cands.size());
|
||||
|
||||
|
||||
// convert into TargetPhrases
|
||||
for(size_t i=0; i<cands.size(); ++i) {
|
||||
TargetPhrase targetPhrase(m_obj);
|
||||
|
||||
StringTgtCand::Tokens const& factorStrings=cands[i].tokens;
|
||||
Scores const& probVector=cands[i].scores;
|
||||
|
||||
std::vector<float> scoreVector(probVector.size());
|
||||
std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
|
||||
TransformScore);
|
||||
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
|
||||
FloorScore);
|
||||
|
||||
//sparse features.
|
||||
//These are already in log-space
|
||||
for (size_t j = 0; j < cands[i].fnames.size(); ++j) {
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_obj, *cands[i].fnames[j], cands[i].fvalues[j]);
|
||||
}
|
||||
|
||||
CreateTargetPhrase(targetPhrase,factorStrings,scoreVector, Scores(0), &wacands[i], &src);
|
||||
|
||||
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
||||
tCands.push_back(targetPhrase);
|
||||
|
||||
sourcePhrases.push_back(src);
|
||||
}
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase *rv;
|
||||
rv=PruneTargetCandidates(tCands,costs, sourcePhrases);
|
||||
if(rv->IsEmpty()) {
|
||||
delete rv;
|
||||
return 0;
|
||||
} else {
|
||||
if(useCache) piter.first->second=rv;
|
||||
m_tgtColls.push_back(rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void PDTAimp::Create(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, const std::string &filePath
|
||||
, const std::vector<float> &weight
|
||||
) {
|
||||
|
||||
// set my members
|
||||
m_dict=new PhraseDictionaryTree();
|
||||
m_input=input;
|
||||
m_output=output;
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
m_dict->NeedAlignmentInfo(staticData.NeedAlignmentInfo());
|
||||
|
||||
std::string binFname=filePath+".binphr.idx";
|
||||
if(!FileExists(binFname.c_str())) {
|
||||
UTIL_THROW2( "bin ttable does not exist");
|
||||
//TRACE_ERR( "bin ttable does not exist -> create it\n");
|
||||
//InputFileStream in(filePath);
|
||||
//m_dict->Create(in,filePath);
|
||||
}
|
||||
VERBOSE(1,"reading bin ttable\n");
|
||||
// m_dict->Read(filePath);
|
||||
bool res=m_dict->Read(filePath);
|
||||
if (!res) {
|
||||
std::stringstream strme;
|
||||
strme << "bin ttable was read in a wrong way\n";
|
||||
UserMessage::Add(strme.str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void PDTAimp::CacheSource(ConfusionNet const& src) {
|
||||
assert(m_dict);
|
||||
const size_t srcSize=src.GetSize();
|
||||
|
||||
std::vector<size_t> exploredPaths(srcSize+1,0);
|
||||
std::vector<double> exPathsD(srcSize+1,-1.0);
|
||||
|
||||
// collect some statistics
|
||||
std::vector<size_t> cnDepths(srcSize,0);
|
||||
for(size_t i=0; i<srcSize; ++i) cnDepths[i]=src[i].size();
|
||||
|
||||
for(size_t len=1; len<=srcSize; ++len)
|
||||
for(size_t i=0; i<=srcSize-len; ++i) {
|
||||
double pd=0.0;
|
||||
for(size_t k=i; k<i+len; ++k) pd+=log(1.0*cnDepths[k]);
|
||||
exPathsD[len]=(exPathsD[len]>=0.0 ? addLogScale(pd,exPathsD[len]) : pd);
|
||||
}
|
||||
|
||||
// update global statistics
|
||||
if(pathCN.size()<=srcSize) pathCN.resize(srcSize+1,-1.0);
|
||||
for(size_t len=1; len<=srcSize; ++len)
|
||||
pathCN[len]=pathCN[len]>=0.0 ? addLogScale(pathCN[len],exPathsD[len]) : exPathsD[len];
|
||||
|
||||
if(path1Best.size()<=srcSize) path1Best.resize(srcSize+1,0);
|
||||
for(size_t len=1; len<=srcSize; ++len) path1Best[len]+=srcSize-len+1;
|
||||
|
||||
|
||||
if (StaticData::Instance().GetVerboseLevel() >= 2 && exPathsD.size()) {
|
||||
TRACE_ERR("path stats for current CN: \nCN (full): ");
|
||||
std::transform(exPathsD.begin()+1
|
||||
,exPathsD.end()
|
||||
,std::ostream_iterator<double>(std::cerr," ")
|
||||
,Exp);
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
|
||||
typedef StringTgtCand::Tokens sPhrase;
|
||||
typedef std::map<StringTgtCand::Tokens,TScores> E2Costs;
|
||||
|
||||
std::map<Range,E2Costs> cov2cand;
|
||||
std::vector<State> stack;
|
||||
for(Position i=0 ; i < srcSize ; ++i)
|
||||
stack.push_back(State(i, i, m_dict->GetRoot(), std::vector<float>(m_numInputScores,0.0)));
|
||||
|
||||
std::vector<float> weightTrans = StaticData::Instance().GetWeights(m_obj);
|
||||
std::vector<float> weightInput = StaticData::Instance().GetWeights(m_inputFeature);
|
||||
float weightWP = StaticData::Instance().GetWeightWordPenalty();
|
||||
|
||||
while(!stack.empty()) {
|
||||
State curr(stack.back());
|
||||
stack.pop_back();
|
||||
|
||||
UTIL_THROW_IF2(curr.end() >= srcSize, "Error");
|
||||
const ConfusionNet::Column &currCol=src[curr.end()];
|
||||
// in a given column, loop over all possibilities
|
||||
for(size_t colidx=0; colidx<currCol.size(); ++colidx) {
|
||||
const Word& w=currCol[colidx].first; // w=the i^th possibility in column colidx
|
||||
std::string s;
|
||||
Factors2String(w,s);
|
||||
bool isEpsilon=(s=="" || s==EPSILON);
|
||||
|
||||
//assert that we have the right number of link params in this CN option
|
||||
UTIL_THROW_IF2(currCol[colidx].second.denseScores.size() < m_numInputScores,
|
||||
"Incorrect number of input scores");
|
||||
|
||||
// do not start with epsilon (except at first position)
|
||||
if(isEpsilon && curr.begin()==curr.end() && curr.begin()>0) continue;
|
||||
|
||||
// At a given node in the prefix tree, look to see if w defines an edge to
|
||||
// another node (Extend). Stay at the same node if w==EPSILON
|
||||
PPtr nextP = (isEpsilon ? curr.ptr : m_dict->Extend(curr.ptr,s));
|
||||
|
||||
if(nextP) { // w is a word that should be considered
|
||||
Range newRange(curr.begin(),curr.end()+src.GetColumnIncrement(curr.end(),colidx));
|
||||
|
||||
//add together the link scores from the current state and the new arc
|
||||
float inputScoreSum = 0;
|
||||
std::vector<float> newInputScores(m_numInputScores,0.0);
|
||||
if (m_numInputScores) {
|
||||
std::transform(currCol[colidx].second.denseScores.begin(), currCol[colidx].second.denseScores.end(),
|
||||
curr.GetScores().begin(),
|
||||
newInputScores.begin(),
|
||||
std::plus<float>());
|
||||
|
||||
|
||||
//we need to sum up link weights (excluding realWordCount, which isn't in numLinkParams)
|
||||
//if the sum is too low, then we won't expand this.
|
||||
//TODO: dodgy! shouldn't we consider weights here? what about zero-weight params?
|
||||
inputScoreSum = std::accumulate(newInputScores.begin(),newInputScores.begin()+m_numInputScores,0.0);
|
||||
}
|
||||
|
||||
Phrase newSrc(curr.src);
|
||||
if(!isEpsilon) newSrc.AddWord(w);
|
||||
if(newRange.second<srcSize && inputScoreSum>LOWEST_SCORE) {
|
||||
// if there is more room to grow, add a new state onto the queue
|
||||
// to be explored that represents [begin, curEnd+)
|
||||
stack.push_back(State(newRange,nextP,newInputScores));
|
||||
stack.back().src=newSrc;
|
||||
}
|
||||
|
||||
std::vector<StringTgtCand> tcands;
|
||||
// now, look up the target candidates (aprx. TargetPhraseCollection) for
|
||||
// the current path through the CN
|
||||
m_dict->GetTargetCandidates(nextP,tcands);
|
||||
|
||||
if(newRange.second>=exploredPaths.size()+newRange.first)
|
||||
exploredPaths.resize(newRange.second-newRange.first+1,0);
|
||||
++exploredPaths[newRange.second-newRange.first];
|
||||
|
||||
totalE+=tcands.size();
|
||||
|
||||
if(tcands.size()) {
|
||||
E2Costs& e2costs=cov2cand[newRange];
|
||||
Phrase const* srcPtr=uniqSrcPhr(newSrc);
|
||||
for(size_t i=0; i<tcands.size(); ++i) {
|
||||
//put input scores in first - already logged, just drop in directly
|
||||
std::vector<float> transcores(m_obj->GetNumScoreComponents());
|
||||
UTIL_THROW_IF2(transcores.size() != weightTrans.size(),
|
||||
"Incorrect number of translation scores");
|
||||
|
||||
//put in phrase table scores, logging as we insert
|
||||
std::transform(tcands[i].scores.begin()
|
||||
,tcands[i].scores.end()
|
||||
,transcores.begin()
|
||||
,TransformScore);
|
||||
|
||||
|
||||
//tally up
|
||||
float score=std::inner_product(transcores.begin(), transcores.end(), weightTrans.begin(), 0.0f);
|
||||
|
||||
// input feature
|
||||
score +=std::inner_product(newInputScores.begin(), newInputScores.end(), weightInput.begin(), 0.0f);
|
||||
|
||||
//count word penalty
|
||||
score-=tcands[i].tokens.size() * weightWP;
|
||||
|
||||
std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(tcands[i].tokens,TScores()));
|
||||
|
||||
if(p.second) ++distinctE;
|
||||
|
||||
TScores & scores=p.first->second;
|
||||
if(p.second || scores.total<score) {
|
||||
scores.total=score;
|
||||
scores.transScore=transcores;
|
||||
scores.inputScores=newInputScores;
|
||||
scores.src=srcPtr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end while(!stack.empty())
|
||||
|
||||
|
||||
if (StaticData::Instance().GetVerboseLevel() >= 2 && exploredPaths.size()) {
|
||||
TRACE_ERR("CN (explored): ");
|
||||
std::copy(exploredPaths.begin()+1,exploredPaths.end(),
|
||||
std::ostream_iterator<size_t>(std::cerr," "));
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
|
||||
if(pathExplored.size()<exploredPaths.size())
|
||||
pathExplored.resize(exploredPaths.size(),0);
|
||||
for(size_t len=1; len<=srcSize; ++len)
|
||||
pathExplored[len]+=exploredPaths[len];
|
||||
|
||||
|
||||
m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
|
||||
|
||||
for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) {
|
||||
assert(i->first.first<m_rangeCache.size());
|
||||
assert(i->first.second>0);
|
||||
assert(static_cast<size_t>(i->first.second-1)<m_rangeCache[i->first.first].size());
|
||||
assert(m_rangeCache[i->first.first][i->first.second-1]==0);
|
||||
|
||||
std::vector<TargetPhrase> tCands;
|
||||
tCands.reserve(i->second.size());
|
||||
|
||||
std::vector<std::pair<float,size_t> > costs;
|
||||
costs.reserve(i->second.size());
|
||||
|
||||
std::vector<Phrase> sourcePhrases;
|
||||
sourcePhrases.reserve(i->second.size());
|
||||
|
||||
for(E2Costs::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) {
|
||||
TScores const & scores=j->second;
|
||||
TargetPhrase targetPhrase(m_obj);
|
||||
CreateTargetPhrase(targetPhrase
|
||||
, j ->first
|
||||
, scores.transScore
|
||||
, scores.inputScores
|
||||
, NULL
|
||||
, scores.src);
|
||||
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
||||
tCands.push_back(targetPhrase);
|
||||
|
||||
sourcePhrases.push_back(*scores.src);
|
||||
|
||||
//std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
|
||||
}
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase *rv=PruneTargetCandidates(tCands, costs, sourcePhrases);
|
||||
|
||||
if(rv->IsEmpty())
|
||||
delete rv;
|
||||
else {
|
||||
m_rangeCache[i->first.first][i->first.second-1]=rv;
|
||||
m_tgtColls.push_back(rv);
|
||||
}
|
||||
}
|
||||
// free memory
|
||||
m_dict->FreeMemory();
|
||||
}
|
||||
|
||||
void PDTAimp::CreateTargetPhrase(TargetPhrase& targetPhrase,
|
||||
StringTgtCand::Tokens const& factorStrings,
|
||||
Scores const& transVector,
|
||||
Scores const& inputVector,
|
||||
const std::string *alignmentString,
|
||||
Phrase const* srcPtr) const {
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
for(size_t k=0; k<factorStrings.size(); ++k) {
|
||||
util::TokenIter<util::MultiCharacter, false> word(*factorStrings[k], StaticData::Instance().GetFactorDelimiter());
|
||||
Word& w=targetPhrase.AddWord();
|
||||
for(size_t l=0; l<m_output.size(); ++l, ++word) {
|
||||
w[m_output[l]]= factorCollection.AddFactor(*word);
|
||||
}
|
||||
}
|
||||
|
||||
if (alignmentString) {
|
||||
targetPhrase.SetAlignmentInfo(*alignmentString);
|
||||
}
|
||||
|
||||
if (m_numInputScores) {
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_inputFeature, inputVector);
|
||||
}
|
||||
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_obj, transVector);
|
||||
targetPhrase.Evaluate(*srcPtr, m_obj->GetFeaturesToApply());
|
||||
}
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase* PDTAimp::PruneTargetCandidates
|
||||
(const std::vector<TargetPhrase> & tCands,
|
||||
std::vector<std::pair<float,size_t> >& costs,
|
||||
const std::vector<Phrase> &sourcePhrases) const {
|
||||
// convert into TargetPhraseCollection
|
||||
UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
|
||||
"Number of target phrases must equal number of source phrases");
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase;
|
||||
|
||||
|
||||
// set limit to tableLimit or actual size, whatever is smaller
|
||||
std::vector<std::pair<float,size_t> >::iterator nth =
|
||||
costs.begin() + ((m_obj->m_tableLimit>0 && // 0 indicates no limit
|
||||
m_obj->m_tableLimit < costs.size()) ?
|
||||
m_obj->m_tableLimit : costs.size());
|
||||
|
||||
// find the nth phrase according to future cost
|
||||
NTH_ELEMENT3(costs.begin(),nth ,costs.end());
|
||||
|
||||
// add n top phrases to the return list
|
||||
for(std::vector<std::pair<float,size_t> >::iterator
|
||||
it = costs.begin(); it != nth; ++it) {
|
||||
size_t ind = it->second;
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(tCands[ind]);
|
||||
const Phrase &sourcePhrase = sourcePhrases[ind];
|
||||
rv->Add(targetPhrase, sourcePhrase);
|
||||
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
443
moses/PDTAimp.h
443
moses/PDTAimp.h
@ -38,23 +38,7 @@ class PDTAimp
|
||||
friend class PhraseDictionaryTreeAdaptor;
|
||||
|
||||
protected:
|
||||
PDTAimp(PhraseDictionaryTreeAdaptor *p)
|
||||
: m_dict(0),
|
||||
m_obj(p),
|
||||
useCache(1),
|
||||
totalE(0),
|
||||
distinctE(0) {
|
||||
m_numInputScores = 0;
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
m_inputFeature = &InputFeature::Instance();
|
||||
|
||||
if (m_inputFeature) {
|
||||
const PhraseDictionary *firstPt = PhraseDictionary::GetColl()[0];
|
||||
if (firstPt == m_obj) {
|
||||
m_numInputScores = m_inputFeature->GetNumScoreComponents();
|
||||
}
|
||||
}
|
||||
}
|
||||
PDTAimp(PhraseDictionaryTreeAdaptor *p);
|
||||
|
||||
public:
|
||||
std::vector<FactorType> m_input,m_output;
|
||||
@ -77,172 +61,22 @@ public:
|
||||
std::vector<size_t> path1Best,pathExplored;
|
||||
std::vector<double> pathCN;
|
||||
|
||||
~PDTAimp() {
|
||||
CleanUp();
|
||||
delete m_dict;
|
||||
|
||||
if (StaticData::Instance().GetVerboseLevel() >= 2) {
|
||||
|
||||
TRACE_ERR("tgt candidates stats: total="<<totalE<<"; distinct="
|
||||
<<distinctE<<" ("<<distinctE/(0.01*totalE)<<"); duplicates="
|
||||
<<totalE-distinctE<<" ("<<(totalE-distinctE)/(0.01*totalE)
|
||||
<<")\n");
|
||||
|
||||
TRACE_ERR("\npath statistics\n");
|
||||
|
||||
if(path1Best.size()) {
|
||||
TRACE_ERR("1-best: ");
|
||||
std::copy(path1Best.begin()+1,path1Best.end(),
|
||||
std::ostream_iterator<size_t>(std::cerr," \t"));
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
if(pathCN.size()) {
|
||||
TRACE_ERR("CN (full): ");
|
||||
std::transform(pathCN.begin()+1
|
||||
,pathCN.end()
|
||||
,std::ostream_iterator<double>(std::cerr," \t")
|
||||
,Exp);
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
if(pathExplored.size()) {
|
||||
TRACE_ERR("CN (explored): ");
|
||||
std::copy(pathExplored.begin()+1,pathExplored.end(),
|
||||
std::ostream_iterator<size_t>(std::cerr," \t"));
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
~PDTAimp();
|
||||
|
||||
void Factors2String(Word const& w,std::string& s) const {
|
||||
s=w.GetString(m_input,false);
|
||||
}
|
||||
|
||||
void CleanUp() {
|
||||
assert(m_dict);
|
||||
m_dict->FreeMemory();
|
||||
for(size_t i=0; i<m_tgtColls.size(); ++i) delete m_tgtColls[i];
|
||||
m_tgtColls.clear();
|
||||
m_cache.clear();
|
||||
m_rangeCache.clear();
|
||||
uniqSrcPhr.clear();
|
||||
}
|
||||
void CleanUp();
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase const*
|
||||
GetTargetPhraseCollection(Phrase const &src) const {
|
||||
|
||||
assert(m_dict);
|
||||
if(src.GetSize()==0) return 0;
|
||||
|
||||
std::pair<MapSrc2Tgt::iterator,bool> piter;
|
||||
if(useCache) {
|
||||
piter=m_cache.insert(std::make_pair(src,static_cast<TargetPhraseCollectionWithSourcePhrase const*>(0)));
|
||||
if(!piter.second) return piter.first->second;
|
||||
} else if (m_cache.size()) {
|
||||
MapSrc2Tgt::const_iterator i=m_cache.find(src);
|
||||
return (i!=m_cache.end() ? i->second : 0);
|
||||
}
|
||||
|
||||
std::vector<std::string> srcString(src.GetSize());
|
||||
// convert source Phrase into vector of strings
|
||||
for(size_t i=0; i<srcString.size(); ++i) {
|
||||
Factors2String(src.GetWord(i),srcString[i]);
|
||||
}
|
||||
|
||||
// get target phrases in string representation
|
||||
std::vector<StringTgtCand> cands;
|
||||
std::vector<std::string> wacands;
|
||||
m_dict->GetTargetCandidates(srcString,cands,wacands);
|
||||
if(cands.empty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
//TODO: Multiple models broken here
|
||||
std::vector<float> weights = StaticData::Instance().GetWeights(m_obj);
|
||||
|
||||
std::vector<TargetPhrase> tCands;
|
||||
tCands.reserve(cands.size());
|
||||
|
||||
std::vector<std::pair<float,size_t> > costs;
|
||||
costs.reserve(cands.size());
|
||||
|
||||
std::vector<Phrase> sourcePhrases;
|
||||
sourcePhrases.reserve(cands.size());
|
||||
|
||||
|
||||
// convert into TargetPhrases
|
||||
for(size_t i=0; i<cands.size(); ++i) {
|
||||
TargetPhrase targetPhrase;
|
||||
|
||||
StringTgtCand::Tokens const& factorStrings=cands[i].tokens;
|
||||
Scores const& probVector=cands[i].scores;
|
||||
|
||||
std::vector<float> scoreVector(probVector.size());
|
||||
std::transform(probVector.begin(),probVector.end(),scoreVector.begin(),
|
||||
TransformScore);
|
||||
std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),
|
||||
FloorScore);
|
||||
|
||||
//sparse features.
|
||||
//These are already in log-space
|
||||
for (size_t j = 0; j < cands[i].fnames.size(); ++j) {
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_obj, *cands[i].fnames[j], cands[i].fvalues[j]);
|
||||
}
|
||||
|
||||
CreateTargetPhrase(targetPhrase,factorStrings,scoreVector, Scores(0), &wacands[i], &src);
|
||||
|
||||
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
||||
tCands.push_back(targetPhrase);
|
||||
|
||||
sourcePhrases.push_back(src);
|
||||
}
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase *rv;
|
||||
rv=PruneTargetCandidates(tCands,costs, sourcePhrases);
|
||||
if(rv->IsEmpty()) {
|
||||
delete rv;
|
||||
return 0;
|
||||
} else {
|
||||
if(useCache) piter.first->second=rv;
|
||||
m_tgtColls.push_back(rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
GetTargetPhraseCollection(Phrase const &src) const;
|
||||
|
||||
void Create(const std::vector<FactorType> &input
|
||||
, const std::vector<FactorType> &output
|
||||
, const std::string &filePath
|
||||
, const std::vector<float> &weight
|
||||
) {
|
||||
, const std::vector<float> &weight);
|
||||
|
||||
// set my members
|
||||
m_dict=new PhraseDictionaryTree();
|
||||
m_input=input;
|
||||
m_output=output;
|
||||
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
m_dict->NeedAlignmentInfo(staticData.NeedAlignmentInfo());
|
||||
|
||||
std::string binFname=filePath+".binphr.idx";
|
||||
if(!FileExists(binFname.c_str())) {
|
||||
UTIL_THROW2( "bin ttable does not exist");
|
||||
//TRACE_ERR( "bin ttable does not exist -> create it\n");
|
||||
//InputFileStream in(filePath);
|
||||
//m_dict->Create(in,filePath);
|
||||
}
|
||||
VERBOSE(1,"reading bin ttable\n");
|
||||
// m_dict->Read(filePath);
|
||||
bool res=m_dict->Read(filePath);
|
||||
if (!res) {
|
||||
std::stringstream strme;
|
||||
strme << "bin ttable was read in a wrong way\n";
|
||||
UserMessage::Add(strme.str());
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
typedef PhraseDictionaryTree::PrefixPtr PPtr;
|
||||
typedef unsigned short Position;
|
||||
@ -285,61 +119,13 @@ public:
|
||||
Scores const& transVector,
|
||||
Scores const& inputVector,
|
||||
const std::string *alignmentString,
|
||||
Phrase const* srcPtr=0) const {
|
||||
FactorCollection &factorCollection = FactorCollection::Instance();
|
||||
|
||||
for(size_t k=0; k<factorStrings.size(); ++k) {
|
||||
util::TokenIter<util::MultiCharacter, false> word(*factorStrings[k], StaticData::Instance().GetFactorDelimiter());
|
||||
Word& w=targetPhrase.AddWord();
|
||||
for(size_t l=0; l<m_output.size(); ++l, ++word) {
|
||||
w[m_output[l]]= factorCollection.AddFactor(*word);
|
||||
}
|
||||
}
|
||||
|
||||
if (alignmentString) {
|
||||
targetPhrase.SetAlignmentInfo(*alignmentString);
|
||||
}
|
||||
|
||||
if (m_numInputScores) {
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_inputFeature, inputVector);
|
||||
}
|
||||
|
||||
targetPhrase.GetScoreBreakdown().Assign(m_obj, transVector);
|
||||
targetPhrase.Evaluate(*srcPtr, m_obj->GetFeaturesToApply());
|
||||
}
|
||||
Phrase const* srcPtr=0) const;
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase* PruneTargetCandidates
|
||||
(const std::vector<TargetPhrase> & tCands,
|
||||
std::vector<std::pair<float,size_t> >& costs,
|
||||
const std::vector<Phrase> &sourcePhrases) const {
|
||||
// convert into TargetPhraseCollection
|
||||
UTIL_THROW_IF2(tCands.size() != sourcePhrases.size(),
|
||||
"Number of target phrases must equal number of source phrases");
|
||||
const std::vector<Phrase> &sourcePhrases) const;
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase *rv=new TargetPhraseCollectionWithSourcePhrase;
|
||||
|
||||
|
||||
// set limit to tableLimit or actual size, whatever is smaller
|
||||
std::vector<std::pair<float,size_t> >::iterator nth =
|
||||
costs.begin() + ((m_obj->m_tableLimit>0 && // 0 indicates no limit
|
||||
m_obj->m_tableLimit < costs.size()) ?
|
||||
m_obj->m_tableLimit : costs.size());
|
||||
|
||||
// find the nth phrase according to future cost
|
||||
NTH_ELEMENT3(costs.begin(),nth ,costs.end());
|
||||
|
||||
// add n top phrases to the return list
|
||||
for(std::vector<std::pair<float,size_t> >::iterator
|
||||
it = costs.begin(); it != nth; ++it) {
|
||||
size_t ind = it->second;
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(tCands[ind]);
|
||||
const Phrase &sourcePhrase = sourcePhrases[ind];
|
||||
rv->Add(targetPhrase, sourcePhrase);
|
||||
|
||||
}
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
// POD for target phrase scores
|
||||
struct TScores {
|
||||
@ -350,220 +136,7 @@ public:
|
||||
TScores() : total(0.0),src(0) {}
|
||||
};
|
||||
|
||||
void CacheSource(ConfusionNet const& src) {
|
||||
assert(m_dict);
|
||||
const size_t srcSize=src.GetSize();
|
||||
|
||||
std::vector<size_t> exploredPaths(srcSize+1,0);
|
||||
std::vector<double> exPathsD(srcSize+1,-1.0);
|
||||
|
||||
// collect some statistics
|
||||
std::vector<size_t> cnDepths(srcSize,0);
|
||||
for(size_t i=0; i<srcSize; ++i) cnDepths[i]=src[i].size();
|
||||
|
||||
for(size_t len=1; len<=srcSize; ++len)
|
||||
for(size_t i=0; i<=srcSize-len; ++i) {
|
||||
double pd=0.0;
|
||||
for(size_t k=i; k<i+len; ++k) pd+=log(1.0*cnDepths[k]);
|
||||
exPathsD[len]=(exPathsD[len]>=0.0 ? addLogScale(pd,exPathsD[len]) : pd);
|
||||
}
|
||||
|
||||
// update global statistics
|
||||
if(pathCN.size()<=srcSize) pathCN.resize(srcSize+1,-1.0);
|
||||
for(size_t len=1; len<=srcSize; ++len)
|
||||
pathCN[len]=pathCN[len]>=0.0 ? addLogScale(pathCN[len],exPathsD[len]) : exPathsD[len];
|
||||
|
||||
if(path1Best.size()<=srcSize) path1Best.resize(srcSize+1,0);
|
||||
for(size_t len=1; len<=srcSize; ++len) path1Best[len]+=srcSize-len+1;
|
||||
|
||||
|
||||
if (StaticData::Instance().GetVerboseLevel() >= 2 && exPathsD.size()) {
|
||||
TRACE_ERR("path stats for current CN: \nCN (full): ");
|
||||
std::transform(exPathsD.begin()+1
|
||||
,exPathsD.end()
|
||||
,std::ostream_iterator<double>(std::cerr," ")
|
||||
,Exp);
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
|
||||
typedef StringTgtCand::Tokens sPhrase;
|
||||
typedef std::map<StringTgtCand::Tokens,TScores> E2Costs;
|
||||
|
||||
std::map<Range,E2Costs> cov2cand;
|
||||
std::vector<State> stack;
|
||||
for(Position i=0 ; i < srcSize ; ++i)
|
||||
stack.push_back(State(i, i, m_dict->GetRoot(), std::vector<float>(m_numInputScores,0.0)));
|
||||
|
||||
std::vector<float> weightTrans = StaticData::Instance().GetWeights(m_obj);
|
||||
std::vector<float> weightInput = StaticData::Instance().GetWeights(m_inputFeature);
|
||||
float weightWP = StaticData::Instance().GetWeightWordPenalty();
|
||||
|
||||
while(!stack.empty()) {
|
||||
State curr(stack.back());
|
||||
stack.pop_back();
|
||||
|
||||
UTIL_THROW_IF2(curr.end() >= srcSize, "Error");
|
||||
const ConfusionNet::Column &currCol=src[curr.end()];
|
||||
// in a given column, loop over all possibilities
|
||||
for(size_t colidx=0; colidx<currCol.size(); ++colidx) {
|
||||
const Word& w=currCol[colidx].first; // w=the i^th possibility in column colidx
|
||||
std::string s;
|
||||
Factors2String(w,s);
|
||||
bool isEpsilon=(s=="" || s==EPSILON);
|
||||
|
||||
//assert that we have the right number of link params in this CN option
|
||||
UTIL_THROW_IF2(currCol[colidx].second.denseScores.size() < m_numInputScores,
|
||||
"Incorrect number of input scores");
|
||||
|
||||
// do not start with epsilon (except at first position)
|
||||
if(isEpsilon && curr.begin()==curr.end() && curr.begin()>0) continue;
|
||||
|
||||
// At a given node in the prefix tree, look to see if w defines an edge to
|
||||
// another node (Extend). Stay at the same node if w==EPSILON
|
||||
PPtr nextP = (isEpsilon ? curr.ptr : m_dict->Extend(curr.ptr,s));
|
||||
|
||||
if(nextP) { // w is a word that should be considered
|
||||
Range newRange(curr.begin(),curr.end()+src.GetColumnIncrement(curr.end(),colidx));
|
||||
|
||||
//add together the link scores from the current state and the new arc
|
||||
float inputScoreSum = 0;
|
||||
std::vector<float> newInputScores(m_numInputScores,0.0);
|
||||
if (m_numInputScores) {
|
||||
std::transform(currCol[colidx].second.denseScores.begin(), currCol[colidx].second.denseScores.end(),
|
||||
curr.GetScores().begin(),
|
||||
newInputScores.begin(),
|
||||
std::plus<float>());
|
||||
|
||||
|
||||
//we need to sum up link weights (excluding realWordCount, which isn't in numLinkParams)
|
||||
//if the sum is too low, then we won't expand this.
|
||||
//TODO: dodgy! shouldn't we consider weights here? what about zero-weight params?
|
||||
inputScoreSum = std::accumulate(newInputScores.begin(),newInputScores.begin()+m_numInputScores,0.0);
|
||||
}
|
||||
|
||||
Phrase newSrc(curr.src);
|
||||
if(!isEpsilon) newSrc.AddWord(w);
|
||||
if(newRange.second<srcSize && inputScoreSum>LOWEST_SCORE) {
|
||||
// if there is more room to grow, add a new state onto the queue
|
||||
// to be explored that represents [begin, curEnd+)
|
||||
stack.push_back(State(newRange,nextP,newInputScores));
|
||||
stack.back().src=newSrc;
|
||||
}
|
||||
|
||||
std::vector<StringTgtCand> tcands;
|
||||
// now, look up the target candidates (aprx. TargetPhraseCollection) for
|
||||
// the current path through the CN
|
||||
m_dict->GetTargetCandidates(nextP,tcands);
|
||||
|
||||
if(newRange.second>=exploredPaths.size()+newRange.first)
|
||||
exploredPaths.resize(newRange.second-newRange.first+1,0);
|
||||
++exploredPaths[newRange.second-newRange.first];
|
||||
|
||||
totalE+=tcands.size();
|
||||
|
||||
if(tcands.size()) {
|
||||
E2Costs& e2costs=cov2cand[newRange];
|
||||
Phrase const* srcPtr=uniqSrcPhr(newSrc);
|
||||
for(size_t i=0; i<tcands.size(); ++i) {
|
||||
//put input scores in first - already logged, just drop in directly
|
||||
std::vector<float> transcores(m_obj->GetNumScoreComponents());
|
||||
UTIL_THROW_IF2(transcores.size() != weightTrans.size(),
|
||||
"Incorrect number of translation scores");
|
||||
|
||||
//put in phrase table scores, logging as we insert
|
||||
std::transform(tcands[i].scores.begin()
|
||||
,tcands[i].scores.end()
|
||||
,transcores.begin()
|
||||
,TransformScore);
|
||||
|
||||
|
||||
//tally up
|
||||
float score=std::inner_product(transcores.begin(), transcores.end(), weightTrans.begin(), 0.0f);
|
||||
|
||||
// input feature
|
||||
score +=std::inner_product(newInputScores.begin(), newInputScores.end(), weightInput.begin(), 0.0f);
|
||||
|
||||
//count word penalty
|
||||
score-=tcands[i].tokens.size() * weightWP;
|
||||
|
||||
std::pair<E2Costs::iterator,bool> p=e2costs.insert(std::make_pair(tcands[i].tokens,TScores()));
|
||||
|
||||
if(p.second) ++distinctE;
|
||||
|
||||
TScores & scores=p.first->second;
|
||||
if(p.second || scores.total<score) {
|
||||
scores.total=score;
|
||||
scores.transScore=transcores;
|
||||
scores.inputScores=newInputScores;
|
||||
scores.src=srcPtr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end while(!stack.empty())
|
||||
|
||||
|
||||
if (StaticData::Instance().GetVerboseLevel() >= 2 && exploredPaths.size()) {
|
||||
TRACE_ERR("CN (explored): ");
|
||||
std::copy(exploredPaths.begin()+1,exploredPaths.end(),
|
||||
std::ostream_iterator<size_t>(std::cerr," "));
|
||||
TRACE_ERR("\n");
|
||||
}
|
||||
|
||||
if(pathExplored.size()<exploredPaths.size())
|
||||
pathExplored.resize(exploredPaths.size(),0);
|
||||
for(size_t len=1; len<=srcSize; ++len)
|
||||
pathExplored[len]+=exploredPaths[len];
|
||||
|
||||
|
||||
m_rangeCache.resize(src.GetSize(),vTPC(src.GetSize(),0));
|
||||
|
||||
for(std::map<Range,E2Costs>::const_iterator i=cov2cand.begin(); i!=cov2cand.end(); ++i) {
|
||||
assert(i->first.first<m_rangeCache.size());
|
||||
assert(i->first.second>0);
|
||||
assert(static_cast<size_t>(i->first.second-1)<m_rangeCache[i->first.first].size());
|
||||
assert(m_rangeCache[i->first.first][i->first.second-1]==0);
|
||||
|
||||
std::vector<TargetPhrase> tCands;
|
||||
tCands.reserve(i->second.size());
|
||||
|
||||
std::vector<std::pair<float,size_t> > costs;
|
||||
costs.reserve(i->second.size());
|
||||
|
||||
std::vector<Phrase> sourcePhrases;
|
||||
sourcePhrases.reserve(i->second.size());
|
||||
|
||||
for(E2Costs::const_iterator j=i->second.begin(); j!=i->second.end(); ++j) {
|
||||
TScores const & scores=j->second;
|
||||
TargetPhrase targetPhrase;
|
||||
CreateTargetPhrase(targetPhrase
|
||||
, j ->first
|
||||
, scores.transScore
|
||||
, scores.inputScores
|
||||
, NULL
|
||||
, scores.src);
|
||||
costs.push_back(std::make_pair(-targetPhrase.GetFutureScore(),tCands.size()));
|
||||
tCands.push_back(targetPhrase);
|
||||
|
||||
sourcePhrases.push_back(*scores.src);
|
||||
|
||||
//std::cerr << i->first.first << "-" << i->first.second << ": " << targetPhrase << std::endl;
|
||||
}
|
||||
|
||||
TargetPhraseCollectionWithSourcePhrase *rv=PruneTargetCandidates(tCands, costs, sourcePhrases);
|
||||
|
||||
if(rv->IsEmpty())
|
||||
delete rv;
|
||||
else {
|
||||
m_rangeCache[i->first.first][i->first.second-1]=rv;
|
||||
m_tgtColls.push_back(rv);
|
||||
}
|
||||
}
|
||||
// free memory
|
||||
m_dict->FreeMemory();
|
||||
}
|
||||
|
||||
void CacheSource(ConfusionNet const& src);
|
||||
|
||||
size_t GetNumInputScores() const {
|
||||
return m_numInputScores;
|
||||
|
@ -38,24 +38,7 @@ using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
TargetPhrase::TargetPhrase( std::string out_string)
|
||||
:Phrase(0)
|
||||
, m_fullScore(0.0)
|
||||
, m_futureScore(0.0)
|
||||
, m_alignTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
, m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
, m_lhsTarget(NULL)
|
||||
, m_ruleSource(NULL)
|
||||
{
|
||||
|
||||
//ACAT
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
CreateFromString(Output, staticData.GetInputFactorOrder(), out_string,
|
||||
// staticData.GetFactorDelimiter(), // eliminated [UG]
|
||||
NULL);
|
||||
}
|
||||
|
||||
TargetPhrase::TargetPhrase()
|
||||
TargetPhrase::TargetPhrase(const PhraseDictionary *pt)
|
||||
:Phrase()
|
||||
, m_fullScore(0.0)
|
||||
, m_futureScore(0.0)
|
||||
@ -63,10 +46,11 @@ TargetPhrase::TargetPhrase()
|
||||
, m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
, m_lhsTarget(NULL)
|
||||
, m_ruleSource(NULL)
|
||||
, m_container(pt)
|
||||
{
|
||||
}
|
||||
|
||||
TargetPhrase::TargetPhrase(const Phrase &phrase)
|
||||
TargetPhrase::TargetPhrase(const Phrase &phrase, const PhraseDictionary *pt)
|
||||
: Phrase(phrase)
|
||||
, m_fullScore(0.0)
|
||||
, m_futureScore(0.0)
|
||||
@ -74,6 +58,7 @@ TargetPhrase::TargetPhrase(const Phrase &phrase)
|
||||
, m_alignNonTerm(&AlignmentInfoCollection::Instance().GetEmptyAlignmentInfo())
|
||||
, m_lhsTarget(NULL)
|
||||
, m_ruleSource(NULL)
|
||||
, m_container(pt)
|
||||
{
|
||||
}
|
||||
|
||||
@ -84,6 +69,7 @@ TargetPhrase::TargetPhrase(const TargetPhrase ©)
|
||||
, m_scoreBreakdown(copy.m_scoreBreakdown)
|
||||
, m_alignTerm(copy.m_alignTerm)
|
||||
, m_alignNonTerm(copy.m_alignNonTerm)
|
||||
, m_container(copy.m_container)
|
||||
{
|
||||
if (copy.m_lhsTarget) {
|
||||
m_lhsTarget = new Word(*copy.m_lhsTarget);
|
||||
|
@ -41,6 +41,8 @@ namespace Moses
|
||||
{
|
||||
class FeatureFunction;
|
||||
class InputPath;
|
||||
class InputPath;
|
||||
class PhraseDictionary;
|
||||
|
||||
/** represents an entry on the target side of a phrase table (scores, translation, alignment)
|
||||
*/
|
||||
@ -60,11 +62,12 @@ private:
|
||||
typedef std::map<std::string, boost::shared_ptr<PhraseProperty> > Properties;
|
||||
Properties m_properties;
|
||||
|
||||
const PhraseDictionary *m_container;
|
||||
|
||||
public:
|
||||
TargetPhrase();
|
||||
TargetPhrase(const PhraseDictionary *pt = NULL);
|
||||
TargetPhrase(const TargetPhrase ©);
|
||||
explicit TargetPhrase(std::string out_string);
|
||||
explicit TargetPhrase(const Phrase &targetPhrase);
|
||||
explicit TargetPhrase(const Phrase &targetPhrase, const PhraseDictionary *pt);
|
||||
~TargetPhrase();
|
||||
|
||||
// 1st evaluate method. Called during loading of phrase table.
|
||||
@ -132,6 +135,9 @@ public:
|
||||
return m_ruleSource;
|
||||
}
|
||||
|
||||
const PhraseDictionary *GetContainer() const
|
||||
{ return m_container; }
|
||||
|
||||
// To be set by the FF that needs it, by default the rule source = NULL
|
||||
// make a copy of the source side of the rule
|
||||
void SetRuleSource(const Phrase &ruleSource) const;
|
||||
|
@ -377,9 +377,9 @@ TrgPhraseFromSntIdx(const PhrasePair& phrasepair) const
|
||||
|
||||
TargetPhrase*
|
||||
BilingualDynSuffixArray::
|
||||
GetMosesFactorIDs(const SAPhrase& phrase, const Phrase& sourcePhrase) const
|
||||
GetMosesFactorIDs(const SAPhrase& phrase, const Phrase& sourcePhrase, const PhraseDictionary *pt) const
|
||||
{
|
||||
TargetPhrase* targetPhrase = new TargetPhrase();
|
||||
TargetPhrase* targetPhrase = new TargetPhrase(pt);
|
||||
for(size_t i=0; i < phrase.words.size(); ++i) { // look up trg words
|
||||
Word& word = m_trgVocab->GetWord( phrase.words[i]);
|
||||
UTIL_THROW_IF2(word == m_trgVocab->GetkOOVWord(),
|
||||
|
@ -128,7 +128,7 @@ public:
|
||||
GatherCands(Phrase const& src, map<SAPhrase, vector<float> >& pstats) const;
|
||||
|
||||
TargetPhrase*
|
||||
GetMosesFactorIDs(const SAPhrase&, const Phrase& sourcePhrase) const;
|
||||
GetMosesFactorIDs(const SAPhrase&, const Phrase& sourcePhrase, const PhraseDictionary *pt) const;
|
||||
|
||||
private:
|
||||
|
||||
|
@ -77,7 +77,7 @@ TargetPhrase *ChartRuleLookupManagerSkeleton::CreateTargetPhrase(const Word &sou
|
||||
string str = sourceWord.GetFactor(0)->GetString().as_string();
|
||||
str = "ChartManagerSkeleton:" + str;
|
||||
|
||||
TargetPhrase *tp = new TargetPhrase();
|
||||
TargetPhrase *tp = new TargetPhrase(&m_skeletonPT);
|
||||
Word &word = tp->AddWord();
|
||||
word.CreateFromString(Output, m_skeletonPT.GetOutput(), str, false);
|
||||
|
||||
|
@ -418,7 +418,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
}
|
||||
|
||||
if(eval) {
|
||||
targetPhrase->Evaluate(sourcePhrase);
|
||||
targetPhrase->Evaluate(sourcePhrase, m_phraseDictionary.GetFeaturesToApply());
|
||||
}
|
||||
|
||||
if(m_coding == PREnc) {
|
||||
|
@ -57,6 +57,8 @@ void PhraseDictionaryCompact::Load()
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
|
||||
SetFeaturesToApply();
|
||||
|
||||
m_weight = staticData.GetWeights(this);
|
||||
|
||||
std::string tFilePath = m_filePath;
|
||||
|
@ -24,6 +24,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "moses/InputType.h"
|
||||
#include "moses/TranslationOption.h"
|
||||
#include "moses/UserMessage.h"
|
||||
#include "moses/DecodeStep.h"
|
||||
#include "moses/DecodeGraph.h"
|
||||
#include "moses/InputPath.h"
|
||||
#include "util/exception.hh"
|
||||
@ -48,6 +49,7 @@ PhraseDictionary::PhraseDictionary(const std::string &line)
|
||||
,m_tableLimit(20) // default
|
||||
,m_maxCacheSize(DEFAULT_MAX_TRANS_OPT_CACHE_SIZE)
|
||||
{
|
||||
m_id = s_staticColl.size();
|
||||
s_staticColl.push_back(this);
|
||||
}
|
||||
|
||||
|
@ -87,6 +87,10 @@ public:
|
||||
return m_tableLimit;
|
||||
}
|
||||
|
||||
//! continguous id for each pt, starting from 0
|
||||
size_t GetId() const
|
||||
{ return m_id; }
|
||||
|
||||
virtual
|
||||
void
|
||||
Release(TargetPhraseCollection const* tpc) const;
|
||||
@ -167,6 +171,7 @@ protected:
|
||||
|
||||
protected:
|
||||
CacheColl &GetCache() const;
|
||||
size_t m_id;
|
||||
|
||||
};
|
||||
|
||||
|
@ -58,7 +58,7 @@ GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
|
||||
TargetPhraseCollection *ret = new TargetPhraseCollection();
|
||||
BOOST_FOREACH(pstat_entry & e, pstats) {
|
||||
TargetPhrase* tp = m_biSA->GetMosesFactorIDs(e.first, src);
|
||||
TargetPhrase* tp = m_biSA->GetMosesFactorIDs(e.first, src, this);
|
||||
tp->GetScoreBreakdown().Assign(this,e.second);
|
||||
tp->Evaluate(src);
|
||||
ret->Add(tp);
|
||||
|
@ -124,7 +124,7 @@ std::vector<TargetPhrase*> PhraseDictionaryTransliteration::CreateTargetPhrases(
|
||||
Tokenize(toks, line, "\t");
|
||||
UTIL_THROW_IF2(toks.size() != 2, "Error in transliteration output file. Expecting word\tscore");
|
||||
|
||||
TargetPhrase *tp = new TargetPhrase();
|
||||
TargetPhrase *tp = new TargetPhrase(this);
|
||||
Word &word = tp->AddWord();
|
||||
word.CreateFromString(Output, m_output, toks[0], false);
|
||||
|
||||
|
@ -153,7 +153,7 @@ TargetPhrase *ProbingPT::CreateTargetPhrase(const Phrase &sourcePhrase, const ta
|
||||
const std::vector<unsigned int> &probingPhrase = probingTargetPhrase.target_phrase;
|
||||
size_t size = probingPhrase.size();
|
||||
|
||||
TargetPhrase *tp = new TargetPhrase();
|
||||
TargetPhrase *tp = new TargetPhrase(this);
|
||||
|
||||
// words
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
|
@ -222,7 +222,7 @@ bool RuleTableLoaderCompact::LoadRuleSection(
|
||||
// The remaining columns are currently ignored.
|
||||
|
||||
// Create and score target phrase.
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(targetPhrasePhrase);
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(targetPhrasePhrase, &ruleTable);
|
||||
targetPhrase->SetAlignNonTerm(alignNonTerm);
|
||||
targetPhrase->SetTargetLHS(targetLhs);
|
||||
|
||||
|
@ -222,7 +222,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
|
||||
Word *targetLHS;
|
||||
|
||||
// create target phrase obj
|
||||
TargetPhrase *targetPhrase = new TargetPhrase();
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(&ruleTable);
|
||||
// targetPhrase->CreateFromString(Output, output, targetPhraseString, factorDelimiter, &targetLHS);
|
||||
targetPhrase->CreateFromString(Output, output, targetPhraseString, &targetLHS);
|
||||
// source
|
||||
|
@ -270,7 +270,7 @@ void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSenten
|
||||
sourcePhrase.CreateFromString(Input, m_input, sourcePhraseString, &sourceLHS);
|
||||
|
||||
// create target phrase obj
|
||||
TargetPhrase *targetPhrase = new TargetPhrase();
|
||||
TargetPhrase *targetPhrase = new TargetPhrase(this);
|
||||
// targetPhrase->CreateFromString(Output, m_output, targetPhraseString, factorDelimiter, &targetLHS);
|
||||
targetPhrase->CreateFromString(Output, m_output, targetPhraseString, &targetLHS);
|
||||
|
||||
|
@ -53,7 +53,7 @@ TargetPhrase *SkeletonPT::CreateTargetPhrase(const Phrase &sourcePhrase) const
|
||||
string str = sourcePhrase.GetWord(0).GetFactor(0)->GetString().as_string();
|
||||
str = "SkeletonPT:" + str;
|
||||
|
||||
TargetPhrase *tp = new TargetPhrase();
|
||||
TargetPhrase *tp = new TargetPhrase(this);
|
||||
Word &word = tp->AddWord();
|
||||
word.CreateFromString(Output, m_output, str, false);
|
||||
|
||||
|
@ -466,7 +466,7 @@ namespace Moses
|
||||
BOOST_FOREACH(sptr<pscorer> const& ff, m_active_ff_common)
|
||||
(*ff)(*dynbt, pool, &fvals);
|
||||
}
|
||||
TargetPhrase* tp = new TargetPhrase();
|
||||
TargetPhrase* tp = new TargetPhrase(this);
|
||||
Token const* x = fix ? fix->start2 : dyn->start2;
|
||||
uint32_t len = fix ? fix->len2 : dyn->len2;
|
||||
for (uint32_t k = 0; k < len; ++k, x = x->next())
|
||||
|
@ -32,7 +32,7 @@ namespace Moses
|
||||
{
|
||||
|
||||
TranslationOption::TranslationOption()
|
||||
:m_targetPhrase()
|
||||
:m_targetPhrase(NULL)
|
||||
,m_inputPath(NULL)
|
||||
,m_sourceWordsRange(NOT_FOUND, NOT_FOUND)
|
||||
{
|
||||
|
@ -231,7 +231,7 @@ void TranslationOptionCollection::ProcessOneUnknownWord(const InputPath &inputPa
|
||||
// modify the starting bitmap
|
||||
}
|
||||
|
||||
TargetPhrase targetPhrase;
|
||||
TargetPhrase targetPhrase(NULL);
|
||||
|
||||
if (!(staticData.GetDropUnknown() || isEpsilon) || isDigit) {
|
||||
// add to dictionary
|
||||
|
@ -173,7 +173,7 @@ bool TreeInput::ProcessAndStripXMLTags(string &line, std::vector<XMLParseOutput>
|
||||
//TRACE_ERR("number of translations: " << altTexts.size() << endl);
|
||||
for (size_t i=0; i<altTexts.size(); ++i) {
|
||||
// set target phrase
|
||||
TargetPhrase targetPhrase;
|
||||
TargetPhrase targetPhrase(NULL);
|
||||
// targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);
|
||||
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i], NULL);
|
||||
|
||||
|
@ -361,7 +361,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
|
||||
float scoreValue = FloorScore(TransformScore(probValue));
|
||||
|
||||
WordsRange range(startPos + offset,endPos-1 + offset); // span covered by phrase
|
||||
TargetPhrase targetPhrase;
|
||||
TargetPhrase targetPhrase(NULL);
|
||||
// targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i],factorDelimiter, NULL);
|
||||
targetPhrase.CreateFromString(Output, outputFactorOrder,altTexts[i], NULL);
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "PhraseOrientation.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <limits>
|
||||
|
||||
#include <boost/assign/list_of.hpp>
|
||||
|
Loading…
Reference in New Issue
Block a user