Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Ian Johnson 2013-07-24 11:52:21 +01:00
commit 68779c66b9
41 changed files with 724 additions and 348 deletions

View File

@ -1,7 +1,5 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.162355801">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.162355801" moduleId="org.eclipse.cdt.core.settings" name="Debug">
@ -55,6 +53,7 @@
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686&quot;"/>
@ -77,6 +76,7 @@
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="RandLM"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>

View File

@ -1,7 +1,5 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?>
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.461114338">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.461114338" moduleId="org.eclipse.cdt.core.settings" name="Debug">
@ -49,6 +47,7 @@
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1546774818" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.523170942" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
@ -73,6 +72,7 @@
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="RandLM"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>

View File

@ -35,6 +35,7 @@
<listOptionValue builtIn="false" value="/opt/local/include/"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../irstlm/include"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../srilm/include"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../randlm/include/RandLM&quot;"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.752586397" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
@ -46,6 +47,7 @@
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
<listOptionValue builtIn="false" value="LM_IRST"/>
<listOptionValue builtIn="false" value="LM_RAND"/>
<listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
<listOptionValue builtIn="false" value="_LARGE_FILES"/>
</option>
@ -68,8 +70,9 @@
</tool>
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.511477442" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
<sourceEntries>
<entry excluding="FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
<entry excluding="FF/PhraseLengthFeatureTest.cpp|PhraseLengthFeatureTest.cpp|LM/BackwardTest.cpp|LM/BackwardLMState.h|LM/BackwardLMState.cpp|LM/Backward.h|LM/Backward.cpp|FeatureVectorTest.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|src/SyntacticLanguageModelState.h|src/SyntacticLanguageModelFiles.h|src/SyntacticLanguageModel.h|src/SyntacticLanguageModel.cpp|src/LM/SRI.h|src/LM/SRI.cpp|src/LM/Rand.h|src/LM/Rand.cpp|src/LM/LDHT.h|src/LM/LDHT.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>

View File

@ -1061,6 +1061,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ChartBasedFeatureContext.h</locationURI>
</link>
<link>
<name>FF/ControlRecombination.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ControlRecombination.cpp</locationURI>
</link>
<link>
<name>FF/ControlRecombination.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/ControlRecombination.h</locationURI>
</link>
<link>
<name>FF/DistortionScoreProducer.cpp</name>
<type>1</type>
@ -1081,6 +1091,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/FFState.h</locationURI>
</link>
<link>
<name>FF/Factory.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/Factory.cpp</locationURI>
</link>
<link>
<name>FF/Factory.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/Factory.h</locationURI>
</link>
<link>
<name>FF/FeatureFunction.cpp</name>
<type>1</type>

View File

@ -0,0 +1,69 @@
#include "ControlRecombination.h"
#include "moses/Hypothesis.h"
#include "util/exception.hh"
using namespace std;
namespace Moses {
ControlRecombination::ControlRecombination(const std::string &line)
:StatefulFeatureFunction("ControlRecombination", 0, line)
,m_type(Output)
{
}
void ControlRecombination::SetParameter(const std::string& key, const std::string& value)
{
if (key == "type") {
m_type = (Type) Scan<size_t>(value);
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
FFState* ControlRecombination::Evaluate(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const
{
ControlRecombinationState *state = new ControlRecombinationState(&cur_hypo);
return state;
}
FFState* ControlRecombination::EvaluateChart(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const
{
UTIL_THROW(util::Exception, "Not implemented yet");
}
const FFState* ControlRecombination::EmptyHypothesisState(const InputType &input) const
{
ControlRecombinationState *state = new ControlRecombinationState();
}
ControlRecombinationState::ControlRecombinationState()
:m_hypo(NULL)
{
}
ControlRecombinationState::ControlRecombinationState(const Hypothesis *hypo)
:m_hypo(hypo)
{
}
int ControlRecombinationState::Compare(const FFState& other) const
{
const ControlRecombinationState &other2 = static_cast<const ControlRecombinationState&>(other);
const Hypothesis *otherHypo = other2.m_hypo;
Phrase thisOutputPhrase, otherOutputPhrase;
m_hypo->GetOutputPhrase(thisOutputPhrase);
otherHypo->GetOutputPhrase(otherOutputPhrase);
int ret = thisOutputPhrase.Compare(otherOutputPhrase);
return ret;
}
}

View File

@ -0,0 +1,58 @@
#pragma once
#include <string>
#include "StatefulFeatureFunction.h"
#include "moses/FF/FFState.h"
namespace Moses {
class ControlRecombinationState;
// force hypotheses NOT to recombine. For forced decoding
class ControlRecombination : public StatefulFeatureFunction
{
public:
enum Type
{
None,
Output,
Segmentation
};
ControlRecombination(const std::string &line);
bool IsUseable(const FactorMask &mask) const {
return true;
}
virtual FFState* Evaluate(
const Hypothesis& cur_hypo,
const FFState* prev_state,
ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateChart(
const ChartHypothesis& /* cur_hypo */,
int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const;
//! return the state associated with the empty hypothesis for a given sentence
virtual const FFState* EmptyHypothesisState(const InputType &input) const;
void SetParameter(const std::string& key, const std::string& value);
protected:
Type m_type;
};
class ControlRecombinationState : public FFState
{
protected:
const Hypothesis *m_hypo;
public:
ControlRecombinationState();
ControlRecombinationState(const Hypothesis *hypo);
int Compare(const FFState& other) const;
};
} // namespace

177
moses/FF/Factory.cpp Normal file
View File

@ -0,0 +1,177 @@
#include "moses/FF/Factory.h"
#include "moses/StaticData.h"
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
#include "moses/LexicalReordering.h"
#include "moses/FF/BleuScoreFeature.h"
#include "moses/FF/TargetWordInsertionFeature.h"
#include "moses/FF/SourceWordDeletionFeature.h"
#include "moses/FF/GlobalLexicalModel.h"
#include "moses/FF/GlobalLexicalModelUnlimited.h"
#include "moses/FF/UnknownWordPenaltyProducer.h"
#include "moses/FF/WordTranslationFeature.h"
#include "moses/FF/TargetBigramFeature.h"
#include "moses/FF/TargetNgramFeature.h"
#include "moses/FF/PhraseBoundaryFeature.h"
#include "moses/FF/PhrasePairFeature.h"
#include "moses/FF/PhraseLengthFeature.h"
#include "moses/FF/DistortionScoreProducer.h"
#include "moses/FF/WordPenaltyProducer.h"
#include "moses/FF/InputFeature.h"
#include "moses/FF/PhrasePenalty.h"
#include "moses/FF/OSM-Feature/OpSequenceModel.h"
#include "moses/FF/ControlRecombination.h"
#include "moses/LM/Ken.h"
#ifdef LM_IRST
#include "moses/LM/IRST.h"
#endif
#ifdef LM_SRI
#include "moses/LM/SRI.h"
#endif
#ifdef LM_RAND
#include "moses/LM/Rand.h"
#endif
#ifdef HAVE_SYNLM
#include "moses/SyntacticLanguageModel.h"
#endif
#include "util/exception.hh"
#include <vector>
namespace Moses
{
class FeatureFactory
{
public:
virtual ~FeatureFactory() {}
virtual void Create(const std::string &line) = 0;
protected:
template <class F> static void DefaultSetup(F *feature);
FeatureFactory() {}
};
template <class F> void FeatureFactory::DefaultSetup(F *feature)
{
StaticData &static_data = StaticData::InstanceNonConst();
std::vector<float> &weights = static_data.GetParameter()->GetWeights(feature->GetScoreProducerDescription());
if (feature->IsTuneable() || weights.size()) {
// if it's tuneable, ini file MUST have weights
// even it it's not tuneable, people can still set the weights in the ini file
static_data.SetWeights(feature, weights);
} else {
std::vector<float> defaultWeights = feature->DefaultWeights();
static_data.SetWeights(feature, defaultWeights);
}
}
namespace
{
template <class F> class DefaultFeatureFactory : public FeatureFactory
{
public:
void Create(const std::string &line) {
DefaultSetup(new F(line));
}
};
class KenFactory : public FeatureFactory
{
public:
void Create(const std::string &line) {
DefaultSetup(ConstructKenLM(line));
}
};
} // namespace
FeatureRegistry::FeatureRegistry()
{
// Feature with same name as class
#define MOSES_FNAME(name) Add(#name, new DefaultFeatureFactory< name >());
// Feature with different name than class.
#define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >());
MOSES_FNAME(GlobalLexicalModel);
//MOSES_FNAME(GlobalLexicalModelUnlimited); This was commented out in the original
MOSES_FNAME(SourceWordDeletionFeature);
MOSES_FNAME(TargetWordInsertionFeature);
MOSES_FNAME(PhraseBoundaryFeature);
MOSES_FNAME(PhraseLengthFeature);
MOSES_FNAME(WordTranslationFeature);
MOSES_FNAME(TargetBigramFeature);
MOSES_FNAME(TargetNgramFeature);
MOSES_FNAME(PhrasePairFeature);
MOSES_FNAME(LexicalReordering);
MOSES_FNAME2("Generation", GenerationDictionary);
MOSES_FNAME(BleuScoreFeature);
MOSES_FNAME2("Distortion", DistortionScoreProducer);
MOSES_FNAME2("WordPenalty", WordPenaltyProducer);
MOSES_FNAME(InputFeature);
MOSES_FNAME2("PhraseDictionaryBinary", PhraseDictionaryTreeAdaptor);
MOSES_FNAME(PhraseDictionaryOnDisk);
MOSES_FNAME(PhraseDictionaryMemory);
MOSES_FNAME(PhraseDictionaryCompact);
MOSES_FNAME(PhraseDictionaryMultiModel);
MOSES_FNAME(PhraseDictionaryMultiModelCounts);
MOSES_FNAME(PhraseDictionaryALSuffixArray);
MOSES_FNAME(PhraseDictionaryDynSuffixArray);
MOSES_FNAME(OpSequenceModel);
MOSES_FNAME(PhrasePenalty);
MOSES_FNAME2("UnknownWordPenalty", UnknownWordPenaltyProducer);
MOSES_FNAME(ControlRecombination);
#ifdef HAVE_SYNLM
MOSES_FNAME(SyntacticLanguageModel);
#endif
#ifdef LM_IRST
MOSES_FNAME2("IRSTLM", LanguageModelIRST);
#endif
#ifdef LM_SRI
MOSES_FNAME2("SRILM", LanguageModelSRI);
#endif
#ifdef LM_RAND
MOSES_FNAME2("RANDLM", LanguageModelRandLM);
#endif
Add("KENLM", new KenFactory());
}
FeatureRegistry::~FeatureRegistry() {}
void FeatureRegistry::Add(const std::string &name, FeatureFactory *factory)
{
std::pair<std::string, boost::shared_ptr<FeatureFactory> > to_ins(name, boost::shared_ptr<FeatureFactory>(factory));
UTIL_THROW_IF(!registry_.insert(to_ins).second, util::Exception, "Duplicate feature name " << name);
}
namespace
{
class UnknownFeatureException : public util::Exception {};
}
void FeatureRegistry::Construct(const std::string &name, const std::string &line)
{
Map::iterator i = registry_.find(name);
UTIL_THROW_IF(i == registry_.end(), UnknownFeatureException, "Feature name " << name << " is not registered.");
i->second->Create(line);
}
} // namespace Moses

30
moses/FF/Factory.h Normal file
View File

@ -0,0 +1,30 @@
#pragma once
#include <string>
#include <boost/shared_ptr.hpp>
#include <boost/unordered_map.hpp>
namespace Moses
{
class FeatureFactory;
class FeatureRegistry
{
public:
FeatureRegistry();
~FeatureRegistry();
void Construct(const std::string &name, const std::string &line);
private:
void Add(const std::string &name, FeatureFactory *factory);
typedef boost::unordered_map<std::string, boost::shared_ptr<FeatureFactory> > Map;
Map registry_;
};
} // namespace Moses

View File

@ -74,7 +74,7 @@ void FeatureFunction::ParseLine(const std::string& description, const std::strin
set<string> keys;
for (size_t i = 1; i < toks.size(); ++i) {
vector<string> args = Tokenize(toks[i], "=");
vector<string> args = TokenizeFirstOnly(toks[i], "=");
CHECK(args.size() == 2);
pair<set<string>::iterator,bool> ret = keys.insert(args[0]);
@ -109,5 +109,10 @@ void FeatureFunction::ReadParameters()
}
}
std::vector<float> FeatureFunction::DefaultWeights() const
{
UTIL_THROW(util::Exception, "No default weights");
}
}

View File

@ -78,6 +78,7 @@ public:
virtual bool IsTuneable() const {
return m_tuneable;
}
virtual std::vector<float> DefaultWeights() const;
//! Called before search and collecting of translation options
virtual void InitializeForInput(InputType const& source) {

View File

@ -13,5 +13,11 @@ UnknownWordPenaltyProducer::UnknownWordPenaltyProducer(const std::string &line)
ReadParameters();
}
std::vector<float> UnknownWordPenaltyProducer::DefaultWeights() const
{
std::vector<float> ret(1, 1.0f);
return ret;
}
}

View File

@ -20,6 +20,7 @@ public:
bool IsUseable(const FactorMask &mask) const {
return true;
}
std::vector<float> DefaultWeights() const;
};

View File

@ -401,6 +401,14 @@ void Hypothesis::CleanupArcList()
}
}
void Hypothesis::GetOutputPhrase(Phrase &out) const
{
if (m_prevHypo != NULL) {
m_prevHypo->GetOutputPhrase(out);
}
out.Append(GetCurrTargetPhrase());
}
TO_STRING_BODY(Hypothesis)
// friend

View File

@ -200,11 +200,12 @@ public:
int RecombineCompare(const Hypothesis &compare) const;
void GetOutputPhrase(Phrase &out) const;
void ToStream(std::ostream& out) const {
if (m_prevHypo != NULL) {
m_prevHypo->ToStream(out);
}
out << (Phrase) GetCurrTargetPhrase();
Phrase ret;
GetOutputPhrase(ret);
out << ret;
}
void ToStringStream(std::stringstream& out) const {

View File

@ -30,6 +30,25 @@ if $(have-clock[2]) = 0 {
alias rt ;
}
#This is a kludge to force rebuilding if different --with options are passed.
#Could have used features like <srilm>on but getting these to apply only to
#linking was ugly and it still didn't trigger an install (since the install
#path doesn't encode features). It stores a file lm.log with the previous
#options and forces a rebuild if the current options differ.
local current = ;
for local i in srilm irstlm randlm {
local optval = [ option.get "with-$(i)" ] ;
if $(optval) {
current += "--with-$(i)=$(optval)" ;
}
}
current = $(current:J=" ") ;
current ?= "" ;
path-constant LM-LOG : bin/lm.log ;
update-if-changed $(LM-LOG) $(current) ;
obj FF_Factory.o : FF/Factory.cpp LM//macros headers ../lm//kenlm : <dependency>$(LM-LOG) ;
lib moses :
[ glob
*.cpp
@ -45,8 +64,9 @@ lib moses :
ThreadPool.cpp
SyntacticLanguageModel.cpp
*Test.cpp Mock*.cpp
LM/Factory.cpp
]
headers LM//LM TranslationModel/CompactPT//CompactPT synlm ThreadPool rt
headers FF_Factory.o LM//LM TranslationModel/CompactPT//CompactPT synlm ThreadPool rt
..//search ../util/double-conversion//double-conversion ..//z ../OnDiskPt//OnDiskPt ;
alias headers-to-install : [ glob-tree *.h ] ;

View File

@ -7,26 +7,8 @@
import option path ;
#This is a kludge to force rebuilding if different --with options are passed.
#Could have used features like <srilm>on but getting these to apply only to
#linking was ugly and it still didn't trigger an install (since the install
#path doesn't encode features). It stores a file lm.log with the previous
#options and forces a rebuild if the current options differ.
local current = ;
for local i in srilm irstlm randlm {
local optval = [ option.get "with-$(i)" ] ;
if $(optval) {
current += "--with-$(i)=$(optval)" ;
}
}
current = $(current:J=" ") ;
current ?= "" ;
path-constant LM-LOG : bin/lm.log ;
update-if-changed $(LM-LOG) $(current) ;
local dependencies = ;
local lmmacros = ;
#IRSTLM
local with-irstlm = [ option.get "with-irstlm" ] ;
@ -35,6 +17,7 @@ if $(with-irstlm) {
obj IRST.o : IRST.cpp ..//headers : <include>$(with-irstlm)/include <include>$(with-irstlm)/include/irstlm ;
alias irst : IRST.o irstlm : : : <define>LM_IRST ;
dependencies += irst ;
lmmacros += LM_IRST ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
echo "!!! You are linking the IRSTLM library; be sure the release is >= 5.70.02 !!!" ;
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ;
@ -63,6 +46,7 @@ if $(with-srilm) {
obj ParallelBackoff.o : ParallelBackoff.cpp ..//headers : <include>$(with-srilm)/include <include>$(with-srilm)/include/srilm <warnings>off ;
alias sri : SRI.o ParallelBackoff.o sri-libs : : : <define>LM_SRI ;
dependencies += sri ;
lmmacros += LM_SRI ;
}
#RandLM
@ -72,6 +56,7 @@ if $(with-randlm) {
obj Rand.o : Rand.cpp RandLM ..//headers : <include>$(with-randlm)/include <include>$(with-randlm)/include/RandLM ;
alias rand : Rand.o RandLM : : : <define>LM_RAND ;
dependencies += rand ;
lmmacros += LM_RAND ;
}
# LDHTLM
@ -82,6 +67,7 @@ if $(with-ldhtlm) {
obj LDHT.o : LDHT.cpp LDHT ..//headers : <include>$(with-ldhtlm)/include <include>$(with-ldhtlm)/include/LDHT ;
alias ldht : LDHT.o LDHT ticpp : : : <define>LM_LDHT ;
dependencies += ldht ;
lmmacros += LM_LDHT ;
}
#ORLM is always compiled but needs special headers
@ -92,4 +78,4 @@ obj ORLM.o : ORLM.cpp ..//headers ../TranslationModel/DynSAInclude//dynsa : : :
alias LM : Base.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ORLM.o
../../lm//kenlm ..//headers $(dependencies) ;
alias macros : : : : <define>$(lmmacros) ;

View File

@ -66,7 +66,7 @@ struct KenLMState : public FFState {
template <class Model> class LanguageModelKen : public LanguageModel
{
public:
LanguageModelKen(const std::string &description, const std::string &line, const std::string &file, FactorType factorType, bool lazy);
LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
const FFState *EmptyHypothesisState(const InputType &/*input*/) const {
KenLMState *ret = new KenLMState();
@ -137,8 +137,8 @@ private:
std::vector<lm::WordIndex> &m_mapping;
};
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &description, const std::string &line, const std::string &file, FactorType factorType, bool lazy)
:LanguageModel(description, line)
template <class Model> LanguageModelKen<Model>::LanguageModelKen(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
:LanguageModel("KENLM", line)
,m_factorType(factorType)
{
lm::ngram::Config config;
@ -351,7 +351,7 @@ bool LanguageModelKen<Model>::IsUseable(const FactorMask &mask) const
} // namespace
LanguageModel *ConstructKenLM(const std::string &description, const std::string &line)
LanguageModel *ConstructKenLM(const std::string &line)
{
FactorType factorType;
string filePath;
@ -375,10 +375,10 @@ LanguageModel *ConstructKenLM(const std::string &description, const std::string
}
}
return ConstructKenLM(description, line, filePath, factorType, lazy);
return ConstructKenLM(line, filePath, factorType, lazy);
}
LanguageModel *ConstructKenLM(const std::string &description, const std::string &line, const std::string &file, FactorType factorType, bool lazy)
LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
{
try {
lm::ngram::ModelType model_type;
@ -386,23 +386,23 @@ LanguageModel *ConstructKenLM(const std::string &description, const std::string
switch(model_type) {
case lm::ngram::PROBING:
return new LanguageModelKen<lm::ngram::ProbingModel>(description, line, file, factorType, lazy);
return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
case lm::ngram::REST_PROBING:
return new LanguageModelKen<lm::ngram::RestProbingModel>(description, line, file, factorType, lazy);
return new LanguageModelKen<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
case lm::ngram::TRIE:
return new LanguageModelKen<lm::ngram::TrieModel>(description, line, file, factorType, lazy);
return new LanguageModelKen<lm::ngram::TrieModel>(line, file, factorType, lazy);
case lm::ngram::QUANT_TRIE:
return new LanguageModelKen<lm::ngram::QuantTrieModel>(description, line, file, factorType, lazy);
return new LanguageModelKen<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
case lm::ngram::ARRAY_TRIE:
return new LanguageModelKen<lm::ngram::ArrayTrieModel>(description, line, file, factorType, lazy);
return new LanguageModelKen<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
case lm::ngram::QUANT_ARRAY_TRIE:
return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(description, line, file, factorType, lazy);
return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
default:
std::cerr << "Unrecognized kenlm model type " << model_type << std::endl;
abort();
}
} else {
return new LanguageModelKen<lm::ngram::ProbingModel>(description, line, file, factorType, lazy);
return new LanguageModelKen<lm::ngram::ProbingModel>(line, file, factorType, lazy);
}
} catch (std::exception &e) {
std::cerr << e.what() << std::endl;

View File

@ -31,10 +31,10 @@ namespace Moses
class LanguageModel;
LanguageModel *ConstructKenLM(const std::string &description, const std::string &line);
LanguageModel *ConstructKenLM(const std::string &line);
//! This will also load. Returns a templated KenLM class
LanguageModel *ConstructKenLM(const std::string &description, const std::string &line, const std::string &file, FactorType factorType, bool lazy);
LanguageModel *ConstructKenLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy);
} // namespace Moses

View File

@ -20,11 +20,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <limits>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include "SingleFactor.h"
#include "RandLM.h"
#include "Rand.h"
#include "moses/Factor.h"
#include "moses/Util.h"
@ -33,62 +29,34 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/InputFileStream.h"
#include "moses/StaticData.h"
#include "util/check.hh"
#include "RandLM.h"
using namespace std;
namespace Moses
{
namespace
LanguageModelRandLM::LanguageModelRandLM(const std::string &line)
:LanguageModelSingleFactor("RandLM", line)
, m_lm(0)
{
using namespace std;
}
class LanguageModelRandLM : public LanguageModelSingleFactor
{
public:
LanguageModelRandLM(const std::string &line)
:LanguageModelSingleFactor("RandLM", line)
, m_lm(0) {
}
bool Load(const std::string &filePath, FactorType factorType, size_t nGramOrder);
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
~LanguageModelRandLM() {
delete m_lm;
}
void InitializeForInput(InputType const& source) {
m_lm->initThreadSpecificData(); // Creates thread specific data iff // compiled with multithreading.
}
void CleanUpAfterSentenceProcessing(const InputType& source) {
m_lm->clearCaches(); // clear caches
}
protected:
std::vector<randlm::WordID> m_randlm_ids_vec;
randlm::RandLM* m_lm;
randlm::WordID m_oov_id;
void CreateFactors(FactorCollection &factorCollection);
randlm::WordID GetLmID( const std::string &str ) const;
randlm::WordID GetLmID( const Factor *factor ) const {
size_t factorId = factor->GetId();
return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId];
};
LanguageModelRandLM::~LanguageModelRandLM() {
delete m_lm;
}
};
bool LanguageModelRandLM::Load(const std::string &filePath, FactorType factorType,
size_t nGramOrder)
void LanguageModelRandLM::Load()
{
cerr << "Loading LanguageModelRandLM..." << endl;
FactorCollection &factorCollection = FactorCollection::Instance();
m_filePath = filePath;
m_factorType = factorType;
m_nGramOrder = nGramOrder;
int cache_MB = 50; // increase cache size
m_lm = randlm::RandLM::initRandLM(filePath, nGramOrder, cache_MB);
m_lm = randlm::RandLM::initRandLM(m_filePath, m_nGramOrder, cache_MB);
CHECK(m_lm != NULL);
// get special word ids
m_oov_id = m_lm->getWordID(m_lm->getOOV());
CreateFactors(factorCollection);
m_lm->initThreadSpecificData();
return true;
}
void LanguageModelRandLM::CreateFactors(FactorCollection &factorCollection) // add factors which have randlm id
@ -132,6 +100,11 @@ randlm::WordID LanguageModelRandLM::GetLmID( const std::string &str ) const
return m_lm->getWordID(str);
}
randlm::WordID LanguageModelRandLM::GetLmID( const Factor *factor ) const {
size_t factorId = factor->GetId();
return ( factorId >= m_randlm_ids_vec.size()) ? m_oov_id : m_randlm_ids_vec[factorId];
}
LMResult LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
State* finalState) const
{
@ -154,6 +127,11 @@ LMResult LanguageModelRandLM::GetValue(const vector<const Word*> &contextFactor,
return ret;
}
void LanguageModelRandLM::InitializeForInput(InputType const& source) {
m_lm->initThreadSpecificData(); // Creates thread specific data iff // compiled with multithreading.
}
void LanguageModelRandLM::CleanUpAfterSentenceProcessing(const InputType& source) {
m_lm->clearCaches(); // clear caches
}
}

View File

@ -1,3 +1,5 @@
#ifndef moses_LM_Rand_h
#define moses_LM_Rand_h
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
@ -16,14 +18,43 @@ You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <vector>
#include <string>
#include <stdint.h>
#include "SingleFactor.h"
#include "moses/TypeDef.h"
#include "moses/Word.h"
//#include "RandLM.h"
#ifndef moses_LM_Rand_h
#define moses_LM_Rand_h
namespace randlm
{
class RandLM;
}
namespace Moses
{
class LanguageModelPointerState;
LanguageModelPointerState *NewRandLM();
class LanguageModelRandLM : public LanguageModelSingleFactor
{
public:
LanguageModelRandLM(const std::string &line);
~LanguageModelRandLM();
void Load();
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
void InitializeForInput(InputType const& source);
void CleanUpAfterSentenceProcessing(const InputType& source);
protected:
//std::vector<randlm::WordID> m_randlm_ids_vec;
std::vector<uint32_t> m_randlm_ids_vec; // Ken made me do this
randlm::RandLM* m_lm;
uint32_t m_oov_id;
void CreateFactors(FactorCollection &factorCollection);
uint32_t GetLmID( const std::string &str ) const;
uint32_t GetLmID( const Factor *factor ) const;
};
}

View File

@ -30,6 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "InputFileStream.h"
#include "StaticData.h"
#include "UserMessage.h"
#include "util/exception.hh"
using namespace std;
@ -38,25 +39,18 @@ namespace Moses
/** define allowed parameters */
Parameter::Parameter()
{
AddParam("mapping", "description of decoding steps");
AddParam("beam-threshold", "b", "threshold for threshold pruning");
AddParam("config", "f", "location of the configuration file");
AddParam("continue-partial-translation", "cpt", "start from nonempty hypothesis");
AddParam("decoding-graph-backoff", "dpb", "only use subsequent decoding paths for unknown spans of given length");
AddParam("dlm-model", "Order, factor and vocabulary file for discriminative LM. Use * for filename to indicate unlimited vocabulary.");
AddParam("drop-unknown", "du", "drop unknown words instead of copying them");
AddParam("disable-discarding", "dd", "disable hypothesis discarding");
AddParam("factor-delimiter", "fd", "specify a different factor delimiter than the default");
AddParam("generation-file", "location and properties of the generation table");
AddParam("global-lexical-file", "gl", "discriminatively trained global lexical translation model file");
AddParam("glm-feature", "discriminatively trained global lexical translation feature, sparse producer");
AddParam("input-factors", "list of factors in the input");
AddParam("input-file", "i", "location of the input file to be translated");
AddParam("inputtype", "text (0), confusion network (1), word lattice (2) (default = 0)");
AddParam("labeled-n-best-list", "print out labels for each weight type in n-best list. default is true");
AddParam("lmodel-file", "location and properties of the language models");
AddParam("lmodel-dub", "dictionary upper bounds of language models");
AddParam("lmodel-oov-feature", "add language model oov feature, one per model");
AddParam("mapping", "description of decoding steps");
AddParam("max-partial-trans-opt", "maximum number of partial translation options per input span (during mapping steps)");
AddParam("max-trans-opt-per-coverage", "maximum number of translation options per input span (after applying mapping steps)");
AddParam("max-phrase-length", "maximum phrase length (default 20)");
@ -68,16 +62,10 @@ Parameter::Parameter()
AddParam("phrase-drop-allowed", "da", "if present, allow dropping of source words"); //da = drop any (word); see -du for comparison
AddParam("report-all-factors", "report all factors in output, not just first");
AddParam("report-all-factors-in-n-best", "Report all factors in n-best-lists. Default is false");
#ifdef HAVE_SYNLM
AddParam("slmodel-file", "location of the syntactic language model file(s)");
AddParam("slmodel-factor", "factor to use with syntactic language model");
AddParam("slmodel-beam", "beam width to use with syntactic language model's parser");
#endif
AddParam("stack", "s", "maximum stack size for histogram pruning");
AddParam("stack-diversity", "sd", "minimum number of hypothesis of each coverage in stack (default 0)");
AddParam("threads","th", "number of threads to use in decoding (defaults to single-threaded)");
AddParam("translation-details", "T", "for each best hypothesis, report translation details to the given file");
AddParam("ttable-file", "location and properties of the translation tables");
AddParam("translation-option-threshold", "tot", "threshold for translation options relative to best for input phrase");
AddParam("early-discarding-threshold", "edt", "threshold for constructing hypotheses based on estimate cost");
AddParam("verbose", "v", "verbosity level of the logging");
@ -103,6 +91,7 @@ Parameter::Parameter()
AddParam("lmbr-r", "ngram precision decay value for lattice mbr");
AddParam("lmbr-map-weight", "weight given to map solution when doing lattice MBR (default 0)");
AddParam("lattice-hypo-set", "to use lattice as hypo set during lattice MBR");
AddParam("lmodel-oov-feature", "add language model oov feature, one per model");
AddParam("clean-lm-cache", "clean language model caches after N translations (default N=1)");
AddParam("use-persistent-cache", "cache translation options across sentences (default true)");
AddParam("persistent-cache-size", "maximum size of cache for translation options (default 10,000 input phrases)");
@ -129,13 +118,6 @@ Parameter::Parameter()
AddParam("source-label-overlap", "What happens if a span already has a label. 0=add more. 1=replace. 2=discard. Default is 0");
AddParam("output-hypo-score", "Output the hypo score to stdout with the output string. For search error analysis. Default is false");
AddParam("unknown-lhs", "file containing target lhs of unknown words. 1 per line: LHS prob");
AddParam("phrase-pair-feature", "Source and target factors for phrase pair feature");
AddParam("phrase-boundary-source-feature", "Source factors for phrase boundary feature");
AddParam("phrase-boundary-target-feature", "Target factors for phrase boundary feature");
AddParam("phrase-length-feature", "Count features for source length, target length, both of each phrase");
AddParam("target-word-insertion-feature", "Count feature for each unaligned target word");
AddParam("source-word-deletion-feature", "Count feature for each unaligned source word");
AddParam("word-translation-feature", "Count feature for word translation according to word alignment");
AddParam("cube-pruning-lazy-scoring", "cbls", "Don't fully score a hypothesis until it is popped");
AddParam("parsing-algorithm", "Which parsing algorithm to use. 0=CYK+, 1=scope-3. (default = 0)");
AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing, 4=stack with batched lm requests (default = 0)");
@ -185,6 +167,27 @@ Parameter::Parameter()
AddParam("text-type", "DEPRECATED. DO NOT USE. should be one of dev/devtest/test, used for domain adaptation features");
AddParam("input-scores", "DEPRECATED. DO NOT USE. 2 numbers on 2 lines - [1] of scores on each edge of a confusion network or lattice input (default=1). [2] Number of 'real' word scores (0 or 1. default=0)");
AddParam("dlm-model", "DEPRECATED. DO NOT USE. Order, factor and vocabulary file for discriminative LM. Use * for filename to indicate unlimited vocabulary.");
AddParam("generation-file", "DEPRECATED. DO NOT USE. location and properties of the generation table");
AddParam("global-lexical-file", "gl", "DEPRECATED. DO NOT USE. discriminatively trained global lexical translation model file");
AddParam("glm-feature", "DEPRECATED. DO NOT USE. discriminatively trained global lexical translation feature, sparse producer");
AddParam("lmodel-file", "DEPRECATED. DO NOT USE. location and properties of the language models");
AddParam("lmodel-dub", "DEPRECATED. DO NOT USE. dictionary upper bounds of language models");
#ifdef HAVE_SYNLM
AddParam("slmodel-file", "DEPRECATED. DO NOT USE. location of the syntactic language model file(s)");
AddParam("slmodel-factor", "DEPRECATED. DO NOT USE. factor to use with syntactic language model");
AddParam("slmodel-beam", "DEPRECATED. DO NOT USE. beam width to use with syntactic language model's parser");
#endif
AddParam("ttable-file", "DEPRECATED. DO NOT USE. location and properties of the translation tables");
AddParam("phrase-pair-feature", "DEPRECATED. DO NOT USE. Source and target factors for phrase pair feature");
AddParam("phrase-boundary-source-feature", "DEPRECATED. DO NOT USE. Source factors for phrase boundary feature");
AddParam("phrase-boundary-target-feature", "DEPRECATED. DO NOT USE. Target factors for phrase boundary feature");
AddParam("phrase-length-feature", "DEPRECATED. DO NOT USE. Count features for source length, target length, both of each phrase");
AddParam("target-word-insertion-feature", "DEPRECATED. DO NOT USE. Count feature for each unaligned target word");
AddParam("source-word-deletion-feature", "DEPRECATED. DO NOT USE. Count feature for each unaligned source word");
AddParam("word-translation-feature", "DEPRECATED. DO NOT USE. Count feature for word translation according to word alignment");
AddParam("weight-file", "wf", "feature weights file. Do *not* put weights for 'core' features in here - they go in moses.ini");
AddParam("weight", "weights for ALL models, 1 per line 'WeightName value'. Weight names can be repeated");
@ -195,6 +198,8 @@ Parameter::Parameter()
AddParam("print-id", "prefix translations with id. Default if false");
AddParam("alternate-weight-setting", "aws", "alternate set of weights to used per xml specification");
AddParam("placeholder-factor", "Which factor to use to store the original text for placeholders");
}
Parameter::~Parameter()
@ -305,9 +310,27 @@ bool Parameter::LoadParam(int argc, char* argv[])
}
}
// don't mix old and new format
if ((isParamSpecified("feature") || isParamSpecified("weight"))
&& (isParamSpecified("weight-slm") || isParamSpecified("weight-bl") || isParamSpecified("weight-d") ||
isParamSpecified("weight-dlm") || isParamSpecified("weight-lrl") || isParamSpecified("weight-generation") ||
isParamSpecified("weight-i") || isParamSpecified("weight-l") || isParamSpecified("weight-lex") ||
isParamSpecified("weight-glm") || isParamSpecified("weight-wt") || isParamSpecified("weight-pp") ||
isParamSpecified("weight-pb") || isParamSpecified("weight-t") || isParamSpecified("weight-w") ||
isParamSpecified("weight-u") || isParamSpecified("weight-e") ||
isParamSpecified("dlm-mode") || isParamSpecified("generation-file") || isParamSpecified("global-lexical-file") ||
isParamSpecified("glm-feature") || isParamSpecified("lmodel-file") || isParamSpecified("lmodel-dub") ||
isParamSpecified("slmodel-file") || isParamSpecified("slmodel-factor") ||
isParamSpecified("slmodel-beam") || isParamSpecified("ttable-file") || isParamSpecified("phrase-pair-feature") ||
isParamSpecified("phrase-boundary-source-feature") || isParamSpecified("phrase-boundary-target-feature") || isParamSpecified("phrase-length-feature") ||
isParamSpecified("target-word-insertion-feature") || isParamSpecified("source-word-deletion-feature") || isParamSpecified("word-translation-feature")
)
) {
UTIL_THROW(util::Exception, "Don't mix old and new ini file format");
}
// convert old weights args to new format
// WHAT IS GOING ON HERE??? - UG
if (!isParamSpecified("feature")) // UG
if (!isParamSpecified("feature"))
ConvertWeightArgs();
CreateWeightsMap();
WeightOverwrite();

View File

@ -121,15 +121,22 @@ int Sentence::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
//const StaticData &staticData = StaticData::Instance();
std::vector<XmlOption*> xmlOptionsList(0);
std::vector< size_t > xmlWalls;
std::vector< std::pair<size_t, std::string> > placeholders;
if (staticData.GetXmlInputType() != XmlPassThrough) {
if (!ProcessAndStripXMLTags(line, xmlOptionsList, m_reorderingConstraint, xmlWalls, staticData.GetXmlBrackets().first, staticData.GetXmlBrackets().second)) {
if (!ProcessAndStripXMLTags(line, xmlOptionsList, m_reorderingConstraint, xmlWalls, placeholders,
staticData.GetXmlBrackets().first, staticData.GetXmlBrackets().second)) {
const string msg("Unable to parse XML in line: " + line);
TRACE_ERR(msg << endl);
throw runtime_error(msg);
}
}
Phrase::CreateFromString(Input, factorOrder, line, factorDelimiter, NULL);
// placeholders
ProcessPlaceholders(placeholders);
if (staticData.IsChart()) {
InitStartEndWord();
}
@ -194,6 +201,22 @@ void Sentence::InitStartEndWord()
AddWord(endWord);
}
void Sentence::ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders)
{
FactorType factorType = StaticData::Instance().GetPlaceholderFactor();
if (factorType == NOT_FOUND) {
return;
}
for (size_t i = 0; i < placeholders.size(); ++i) {
size_t pos = placeholders[i].first;
const string &str = placeholders[i].second;
const Factor *factor = FactorCollection::Instance().AddFactor(str);
Word &word = Phrase::GetWord(pos);
word[factorType] = factor;
}
}
TranslationOptionCollection*
Sentence::CreateTranslationOptionCollection() const
{

View File

@ -57,6 +57,7 @@ private:
NonTerminalSet m_defaultLabelSet;
void InitStartEndWord();
void ProcessPlaceholders(const std::vector< std::pair<size_t, std::string> > &placeholders);
public:

View File

@ -22,14 +22,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <string>
#include "util/check.hh"
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
#include "moses/TranslationModel/PhraseDictionaryMemory.h"
#include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
#include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
#include "moses/TranslationModel/PhraseDictionaryDynSuffixArray.h"
#include "moses/FF/Factory.h"
#include "moses/FF/WordPenaltyProducer.h"
#include "moses/FF/UnknownWordPenaltyProducer.h"
#include "moses/FF/InputFeature.h"
#include "DecodeStepTranslation.h"
#include "DecodeStepGeneration.h"
@ -46,37 +43,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "InputFileStream.h"
#include "ScoreComponentCollection.h"
#include "moses/FF/BleuScoreFeature.h"
#include "moses/FF/TargetWordInsertionFeature.h"
#include "moses/FF/SourceWordDeletionFeature.h"
#include "moses/FF/GlobalLexicalModel.h"
#include "moses/FF/GlobalLexicalModelUnlimited.h"
#include "moses/FF/UnknownWordPenaltyProducer.h"
#include "moses/FF/WordTranslationFeature.h"
#include "moses/FF/TargetBigramFeature.h"
#include "moses/FF/TargetNgramFeature.h"
#include "moses/FF/PhraseBoundaryFeature.h"
#include "moses/FF/PhrasePairFeature.h"
#include "moses/FF/PhraseLengthFeature.h"
#include "moses/FF/DistortionScoreProducer.h"
#include "moses/FF/WordPenaltyProducer.h"
#include "moses/FF/InputFeature.h"
#include "moses/FF/PhrasePenalty.h"
#include "moses/FF/OSM-Feature/OpSequenceModel.h"
#include "LM/Ken.h"
#ifdef LM_IRST
#include "LM/IRST.h"
#endif
#ifdef LM_SRI
#include "LM/SRI.h"
#endif
#ifdef HAVE_SYNLM
#include "SyntacticLanguageModel.h"
#endif
#ifdef WITH_THREADS
#include <boost/thread.hpp>
#endif
@ -556,10 +522,18 @@ bool StaticData::LoadData(Parameter *parameter)
cerr << "XML tags opening and closing brackets for XML input are: " << m_xmlBrackets.first << " and " << m_xmlBrackets.second << endl;
}
if (m_parameter->GetParam("placeholder-factor").size() > 0) {
m_placeHolderFactor = Scan<FactorType>(m_parameter->GetParam("placeholder-factor")[0]);
} else {
m_placeHolderFactor = NOT_FOUND;
}
// all features
map<string, int> featureIndexMap;
const vector<string> &features = m_parameter->GetParam("feature");
FeatureRegistry registry;
for (size_t i = 0; i < features.size(); ++i) {
const string &line = Trim(features[i]);
cerr << "line=" << line << endl;
@ -569,151 +543,8 @@ bool StaticData::LoadData(Parameter *parameter)
vector<string> toks = Tokenize(line);
const string &feature = toks[0];
//int featureIndex = GetFeatureIndex(featureIndexMap, feature);
if (feature == "GlobalLexicalModel") {
GlobalLexicalModel *model = new GlobalLexicalModel(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "GlobalLexicalModelUnlimited") {
GlobalLexicalModelUnlimited *model = NULL; //new GlobalLexicalModelUnlimited(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "SourceWordDeletionFeature") {
SourceWordDeletionFeature *model = new SourceWordDeletionFeature(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
//SetWeights(model, weights);
} else if (feature == "TargetWordInsertionFeature") {
TargetWordInsertionFeature *model = new TargetWordInsertionFeature(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
//SetWeights(model, weights);
} else if (feature == "PhraseBoundaryFeature") {
PhraseBoundaryFeature *model = new PhraseBoundaryFeature(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
//SetWeights(model, weights);
} else if (feature == "PhraseLengthFeature") {
PhraseLengthFeature *model = new PhraseLengthFeature(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
//SetWeights(model, weights);
} else if (feature == "WordTranslationFeature") {
WordTranslationFeature *model = new WordTranslationFeature(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
//SetWeights(model, weights);
} else if (feature == "TargetBigramFeature") {
TargetBigramFeature *model = new TargetBigramFeature(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
//SetWeights(model, weights);
} else if (feature == "TargetNgramFeature") {
TargetNgramFeature *model = new TargetNgramFeature(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
//SetWeights(model, weights);
} else if (feature == "PhrasePairFeature") {
PhrasePairFeature *model = new PhrasePairFeature(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
//SetWeights(model, weights);
} else if (feature == "LexicalReordering") {
LexicalReordering *model = new LexicalReordering(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "KENLM") {
LanguageModel *model = ConstructKenLM(feature, line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
}
#ifdef LM_IRST
else if (feature == "IRSTLM") {
LanguageModelIRST *model = new LanguageModelIRST(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
}
#endif
#ifdef LM_SRI
else if (feature == "SRILM") {
LanguageModelSRI *model = new LanguageModelSRI(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
}
#endif
else if (feature == "Generation") {
GenerationDictionary *model = new GenerationDictionary(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "BleuScoreFeature") {
BleuScoreFeature *model = new BleuScoreFeature(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "Distortion") {
DistortionScoreProducer *model = new DistortionScoreProducer(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "WordPenalty") {
WordPenaltyProducer *model = new WordPenaltyProducer(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "UnknownWordPenalty") {
UnknownWordPenaltyProducer *model = new UnknownWordPenaltyProducer(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
if (weights.size() == 0)
weights.push_back(1.0f);
SetWeights(model, weights);
} else if (feature == "InputFeature") {
InputFeature *model = new InputFeature(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "PhraseDictionaryBinary") {
PhraseDictionaryTreeAdaptor* model = new PhraseDictionaryTreeAdaptor(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "PhraseDictionaryOnDisk") {
PhraseDictionaryOnDisk* model = new PhraseDictionaryOnDisk(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "PhraseDictionaryMemory") {
PhraseDictionaryMemory* model = new PhraseDictionaryMemory(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "PhraseDictionaryCompact") {
PhraseDictionaryCompact* model = new PhraseDictionaryCompact(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "PhraseDictionaryMultiModel") {
PhraseDictionaryMultiModel* model = new PhraseDictionaryMultiModel(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "PhraseDictionaryMultiModelCounts") {
PhraseDictionaryMultiModelCounts* model = new PhraseDictionaryMultiModelCounts(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "PhraseDictionaryALSuffixArray") {
PhraseDictionaryALSuffixArray* model = new PhraseDictionaryALSuffixArray(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "PhraseDictionaryDynSuffixArray") {
PhraseDictionaryDynSuffixArray* model = new PhraseDictionaryDynSuffixArray(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "OpSequenceModel") {
OpSequenceModel* model = new OpSequenceModel(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
} else if (feature == "PhrasePenalty") {
PhrasePenalty* model = new PhrasePenalty(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
}
#ifdef HAVE_SYNLM
else if (feature == "SyntacticLanguageModel") {
SyntacticLanguageModel *model = new SyntacticLanguageModel(line);
vector<float> weights = m_parameter->GetWeights(model->GetScoreProducerDescription());
SetWeights(model, weights);
}
#endif
else {
UserMessage::Add("Unknown feature function:" + feature);
return false;
}
registry.Construct(feature, line);
}
OverrideFeatures();
@ -1317,7 +1148,7 @@ void StaticData::OverrideFeatures()
CHECK(keyVal.size() == 2);
VERBOSE(1, "Override " << ff.GetScoreProducerDescription() << " "
<< keyVal[0] << "=" << keyVal[1] << endl);
<< keyVal[0] << "=" << keyVal[1] << endl);
ff.SetParameter(keyVal[0], keyVal[1]);

View File

@ -214,6 +214,8 @@ protected:
std::map< std::string, std::set< std::string > > m_weightSettingIgnoreFF; // feature function
std::map< std::string, std::set< size_t > > m_weightSettingIgnoreDP; // decoding path
FactorType m_placeHolderFactor;
StaticData();
void LoadChartDecodingParameters();
@ -768,6 +770,9 @@ public:
void OverrideFeatures();
FactorType GetPlaceholderFactor() const {
return m_placeHolderFactor;
}
};
}

View File

@ -85,7 +85,7 @@ public:
void Sort(bool adhereTableLimit, size_t tableLimit);
void Clear() {
RemoveAllInColl(m_collection);
RemoveAllInColl(m_collection);
}
};

View File

@ -154,12 +154,10 @@ void ChartRuleLookupManagerMemory::GetChartRuleCollection(
const PhraseDictionaryNodeMemory &node = dottedRule.GetLastNode();
// look up target sides
const TargetPhraseCollection *tpc = node.GetTargetPhraseCollection();
const TargetPhraseCollection &tpc = node.GetTargetPhraseCollection();
// add the fully expanded rule (with lexical target side)
if (tpc != NULL) {
AddCompletedRule(dottedRule, *tpc, range, outColl);
}
AddCompletedRule(dottedRule, tpc, range, outColl);
}
dottedRuleCol.Clear(relEndPos+1);

View File

@ -153,12 +153,10 @@ void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
const PhraseDictionaryNodeMemory &node = dottedRule.GetLastNode();
// look up target sides
const TargetPhraseCollection *tpc = node.GetTargetPhraseCollection();
const TargetPhraseCollection &tpc = node.GetTargetPhraseCollection();
// add the fully expanded rule (with lexical target side)
if (tpc != NULL) {
AddCompletedRule(dottedRule, *tpc, range, outColl);
}
AddCompletedRule(dottedRule, tpc, range, outColl);
}
dottedRuleCol.Clear(relEndPos+1);

View File

@ -51,7 +51,7 @@ TargetPhraseCollection &PhraseDictionaryMemory::GetOrCreateTargetPhraseCollectio
, const Word *sourceLHS)
{
PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(source, target, sourceLHS);
return currNode.GetOrCreateTargetPhraseCollection();
return currNode.GetTargetPhraseCollection();
}
const TargetPhraseCollection *PhraseDictionaryMemory::GetTargetPhraseCollection(const Phrase& sourceOrig) const
@ -70,7 +70,7 @@ const TargetPhraseCollection *PhraseDictionaryMemory::GetTargetPhraseCollection(
return NULL;
}
return currNode->GetTargetPhraseCollection();
return &currNode->GetTargetPhraseCollection();
}
PhraseDictionaryNodeMemory &PhraseDictionaryMemory::GetOrCreateNode(const Phrase &source

View File

@ -223,7 +223,6 @@ void PhraseDictionaryMultiModelCounts::Load()
const TargetPhraseCollection *PhraseDictionaryMultiModelCounts::GetTargetPhraseCollection(const Phrase& src) const
{
vector<vector<float> > multimodelweights;
bool normalize;
normalize = (m_mode == "interpolate") ? true : false;
@ -346,11 +345,11 @@ float PhraseDictionaryMultiModelCounts::GetTargetCount(const Phrase &target, siz
{
const PhraseDictionary &pd = *m_inverse_pd[modelIndex];
TargetPhraseCollection *ret_raw = (TargetPhraseCollection*) pd.GetTargetPhraseCollection(target);
const TargetPhraseCollection *ret_raw = pd.GetTargetPhraseCollection(target);
// in inverse mode, we want the first score of the first phrase pair (note: if we were to work with truly symmetric models, it would be the third score)
if (ret_raw != NULL) {
TargetPhrase * targetPhrase = *(ret_raw->begin());
if (ret_raw && ret_raw->GetSize() > 0) {
const TargetPhrase * targetPhrase = *(ret_raw->begin());
return UntransformScore(targetPhrase->GetScoreBreakdown().GetScoresForProducer(&pd)[0]);
}

View File

@ -133,10 +133,10 @@ public:
const PhraseDictionaryNodeMemory *GetChild(const Word &sourceTerm) const;
const PhraseDictionaryNodeMemory *GetChild(const Word &sourceNonTerm, const Word &targetNonTerm) const;
const TargetPhraseCollection *GetTargetPhraseCollection() const {
return &m_targetPhraseCollection;
const TargetPhraseCollection &GetTargetPhraseCollection() const {
return m_targetPhraseCollection;
}
TargetPhraseCollection &GetOrCreateTargetPhraseCollection() {
TargetPhraseCollection &GetTargetPhraseCollection() {
return m_targetPhraseCollection;
}

View File

@ -263,7 +263,7 @@ TargetPhraseCollection &PhraseDictionaryFuzzyMatch::GetOrCreateTargetPhraseColle
, const Word *sourceLHS)
{
PhraseDictionaryNodeMemory &currNode = GetOrCreateNode(rootNode, source, target, sourceLHS);
return currNode.GetOrCreateTargetPhraseCollection();
return currNode.GetTargetPhraseCollection();
}
PhraseDictionaryNodeMemory &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDictionaryNodeMemory &rootNode

View File

@ -233,6 +233,26 @@ inline void TokenizeMultiCharSeparator(std::vector<std::string> &output
output.push_back(Trim(str.substr(pos, nextPos - pos)));
}
/** only split of the first delimiter. Used by class FeatureFunction for parse key=value pair.
* Value may have = character
*/
inline std::vector<std::string> TokenizeFirstOnly(const std::string& str,
const std::string& delimiters = " \t")
{
std::vector<std::string> tokens;
std::string::size_type pos = str.find_first_of(delimiters);
if (std::string::npos != pos) {
// Found a token, add it to the vector.
tokens.push_back(str.substr(0, pos));
tokens.push_back(str.substr(pos + 1, str.size() - pos - 1));
} else {
tokens.push_back(str);
}
return tokens;
}
/**
* Convert vector of type T to string

View File

@ -134,6 +134,11 @@ public:
return Compare(*this, compare) != 0;
}
int Compare(const Word &other) const {
return Compare(*this, other);
}
/* static functions */
/** transitive comparison of 2 word objects. Used by operator<.

View File

@ -150,10 +150,13 @@ vector<string> TokenizeXml(const string& str, const std::string& lbrackStr, cons
* \param rbrackStr xml tag's right bracket string, typically ">"
*/
bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingConstraint &reorderingConstraint, vector< size_t > &walls,
std::vector< std::pair<size_t, std::string> > &placeholders,
const std::string& lbrackStr, const std::string& rbrackStr)
{
//parse XML markup in translation line
const StaticData &staticData = StaticData::Instance();
// no xml tag? we're done.
//if (line.find_first_of('<') == string::npos) {
if (line.find(lbrackStr) == string::npos) {
@ -172,8 +175,8 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
string cleanLine; // return string (text without xml)
size_t wordPos = 0; // position in sentence (in terms of number of words)
const vector<FactorType> &outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
const string &factorDelimiter = StaticData::Instance().GetFactorDelimiter();
const vector<FactorType> &outputFactorOrder = staticData.GetOutputFactorOrder();
const string &factorDelimiter = staticData.GetFactorDelimiter();
// loop through the tokens
for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++) {
@ -290,6 +293,16 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
reorderingConstraint.SetZone( startPos, endPos-1 );
}
// name-entity placeholder
else if (tagName == "ne") {
if (startPos != (endPos - 1)) {
TRACE_ERR("ERROR: Placeholder must only span 1 word: " << line << endl);
return false;
}
string entity = ParseXmlTagAttribute(tagContent,"entity");
placeholders.push_back(std::pair<size_t, std::string>(startPos, entity));
}
// default: opening tag that specifies translation options
else {
if (startPos >= endPos) {
@ -329,7 +342,7 @@ bool ProcessAndStripXMLTags(string &line, vector<XmlOption*> &res, ReorderingCon
}
// store translation options into members
if (StaticData::Instance().GetXmlInputType() != XmlIgnore) {
if (staticData.GetXmlInputType() != XmlIgnore) {
// only store options if we aren't ignoring them
for (size_t i=0; i<altTexts.size(); ++i) {
Phrase sourcePhrase; // TODO don't know what the source phrase is

View File

@ -31,6 +31,7 @@ bool isXmlTag(const std::string& tag, const std::string& lbrackStr="<", const st
std::vector<std::string> TokenizeXml(const std::string& str, const std::string& lbrackStr="<", const std::string& rbrackStr=">");
bool ProcessAndStripXMLTags(std::string &line, std::vector<XmlOption*> &res, ReorderingConstraint &reorderingConstraint, std::vector< size_t > &walls,
std::vector< std::pair<size_t, std::string> > &placeholders,
const std::string& lbrackStr="<", const std::string& rbrackStr=">");
}

View File

@ -388,6 +388,12 @@ void ExtractGHKM::ProcessOptions(int argc, char *argv[],
if (vm.count("UnpairedExtractFormat")) {
options.unpairedExtractFormat = true;
}
// Workaround for extract-parallel issue.
if (options.sentenceOffset > 0) {
options.glueGrammarFile.clear();
options.unknownWordFile.clear();
}
}
void ExtractGHKM::Error(const std::string &msg) const

View File

@ -371,6 +371,23 @@ alignment-symmetrization-method = grow-diag-final-and
#
#biconcor = $moses-script-dir/ems/biconcor/biconcor
############################################################
### use of Operation Sequence Model
### Durrani, Schmid and Fraser. (2011): "A Joint Sequence Translation Model with Integrated Reordering"
#operation-sequence-model = "yes"
#operation-sequence-model-order = 5
### compile Moses with --max-kenlm-order=9 if higher order is required
### if OSM training should be skipped,
# point to OSM Model
#
# osm-model =
############################################################
### lexicalized reordering: specify orientation type
# (default: only distance-based reordering model)
#

View File

@ -355,6 +355,23 @@ alignment-symmetrization-method = grow-diag-final-and
#
#biconcor = $moses-script-dir/ems/biconcor/biconcor
############################################################
### use of Operation Sequence Model
### Durrani, Schmid and Fraser. (2011): "A Joint Sequence Translation Model with Integrated Reordering"
#operation-sequence-model = "yes"
#operation-sequence-model-order = 5
### compile Moses with --max-kenlm-order=9 if higher order is required
### if OSM training should be skipped,
# point to OSM Model
#
# osm-model =
############################################################
### lexicalized reordering: specify orientation type
# (default: only distance-based reordering model)
#

View File

@ -1,4 +1,5 @@
#!/usr/bin/perl -w
package ph_numbers;
# Script to recognize and replace numbers in Moses training corpora
# and decoder input
@ -7,45 +8,65 @@
use strict;
run() unless caller();
use Getopt::Std;
my $debug = $ENV{DEBUG} || 0;
my %opts;
if(!getopts('s:t:cm:hl',\%opts) || $opts{h}) {
print "Usage: perl $0 [-s source_locale][-t target_locale][-c][-h][-l][-m symbol] < in > out\n";
exit;
sub run {
my %opts;
if(!getopts('s:t:cm:hl',\%opts) || $opts{h}) {
print "Usage: perl $0 [-s source_locale][-t target_locale][-c][-h][-l][-m symbol] < in > out\n";
exit;
}
my $sourceLocale = $opts{s} || "";
my $targetLocale = $opts{t} || "";
my $numberSymbol = $opts{m} || '@NUM@';
while(<>) {
chomp;
print recognize($_,$opts{c},$opts{l},$numberSymbol,$_),"\n";
}
}
my $sourceLocale = $opts{s} || "";
my $targetLocale = $opts{t} || "";
my $numberSymbol = $opts{m} || '@NUM@';
while(<>) {
sub recognize {
my $line = shift;
my $corpusMode = shift;
my $legacyMode = shift;
my $numberSymbol = shift || '@NUM@';
# [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?
# while(/\G(.*?)\s*([+-]?\p{Digit}+[+-\p{Digit}\.,eE])/) {
chomp;
my $output = "";
my $remainder = "";
while(/\G(.*?)(\s*)([+-]?\p{Digit}*[\.,]?\p{Digit}+[\p{Digit}\.,+-eE]*)/g) {
print STDERR "Between: x$1x\n" if $debug;
print STDERR "Number: x$3x\n" if $debug;
$output .= $1;
if($opts{c}) {
while($line =~ /\G(.*?)(\s*)([+-]?\p{Digit}*[\.,]?\p{Digit}+[\p{Digit}\.,+-eE]*)/g) {
my $between = $1;
my $number = $3;
print STDERR "Between: x${between}x\n" if $debug;
print STDERR "Number: x${number}x\n" if $debug;
# If there are more numbers separated by whitespace, add these
my $numberContinuation = "";
while($line = /\G(\s+)([\p{Digit}\.,+-eE]*)/g) {
$numberContinuation .= $1.$2;
}
$number .= $numberContinuation;
$output .= $between;
if($corpusMode) {
$output .= $2.$numberSymbol;
}
else {
if($opts{l}) {
$output .= $2."<ne translation=\"$3\">$numberSymbol</ne>";
if($legacyMode) {
$output .= $2."<ne translation=\"$number\">$numberSymbol</ne>";
}
else {
$output .= $2."<ne translation=\"$numberSymbol\" entity=\"$3\">$numberSymbol</ne>";
$output .= $2."<ne translation=\"$numberSymbol\" entity=\"$number\">$numberSymbol</ne>";
}
}
$remainder = $';
}
print STDERR "Remainder: x".$remainder."x\n" if $debug;
print STDERR "\n" if $debug;
$output .= $remainder if $remainder;
$output .= "\n";
print $output;
print STDERR "Remainder: x".$remainder."x\n" if $debug;
print STDERR "\n" if $debug;
$output .= $remainder if $remainder;
return $output;
}
1;

View File

@ -33,6 +33,9 @@ while(<STDIN>) {
s/\(/*LRB*/g;
s/\)/*RRB*/g;
# handle @ (the parser does something weird with these)
s/\@/\\\@/g;
print TMP $_;
}
close(TMP);
@ -42,6 +45,7 @@ print STDERR $cmd."\n";
open(PARSE,"$cmd|");
while(<PARSE>) {
s/\\\@/\@/g;
print $_;
}
close(PARSE);