Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2014-05-20 19:52:31 +01:00
commit 3f93534624
11 changed files with 303 additions and 49 deletions

View File

@ -48,6 +48,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include "moses/FF/TreeStructureFeature.h"
#include "moses/PP/TreeStructurePhraseProperty.h"
#include "util/exception.hh"
using namespace std;
@ -410,17 +411,15 @@ void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, Applica
if (hypo != NULL) {
OutputTranslationOption(out, applicationContext, hypo, sentence, translationId);
const std::string key = "Tree";
std::string value;
bool hasProperty;
const TargetPhrase &currTarPhr = hypo->GetCurrTargetPhrase();
currTarPhr.GetProperty(key, value, hasProperty);
boost::shared_ptr<PhraseProperty> property;
out << " ||| ";
if (hasProperty)
out << " " << value;
else
if (currTarPhr.GetProperty("Tree", property)) {
out << " " << property->GetValueString();
} else {
out << " " << "noTreeInfo";
}
out << std::endl;
}
@ -439,17 +438,15 @@ void IOWrapper::OutputTreeFragmentsTranslationOptions(std::ostream &out, Applica
if (applied != NULL) {
OutputTranslationOption(out, applicationContext, applied, sentence, translationId);
const std::string key = "Tree";
std::string value;
bool hasProperty;
const TargetPhrase &currTarPhr = *static_cast<const TargetPhrase*>(applied->GetNote().vp);
currTarPhr.GetProperty(key, value, hasProperty);
boost::shared_ptr<PhraseProperty> property;
out << " ||| ";
if (hasProperty)
out << " " << value;
else
if (currTarPhr.GetProperty("Tree", property)) {
out << " " << property->GetValueString();
} else {
out << " " << "noTreeInfo";
}
out << std::endl;
}

View File

@ -6,6 +6,7 @@
#include "moses/TargetPhrase.h"
#include <boost/shared_ptr.hpp>
#include <vector>
#include "moses/PP/TreeStructurePhraseProperty.h"
using namespace std;
@ -270,10 +271,9 @@ FFState* TreeStructureFeature::EvaluateChart(const ChartHypothesis& cur_hypo
, int featureID /* used to index the state in the previous hypotheses */
, ScoreComponentCollection* accumulator) const
{
std::string tree;
bool found = 0;
cur_hypo.GetCurrTargetPhrase().GetProperty("Tree", tree, found);
if (found) {
boost::shared_ptr<PhraseProperty> property;
if (cur_hypo.GetCurrTargetPhrase().GetProperty("Tree", property)) {
const std::string &tree = property->GetValueString();
TreePointer mytree (new InternalTree(tree));
if (m_labelset) {

View File

@ -65,6 +65,7 @@ lib moses :
FF/*.cpp
FF/OSM-Feature/*.cpp
FF/LexicalReordering/*.cpp
PP/*.cpp
: #exceptions
ThreadPool.cpp
SyntacticLanguageModel.cpp

91
moses/PP/Factory.cpp Normal file
View File

@ -0,0 +1,91 @@
#include "moses/PP/Factory.h"
#include "util/exception.hh"
#include <iostream>
#include <vector>
#include "moses/PP/TreeStructurePhraseProperty.h"
namespace Moses
{
class PhrasePropertyCreator
{
public:
virtual ~PhrasePropertyCreator() {}
virtual boost::shared_ptr<PhraseProperty> CreateProperty(const std::string &value) = 0;
protected:
template <class P> boost::shared_ptr<P> Create(P *property);
PhrasePropertyCreator() {}
};
template <class P> boost::shared_ptr<P> PhrasePropertyCreator::Create(P *property)
{
return boost::shared_ptr<P>(property);
}
namespace
{
template <class P> class DefaultPhrasePropertyCreator : public PhrasePropertyCreator
{
public:
boost::shared_ptr<PhraseProperty> CreateProperty(const std::string &value) {
P* property = new P(value);
property->ProcessValue();
return Create(property);
}
};
} // namespace
PhrasePropertyFactory::PhrasePropertyFactory()
{
// Property with same key as class
#define MOSES_PNAME(name) Add(#name, new DefaultPhrasePropertyCreator< name >());
// Properties with different key than class.
#define MOSES_PNAME2(name, type) Add(name, new DefaultPhrasePropertyCreator< type >());
MOSES_PNAME2("Tree",TreeStructurePhraseProperty);
}
PhrasePropertyFactory::~PhrasePropertyFactory()
{
}
void PhrasePropertyFactory::Add(const std::string &name, PhrasePropertyCreator *creator)
{
std::pair<std::string, boost::shared_ptr<PhrasePropertyCreator> > to_ins(name, boost::shared_ptr<PhrasePropertyCreator>(creator));
UTIL_THROW_IF2(!m_registry.insert(to_ins).second, "Phrase property registered twice: " << name);
}
namespace
{
class UnknownPhrasePropertyException : public util::Exception {};
}
boost::shared_ptr<PhraseProperty> PhrasePropertyFactory::ProduceProperty(const std::string &key, const std::string &value) const
{
Registry::const_iterator i = m_registry.find(key);
UTIL_THROW_IF(i == m_registry.end(), UnknownPhrasePropertyException, "Phrase property is not registered: " << key);
return i->second->CreateProperty(value);
}
void PhrasePropertyFactory::PrintPP() const
{
std::cerr << "Registered phrase properties:" << std::endl;
Registry::const_iterator iter;
for (iter = m_registry.begin(); iter != m_registry.end(); ++iter) {
const std::string &ppName = iter->first;
std::cerr << ppName << " ";
}
std::cerr << std::endl;
}
} // namespace Moses

33
moses/PP/Factory.h Normal file
View File

@ -0,0 +1,33 @@
#pragma once
#include "moses/PP/PhraseProperty.h"
#include <string>
#include <boost/shared_ptr.hpp>
#include <boost/unordered_map.hpp>
namespace Moses
{
class PhrasePropertyCreator;
class PhrasePropertyFactory
{
public:
PhrasePropertyFactory();
~PhrasePropertyFactory();
boost::shared_ptr<PhraseProperty> ProduceProperty(const std::string &key, const std::string &value) const;
void PrintPP() const;
private:
void Add(const std::string &name, PhrasePropertyCreator *creator);
typedef boost::unordered_map<std::string, boost::shared_ptr<PhrasePropertyCreator> > Registry;
Registry m_registry;
};
} // namespace Moses

27
moses/PP/PhraseProperty.h Normal file
View File

@ -0,0 +1,27 @@
#pragma once
#include <string>
#include <iostream>
namespace Moses
{
/** base class for all phrase properties.
*/
class PhraseProperty
{
public:
PhraseProperty(const std::string &value) : m_value(value) {};
virtual void ProcessValue() {};
const std::string &GetValueString() { return m_value; };
protected:
const std::string m_value;
};
} // namespace Moses

View File

@ -0,0 +1,18 @@
#pragma once
#include "moses/PP/PhraseProperty.h"
#include <string>
namespace Moses
{
class TreeStructurePhraseProperty : public PhraseProperty
{
public:
TreeStructurePhraseProperty(const std::string &value) : PhraseProperty(value) {};
};
} // namespace Moses

View File

@ -42,6 +42,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "SentenceStats.h"
#include "ScoreComponentCollection.h"
#include "moses/FF/Factory.h"
#include "moses/PP/Factory.h"
namespace Moses
{
@ -200,6 +201,7 @@ protected:
bool m_adjacentOnly;
FeatureRegistry m_registry;
PhrasePropertyFactory m_phrasePropertyFactory;
StaticData();
@ -734,6 +736,9 @@ public:
const FeatureRegistry &GetFeatureRegistry() const
{ return m_registry; }
const PhrasePropertyFactory &GetPhrasePropertyFactory() const
{ return m_phrasePropertyFactory; }
/** check whether we should be using the old code to support binary phrase-table.
** eventually, we'll stop support the binary phrase-table and delete this legacy code
**/

View File

@ -156,7 +156,6 @@ void TargetPhrase::Evaluate(const InputType &input, const InputPath &inputPath)
void TargetPhrase::SetXMLScore(float score)
{
const StaticData &staticData = StaticData::Instance();
const FeatureFunction* prod = PhraseDictionary::GetColl()[0];
size_t numScores = prod->GetNumScoreComponents();
vector <float> scoreVector(numScores,score/numScores);
@ -240,16 +239,22 @@ void TargetPhrase::SetProperties(const StringPiece &str)
}
}
void TargetPhrase::GetProperty(const std::string &key, std::string &value, bool &found) const
void TargetPhrase::SetProperty(const std::string &key, const std::string &value)
{
std::map<std::string, std::string>::const_iterator iter;
const StaticData &staticData = StaticData::Instance();
const PhrasePropertyFactory& phrasePropertyFactory = staticData.GetPhrasePropertyFactory();
m_properties[key] = phrasePropertyFactory.ProduceProperty(key,value);
}
bool TargetPhrase::GetProperty(const std::string &key, boost::shared_ptr<PhraseProperty> &value) const
{
std::map<std::string, boost::shared_ptr<PhraseProperty> >::const_iterator iter;
iter = m_properties.find(key);
if (iter == m_properties.end()) {
found = false;
} else {
found = true;
if (iter != m_properties.end()) {
value = iter->second;
return true;
}
return false;
}
void TargetPhrase::SetRuleSource(const Phrase &ruleSource) const

View File

@ -28,9 +28,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Phrase.h"
#include "ScoreComponentCollection.h"
#include "AlignmentInfo.h"
#include "moses/PP/PhraseProperty.h"
#include "util/string_piece.hh"
#include <boost/shared_ptr.hpp>
#ifdef HAVE_PROTOBUF
#include "rule.pb.h"
#endif
@ -55,7 +57,8 @@ private:
const Word *m_lhsTarget;
mutable Phrase *m_ruleSource; // to be set by the feature function that needs it.
std::map<std::string, std::string> m_properties;
std::map<std::string, boost::shared_ptr<PhraseProperty> > m_properties;
public:
TargetPhrase();
TargetPhrase(const TargetPhrase &copy);
@ -133,10 +136,8 @@ public:
void SetRuleSource(const Phrase &ruleSource) const;
void SetProperties(const StringPiece &str);
void SetProperty(const std::string &key, const std::string &value) {
m_properties[key] = value;
}
void GetProperty(const std::string &key, std::string &value, bool &found) const;
void SetProperty(const std::string &key, const std::string &value);
bool GetProperty(const std::string &key, boost::shared_ptr<PhraseProperty> &value) const;
void Merge(const TargetPhrase &copy, const std::vector<FactorType>& factorVec);

View File

@ -18,7 +18,7 @@ sub trim($)
my $host = `hostname`; chop($host);
print STDERR "STARTING UP AS PROCESS $$ ON $host AT ".`date`;
my ($CONFIG_FILE,$EXECUTE,$NO_GRAPH,$CONTINUE,$FINAL,$VERBOSE,$IGNORE_TIME,$DELETE_CRASHED);
my ($CONFIG_FILE,$EXECUTE,$NO_GRAPH,$CONTINUE,$FINAL,$VERBOSE,$IGNORE_TIME,$DELETE_CRASHED,$DELETE_VERSION);
my $SLEEP = 2;
my $META = "$RealBin/experiment.meta";
@ -36,6 +36,8 @@ die("experiment.perl -config config-file [-exec] [-no-graph]")
unless &GetOptions('config=s' => \$CONFIG_FILE,
'continue=i' => \$CONTINUE,
'delete-crashed=i' => \$DELETE_CRASHED,
'delete-run=i' => \$DELETE_VERSION,
'delete-version=i' => \$DELETE_VERSION,
'ignore-time' => \$IGNORE_TIME,
'exec' => \$EXECUTE,
'cluster' => \$CLUSTER,
@ -51,9 +53,11 @@ if (! -e "steps") { `mkdir -p steps`; }
die("error: could not find config file")
unless ($CONFIG_FILE && -e $CONFIG_FILE) ||
($CONTINUE && -e &steps_file("config.$CONTINUE",$CONTINUE)) ||
($DELETE_CRASHED && -e &steps_file("config.$DELETE_CRASHED",$DELETE_CRASHED));
($DELETE_CRASHED && -e &steps_file("config.$DELETE_CRASHED",$DELETE_CRASHED)) ||
($DELETE_VERSION && -e &steps_file("config.$DELETE_VERSION",$DELETE_VERSION));
$CONFIG_FILE = &steps_file("config.$CONTINUE",$CONTINUE) if $CONTINUE && !$CONFIG_FILE;
$CONFIG_FILE = &steps_file("config.$DELETE_CRASHED",$DELETE_CRASHED) if $DELETE_CRASHED && !$CONFIG_FILE;
$CONFIG_FILE = &steps_file("config.$DELETE_CRASHED",$DELETE_CRASHED) if $DELETE_CRASHED;
$CONFIG_FILE = &steps_file("config.$DELETE_VERSION",$DELETE_VERSION) if $DELETE_VERSION;
my (@MODULE,
%MODULE_TYPE,
@ -88,8 +92,9 @@ chdir(&check_and_get("GENERAL:working-dir"));
my $VERSION = 0; # experiment number
$VERSION = $CONTINUE if $CONTINUE;
$VERSION = $DELETE_CRASHED if $DELETE_CRASHED;
$VERSION = $DELETE_VERSION if $DELETE_VERSION;
&compute_version_number() if $EXECUTE && !$CONTINUE && !$DELETE_CRASHED;
&compute_version_number() if $EXECUTE && !$CONTINUE && !$DELETE_CRASHED && !$DELETE_VERSION;
`mkdir -p steps/$VERSION`;
&log_config();
@ -113,6 +118,11 @@ if (defined($DELETE_CRASHED)) {
exit;
}
if (defined($DELETE_VERSION)) {
&delete_version($DELETE_VERSION);
exit;
}
print "\nCHECKING IF OLD STEPS ARE RE-USABLE\n";
my @RE_USE; # maps re-usable steps to older versions
my %RECURSIVE_RE_USE; # stores links from .INFO files that record prior re-use
@ -407,7 +417,7 @@ sub log_config {
my $dir = &check_and_get("GENERAL:working-dir");
`mkdir -p $dir/steps`;
my $config_file = &steps_file("config.$VERSION",$VERSION);
`cp $CONFIG_FILE $config_file` unless $CONTINUE || $DELETE_CRASHED;
`cp $CONFIG_FILE $config_file` unless $CONTINUE || $DELETE_CRASHED || $DELETE_VERSION;
open(PARAMETER,">".&steps_file("parameter.$VERSION",$VERSION)) or die "Cannot open: $!";
foreach my $parameter (sort keys %CONFIG) {
print PARAMETER "$parameter =";
@ -689,30 +699,96 @@ sub get_sets {
return @SET;
}
# DELETION OF STEPS AND VERSIONS
# delete step files for steps that have crashed
sub delete_crashed() {
sub delete_crashed {
my $crashed = 0;
for(my $i=0;$i<=$#DO_STEP;$i++) {
my $step_file = &versionize(&step_file($i),$DELETE_CRASHED);
next unless -e $step_file;
next unless &check_if_crashed($i,$DELETE_CRASHED);
# step file
if ($EXECUTE) {
`rm $step_file $step_file.*`;
print "deleted crashed step $step_file\n";
}
print "crashed: $step_file\n";
# output
&delete_output(&get_default_file(&deconstruct_name($DO_STEP[$i])));
&delete_step($DO_STEP[$i],$DELETE_CRASHED);
$crashed++;
}
print "run with -exec to delete steps\n" if $crashed && !$EXECUTE;
print "nothing to do\n" unless $crashed;
}
# delete all step and data files for a version
sub delete_version {
# check which versions are already deleted
my %ALREADY_DELETED;
my $dir = &check_and_get("GENERAL:working-dir");
open(VERSION,"ls $dir/steps/*/deleted.* 2>/dev/null|");
while(<VERSION>) {
/deleted\.(\d+)/;
$ALREADY_DELETED{$1}++;
}
close(VERSION);
# check if any of the steps are re-used by other versions
my (%USED_BY_OTHERS,%DELETABLE,%NOT_DELETABLE);
open(VERSION,"ls $dir/steps|");
while(my $version = <VERSION>) {
chop($version);
next if $version !~ /^\d+/ || $version == 0;
open(RE_USE,"steps/$version/re-use.$version");
while(<RE_USE>) {
next unless /^(.+) (\d+)$/;
my ($step,$re_use_version) = ($1,$2);
# a step in the current version that is used in other versions
$USED_BY_OTHERS{$step}++ if $re_use_version == $DELETE_VERSION && !defined($ALREADY_DELETED{$version});
# potentially deletable step in already deleted version that current version uses
push @{$DELETABLE{$re_use_version}}, $step if $version == $DELETE_VERSION && defined($ALREADY_DELETED{$re_use_version});
# not deletable step used by not-deleted version
$NOT_DELETABLE{$re_use_version}{$step}++ if $version != $DELETE_VERSION && !defined(ALREADY_DELETED{$version});
}
close(RE_USE);
}
# go through all steps for which step files where created
open(STEPS,"ls $dir/steps/$DELETE_VERSION/[A-Z]*.$DELETE_VERSION|");
while(my $step_file = <STEPS>) {
chomp($step_file);
my $step = &get_step_from_step_file($step_file);
next if $USED_BY_OTHERS{$step};
&delete_step($step,$DELETE_VERSION);
}
# orphan killing: delete steps in deleted versions, if they were only preserved because this version needed them
foreach my $version (keys %DELETABLE) {
foreach my $step (@{$DELETABLE{$version}}) {
next if defined($NOT_DELETABLE{$version}) && defined($NOT_DELETABLE{$version}{$step});
&delete_step($step,$version);
}
}
}
sub get_step_from_step_file {
my ($step) = @_;
$step =~ s/^.+\///;
$step =~ s/\.\d+$//;
$step =~ s/_/:/g;
return $step;
}
sub delete_step {
my ($step_name,$version) = @_;
my ($module,$set,$step) = &deconstruct_name($step_name);
my $step_file = &versionize(&step_file2($module,$set,$step),$version);
print "delete step $step_file\n";
`rm $step_file $step_file.*` if $EXECUTE;
my $out_file = $STEP_OUTNAME{"$module:$step"};
$out_file =~ s/^(.+\/)([^\/]+)$/$1$set.$2/g if $set;
&delete_output(&versionize(&long_file_name($out_file,$module,$set), $version));
}
sub delete_output {
my ($file) = @_;
if (-d $file) {
@ -732,7 +808,7 @@ sub delete_output {
}
}
# RE-USE
# look for completed step jobs from previous experiments
sub find_re_use {
my $dir = &check_and_get("GENERAL:working-dir");