mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
pro extraction mainline and stub of feature data iterator
This commit is contained in:
parent
4bb9ecb8eb
commit
4cf6e0320a
@ -17,6 +17,7 @@ AC_PROG_LIBTOOL
|
||||
AX_XMLRPC_C
|
||||
BOOST_REQUIRE([1.36.0])
|
||||
BOOST_SMART_PTR
|
||||
BOOST_PROGRAM_OPTIONS
|
||||
|
||||
AC_ARG_WITH(protobuf,
|
||||
[AC_HELP_STRING([--with-protobuf=PATH], [(optional) path to Google protobuf])],
|
||||
|
40
mert/FeatureDataIterator.cpp
Normal file
40
mert/FeatureDataIterator.cpp
Normal file
@ -0,0 +1,40 @@
|
||||
// $Id$
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2011- University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "FeatureDataIterator.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
FeatureDataIterator::FeatureDataIterator(const string filename) {
|
||||
}
|
||||
|
||||
void FeatureDataIterator::increment() {
|
||||
|
||||
}
|
||||
|
||||
bool FeatureDataIterator::equal(const FeatureDataIterator& rhs) const {
|
||||
|
||||
}
|
||||
|
||||
const vector<FeatureDataItem>& FeatureDataIterator::dereference() const {
|
||||
|
||||
}
|
94
mert/FeatureDataIterator.h
Normal file
94
mert/FeatureDataIterator.h
Normal file
@ -0,0 +1,94 @@
|
||||
// $Id$
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2011- University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef _FEATURE_DATA_ITERATOR_
|
||||
#define _FEATURE_DATA_ITERATOR_
|
||||
|
||||
/**
|
||||
* For loading from the feature data file.
|
||||
**/
|
||||
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/iterator/iterator_facade.hpp>
|
||||
|
||||
|
||||
//Minimal sparse vector
|
||||
class SparseVector {
|
||||
|
||||
public:
|
||||
typedef std::map<size_t,float> fvector_t;
|
||||
typedef std::map<std::string, size_t> name2id_t;
|
||||
typedef std::vector<std::string> id2name_t;
|
||||
|
||||
float get(std::string name) const;
|
||||
float get(size_t id) const;
|
||||
void set(std::string name, float value);
|
||||
void clear();
|
||||
size_t size() const;
|
||||
|
||||
void write(std::ostream& out, const std::string& sep = " ") const;
|
||||
|
||||
SparseVector& operator-=(const SparseVector& rhs);
|
||||
|
||||
private:
|
||||
static name2id_t name2id_;
|
||||
static id2name_t id2name_;
|
||||
fvector_t fvector_;
|
||||
};
|
||||
|
||||
SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
|
||||
|
||||
class FeatureDataItem {
|
||||
public:
|
||||
std::vector<float> dense;
|
||||
SparseVector sparse;
|
||||
};
|
||||
|
||||
class FeatureDataIterator :
|
||||
public boost::iterator_facade<FeatureDataIterator,
|
||||
const std::vector<FeatureDataItem>,
|
||||
boost::forward_traversal_tag>
|
||||
{
|
||||
public:
|
||||
FeatureDataIterator(const std::string filename);
|
||||
|
||||
static FeatureDataIterator end() {
|
||||
return FeatureDataIterator("");
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
friend class boost::iterator_core_access;
|
||||
|
||||
void increment();
|
||||
bool equal(const FeatureDataIterator& rhs) const;
|
||||
const std::vector<FeatureDataItem>& dereference() const;
|
||||
|
||||
std::ifstream* in_;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
lib_LTLIBRARIES = libmert.la
|
||||
bin_PROGRAMS = mert extractor evaluator
|
||||
bin_PROGRAMS = mert extractor evaluator pro
|
||||
AM_CPPFLAGS = -W -Wall -Wno-unused -ffor-scope -DTRACE_ENABLE $(BOOST_CPPFLAGS)
|
||||
|
||||
libmert_la_SOURCES = \
|
||||
@ -7,6 +7,7 @@ Util.cpp \
|
||||
Timer.cpp \
|
||||
ScoreStats.cpp ScoreArray.cpp ScoreData.cpp \
|
||||
FeatureStats.cpp FeatureArray.cpp FeatureData.cpp \
|
||||
FeatureDataIterator.cpp \
|
||||
Data.cpp \
|
||||
BleuScorer.cpp \
|
||||
Point.cpp \
|
||||
@ -29,9 +30,13 @@ CderScorer.cpp \
|
||||
MergeScorer.cpp
|
||||
|
||||
mert_SOURCES = mert.cpp $(top_builddir)/moses/src/ThreadPool.cpp
|
||||
extractor_SOURCES = extractor.cpp
|
||||
evaluator_SOURCES = evaluator.cpp
|
||||
extractor_SOURCES = extractor.cpp
|
||||
evaluator_SOURCES = evaluator.cpp
|
||||
pro_SOURCES = pro.cpp
|
||||
|
||||
extractor_LDADD = libmert.la -lm -lz
|
||||
mert_LDADD = libmert.la -lm -lz $(BOOST_THREAD_LDFLAGS) $(BOOST_THREAD_LIBS)
|
||||
evaluator_LDADD = libmert.la -lm -lz
|
||||
pro_LDADD = libmert.la @KENLM_LDFLAGS@ $(BOOST_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LDFLAGS) $(BOOST_PROGRAM_OPTIONS_LIBS)
|
||||
pro_DEPENDENCIES = $(top_srcdir)/kenlm/libkenlm.la
|
||||
|
||||
|
85
mert/pro.cpp
85
mert/pro.cpp
@ -0,0 +1,85 @@
|
||||
// $Id$
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2011- University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* This is part of the PRO implementation. It converts the features and scores
|
||||
* files into a form suitable for input into the megam maxent trainer.
|
||||
*
|
||||
* For details of PRO, refer to Hopkins & May (EMNLP 2011)
|
||||
**/
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include "FeatureDataIterator.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
bool help;
|
||||
vector<string> scoreFiles;
|
||||
vector<string> featureFiles;
|
||||
int seed;
|
||||
|
||||
po::options_description desc("Allowed options");
|
||||
desc.add_options()
|
||||
("help,h", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
|
||||
("scfile,S", po::value<vector<string> >(&scoreFiles), "Scorer data files")
|
||||
("ffile,F", po::value<vector<string> > (&featureFiles), "Feature data files")
|
||||
("random-seed,r", po::value<int>(&seed), "Seed for random number generation")
|
||||
;
|
||||
|
||||
po::options_description cmdline_options;
|
||||
cmdline_options.add(desc);
|
||||
po::variables_map vm;
|
||||
po::store(po::command_line_parser(argc,argv).
|
||||
options(cmdline_options).run(), vm);
|
||||
po::notify(vm);
|
||||
if (help) {
|
||||
cout << "Usage: " + string(argv[0]) + " [options]" << endl;
|
||||
cout << desc << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vm.count("random-seed")) {
|
||||
cerr << "Initialising random seed to " << seed << endl;
|
||||
srand(seed);
|
||||
} else {
|
||||
cerr << "Initialising random seed from system clock" << endl;
|
||||
srand(time(NULL));
|
||||
}
|
||||
|
||||
FeatureDataIterator fi(featureFiles[0]);
|
||||
for (; fi != FeatureDataIterator::end(); ++fi) {
|
||||
const vector<FeatureDataItem>& featureData = *fi;
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user