mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 22:14:57 +03:00
iterate through feature file
This commit is contained in:
parent
4cf6e0320a
commit
3a6c0e0680
@ -19,22 +19,77 @@ You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include "util/string_piece.hh"
|
||||
#include "util/tokenize_piece.hh"
|
||||
|
||||
#include "FeatureArray.h"
|
||||
#include "FeatureDataIterator.h"
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace util;
|
||||
|
||||
|
||||
|
||||
FeatureDataIterator::FeatureDataIterator() {}
|
||||
|
||||
FeatureDataIterator::FeatureDataIterator(const string filename) {
|
||||
m_in.reset(new FilePiece(filename.c_str()));
|
||||
readNext();
|
||||
}
|
||||
|
||||
void FeatureDataIterator::readNext() {
|
||||
try {
|
||||
StringPiece marker = m_in->ReadDelimited();
|
||||
if (marker != StringPiece(FEATURES_TXT_BEGIN)) {
|
||||
throw FileFormatException(m_in->FileName(), marker.as_string());
|
||||
}
|
||||
size_t sentenceId = m_in->ReadULong();
|
||||
size_t count = m_in->ReadULong();
|
||||
cerr << "Expecting " << count << endl;
|
||||
m_in->ReadLine(); //discard rest of line
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
StringPiece line = m_in->ReadLine();
|
||||
for (util::TokenIter<util::AnyCharacter, true> token(line, util::AnyCharacter(" \t")); token; ++token) {
|
||||
//TODO: Create FeatureDataItem
|
||||
char* err_ind;
|
||||
float value = static_cast<float>(strtod(token->data(), &err_ind));
|
||||
if (err_ind == token->data()) {
|
||||
throw FileFormatException(m_in->FileName(), line.as_string());
|
||||
}
|
||||
cerr << value << ",";
|
||||
}
|
||||
cerr << "\n";
|
||||
}
|
||||
StringPiece line = m_in->ReadLine();
|
||||
if (line != StringPiece(FEATURES_TXT_END)) {
|
||||
throw FileFormatException(m_in->FileName(), line.as_string());
|
||||
}
|
||||
} catch (EndOfFileException &e) {
|
||||
m_in.reset();
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureDataIterator::increment() {
|
||||
|
||||
readNext();
|
||||
}
|
||||
|
||||
bool FeatureDataIterator::equal(const FeatureDataIterator& rhs) const {
|
||||
|
||||
if (!m_in && !rhs.m_in) {
|
||||
return true;
|
||||
} else if (!m_in) {
|
||||
return false;
|
||||
} else if (!rhs.m_in) {
|
||||
return false;
|
||||
} else {
|
||||
return m_in->FileName() == rhs.m_in->FileName() &&
|
||||
m_in->Offset() == rhs.m_in->Offset();
|
||||
}
|
||||
}
|
||||
|
||||
const vector<FeatureDataItem>& FeatureDataIterator::dereference() const {
|
||||
|
||||
return m_next;
|
||||
}
|
||||
|
@ -29,36 +29,17 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/iterator/iterator_facade.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
#include "util/file_piece.hh"
|
||||
|
||||
//Minimal sparse vector
|
||||
class SparseVector {
|
||||
#include "FeatureStats.h"
|
||||
|
||||
public:
|
||||
typedef std::map<size_t,float> fvector_t;
|
||||
typedef std::map<std::string, size_t> name2id_t;
|
||||
typedef std::vector<std::string> id2name_t;
|
||||
|
||||
float get(std::string name) const;
|
||||
float get(size_t id) const;
|
||||
void set(std::string name, float value);
|
||||
void clear();
|
||||
size_t size() const;
|
||||
|
||||
void write(std::ostream& out, const std::string& sep = " ") const;
|
||||
|
||||
SparseVector& operator-=(const SparseVector& rhs);
|
||||
|
||||
private:
|
||||
static name2id_t name2id_;
|
||||
static id2name_t id2name_;
|
||||
fvector_t fvector_;
|
||||
};
|
||||
|
||||
SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
|
||||
|
||||
class FeatureDataItem {
|
||||
public:
|
||||
@ -66,16 +47,24 @@ class FeatureDataItem {
|
||||
SparseVector sparse;
|
||||
};
|
||||
|
||||
class FileFormatException : public util::Exception {
|
||||
public:
|
||||
explicit FileFormatException(const std::string filename, const std::string& line) {
|
||||
*this << "Error in line \"" << line << "\" of " << filename;
|
||||
}
|
||||
};
|
||||
|
||||
class FeatureDataIterator :
|
||||
public boost::iterator_facade<FeatureDataIterator,
|
||||
const std::vector<FeatureDataItem>,
|
||||
boost::forward_traversal_tag>
|
||||
{
|
||||
public:
|
||||
FeatureDataIterator();
|
||||
FeatureDataIterator(const std::string filename);
|
||||
|
||||
static FeatureDataIterator end() {
|
||||
return FeatureDataIterator("");
|
||||
return FeatureDataIterator();
|
||||
}
|
||||
|
||||
|
||||
@ -86,7 +75,10 @@ class FeatureDataIterator :
|
||||
bool equal(const FeatureDataIterator& rhs) const;
|
||||
const std::vector<FeatureDataItem>& dereference() const;
|
||||
|
||||
std::ifstream* in_;
|
||||
void readNext();
|
||||
|
||||
boost::shared_ptr<util::FilePiece> m_in;
|
||||
std::vector<FeatureDataItem> m_next;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -77,6 +77,7 @@ int main(int argc, char** argv)
|
||||
}
|
||||
|
||||
FeatureDataIterator fi(featureFiles[0]);
|
||||
//cerr << featureFiles[0] << endl;
|
||||
for (; fi != FeatureDataIterator::end(); ++fi) {
|
||||
const vector<FeatureDataItem>& featureData = *fi;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user