iterate through feature file

This commit is contained in:
bhaddow 2011-11-14 22:18:22 +00:00
parent 4cf6e0320a
commit 3a6c0e0680
3 changed files with 77 additions and 29 deletions

View File

@ -19,22 +19,77 @@ You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include <sstream>
#include "util/string_piece.hh"
#include "util/tokenize_piece.hh"
#include "FeatureArray.h"
#include "FeatureDataIterator.h"
using namespace std;
using namespace util;
FeatureDataIterator::FeatureDataIterator() {}
FeatureDataIterator::FeatureDataIterator(const string filename) {
m_in.reset(new FilePiece(filename.c_str()));
readNext();
}
void FeatureDataIterator::readNext() {
try {
StringPiece marker = m_in->ReadDelimited();
if (marker != StringPiece(FEATURES_TXT_BEGIN)) {
throw FileFormatException(m_in->FileName(), marker.as_string());
}
size_t sentenceId = m_in->ReadULong();
size_t count = m_in->ReadULong();
cerr << "Expecting " << count << endl;
m_in->ReadLine(); //discard rest of line
for (size_t i = 0; i < count; ++i) {
StringPiece line = m_in->ReadLine();
for (util::TokenIter<util::AnyCharacter, true> token(line, util::AnyCharacter(" \t")); token; ++token) {
//TODO: Create FeatureDataItem
char* err_ind;
float value = static_cast<float>(strtod(token->data(), &err_ind));
if (err_ind == token->data()) {
throw FileFormatException(m_in->FileName(), line.as_string());
}
cerr << value << ",";
}
cerr << "\n";
}
StringPiece line = m_in->ReadLine();
if (line != StringPiece(FEATURES_TXT_END)) {
throw FileFormatException(m_in->FileName(), line.as_string());
}
} catch (EndOfFileException &e) {
m_in.reset();
}
}
void FeatureDataIterator::increment() {
readNext();
}
bool FeatureDataIterator::equal(const FeatureDataIterator& rhs) const {
if (!m_in && !rhs.m_in) {
return true;
} else if (!m_in) {
return false;
} else if (!rhs.m_in) {
return false;
} else {
return m_in->FileName() == rhs.m_in->FileName() &&
m_in->Offset() == rhs.m_in->Offset();
}
}
const vector<FeatureDataItem>& FeatureDataIterator::dereference() const {
return m_next;
}

View File

@ -29,36 +29,17 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <fstream>
#include <map>
#include <memory>
#include <stdexcept>
#include <vector>
#include <boost/iterator/iterator_facade.hpp>
#include <boost/shared_ptr.hpp>
#include "util/file_piece.hh"
//Minimal sparse vector
class SparseVector {
#include "FeatureStats.h"
public:
typedef std::map<size_t,float> fvector_t;
typedef std::map<std::string, size_t> name2id_t;
typedef std::vector<std::string> id2name_t;
float get(std::string name) const;
float get(size_t id) const;
void set(std::string name, float value);
void clear();
size_t size() const;
void write(std::ostream& out, const std::string& sep = " ") const;
SparseVector& operator-=(const SparseVector& rhs);
private:
static name2id_t name2id_;
static id2name_t id2name_;
fvector_t fvector_;
};
SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
class FeatureDataItem {
public:
@ -66,16 +47,24 @@ class FeatureDataItem {
SparseVector sparse;
};
class FileFormatException : public util::Exception {
public:
explicit FileFormatException(const std::string filename, const std::string& line) {
*this << "Error in line \"" << line << "\" of " << filename;
}
};
class FeatureDataIterator :
public boost::iterator_facade<FeatureDataIterator,
const std::vector<FeatureDataItem>,
boost::forward_traversal_tag>
{
public:
FeatureDataIterator();
FeatureDataIterator(const std::string filename);
static FeatureDataIterator end() {
return FeatureDataIterator("");
return FeatureDataIterator();
}
@ -86,7 +75,10 @@ class FeatureDataIterator :
bool equal(const FeatureDataIterator& rhs) const;
const std::vector<FeatureDataItem>& dereference() const;
std::ifstream* in_;
void readNext();
boost::shared_ptr<util::FilePiece> m_in;
std::vector<FeatureDataItem> m_next;
};
#endif

View File

@ -77,6 +77,7 @@ int main(int argc, char** argv)
}
FeatureDataIterator fi(featureFiles[0]);
//cerr << featureFiles[0] << endl;
for (; fi != FeatureDataIterator::end(); ++fi) {
const vector<FeatureDataItem>& featureData = *fi;
}