coz/benchmarks/pbzip2/BZ2StreamScanner.h
Charlie Curtsinger d1c592b097 Moving benchmarks to properly named dir
--HG--
rename : tests/Makefile => benchmarks/Makefile
rename : tests/experiment.py => benchmarks/experiment.py
rename : tests/histogram/Makefile => benchmarks/histogram/Makefile
rename : tests/histogram/histogram-pthread.c => benchmarks/histogram/histogram-pthread.c
rename : tests/histogram/stddefines.h => benchmarks/histogram/stddefines.h
rename : tests/kmeans/Makefile => benchmarks/kmeans/Makefile
rename : tests/kmeans/kmeans-pthread.c => benchmarks/kmeans/kmeans-pthread.c
rename : tests/kmeans/stddefines.h => benchmarks/kmeans/stddefines.h
rename : tests/linear_regression/Makefile => benchmarks/linear_regression/Makefile
rename : tests/linear_regression/linear_regression-pthread.c => benchmarks/linear_regression/linear_regression-pthread.c
rename : tests/linear_regression/stddefines.h => benchmarks/linear_regression/stddefines.h
rename : tests/matrix_multiply/Makefile => benchmarks/matrix_multiply/Makefile
rename : tests/matrix_multiply/map_reduce.h => benchmarks/matrix_multiply/map_reduce.h
rename : tests/matrix_multiply/matrix_multiply-pthread.c => benchmarks/matrix_multiply/matrix_multiply-pthread.c
rename : tests/matrix_multiply/stddefines.h => benchmarks/matrix_multiply/stddefines.h
rename : tests/pbzip2/AUTHORS => benchmarks/pbzip2/AUTHORS
rename : tests/pbzip2/BZ2StreamScanner.cpp => benchmarks/pbzip2/BZ2StreamScanner.cpp
rename : tests/pbzip2/BZ2StreamScanner.h => benchmarks/pbzip2/BZ2StreamScanner.h
rename : tests/pbzip2/COPYING => benchmarks/pbzip2/COPYING
rename : tests/pbzip2/ErrorContext.cpp => benchmarks/pbzip2/ErrorContext.cpp
rename : tests/pbzip2/ErrorContext.h => benchmarks/pbzip2/ErrorContext.h
rename : tests/pbzip2/Makefile => benchmarks/pbzip2/Makefile
rename : tests/pbzip2/README => benchmarks/pbzip2/README
rename : tests/pbzip2/pbzip2.cpp => benchmarks/pbzip2/pbzip2.cpp
rename : tests/pbzip2/pbzip2.h => benchmarks/pbzip2/pbzip2.h
rename : tests/pca/Makefile => benchmarks/pca/Makefile
rename : tests/pca/pca-pthread.c => benchmarks/pca/pca-pthread.c
rename : tests/pca/stddefines.h => benchmarks/pca/stddefines.h
rename : tests/producer_consumer/Makefile => benchmarks/producer_consumer/Makefile
rename : tests/producer_consumer/producer_consumer.cpp => benchmarks/producer_consumer/producer_consumer.cpp
rename : tests/string_match/Makefile => benchmarks/string_match/Makefile
rename : tests/string_match/map_reduce.h => benchmarks/string_match/map_reduce.h
rename : tests/string_match/stddefines.h => benchmarks/string_match/stddefines.h
rename : tests/string_match/string_match-pthread.c => benchmarks/string_match/string_match-pthread.c
rename : tests/unbalanced/Makefile => benchmarks/unbalanced/Makefile
rename : tests/unbalanced/unbalanced.cpp => benchmarks/unbalanced/unbalanced.cpp
rename : tests/word_count/Makefile => benchmarks/word_count/Makefile
rename : tests/word_count/sort-pthread.c => benchmarks/word_count/sort-pthread.c
rename : tests/word_count/sort-pthread.h => benchmarks/word_count/sort-pthread.h
rename : tests/word_count/stddefines.h => benchmarks/word_count/stddefines.h
rename : tests/word_count/word_count-pthread.c => benchmarks/word_count/word_count-pthread.c
rename : tests/work_queue/Makefile => benchmarks/work_queue/Makefile
rename : tests/work_queue/work_queue.cpp => benchmarks/work_queue/work_queue.cpp
2014-07-07 11:33:04 -04:00

152 lines
4.2 KiB
C++

/*
* File: BZ2StreamScanner.h
* Author: Yavor Nikolov
*
* Created on March 6, 2010, 10:07 PM
*/
#ifndef _BZ2STREAMSCANNER_H
#define _BZ2STREAMSCANNER_H
#include "pbzip2.h"
#include <vector>
#include <string>
using namespace std;
namespace pbzip2
{
class BZ2StreamScanner
{
public:
typedef unsigned char CharType;
static const size_t DEFAULT_IN_BUFF_CAPACITY = 1024 * 1024; // 1M
static const size_t DEFAULT_OUT_BUFF_LIMIT = 1024 * 1024;
enum BZ2SScannerErrorFlag
{
ERR_MEM_ALLOC_INBUFF = 1,
ERR_MEM_ALLOC_OUTBUFF = 1 << 1,
ERR_IO_READ = 1 << 2,
ERR_IO_INSUFFICIENT_BUFF_CAPACITY = 1 << 3,
ERR_INVALID_STATE = 1 << 4,
ERR_INVALID_FILE_FORMAT = 1 << 5
};
BZ2StreamScanner( int hInFile, size_t inBuffCapacity = DEFAULT_IN_BUFF_CAPACITY );
int init( int hInFile, size_t inBuffCapacity = DEFAULT_IN_BUFF_CAPACITY );
virtual ~BZ2StreamScanner();
outBuff * getNextStream();
size_t getInBuffSize() const { return ( _inBuffEnd - _inBuff ); }
size_t getInBuffCapacity() const { return _inBuffCapacity; }
const basic_string<CharType> & getHeader() const { return _bz2Header; }
size_t getHeaderSize() const { return _bz2Header.size(); }
int getErrState() const { return _errState; }
bool failed() { return ( _errState != 0 ); }
/** true if header has been found since last initialization */
bool isBz2HeaderFound() const { return _bz2HeaderFound; }
/** status of last/current search only */
bool getSearchStatus() const { return _searchStatus; }
// end of file
bool eof() const { return _eof; }
/** true if out buffer is full enough to produce output block */
bool isOutBuffFullEnough() const { return _outBuff.bufSize >= getOutBuffCapacityLimit(); }
/**
* dispose memory resources
*/
virtual void dispose();
void printCurrentState();
private:
/* disable copy c-tor */
BZ2StreamScanner( const BZ2StreamScanner& orig ) {}
void initOutBuff( char * buf = NULL, size_t bufSize = 0, size_t bufCapacity = 0 );
int appendOutBuffData( CharType * end );
int appendOutBuffData() { return appendOutBuffData( getInBuffSearchPtr() ); }
int appendOutBuffDataUpToLimit();
int ensureOutBuffCapacity( size_t newSize );
int readData();
CharType * getInBuffEnd() { return _inBuffEnd; }
CharType * getInBuffBegin() { return _inBuff; }
CharType * getInBuffCurrent() { return _inBuffCurrent; }
CharType * getInBuffSearchPtr() { return _inBuffSearchPtr; }
char * getOutBuffEnd() { return _outBuff.buf + _outBuff.bufSize; }
size_t getUnsearchedCount() const { return _inBuffEnd - _inBuffSearchPtr; }
/**
* Search next bz2 header. Read more data from file if needed.
*
* @return pointer to header is returned if found;
* getInBuffEnd() - if not found; NULL - on error.
*/
CharType * searchNextHeader();
/**
* Search next bz2 header just in currently available input buffer.
* (Doesn't read more data from file).
*
* @return pointer to header or getInBuffEnd() if such is not found.
*/
CharType * searchNextHeaderInBuff();
/**
* Prepare for next read from file into input buffer.
* Consumes remaining input data buffer and moves header tail to beginning.
*
*/
int rewindInBuff();
/**
* Locate BZh header prefix in buffer. In case of first search - just check
* the beginning of buffer and signal error if it doesn't match to headers.
*
* @return pointer to BZh header prefix if located. getInBuffEnd() if not.
* failure() and getErrState() will indicate error if such occurred.
*/
CharType * locateHeaderPrefixInBuff();
size_t getOutBuffCapacityLimit() const { return _outBuffCapacityLimit; }
int _hInFile; // input file descriptor
bool _eof;
basic_string<CharType> _bz2Header;
basic_string<CharType> _bz2HeaderZero;
bool _bz2HeaderFound;
bool _searchStatus;
CharType * _inBuff;
CharType * _inBuffEnd; // end of data read from file
CharType * _inBuffCurrent;
CharType * _inBuffSearchPtr;
size_t _inBuffCapacity; // allocated memory capacity for in buffer
outBuff _outBuff;
size_t _outBuffCapacity;
size_t _outBuffCapacityHint; // keep max used capacity
size_t _outBuffCapacityLimit;
unsigned int _errState; // 0 - ok; otherwise error
int _outSequenceNumber; // output block sequence number in bz2 stream (>0 if segmented)
int _streamNumber;
};
}
#endif /* _BZ2STREAMSCANNER_H */