1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
/*
* File: BZ2StreamScanner.h
* Author: Yavor Nikolov
*
* Created on March 6, 2010, 10:07 PM
*/
#ifndef _BZ2STREAMSCANNER_H
#define _BZ2STREAMSCANNER_H
#include "pbzip2.h"
#include <vector>
#include <string>
using namespace std;
namespace pbzip2
{
class BZ2StreamScanner
{
public:
typedef unsigned char CharType;
static const size_t DEFAULT_IN_BUFF_CAPACITY = 1024 * 1024; // 1M
static const size_t DEFAULT_OUT_BUFF_LIMIT = 1024 * 1024;
enum BZ2SScannerErrorFlag
{
ERR_MEM_ALLOC_INBUFF = 1,
ERR_MEM_ALLOC_OUTBUFF = 1 << 1,
ERR_IO_READ = 1 << 2,
ERR_IO_INSUFFICIENT_BUFF_CAPACITY = 1 << 3,
ERR_INVALID_STATE = 1 << 4,
ERR_INVALID_FILE_FORMAT = 1 << 5
};
BZ2StreamScanner( int hInFile, size_t inBuffCapacity = DEFAULT_IN_BUFF_CAPACITY );
int init( int hInFile, size_t inBuffCapacity = DEFAULT_IN_BUFF_CAPACITY );
virtual ~BZ2StreamScanner();
outBuff * getNextStream();
size_t getInBuffSize() const { return ( _inBuffEnd - _inBuff ); }
size_t getInBuffCapacity() const { return _inBuffCapacity; }
const basic_string<CharType> & getHeader() const { return _bz2Header; }
size_t getHeaderSize() const { return _bz2Header.size(); }
int getErrState() const { return _errState; }
bool failed() { return ( _errState != 0 ); }
/** true if header has been found since last initialization */
bool isBz2HeaderFound() const { return _bz2HeaderFound; }
/** status of last/current search only */
bool getSearchStatus() const { return _searchStatus; }
// end of file
bool eof() const { return _eof; }
/** true if out buffer is full enough to produce output block */
bool isOutBuffFullEnough() const { return _outBuff.bufSize >= getOutBuffCapacityLimit(); }
/**
* dispose memory resources
*/
virtual void dispose();
#ifdef PBZIP_DEBUG
void printCurrentState();
#endif
private:
/* disable copy c-tor */
BZ2StreamScanner( const BZ2StreamScanner& orig ) {}
void initOutBuff( char * buf = NULL, size_t bufSize = 0, size_t bufCapacity = 0 );
int appendOutBuffData( CharType * end );
int appendOutBuffData() { return appendOutBuffData( getInBuffSearchPtr() ); }
int appendOutBuffDataUpToLimit();
int ensureOutBuffCapacity( size_t newSize );
int readData();
CharType * getInBuffEnd() { return _inBuffEnd; }
CharType * getInBuffBegin() { return _inBuff; }
CharType * getInBuffCurrent() { return _inBuffCurrent; }
CharType * getInBuffSearchPtr() { return _inBuffSearchPtr; }
char * getOutBuffEnd() { return _outBuff.buf + _outBuff.bufSize; }
size_t getUnsearchedCount() const { return _inBuffEnd - _inBuffSearchPtr; }
/**
* Search next bz2 header. Read more data from file if needed.
*
* @return pointer to header is returned if found;
* getInBuffEnd() - if not found; NULL - on error.
*/
CharType * searchNextHeader();
/**
* Search next bz2 header just in currently available input buffer.
* (Doesn't read more data from file).
*
* @return pointer to header or getInBuffEnd() if such is not found.
*/
CharType * searchNextHeaderInBuff();
/**
* Prepare for next read from file into input buffer.
* Consumes remaining input data buffer and moves header tail to beginning.
*
*/
int rewindInBuff();
/**
* Locate BZh header prefix in buffer. In case of first search - just check
* the beginning of buffer and signal error if it doesn't match to headers.
*
* @return pointer to BZh header prefix if located. getInBuffEnd() if not.
* failure() and getErrState() will indicate error if such occurred.
*/
CharType * locateHeaderPrefixInBuff();
size_t getOutBuffCapacityLimit() const { return _outBuffCapacityLimit; }
int _hInFile; // input file descriptor
bool _eof;
basic_string<CharType> _bz2Header;
basic_string<CharType> _bz2HeaderZero;
bool _bz2HeaderFound;
bool _searchStatus;
CharType * _inBuff;
CharType * _inBuffEnd; // end of data read from file
CharType * _inBuffCurrent;
CharType * _inBuffSearchPtr;
size_t _inBuffCapacity; // allocated memory capacity for in buffer
outBuff _outBuff;
size_t _outBuffCapacity;
size_t _outBuffCapacityHint; // keep max used capacity
size_t _outBuffCapacityLimit;
unsigned int _errState; // 0 - ok; otherwise error
int _outSequenceNumber; // output block sequence number in bz2 stream (>0 if segmented)
int _streamNumber;
};
}
#endif /* _BZ2STREAMSCANNER_H */
|