File: BZ2StreamScanner.h

package info (click to toggle)
pbzip2 1.1.13-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid, trixie
  • size: 448 kB
  • sloc: cpp: 4,357; ruby: 101; makefile: 31
file content (153 lines) | stat: -rw-r--r-- 4,355 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/* 
 * File:   BZ2StreamScanner.h
 * Author: Yavor Nikolov
 *
 * Created on March 6, 2010, 10:07 PM
 */

#ifndef _BZ2STREAMSCANNER_H
#define _BZ2STREAMSCANNER_H

#include "pbzip2.h"
#include <vector>
#include <string>

using namespace std;

namespace pbzip2
{

class BZ2StreamScanner
{
public:
	typedef unsigned char CharType;

	static const size_t DEFAULT_IN_BUFF_CAPACITY = 1024 * 1024; // 1M
	static const size_t DEFAULT_OUT_BUFF_LIMIT = 1024 * 1024;

	enum BZ2SScannerErrorFlag
	{
		ERR_MEM_ALLOC_INBUFF = 1,
		ERR_MEM_ALLOC_OUTBUFF = 1 << 1,
		ERR_IO_READ = 1 << 2,
		ERR_IO_INSUFFICIENT_BUFF_CAPACITY = 1 << 3,
		ERR_INVALID_STATE = 1 << 4,
		ERR_INVALID_FILE_FORMAT = 1 << 5
	};

	BZ2StreamScanner( int hInFile, size_t inBuffCapacity = DEFAULT_IN_BUFF_CAPACITY );
	int init( int hInFile, size_t inBuffCapacity = DEFAULT_IN_BUFF_CAPACITY );
	
	virtual ~BZ2StreamScanner();

	outBuff * getNextStream();

	size_t getInBuffSize() const { return ( _inBuffEnd - _inBuff ); }
	size_t getInBuffCapacity() const { return _inBuffCapacity; }
	const basic_string<CharType> & getHeader() const { return _bz2Header; }
	size_t getHeaderSize() const { return _bz2Header.size(); }
	int getErrState() const { return _errState; }
	bool failed() { return ( _errState != 0 ); }

	/** true if header has been found since last initialization */
	bool isBz2HeaderFound() const { return _bz2HeaderFound; }

	/** status of last/current search only */
	bool getSearchStatus() const { return _searchStatus; }
	
	// end of file
	bool eof() const { return _eof; }

	/** true if out buffer is full enough to produce output block */
	bool isOutBuffFullEnough() const { return _outBuff.bufSize >= getOutBuffCapacityLimit(); }

	/**
	 * dispose memory resources
	 */
	virtual void dispose();

	#ifdef PBZIP_DEBUG
	void printCurrentState();
	#endif
	
private:
	/* disable copy c-tor */
	BZ2StreamScanner( const BZ2StreamScanner& orig ) {}

	void initOutBuff( char * buf = NULL, size_t bufSize = 0, size_t bufCapacity = 0 );
	int appendOutBuffData( CharType * end );
	int appendOutBuffData() { return appendOutBuffData( getInBuffSearchPtr() ); }
	int appendOutBuffDataUpToLimit();
	int ensureOutBuffCapacity( size_t newSize );
	int readData();

	CharType * getInBuffEnd() { return _inBuffEnd; }
	CharType * getInBuffBegin() { return _inBuff; }
	CharType * getInBuffCurrent() { return _inBuffCurrent; }
	CharType * getInBuffSearchPtr() { return _inBuffSearchPtr; }
	char * getOutBuffEnd() { return _outBuff.buf + _outBuff.bufSize; }
	size_t getUnsearchedCount() const { return _inBuffEnd - _inBuffSearchPtr; }

	/**
	 * Search next bz2 header. Read more data from file if needed.
	 *
	 * @return pointer to header is returned if found;
	 *         getInBuffEnd() - if not found; NULL - on error.
	 */
	CharType * searchNextHeader();

	/**
	 * Search next bz2 header just in currently available input buffer.
	 * (Doesn't read more data from file).
	 *
	 * @return pointer to header or getInBuffEnd() if such is not found.
	 */
	CharType * searchNextHeaderInBuff();

	/**
	 * Prepare for next read from file into input buffer.
	 * Consumes remaining input data buffer and moves header tail to beginning.
	 * 
	 */
	int rewindInBuff();

	/**
	 * Locate BZh header prefix in buffer. In case of first search - just check
	 * the beginning of buffer and signal error if it doesn't match to headers.
	 *
	 * @return pointer to BZh header prefix if located. getInBuffEnd() if not.
	 *         failure() and getErrState() will indicate error if such occurred.
	 */
	CharType * locateHeaderPrefixInBuff();

	size_t getOutBuffCapacityLimit() const { return _outBuffCapacityLimit; }

	int _hInFile; // input file descriptor
	bool _eof;

	basic_string<CharType> _bz2Header;
	basic_string<CharType> _bz2HeaderZero;
	bool _bz2HeaderFound;
	bool _searchStatus;

	CharType * _inBuff;
	CharType * _inBuffEnd; // end of data read from file
	CharType * _inBuffCurrent;
	CharType * _inBuffSearchPtr;

	size_t _inBuffCapacity; // allocated memory capacity for in buffer

	outBuff _outBuff;
	size_t _outBuffCapacity;
	size_t _outBuffCapacityHint; // keep max used capacity
	size_t _outBuffCapacityLimit;

	unsigned int _errState; // 0 - ok; otherwise error
	int _outSequenceNumber; // output block sequence number in bz2 stream (>0 if segmented)
	int _streamNumber;
};

}

#endif /* _BZ2STREAMSCANNER_H */