File: mfile.h

package info (click to toggle)
ht 2.0.20-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 6,324 kB
  • sloc: cpp: 97,563; ansic: 17,183; sh: 3,811; lex: 226; makefile: 213; yacc: 127
file content (181 lines) | stat: -rw-r--r-- 7,522 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
/*
 *	HT Editor
 *	mfile.h
 *
 *	Copyright (C) 1999-2003 Stefan Weyergraf (stefan@weyergraf.de)
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License version 2 as
 *	published by the Free Software Foundation.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	You should have received a copy of the GNU General Public License
 *	along with this program; if not, write to the Free Software
 *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#ifndef __MFILE_H__
#define __MFILE_H__

#include "stream.h"

/*
 *   File areas
 */
class FileArea: public Object {
public:
	FileOfs start;
	FileOfs size;

				FileArea(FileOfs start, FileOfs size);
	/* extends Object */
	virtual	int		compareTo(const Object *obj) const;
};

class ModifiedFileArea: public FileArea {
public:
	byte *buf;
				ModifiedFileArea(FileOfs start, FileOfs size);
	virtual			~ModifiedFileArea();
	/* extends Object */
	virtual	ObjectID	getObjectID() const;
};

class CopiedFileArea: public FileArea {
public:
	FileOfs src_start;
				CopiedFileArea(FileOfs start, FileOfs size, FileOfs src_start);
	/* extends Object */
	virtual	ObjectID	getObjectID() const;
};

/**
 *	File modification layer.
 *	This is a file modification layer. Ie. a file-layer that keeps track
 *	of all modifications made to it, without forwarding (or "flushing") them to
 *	the underlying (possibly physical) file.
 *	This is invisible however to the user of this object as all modifications are
 *	reflected back when retrieving information through the <b>File</b> interface.
 *	"Flushing" (ie. applying the modifications) must be done explicitly through
 *	a call to <b>fcntl(FCNTL_MODS_FLUSH)</b>.
 *
 *	This object is (esp. for big files) much more memory- and time-efficient
 *	compared to an "all-in-memory"- or even a "modpages-in-memory"-approach:
 *
 *	"all-in-memory" approach<br>
 *	========================<br>
 *	This can be achieved using the MemoryFile object (with little modification).
 *	The whole underlying <b>File</b> is read once on creation of this object and
 *	then kept in memory until destruction.
 *	Modifications received through the <b>File</b> interface are instantly applied to the
 *	this "in-memory image".
 *
 *	Performance analysis (performance grades are relative to the probability of the event):
 *		- read() and write() cause a single call to 'memcpy'. optimal performance.
 *		- truncate() and extend() cause a single call to 'realloc'. good performance.
 *		- insert() and cut() cause a single call to 'memmove'. bad performance.
 *		- memory consumption is constantly high. not memory-efficient.
 *	This approach is naive and very easy to implement, which is why:
 *	This is a very common approach for text- and hex-editors (although
 *	it is VERY BAD for huge files).<p>
 *
 *	"modpages-in-memory" approach<br>
 *	=============================<br>
 *	This approach divides the underlying <b>File</b> into seamless blocks of
 *	equal size. These blocks are called "pages". Two kinds of pages exist:
 *	modified and unmodified pages.
 *	Right after construction, all pages are unmodified. (We may use a tree
 *	that contains only modified pages. So this tree is empty right after
 *	construction).
 *	If a write-(or read-)operation spans multiple pages, it is treated as
 *	multiple discrete write-(or read-)operations not spanning multiple pages.
 *	Whenever an attempt is made to <b>write()</b> to a page, it is made sure
 *	that this page is modified. If it is not modified, "page-size" bytes of
 *	memory are allocated and filled with the corresponding bytes from the
 *	underlying file. The write is then performed in memory. 
 *	<b>read()</b>s are served from modified pages if possible.
 *	<b>extend()</b> and <b>truncate()</b> are treated like writes with zeros.
 *	But they must create new pages and/or change the size of the last page
 *	and <b>realloc()</b>.
 *	<b>insert()</b> and <b>cut()</b> have "naive" implementations, which
 *	translates them into a combination of extend()/read()/write() and
 *	read()/write()/truncate(), respectively.
 *
 *	Performance analysis (performance grades are relative to the probability of the event):
 *		- read() and write() cause little trouble. good performance.
 *		- truncate() and extend() cause little trouble. good performance.
 *		- insert() and cut() cause big trouble. bad performance.
 *		- memory consumption is high only for insert()/extend(). kind-of memory-efficient.
 *	This approach is easy to implement. It has been used extensively used
 *	in HT 0.7.x and its predecessors.<p>
 *
 *	"modareas-in-memory" approach<br>
 *	=============================<br>
 *	This is the approach used by a <b>FileModificator</b>. This is meant
 *	to be a more progressive approach compared to the above.
 *	As is well known from complexity-theory (and a programmer's all-day
 *	work) you can often save memory by wasting some more (CPU-)time and
 *	vice-versa.
 *	As pointed out in both analyses above the <b>insert()</b> and <b>cut()</b>
 *	operations cause these approaches to comsume very much memory (about as much
 *	as the file's size !).
 *	We will combine these two statements into a new approach:
 *	It's derived from the "modpages-in-memory" approach, with some exceptions:
 *	- "seamless blocks of equal size" become "seamless, dynamically sized blocks".
 *	  We will call them areas.
 *	- due to their dynamic size, areas also have dynamic start offsets.
 *
 *	Taking the notions from "modpages-in-memory" we have then also:
 *	"modified and unmodified areas".
 *	But we will now call the unmodified areas "copied areas".
 *	(And both copied and modified areas must to registered in a tree).
 *	<b>insert()</b> and <b>cut()</b> can now be implemented by simple
 *	operations on the areas. Memory consumption drops drastically on huge
 *	files (as compared to all other methods).
 *	This approach is hard to implement (the code written is complex).
 *	It will be used in (at least) HT 2.x.x .
 */
class FileModificator: public FileLayer {
protected:
	AVLTree mods;
	FileOfs newsize;
	FileOfs pos;
	int mcount;
	int inv_mcount;

		bool		cut1(ObjHandle h, FileOfs rstart, FileOfs size);
		ObjHandle	findArea(FileOfs o);
		void		flushMods();
		void		invalidateMods();
		bool		isModified() const;
		bool		isModifiedByte(FileOfs o);
		void 		makeAreaModified(ObjHandle h, FileOfs rstart, FileOfs size);
		void		read1(FileArea *a, FileOfs rstart, byte *buf, uint count);
		void		write1(FileArea *a, FileOfs rstart, const byte *buf, uint count);
public:
				FileModificator(File *file, bool own_file);
	/* <debug> */
		void		checkSanity();
		void		debug();
	/* </debug> */
	/* extends FileLayer */
	virtual	FileOfs		copyAllTo(Stream *stream);
	virtual	FileOfs		copyTo(Stream *stream, FileOfs count);
	virtual	void		cut(FileOfs size);
	virtual	void		extend(FileOfs newsize);
	virtual String &	getDesc(String &result) const;
	virtual	FileOfs		getSize() const;
	virtual	uint		read(void *buf, uint size);
	virtual	void		seek(FileOfs offset);
	virtual	FileOfs		tell() const;
	virtual	void		truncate(FileOfs newsize);
	virtual	int		vcntl(uint cmd, va_list vargs);
	virtual	uint		write(const void *buf, uint size);
	virtual	void		insert(const void *buf, FileOfs size);
};

#endif /* __MFILE_H__ */