1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
|
/*
* HT Editor
* mfile.h
*
* Copyright (C) 1999-2003 Stefan Weyergraf (stefan@weyergraf.de)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef __MFILE_H__
#define __MFILE_H__
#include "stream.h"
/*
* File areas
*/
class FileArea: public Object {
public:
FileOfs start;
FileOfs size;
FileArea(FileOfs start, FileOfs size);
/* extends Object */
virtual int compareTo(const Object *obj) const;
};
class ModifiedFileArea: public FileArea {
public:
byte *buf;
ModifiedFileArea(FileOfs start, FileOfs size);
virtual ~ModifiedFileArea();
/* extends Object */
virtual ObjectID getObjectID() const;
};
class CopiedFileArea: public FileArea {
public:
FileOfs src_start;
CopiedFileArea(FileOfs start, FileOfs size, FileOfs src_start);
/* extends Object */
virtual ObjectID getObjectID() const;
};
/**
* File modification layer.
* This is a file modification layer. Ie. a file-layer that keeps track
* of all modifications made to it, without forwarding (or "flushing") them to
* the underlying (possibly physical) file.
* This is invisible however to the user of this object as all modifications are
* reflected back when retrieving information through the <b>File</b> interface.
* "Flushing" (ie. applying the modifications) must be done explicitly through
* a call to <b>fcntl(FCNTL_MODS_FLUSH)</b>.
*
* This object is (esp. for big files) much more memory- and time-efficient
* compared to an "all-in-memory"- or even a "modpages-in-memory"-approach:
*
* "all-in-memory" approach<br>
* ========================<br>
* This can be achieved using the MemoryFile object (with little modification).
* The whole underlying <b>File</b> is read once on creation of this object and
* then kept in memory until destruction.
* Modifications received through the <b>File</b> interface are instantly applied to the
* this "in-memory image".
*
* Performance analysis (performance grades are relative to the probability of the event):
* - read() and write() cause a single call to 'memcpy'. optimal performance.
* - truncate() and extend() cause a single call to 'realloc'. good performance.
* - insert() and cut() cause a single call to 'memmove'. bad performance.
* - memory consumption is constantly high. not memory-efficient.
* This approach is naive and very easy to implement, which is why:
* This is a very common approach for text- and hex-editors (although
* it is VERY BAD for huge files).<p>
*
* "modpages-in-memory" approach<br>
* =============================<br>
* This approach divides the underlying <b>File</b> into seamless blocks of
* equal size. These blocks are called "pages". Two kinds of pages exist:
* modified and unmodified pages.
* Right after construction, all pages are unmodified. (We may use a tree
* that contains only modified pages. So this tree is empty right after
* construction).
* If a write-(or read-)operation spans multiple pages, it is treated as
* multiple discrete write-(or read-)operations not spanning multiple pages.
* Whenever an attempt is made to <b>write()</b> to a page, it is made sure
* that this page is modified. If it is not modified, "page-size" bytes of
* memory are allocated and filled with the corresponding bytes from the
* underlying file. The write is then performed in memory.
* <b>read()</b>s are served from modified pages if possible.
* <b>extend()</b> and <b>truncate()</b> are treated like writes with zeros.
* But they must create new pages and/or change the size of the last page
* and <b>realloc()</b>.
* <b>insert()</b> and <b>cut()</b> have "naive" implementations, which
* translates them into a combination of extend()/read()/write() and
* read()/write()/truncate(), respectively.
*
* Performance analysis (performance grades are relative to the probability of the event):
* - read() and write() cause little trouble. good performance.
* - truncate() and extend() cause little trouble. good performance.
* - insert() and cut() cause big trouble. bad performance.
* - memory consumption is high only for insert()/extend(). kind-of memory-efficient.
* This approach is easy to implement. It has been used extensively used
* in HT 0.7.x and its predecessors.<p>
*
* "modareas-in-memory" approach<br>
* =============================<br>
* This is the approach used by a <b>FileModificator</b>. This is meant
* to be a more progressive approach compared to the above.
* As is well known from complexity-theory (and a programmer's all-day
* work) you can often save memory by wasting some more (CPU-)time and
* vice-versa.
* As pointed out in both analyses above the <b>insert()</b> and <b>cut()</b>
* operations cause these approaches to comsume very much memory (about as much
* as the file's size !).
* We will combine these two statements into a new approach:
* It's derived from the "modpages-in-memory" approach, with some exceptions:
* - "seamless blocks of equal size" become "seamless, dynamically sized blocks".
* We will call them areas.
* - due to their dynamic size, areas also have dynamic start offsets.
*
* Taking the notions from "modpages-in-memory" we have then also:
* "modified and unmodified areas".
* But we will now call the unmodified areas "copied areas".
* (And both copied and modified areas must to registered in a tree).
* <b>insert()</b> and <b>cut()</b> can now be implemented by simple
* operations on the areas. Memory consumption drops drastically on huge
* files (as compared to all other methods).
* This approach is hard to implement (the code written is complex).
* It will be used in (at least) HT 2.x.x .
*/
class FileModificator: public FileLayer {
protected:
AVLTree mods;
FileOfs newsize;
FileOfs pos;
int mcount;
int inv_mcount;
bool cut1(ObjHandle h, FileOfs rstart, FileOfs size);
ObjHandle findArea(FileOfs o);
void flushMods();
void invalidateMods();
bool isModified() const;
bool isModifiedByte(FileOfs o);
void makeAreaModified(ObjHandle h, FileOfs rstart, FileOfs size);
void read1(FileArea *a, FileOfs rstart, byte *buf, uint count);
void write1(FileArea *a, FileOfs rstart, const byte *buf, uint count);
public:
FileModificator(File *file, bool own_file);
/* <debug> */
void checkSanity();
void debug();
/* </debug> */
/* extends FileLayer */
virtual FileOfs copyAllTo(Stream *stream);
virtual FileOfs copyTo(Stream *stream, FileOfs count);
virtual void cut(FileOfs size);
virtual void extend(FileOfs newsize);
virtual String & getDesc(String &result) const;
virtual FileOfs getSize() const;
virtual uint read(void *buf, uint size);
virtual void seek(FileOfs offset);
virtual FileOfs tell() const;
virtual void truncate(FileOfs newsize);
virtual int vcntl(uint cmd, va_list vargs);
virtual uint write(const void *buf, uint size);
virtual void insert(const void *buf, FileOfs size);
};
#endif /* __MFILE_H__ */
|