File: htmltxar.h

package info (click to toggle)
qtads 2.1.6-1
  • links: PTS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 16,156 kB
  • ctags: 18,767
  • sloc: cpp: 133,078; ansic: 26,048; xml: 18; makefile: 11
file content (276 lines) | stat: -rw-r--r-- 10,988 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
/* $Header: d:/cvsroot/tads/html/htmltxar.h,v 1.2 1999/05/17 02:52:22 MJRoberts Exp $ */

/* 
 *   Copyright (c) 1997 by Michael J. Roberts.  All Rights Reserved.
 *   
 *   Please see the accompanying license file, LICENSE.TXT, for information
 *   on using and copying this software.  
 */
/*
Name
  htmltxar.h - text array class
Function
  This class provides a mechanism for storing a long run of text
  in a linear address space.  Since we'd like to be able to tell
  whether a particular piece of text comes before or after another
  piece of text in the text stream, it's useful for each piece of
  text to have an address in a linear address space.

  Since the text stream for a document can become quite large, we
  don't actually store the text in a single large chunk of memory.
  Instead, we break the text up into pages.  To allow for linear
  addresses within these pages, we give each piece of text a
  "virtual" address, which we can then map into a pointer to the
  actual memory containing the text.

  To simplify code that accesses the text, we ensure that each
  chunk of text added to the array is stored contiguously in
  memory.  So, once the caller has obtained a pointer to the
  memory containing a chunk of text, the caller can treat the
  pointer as a simple C++ character array pointer.

  Note that the addresses that we return are not necessarily
  contiguous; that is, if you store a block of 10 characters,
  and that block is assigned address 25, the next block of
  characters will not necessarily be stored at location 35.
  Therefore, only the addresses actually returned by append_text
  can be used to retrieve text.  Therefore, "address arithmetic"
  is not possible with the values returned by append_text()
  outside of a single chunk.  However, because ordering is
  guaranteed, comparisons are legal and reliably determine
  the relative storage order of two chunks.
Notes
  
Modified
  09/23/97 MJRoberts  - Creation
*/

#ifndef HTMLTXAR_H
#define HTMLTXAR_H

#ifndef TADSHTML_H
#include "tadshtml.h"
#endif

const size_t HTML_TEXTARRAY_PAGESIZE = 32*1024;

/* page entry */
class CHtmlTextArrayEntry
{
public:
    textchar_t *text_;                                  /* text of the page */
    size_t used_;                      /* amount of space used on this page */
    size_t alloced_;                /* amount of space reserved on the page */
    size_t refs_;                      /* number of references to this page */
    size_t space_in_use_;     /* space actually in use (counting deletions) */
};

class CHtmlTextArray
{
public:
    CHtmlTextArray();
    ~CHtmlTextArray();

    /* clear the text array -- deletes all of the text in the array */
    void clear();

    /*
     *   Determine how much memory, in bytes, the text in the array is
     *   consuming.  This measures the memory allocated to the text, hence
     *   the granularity is the size of a page.  
     */
    unsigned long get_mem_in_use() const { return mem_in_use_; }

    /*
     *   Add text to the array.  We return the linear address of the text
     *   in the buffer, which is guaranteed to be higher than that of any
     *   previously appended text.  We also guarantee that the text will
     *   be stored in a contiguous block of memory, so that subsequent
     *   uses of the text can treat it as a simple character array.  If a
     *   text item of length zero is appended, we won't actually store
     *   anything, but we will return an address that is higher than that
     *   of any text in the array.  
     */
    unsigned long append_text(const textchar_t *txt, size_t len);

    /*
     *   Add text to the array without creating a reference to the text.
     *   This should be used whenever the caller doesn't need to keep
     *   track of the text, such as when the text is added purely to
     *   signal a word or line break. 
     */
    void append_text_noref(const textchar_t *txt, size_t len)
        { store_text(txt, len); }

    /*
     *   Temporarily store text in the array without actually consuming
     *   space.  Returns the address used to store the text, which may
     *   change from the address previouly used for the same temporary
     *   storage. 
     */
    unsigned long store_text_temp(const textchar_t *txt, size_t len);

    /*
     *   Store text and commit the space. 
     */
    unsigned long store_text(const textchar_t *txt, size_t len);

    /*
     *   Delete a reference to a block of text previously allocated.
     */
    void delete_text(unsigned long addr, size_t len);

    /*
     *   Reserve space for a chunk of text, ensuring that the chunk will
     *   be stored contiguously on a single page.  (It's not necessary to
     *   call this prior to append_text, since that will ensure that the
     *   text stored is in a single chunk.  However, it is necessary to
     *   use this if you want to call append_text several times and have
     *   the whole group of text end up in a single page -- for this case,
     *   call reserve_space with the sum of the sizes of the pieces of
     *   text, then make the append_text calls.)  Returns the text offset
     *   for the start of the reserved chunk.
     */
    unsigned long reserve_space(size_t len);

    /*
     *   Get the address of a chunk of text previously allocated.  Only
     *   values returned by append_text() can be used reliably with this
     *   call; no "pointer arithmetic" is possible on the values returned
     *   by append_text() outside of a single chunk, other than comparison
     *   of addresses from any chunks to determine storage order.  
     */
    textchar_t *get_text(unsigned long linear_address) const
    {
        /* make sure it's within range */
        if (linear_address > max_addr_)
            linear_address = max_addr_;

        /* get the text at the given offset */
        return (pages_[get_page(linear_address)].text_
                + get_page_ofs(linear_address));
    }

    /* get the highest address currently in use */
    unsigned long get_max_addr() const { return max_addr_; }

    /* get the character at a given offset */
    textchar_t get_char(unsigned long addr) const { return *get_text(addr); }

    /* increment an offset so that it points to another valid offset */
    unsigned long inc_ofs(unsigned long ofs) const;

    /* decrement an offset so that it points to another valid offset */
    unsigned long dec_ofs(unsigned long ofs) const;

    /*
     *   Determine how many characters are between two text offsets.
     *   Since text offsets are not assigned continuously, it is possible
     *   that the difference of the two offsets overstates the number of
     *   characters between the two offset. 
     */
    unsigned long get_char_count(unsigned long startofs,
                                 unsigned long endofs) const;

    /*
     *   Get a pointer to a chunk of characters starting at the given
     *   offset.  Returns a pointer to the characters, sets *len_in_chunk
     *   to the number of characters (up to a maximum of maxlen) in the
     *   chunk, and advances *startofs to point to the next valid offset
     *   after the chunk returned.  This allows a caller to traverse the
     *   possibly discontinuous array of characters by calling this
     *   routine repeatedly to get chunks.  
     */
    const textchar_t *get_text_chunk(unsigned long *startofs,
                                     size_t *len_in_chunk,
                                     unsigned long maxlen) const;

    /*
     *   Find a text string.  Searches from the given starting offset to
     *   the end of the text array.  If we find the string, we'll set
     *   *match_start and *match_end to the starting and ending offsets of
     *   the match and return true; we'll return false if we can't find
     *   the string.  If exact_case is true, we'll match only if the case
     *   matches, otherwise we'll ignore case.
     *   
     *   If dir is 1, we'll search forwards; otherwise, we'll search
     *   backwards.  If wrap is true, we'll wrap around at the end (or
     *   start if going backwards) of the buffer to the opposite end of
     *   the buffer and continue the search from there; we'll only fail if
     *   we get back to the starting point and still haven't found the
     *   string.  
     */
    int search(const textchar_t *txt, size_t txtlen, int exact_case,
               int whole_word, int wrap, int dir, unsigned long startofs,
               unsigned long *match_start, unsigned long *match_end);

private:
    /* allocate the first page */
    void alloc_first_page();
    
    /* get the page containing an address */
    size_t get_page(unsigned long addr) const
        { return addr / HTML_TEXTARRAY_PAGESIZE; }

    /* get the offset within the page containing an address */
    size_t get_page_ofs(unsigned long addr) const
        { return addr % HTML_TEXTARRAY_PAGESIZE; }

    /* make an address out of a page number and offset */
    unsigned long make_addr(size_t pg, size_t ofs) const
        { return (((unsigned long)pg * HTML_TEXTARRAY_PAGESIZE)
                  + (unsigned long)ofs); }
    
    /* last page */
    size_t last_page() const { return pages_alloced_ - 1; }

    /* offset of next free byte on last page */
    size_t last_page_ofs() const
        { return pages_[last_page()].used_; }

    /* increase amount used on last page */
    void inc_last_page_ofs(size_t len)
        { pages_[last_page()].used_ += len; }
    
    /* array of page pointers; the pages contain the actual text */
    CHtmlTextArrayEntry *pages_;

    /* size of top-level page array (number of pointers allocated) */
    size_t page_entries_;

    /* 
     *   number of pages allocated (this is actually just a high-water
     *   mark for the number of pages *ever* allocated; pages can be
     *   deleted when they are unreferenced, hence this doesn't represent
     *   the number of pages actually present in memory, but just the
     *   number of the next slot to be filled) 
     */
    size_t pages_alloced_;

    /*
     *   Maximum address used.  This includes both storage actually
     *   committed and storage only temporarily used.
     */
    unsigned long max_addr_;

    /*
     *   Pages in use.  This represents the actual number of pages for
     *   which memory is currently allocated.  Whenever we allocate a new
     *   page, we increment this, and whenever we delete a page (because
     *   it becomes unreferenced) we decrement this. 
     */
    size_t pages_in_use_;

    /*
     *   Amount of memory currently in use.  This keeps track of
     *   allocations and deletions.  We might actually be using more
     *   system memory than this would indicate, because pages are not
     *   necessarily completely full; a partial page takes up more OS
     *   memory than this would indicate.  
     */
    unsigned long mem_in_use_;
};


#endif /* HTMLTXAR_H */