File: Doc.py

package info (click to toggle)
pyrite 0.9.3
  • links: PTS
  • area: main
  • in suites: potato
  • size: 1,504 kB
  • ctags: 1,924
  • sloc: python: 6,064; ansic: 5,094; makefile: 275; sh: 172
file content (408 lines) | stat: -rw-r--r-- 11,647 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
#
#  $Id: Doc.py,v 1.3 1999/12/11 12:35:11 rob Exp $
#
#  Copyright 1998-1999 Rob Tillotson <robt@debian.org>
#  All Rights Reserved
#
#  Permission to use, copy, modify, and distribute this software and
#  its documentation for any purpose and without fee or royalty is
#  hereby granted, provided that the above copyright notice appear in
#  all copies and that both the copyright notice and this permission
#  notice appear in supporting documentation or portions thereof,
#  including modifications, that you you make.
#
#  THE AUTHOR ROB TILLOTSON DISCLAIMS ALL WARRANTIES WITH REGARD TO
#  THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
#  AND FITNESS.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
#  SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
#  RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
#  CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
#  CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE!
#
"""Doc-format E-Texts.

  This module handles databases in the standard Palm e-text
  format used by applications such as AportisDoc, JDoc, TealDoc,
  etc.

  DOC databases are an excellent example of a reasonably complex
  file format for the Palm.  DOC databases contain three types
  of records: a header, any number of text records (which may be
  compressed by a simple algorithm that gives 40-50% compression
  on many files), and some bookmark records.  The classes in this
  module show how such a database could be handled; the appropriate
  class for a record is selected according to its position in the
  database and the information in the header.

  A small C module (_Doc) goes with this one; it contains the
  the compression and decompression routines from "makedoc", since
  it would be significantly slower to use Python for this.

  Also included are a pair of classes, DOCReader and DOCWriter, which
  implement streamed access to a DOC database.  These classes allow
  you to treat DOCs as ordinary text files, to a certain extent.
  While the reader class is useful, the writer class is even more so,
  since it allows (beware! large memory consumption) you to stream
  data into the file, set bookmarks, etc. without having to know in
  advance how big the text is.  (The util subdirectory of the package
  contains a couple of examples of what this is good for.)
"""

__version__ = '$Id: Doc.py,v 1.3 1999/12/11 12:35:11 rob Exp $'

__copyright__ = 'Copyright 1998-1999 Rob Tillotson <robt@debian.org>'

  
import Pyrite
import Pyrite.Connector
import Pyrite.Store
from Pyrite import Blocks
import string, struct

# C module
try:
    from Pyrite import _Doc
    _compress = _Doc.compress
    _uncompress = _Doc.uncompress
except:
    from Pyrite import doc_compress
    _compress = doc_compress.compress
    _uncompress = doc_compress.uncompress
    
from Pyrite import FLD_STRING, FLD_INT, FLD_LIST, _

class Connector(Pyrite.Connector.Connector):
    name = 'Doc'
    version = Pyrite.version
    author = Pyrite.author
    url = ''
    description = _("Doc-format e-texts.")

    def __init__(self, *a, **kw):
	apply(Pyrite.Connector.Connector.__init__, (self,)+a, kw)
	self.default_name = None
	self.default_class = Database
	self.default_type = 'TEXt'
	self.default_creator = 'REAd'

    def classify(self, info={}):
	c = info.get('creator')
	if info.get('type') == 'TEXt' and (c == 'REAd' or c == 'TlDc'):
	    return Database

    def create_stream(self, store, name, creator='REAd',
		      type='TEXt', flags=None,
		      version=None, category=0, compress=1, **kw):
	return apply(DocWriteStream, (store, name, creator, type, flags,
				      version, category, compress), kw)

    def open_stream(self, store, name=None, mode='rs', **kw):
	db = apply(self.open, (store, name, mode), kw)
	return DOCReader(db)


_header_fields = {
    'version': (FLD_INT, 0),
    'spare': (FLD_INT, 0),
    'storylen': (FLD_INT, 0),
    'textrecs': (FLD_INT, 0),
    'recsize': (FLD_INT, 0),
    'position': (FLD_INT, 0),
    'sizes': (FLD_LIST, [])
    }

class HeaderRecord(Blocks.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = _header_fields
	Pyrite.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self.raw = raw

	self.unpackfields('>hhlhhl',
			  ['version','spare','storylen','textrecs',
			   'recsize','position'],
			  raw[0:16])
	raw = raw[16:]
	self['sizes'] = []
	while raw:
	    r = raw[:2]
	    raw = raw[2:]
	    if len(r) != 2: break
	    self['sizes'].append(struct.unpack('>h',r)[0])

    def pack(self):
	self.raw = self.packfields('>hhlhhl',['version','spare','storylen',
					      'textrecs','recsize','position'])
	for s in self['sizes']:
	    self.raw = self.raw + struct.pack('>h',s)
	    
	return self.raw

class TextRecord(Blocks.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = {'text': (FLD_STRING, '')}
	Pyrite.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self.raw = raw
	self.data['text'] = _uncompress(self.raw)

    def pack(self):
	self.raw = _compress(self.data['text'])
	return self.raw

class TextRecordV1(Blocks.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = {'text': (FLD_STRING, '')}
	Pyrite.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self.data['text'] = self.raw

    def pack(self):
	self.raw = self.data['text']
	return self.raw
    
class BookmarkRecord(Blocks.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = { 'text': (FLD_STRING, '', 16),
			'pos': (FLD_INT, 0) }
	Pyrite.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self.raw = raw
	self.unpackfields('>16sl', ['text','pos'], raw)
	if '\000' in self['text']:
	    self['text'] = self['text'][0:string.find(self['text'],'\000')]

    def pack(self):
	self.raw = self.packfields('>16sl', ['text','pos'])
	return self.raw
    

class Database(Pyrite.Database):
    def __init__(self, *a, **kw):
	apply(Pyrite.Database.__init__, (self,)+a, kw)
	if len(self) > 0:
	    self.header = self[0]
	else:
	    self.header = HeaderRecord()

    def new_record(self, index=0, id=0, attributes=0, category=0, type='text',
		   *a, **kw):
	if type == 'header':
	    cls = HeaderRecord
	elif type == 'bookmark':
	    cls = BookmarkRecord
	elif self.header.has_key('version') and self.header['version'] == 1:
	    cls = TextRecordV1
	else:
	    cls = TextRecord
	return cls('', index, id, attributes, category)
    
    def classify_record(self, raw='', index=0, id=0, attributes=0, category=0):
	if index == 0: cls = HeaderRecord
	elif self.header.has_key('textrecs') and (index <= self.header['textrecs'] \
						  or self.header['textrecs'] == 0):
	    if self.header.has_key('version') and self.header['version'] == 1:
		cls = TextRecordV1
	    else:
		cls = TextRecord
	else:
	    cls = BookmarkRecord
	return cls(raw, index, id, attributes, category)


# A hopefully cleaner, store-oriented version of DocWriter

class DocWriteStream:
    def __init__(self, store, name, creator=None, type=None, flags=None,
		 version=None, category=0, compress=1, **kw):
	self.store = store
	self.name = name
	self.creator = creator
	self.type = type
	self.flags = flags
	self.version = version
	self.compress = compress
	self.category = category
	self.create_kw = kw

	# internal
	self.buf = ''
	self.records = []
	self.index = 0
	self.len = 0
	self.bookmark_pos = []
	self.appinfo = ''

	self.uid = 0x6f8001
	self.opened = 1

	self.header = HeaderRecord()
	if self.compress: self.header['version'] = 2
	else: self.header['version'] = 1
	self.header['recsize'] = 4096
	self.header.id = 0x6f8000
	self.header.modified = 1
	self.header.index = 0
	self.header.category = category

    def set_appinfo(self, a=''):
	self.appinfo = a

    def bookmark(self, title, pos=None):
	if not self.opened: raise IOError, _("document closed")
	if pos is None: pos = self.len
	self.bookmark_pos.append((title, pos))

    def __output(self):
	if not self.opened: raise IOError, _("document closed")
	while len(self.buf) >= 4096:
	    b = self.buf[:4096]
	    self.buf = self.buf[4096:]

	    if self.compress: r = TextRecord()
	    else: r = TextRecordV1()

	    r['text'] = b
	    r.id = self.uid
	    self.uid = self.uid + 1
	    r.modified = 1
	    self.records.append(r)

    def write(self, data):
	if not self.opened: raise IOError, _("document closed")
	self.buf = self.buf + data
	self.len = self.len + len(data)
	self.__output()

    def writelines(self, list):
	for l in list: self.write(l)
	
    def close(self):
	if not self.opened: raise IOError, _("document closed")
	self.__output()
	if self.buf:
	    if self.compress: r = TextRecord()
	    else: r = TextRecordV1()

	    r['text'] = self.buf
	    r.id = self.uid
	    self.uid = self.uid + 1
	    r.modified = 1
	    self.records.append(r)

	# open the database
	db = apply(self.store.create, (self.name, self.creator, self.type,
				       self.flags, self.version), self.create_kw)
	self.header['storylen'] = self.len
	self.header['textrecs'] = len(self.records)
	db.append(self.header)
	for r in self.records: db.append(r)

	if len(self.bookmark_pos):
	    for t, p in self.bookmark_pos:
		r = BookmarkRecord()
		r['text'] = t[:15]+'\000'
		r['pos'] = p
		r.id = self.uid
		self.uid = self.uid + 1
		r.modified = 1
		db.append(r)

	if self.appinfo:
	    b = Pyrite.AppBlock(self.appinfo)
	    db.set_appblock(b)
	    
	self.opened = 0
	db.close()
	
    # there seems to be a problem with this?
    def __del__(self):
	if self.opened: self.close()


	
class DOCReader:
    """Read from a DOC file as a stream.

    Does not support bookmarks yet.  They will probably be available as
    a list or dictionary.

    Note that seeking depends quite heavily on the assumption that all
    records contain exactly 'recsize' bytes of text.
    """
    def __init__(self, pdb):
	self.db = pdb
	self.rec = 0    # record currently in buffer
	self.buf = ''

    def __next(self):
	if self.rec >= self.db.header['textrecs']:
	    return None
	else:
	    self.rec = self.rec + 1
	    r = self.db[self.rec]
	    self.buf = self.buf + r['text']
	    return r
	
    def read(self, nbytes=0):
	"""Per the standard python behavior, read() reads until 'no more data
	is available'.  That is, it reads at most whatever is in the buffer,
	or one record (if the buffer was empty).
	"""
	if not buf:
	    if self.__next() is None:
		return ''
	    
	e = self.buf[:nbytes]
	self.buf = self.buf[nbytes:]
	return e

    def readline(self):
	while not '\n' in self.buf:
	    # if we get eof while trying to find the end of the line,
	    # just return whatever is in the buffer (if it is empty,
	    # that is the same as eof)
	    if self.__next() is None:
		b = self.buf
		self.buf = ''
		return b

	j = string.find(self.buf, '\n')
	e = self.buf[:j+1]
	self.buf = self.buf[j+1:]
	return e

    def readlines(self):
	l = []
	while 1:
	    m = self.readline()
	    if not m: break
	    l.append(m)
	return l

    def tell(self):
	return (self.rec * db.header['recsize']) - len(self.buf)

    def seek(self, pos, whence = 0):
	if whence == 1: pos = self.tell() + pos
	elif whence == 2: pos = self.db.header['storylen'] + pos

	if pos >= self.db.header['storylen']:
	    pos = self.db.header['storylen']

	self.rec = int(pos / db.header['recsize']) + 1
	p = pos % db.header['recsize']
	r = self.db[self.rec]
	self.buf = r['text'][p:]

    def close(self):
	del self.db
	self.rec = 0
	self.buf = ''