File: Doc.py

package info (click to toggle)
pyrite 0.9.3
links: PTS
area: main
in suites: potato
size: 1,504 kB
ctags: 1,924
sloc: python: 6,064; ansic: 5,094; makefile: 275; sh: 172
file content (408 lines) | stat: -rw-r--r-- 11,647 bytes
#
#  $Id: Doc.py,v 1.3 1999/12/11 12:35:11 rob Exp $
#
#  Copyright 1998-1999 Rob Tillotson <robt@debian.org>
#  All Rights Reserved
#
#  Permission to use, copy, modify, and distribute this software and
#  its documentation for any purpose and without fee or royalty is
#  hereby granted, provided that the above copyright notice appear in
#  all copies and that both the copyright notice and this permission
#  notice appear in supporting documentation or portions thereof,
#  including modifications, that you you make.
#
#  THE AUTHOR ROB TILLOTSON DISCLAIMS ALL WARRANTIES WITH REGARD TO
#  THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
#  AND FITNESS.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
#  SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
#  RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
#  CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
#  CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE!
#
"""Doc-format E-Texts.

  This module handles databases in the standard Palm e-text
  format used by applications such as AportisDoc, JDoc, TealDoc,
  etc.

  DOC databases are an excellent example of a reasonably complex
  file format for the Palm.  DOC databases contain three types
  of records: a header, any number of text records (which may be
  compressed by a simple algorithm that gives 40-50% compression
  on many files), and some bookmark records.  The classes in this
  module show how such a database could be handled; the appropriate
  class for a record is selected according to its position in the
  database and the information in the header.

  A small C module (_Doc) goes with this one; it contains the
  the compression and decompression routines from "makedoc", since
  it would be significantly slower to use Python for this.

  Also included are a pair of classes, DOCReader and DOCWriter, which
  implement streamed access to a DOC database.  These classes allow
  you to treat DOCs as ordinary text files, to a certain extent.
  While the reader class is useful, the writer class is even more so,
  since it allows (beware! large memory consumption) you to stream
  data into the file, set bookmarks, etc. without having to know in
  advance how big the text is.  (The util subdirectory of the package
  contains a couple of examples of what this is good for.)
"""

__version__ = '$Id: Doc.py,v 1.3 1999/12/11 12:35:11 rob Exp $'

__copyright__ = 'Copyright 1998-1999 Rob Tillotson <robt@debian.org>'

  
import Pyrite
import Pyrite.Connector
import Pyrite.Store
from Pyrite import Blocks
import string, struct

# C module
try:
    from Pyrite import _Doc
    _compress = _Doc.compress
    _uncompress = _Doc.uncompress
except:
    from Pyrite import doc_compress
    _compress = doc_compress.compress
    _uncompress = doc_compress.uncompress
    
from Pyrite import FLD_STRING, FLD_INT, FLD_LIST, _

class Connector(Pyrite.Connector.Connector):
    name = 'Doc'
    version = Pyrite.version
    author = Pyrite.author
    url = ''
    description = _("Doc-format e-texts.")

    def __init__(self, *a, **kw):
	apply(Pyrite.Connector.Connector.__init__, (self,)+a, kw)
	self.default_name = None
	self.default_class = Database
	self.default_type = 'TEXt'
	self.default_creator = 'REAd'

    def classify(self, info={}):
	c = info.get('creator')
	if info.get('type') == 'TEXt' and (c == 'REAd' or c == 'TlDc'):
	    return Database

    def create_stream(self, store, name, creator='REAd',
		      type='TEXt', flags=None,
		      version=None, category=0, compress=1, **kw):
	return apply(DocWriteStream, (store, name, creator, type, flags,
				      version, category, compress), kw)

    def open_stream(self, store, name=None, mode='rs', **kw):
	db = apply(self.open, (store, name, mode), kw)
	return DOCReader(db)


_header_fields = {
    'version': (FLD_INT, 0),
    'spare': (FLD_INT, 0),
    'storylen': (FLD_INT, 0),
    'textrecs': (FLD_INT, 0),
    'recsize': (FLD_INT, 0),
    'position': (FLD_INT, 0),
    'sizes': (FLD_LIST, [])
    }

class HeaderRecord(Blocks.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = _header_fields
	Pyrite.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self.raw = raw

	self.unpackfields('>hhlhhl',
			  ['version','spare','storylen','textrecs',
			   'recsize','position'],
			  raw[0:16])
	raw = raw[16:]
	self['sizes'] = []
	while raw:
	    r = raw[:2]
	    raw = raw[2:]
	    if len(r) != 2: break
	    self['sizes'].append(struct.unpack('>h',r)[0])

    def pack(self):
	self.raw = self.packfields('>hhlhhl',['version','spare','storylen',
					      'textrecs','recsize','position'])
	for s in self['sizes']:
	    self.raw = self.raw + struct.pack('>h',s)
	    
	return self.raw

class TextRecord(Blocks.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = {'text': (FLD_STRING, '')}
	Pyrite.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self.raw = raw
	self.data['text'] = _uncompress(self.raw)

    def pack(self):
	self.raw = _compress(self.data['text'])
	return self.raw

class TextRecordV1(Blocks.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = {'text': (FLD_STRING, '')}
	Pyrite.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self.data['text'] = self.raw

    def pack(self):
	self.raw = self.data['text']
	return self.raw
    
class BookmarkRecord(Blocks.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = { 'text': (FLD_STRING, '', 16),
			'pos': (FLD_INT, 0) }
	Pyrite.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self.raw = raw
	self.unpackfields('>16sl', ['text','pos'], raw)
	if '\000' in self['text']:
	    self['text'] = self['text'][0:string.find(self['text'],'\000')]

    def pack(self):
	self.raw = self.packfields('>16sl', ['text','pos'])
	return self.raw
    

class Database(Pyrite.Database):
    def __init__(self, *a, **kw):
	apply(Pyrite.Database.__init__, (self,)+a, kw)
	if len(self) > 0:
	    self.header = self[0]
	else:
	    self.header = HeaderRecord()

    def new_record(self, index=0, id=0, attributes=0, category=0, type='text',
		   *a, **kw):
	if type == 'header':
	    cls = HeaderRecord
	elif type == 'bookmark':
	    cls = BookmarkRecord
	elif self.header.has_key('version') and self.header['version'] == 1:
	    cls = TextRecordV1
	else:
	    cls = TextRecord
	return cls('', index, id, attributes, category)
    
    def classify_record(self, raw='', index=0, id=0, attributes=0, category=0):
	if index == 0: cls = HeaderRecord
	elif self.header.has_key('textrecs') and (index <= self.header['textrecs'] \
						  or self.header['textrecs'] == 0):
	    if self.header.has_key('version') and self.header['version'] == 1:
		cls = TextRecordV1
	    else:
		cls = TextRecord
	else:
	    cls = BookmarkRecord
	return cls(raw, index, id, attributes, category)


# A hopefully cleaner, store-oriented version of DocWriter

class DocWriteStream:
    def __init__(self, store, name, creator=None, type=None, flags=None,
		 version=None, category=0, compress=1, **kw):
	self.store = store
	self.name = name
	self.creator = creator
	self.type = type
	self.flags = flags
	self.version = version
	self.compress = compress
	self.category = category
	self.create_kw = kw

	# internal
	self.buf = ''
	self.records = []
	self.index = 0
	self.len = 0
	self.bookmark_pos = []
	self.appinfo = ''

	self.uid = 0x6f8001
	self.opened = 1

	self.header = HeaderRecord()
	if self.compress: self.header['version'] = 2
	else: self.header['version'] = 1
	self.header['recsize'] = 4096
	self.header.id = 0x6f8000
	self.header.modified = 1
	self.header.index = 0
	self.header.category = category

    def set_appinfo(self, a=''):
	self.appinfo = a

    def bookmark(self, title, pos=None):
	if not self.opened: raise IOError, _("document closed")
	if pos is None: pos = self.len
	self.bookmark_pos.append((title, pos))

    def __output(self):
	if not self.opened: raise IOError, _("document closed")
	while len(self.buf) >= 4096:
	    b = self.buf[:4096]
	    self.buf = self.buf[4096:]

	    if self.compress: r = TextRecord()
	    else: r = TextRecordV1()

	    r['text'] = b
	    r.id = self.uid
	    self.uid = self.uid + 1
	    r.modified = 1
	    self.records.append(r)

    def write(self, data):
	if not self.opened: raise IOError, _("document closed")
	self.buf = self.buf + data
	self.len = self.len + len(data)
	self.__output()

    def writelines(self, list):
	for l in list: self.write(l)
	
    def close(self):
	if not self.opened: raise IOError, _("document closed")
	self.__output()
	if self.buf:
	    if self.compress: r = TextRecord()
	    else: r = TextRecordV1()

	    r['text'] = self.buf
	    r.id = self.uid
	    self.uid = self.uid + 1
	    r.modified = 1
	    self.records.append(r)

	# open the database
	db = apply(self.store.create, (self.name, self.creator, self.type,
				       self.flags, self.version), self.create_kw)
	self.header['storylen'] = self.len
	self.header['textrecs'] = len(self.records)
	db.append(self.header)
	for r in self.records: db.append(r)

	if len(self.bookmark_pos):
	    for t, p in self.bookmark_pos:
		r = BookmarkRecord()
		r['text'] = t[:15]+'\000'
		r['pos'] = p
		r.id = self.uid
		self.uid = self.uid + 1
		r.modified = 1
		db.append(r)

	if self.appinfo:
	    b = Pyrite.AppBlock(self.appinfo)
	    db.set_appblock(b)
	    
	self.opened = 0
	db.close()
	
    # there seems to be a problem with this?
    def __del__(self):
	if self.opened: self.close()


	
class DOCReader:
    """Read from a DOC file as a stream.

    Does not support bookmarks yet.  They will probably be available as
    a list or dictionary.

    Note that seeking depends quite heavily on the assumption that all
    records contain exactly 'recsize' bytes of text.
    """
    def __init__(self, pdb):
	self.db = pdb
	self.rec = 0    # record currently in buffer
	self.buf = ''

    def __next(self):
	if self.rec >= self.db.header['textrecs']:
	    return None
	else:
	    self.rec = self.rec + 1
	    r = self.db[self.rec]
	    self.buf = self.buf + r['text']
	    return r
	
    def read(self, nbytes=0):
	"""Per the standard python behavior, read() reads until 'no more data
	is available'.  That is, it reads at most whatever is in the buffer,
	or one record (if the buffer was empty).
	"""
	if not buf:
	    if self.__next() is None:
		return ''
	    
	e = self.buf[:nbytes]
	self.buf = self.buf[nbytes:]
	return e

    def readline(self):
	while not '\n' in self.buf:
	    # if we get eof while trying to find the end of the line,
	    # just return whatever is in the buffer (if it is empty,
	    # that is the same as eof)
	    if self.__next() is None:
		b = self.buf
		self.buf = ''
		return b

	j = string.find(self.buf, '\n')
	e = self.buf[:j+1]
	self.buf = self.buf[j+1:]
	return e

    def readlines(self):
	l = []
	while 1:
	    m = self.readline()
	    if not m: break
	    l.append(m)
	return l

    def tell(self):
	return (self.rec * db.header['recsize']) - len(self.buf)

    def seek(self, pos, whence = 0):
	if whence == 1: pos = self.tell() + pos
	elif whence == 2: pos = self.db.header['storylen'] + pos

	if pos >= self.db.header['storylen']:
	    pos = self.db.header['storylen']

	self.rec = int(pos / db.header['recsize']) + 1
	p = pos % db.header['recsize']
	r = self.db[self.rec]
	self.buf = r['text'][p:]

    def close(self):
	del self.db
	self.rec = 0
	self.buf = ''