1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
|
#
# Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewCVS
# distribution or at http://viewcvs.sourceforge.net/license-1.html.
#
# Contact information:
# Greg Stein, PO Box 760, Palo Alto, CA, 94302
# gstein@lyra.org, http://viewcvs.sourceforge.net/
#
# -----------------------------------------------------------------------
#
# This software is being maintained as part of the ViewCVS project.
# Information is available at:
# http://viewcvs.sourceforge.net/
#
# This file was originally based on portions of the blame.py script by
# Curt Hagenlocher.
#
# -----------------------------------------------------------------------
import string
import common
class _TokenStream:
token_term = string.whitespace + ';'
# the algorithm is about the same speed for any CHUNK_SIZE chosen.
# grab a good-sized chunk, but not too large to overwhelm memory.
# note: we use a multiple of a standard block size
CHUNK_SIZE = 192 * 512 # about 100k
# CHUNK_SIZE = 5 # for debugging, make the function grind...
def __init__(self, file):
self.rcsfile = file
self.idx = 0
self.buf = self.rcsfile.read(self.CHUNK_SIZE)
if self.buf == '':
raise RuntimeError, 'EOF'
def get(self):
"Get the next token from the RCS file."
# Note: we can afford to loop within Python, examining individual
# characters. For the whitespace and tokens, the number of iterations
# is typically quite small. Thus, a simple iterative loop will beat
# out more complex solutions.
buf = self.buf
idx = self.idx
while 1:
if idx == len(buf):
buf = self.rcsfile.read(self.CHUNK_SIZE)
if buf == '':
# signal EOF by returning None as the token
del self.buf # so we fail if get() is called again
return None
idx = 0
if buf[idx] not in string.whitespace:
break
idx = idx + 1
if buf[idx] == ';':
self.buf = buf
self.idx = idx + 1
return ';'
if buf[idx] != '@':
end = idx + 1
token = ''
while 1:
# find token characters in the current buffer
while end < len(buf) and buf[end] not in self.token_term:
end = end + 1
token = token + buf[idx:end]
if end < len(buf):
# we stopped before the end, so we have a full token
idx = end
break
# we stopped at the end of the buffer, so we may have a partial token
buf = self.rcsfile.read(self.CHUNK_SIZE)
idx = end = 0
self.buf = buf
self.idx = idx
return token
# a "string" which starts with the "@" character. we'll skip it when we
# search for content.
idx = idx + 1
chunks = [ ]
while 1:
if idx == len(buf):
idx = 0
buf = self.rcsfile.read(self.CHUNK_SIZE)
if buf == '':
raise RuntimeError, 'EOF'
i = string.find(buf, '@', idx)
if i == -1:
chunks.append(buf[idx:])
idx = len(buf)
continue
if i == len(buf) - 1:
chunks.append(buf[idx:i])
idx = 0
buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
if buf == '@':
raise RuntimeError, 'EOF'
continue
if buf[i + 1] == '@':
chunks.append(buf[idx:i+1])
idx = i + 2
continue
chunks.append(buf[idx:i])
self.buf = buf
self.idx = i + 1
return string.join(chunks, '')
# _get = get
# def get(self):
token = self._get()
print 'T:', `token`
return token
def match(self, match):
"Try to match the next token from the input buffer."
token = self.get()
if token != match:
raise RuntimeError, ('Unexpected parsing error in RCS file.\n' +
'Expected token: %s, but saw: %s' % (match, token))
def unget(self, token):
"Put this token back, for the next get() to return."
# Override the class' .get method with a function which clears the
# overridden method then returns the pushed token. Since this function
# will not be looked up via the class mechanism, it should be a "normal"
# function, meaning it won't have "self" automatically inserted.
# Therefore, we need to pass both self and the token thru via defaults.
# note: we don't put this into the input buffer because it may have been
# @-unescaped already.
def give_it_back(self=self, token=token):
del self.get
return token
self.get = give_it_back
def mget(self, count):
"Return multiple tokens. 'next' is at the end."
result = [ ]
for i in range(count):
result.append(self.get())
result.reverse()
return result
class Parser(common._Parser):
stream_class = _TokenStream
def parse_rcs_admin(self):
while 1:
# Read initial token at beginning of line
token = self.ts.get()
# We're done once we reach the description of the RCS tree
if token[0] in string.digits:
self.ts.unget(token)
return
if token == "head":
semi, rev = self.ts.mget(2)
self.sink.set_head_revision(rev)
if semi != ';':
raise common.RCSExpected(semi, ';')
elif token == "branch":
semi, branch = self.ts.mget(2)
self.sink.set_principal_branch(branch)
if semi != ';':
raise common.RCSExpected(semi, ';')
elif token == "symbols":
while 1:
tag = self.ts.get()
if tag == ';':
break
(tag_name, tag_rev) = string.split(tag, ':')
self.sink.define_tag(tag_name, tag_rev)
elif token == "comment":
semi, comment = self.ts.mget(2)
self.sink.set_comment(comment)
if semi != ';':
raise common.RCSExpected(semi, ';')
elif token == "expand":
semi, expand_mode = self.ts.mget(2)
self.sink.set_expansion(expand_mode)
if semi != ';':
raise RCSExpected(semi, ';')
elif token == "locks":
while 1:
tag = self.ts.get()
if tag == ';':
break
(locker, rev) = string.split(tag,':')
self.sink.set_locker(rev, locker)
tag = self.ts.get()
if tag == "strict":
self.sink.set_locking("strict")
self.ts.match(';')
else:
self.ts.unget(tag)
elif token == "access":
accessors = []
while 1:
tag = self.ts.get()
if tag == ';':
if accessors != []:
self.sink.set_access(accessors)
break
accessors = accessors + [ tag ]
# Chew up "newphrase".
else:
pass
# warn("Unexpected RCS token: $token\n")
raise RuntimeError, "Unexpected EOF"
|