1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
|
# -*-python-*-
#
# Copyright (C) 1999-2002 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewCVS
# distribution or at http://viewcvs.sourceforge.net/license-1.html.
#
# Contact information:
# Greg Stein, PO Box 760, Palo Alto, CA, 94302
# gstein@lyra.org, http://viewcvs.sourceforge.net/
#
# -----------------------------------------------------------------------
#
# accept.py: parse/handle the various Accept headers from the client
#
# -----------------------------------------------------------------------
#
import re
import string
def language(hdr):
"Parse an Accept-Language header."
# parse the header, storing results in a _LanguageSelector object
return _parse(hdr, _LanguageSelector())
# -----------------------------------------------------------------------
_re_token = re.compile(r'\s*([^\s;,"]+|"[^"]*")+\s*')
_re_param = re.compile(r';\s*([^;,"]+|"[^"]*")+\s*')
_re_split_param = re.compile(r'([^\s=])\s*=\s*(.*)')
def _parse(hdr, result):
# quick exit for empty or not-supplied header
if not hdr:
return result
pos = 0
while pos < len(hdr):
name = _re_token.match(hdr, pos)
if not name:
raise AcceptParseError()
a = result.item_class(string.lower(name.group(1)))
pos = name.end()
while 1:
# are we looking at a parameter?
match = _re_param.match(hdr, pos)
if not match:
break
param = match.group(1)
pos = match.end()
# split up the pieces of the parameter
match = _re_split_param.match(param)
if not match:
# the "=" was probably missing
continue
pname = string.lower(match.group(1))
if pname == 'q' or pname == 'qs':
try:
a.quality = float(match.group(2))
except ValueError:
# bad float literal
pass
elif pname == 'level':
try:
a.level = float(match.group(2))
except ValueError:
# bad float literal
pass
elif pname == 'charset':
a.charset = string.lower(match.group(2))
result.append(a)
if hdr[pos:pos+1] == ',':
pos = pos + 1
return result
class _AcceptItem:
def __init__(self, name):
self.name = name
self.quality = 1.0
self.level = 0.0
self.charset = ''
def __str__(self):
s = self.name
if self.quality != 1.0:
s = '%s;q=%.3f' % (s, self.quality)
if self.level != 0.0:
s = '%s;level=%.3f' % (s, self.level)
if self.charset:
s = '%s;charset=%s' % (s, self.charset)
return s
class _LanguageRange(_AcceptItem):
def matches(self, tag):
"Match the tag against self. Returns the qvalue, or None if non-matching."
if tag == self.name:
return self.quality
# are we a prefix of the available language-tag
name = self.name + '-'
if tag[:len(name)] == name:
return self.quality
return None
class _LanguageSelector:
"""Instances select an available language based on the user's request.
Languages found in the user's request are added to this object with the
append() method (they should be instances of _LanguageRange). After the
languages have been added, then the caller can use select_from() to
determine which user-request language(s) best matches the set of
available languages.
Strictly speaking, this class is pretty close for more than just
language matching. It has been implemented to enable q-value based
matching between requests and availability. Some minor tweaks may be
necessary, but simply using a new 'item_class' should be sufficient
to allow the _parse() function to construct a selector which holds
the appropriate item implementations (e.g. _LanguageRange is the
concrete _AcceptItem class that handles matching of language tags).
"""
item_class = _LanguageRange
def __init__(self):
self.requested = [ ]
def select_from(self, avail):
"""Select one of the available choices based on the request.
Note: if there isn't a match, then the first available choice is
considered the default. Also, if a number of matches are equally
relevant, then the first-requested will be used.
avail is a list of language-tag strings of available languages
"""
# tuples of (qvalue, language-tag)
matches = [ ]
# try matching all pairs of desired vs available, recording the
# resulting qvalues. we also need to record the longest language-range
# that matches since the most specific range "wins"
for tag in avail:
longest = 0
final = 0.0
# check this tag against the requests from the user
for want in self.requested:
qvalue = want.matches(tag)
#print 'have %s. want %s. qvalue=%s' % (tag, want.name, qvalue)
if qvalue is not None and len(want.name) > longest:
# we have a match and it is longer than any we may have had.
# the final qvalue should be from this tag.
final = qvalue
longest = len(want.name)
# a non-zero qvalue is a potential match
if final:
matches.append((final, tag))
# if there are no matches, then return the default language tag
if not matches:
return avail[0]
# get the highest qvalue and its corresponding tag
matches.sort()
qvalue, tag = matches[-1]
# if the qvalue is zero, then we have no valid matches. return the
# default language tag.
if not qvalue:
return avail[0]
# if there are two or more matches, and the second-highest has a
# qvalue equal to the best, then we have multiple "best" options.
# select the one that occurs first in self.requested
if len(matches) >= 2 and matches[-2][0] == qvalue:
# remove non-best matches
while matches[0][0] != qvalue:
del matches[0]
#print "non-deterministic choice", matches
# sequence through self.requested, in order
for want in self.requested:
# try to find this one in our best matches
for qvalue, tag in matches:
if want.matches(tag):
# this requested item is one of the "best" options
### note: this request item could match *other* "best" options,
### so returning *this* one is rather non-deterministic.
### theoretically, we could go further here, and do another
### search based on the ordering in 'avail'. however, note
### that this generally means that we are picking from multiple
### *SUB* languages, so I'm all right with the non-determinism
### at this point. stupid client should send a qvalue if they
### want to refine.
return tag
# NOTREACHED
# return the best match
return tag
def append(self, item):
self.requested.append(item)
class AcceptParseError(Exception):
pass
def _test():
s = language('en')
assert s.select_from(['en']) == 'en'
assert s.select_from(['en', 'de']) == 'en'
assert s.select_from(['de', 'en']) == 'en'
# Netscape 4.x and early version of Mozilla may not send a q value
s = language('en, ja')
assert s.select_from(['en', 'ja']) == 'en'
s = language('fr, de;q=0.9, en-gb;q=0.7, en;q=0.6, en-gb-foo;q=0.8')
assert s.select_from(['en']) == 'en'
assert s.select_from(['en-gb-foo']) == 'en-gb-foo'
assert s.select_from(['de', 'fr']) == 'fr'
assert s.select_from(['de', 'en-gb']) == 'de'
assert s.select_from(['en-gb', 'en-gb-foo']) == 'en-gb-foo'
assert s.select_from(['en-bar']) == 'en-bar'
assert s.select_from(['en-gb-bar', 'en-gb-foo']) == 'en-gb-foo'
# non-deterministic. en-gb;q=0.7 matches both avail tags.
#assert s.select_from(['en-gb-bar', 'en-gb']) == 'en-gb'
|