File: accept.py

package info (click to toggle)
viewcvs 0.9.2%2Bcvs.1.0.dev.2004.07.28-4.1etch1
  • links: PTS
  • area: main
  • in suites: etch
  • size: 1,452 kB
  • ctags: 1,355
  • sloc: python: 10,100; cpp: 840; ansic: 763; yacc: 526; sh: 163; makefile: 115
file content (239 lines) | stat: -rw-r--r-- 7,820 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# -*-python-*-
#
# Copyright (C) 1999-2002 The ViewCVS Group. All Rights Reserved.
#
# By using this file, you agree to the terms and conditions set forth in
# the LICENSE.html file which can be found at the top level of the ViewCVS
# distribution or at http://viewcvs.sourceforge.net/license-1.html.
#
# Contact information:
#   Greg Stein, PO Box 760, Palo Alto, CA, 94302
#   gstein@lyra.org, http://viewcvs.sourceforge.net/
#
# -----------------------------------------------------------------------
#
# accept.py: parse/handle the various Accept headers from the client
#
# -----------------------------------------------------------------------
#

import re
import string


def language(hdr):
  "Parse an Accept-Language header."

  # parse the header, storing results in a _LanguageSelector object
  return _parse(hdr, _LanguageSelector())

# -----------------------------------------------------------------------

_re_token = re.compile(r'\s*([^\s;,"]+|"[^"]*")+\s*')
_re_param = re.compile(r';\s*([^;,"]+|"[^"]*")+\s*')
_re_split_param = re.compile(r'([^\s=])\s*=\s*(.*)')

def _parse(hdr, result):
  # quick exit for empty or not-supplied header
  if not hdr:
    return result

  pos = 0
  while pos < len(hdr):
    name = _re_token.match(hdr, pos)
    if not name:
      raise AcceptParseError()
    a = result.item_class(string.lower(name.group(1)))
    pos = name.end()
    while 1:
      # are we looking at a parameter?
      match = _re_param.match(hdr, pos)
      if not match:
        break
      param = match.group(1)
      pos = match.end()

      # split up the pieces of the parameter
      match = _re_split_param.match(param)
      if not match:
        # the "=" was probably missing
        continue

      pname = string.lower(match.group(1))
      if pname == 'q' or pname == 'qs':
        try:
          a.quality = float(match.group(2))
        except ValueError:
          # bad float literal
          pass
      elif pname == 'level':
        try:
          a.level = float(match.group(2))
        except ValueError:
          # bad float literal
          pass
      elif pname == 'charset':
        a.charset = string.lower(match.group(2))

    result.append(a)
    if hdr[pos:pos+1] == ',':
      pos = pos + 1

  return result

class _AcceptItem:
  def __init__(self, name):
    self.name = name
    self.quality = 1.0
    self.level = 0.0
    self.charset = ''

  def __str__(self):
    s = self.name
    if self.quality != 1.0:
      s = '%s;q=%.3f' % (s, self.quality)
    if self.level != 0.0:
      s = '%s;level=%.3f' % (s, self.level)
    if self.charset:
      s = '%s;charset=%s' % (s, self.charset)
    return s

class _LanguageRange(_AcceptItem):
  def matches(self, tag):
    "Match the tag against self. Returns the qvalue, or None if non-matching."
    if tag == self.name:
      return self.quality

    # are we a prefix of the available language-tag
    name = self.name + '-'
    if tag[:len(name)] == name:
      return self.quality
    return None

class _LanguageSelector:
  """Instances select an available language based on the user's request.

  Languages found in the user's request are added to this object with the
  append() method (they should be instances of _LanguageRange). After the
  languages have been added, then the caller can use select_from() to
  determine which user-request language(s) best matches the set of
  available languages.

  Strictly speaking, this class is pretty close for more than just
  language matching. It has been implemented to enable q-value based
  matching between requests and availability. Some minor tweaks may be
  necessary, but simply using a new 'item_class' should be sufficient
  to allow the _parse() function to construct a selector which holds
  the appropriate item implementations (e.g. _LanguageRange is the
  concrete _AcceptItem class that handles matching of language tags).
  """

  item_class = _LanguageRange

  def __init__(self):
    self.requested = [ ]

  def select_from(self, avail):
    """Select one of the available choices based on the request.

    Note: if there isn't a match, then the first available choice is
    considered the default. Also, if a number of matches are equally
    relevant, then the first-requested will be used.

    avail is a list of language-tag strings of available languages
    """

    # tuples of (qvalue, language-tag)
    matches = [ ]

    # try matching all pairs of desired vs available, recording the
    # resulting qvalues. we also need to record the longest language-range
    # that matches since the most specific range "wins"
    for tag in avail:
      longest = 0
      final = 0.0

      # check this tag against the requests from the user
      for want in self.requested:
        qvalue = want.matches(tag)
        #print 'have %s. want %s. qvalue=%s' % (tag, want.name, qvalue)
        if qvalue is not None and len(want.name) > longest:
          # we have a match and it is longer than any we may have had.
          # the final qvalue should be from this tag.
          final = qvalue
          longest = len(want.name)

      # a non-zero qvalue is a potential match
      if final:
        matches.append((final, tag))

    # if there are no matches, then return the default language tag
    if not matches:
      return avail[0]

    # get the highest qvalue and its corresponding tag
    matches.sort()
    qvalue, tag = matches[-1]

    # if the qvalue is zero, then we have no valid matches. return the
    # default language tag.
    if not qvalue:
      return avail[0]

    # if there are two or more matches, and the second-highest has a
    # qvalue equal to the best, then we have multiple "best" options.
    # select the one that occurs first in self.requested
    if len(matches) >= 2 and matches[-2][0] == qvalue:
      # remove non-best matches
      while matches[0][0] != qvalue:
        del matches[0]
      #print "non-deterministic choice", matches

      # sequence through self.requested, in order
      for want in self.requested:
        # try to find this one in our best matches
        for qvalue, tag in matches:
          if want.matches(tag):
            # this requested item is one of the "best" options
            ### note: this request item could match *other* "best" options,
            ### so returning *this* one is rather non-deterministic.
            ### theoretically, we could go further here, and do another
            ### search based on the ordering in 'avail'. however, note
            ### that this generally means that we are picking from multiple
            ### *SUB* languages, so I'm all right with the non-determinism
            ### at this point. stupid client should send a qvalue if they
            ### want to refine.
            return tag

      # NOTREACHED

    # return the best match
    return tag

  def append(self, item):
    self.requested.append(item)

class AcceptParseError(Exception):
  pass

def _test():
  s = language('en')
  assert s.select_from(['en']) == 'en'
  assert s.select_from(['en', 'de']) == 'en'
  assert s.select_from(['de', 'en']) == 'en'

  # Netscape 4.x and early version of Mozilla may not send a q value
  s = language('en, ja')
  assert s.select_from(['en', 'ja']) == 'en'

  s = language('fr, de;q=0.9, en-gb;q=0.7, en;q=0.6, en-gb-foo;q=0.8')
  assert s.select_from(['en']) == 'en'
  assert s.select_from(['en-gb-foo']) == 'en-gb-foo'
  assert s.select_from(['de', 'fr']) == 'fr'
  assert s.select_from(['de', 'en-gb']) == 'de'
  assert s.select_from(['en-gb', 'en-gb-foo']) == 'en-gb-foo'
  assert s.select_from(['en-bar']) == 'en-bar'
  assert s.select_from(['en-gb-bar', 'en-gb-foo']) == 'en-gb-foo'

  # non-deterministic. en-gb;q=0.7 matches both avail tags.
  #assert s.select_from(['en-gb-bar', 'en-gb']) == 'en-gb'