File: iso8859.py

package info (click to toggle)
python-xml 0.8.4-10.1%2Blenny1
  • links: PTS
  • area: main
  • in suites: lenny
  • size: 4,972 kB
  • ctags: 10,628
  • sloc: python: 46,730; ansic: 14,354; xml: 968; makefile: 201; sh: 20
file content (86 lines) | stat: -rw-r--r-- 2,863 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
"""This module adds a backwards-compatibility to the older wstring module.
It is intended for use by 4Suite only; do not use it in your own code."""

import string
import utf8_iso

_trans = string.maketrans("_:","- ")
def _normalize(codeset):
    codeset = string.lower(codeset)
    codeset = string.translate(codeset, _trans)
    return codeset

class _Wstringmod:
    "Emulator for old wstring module"
    def __init__(self):
        self.aliases = {'iso-ir-100' : 'iso-8859-1',
                        'cp819' : 'iso-8859-1',
                        'l1' : 'iso-8859-1',
                        'latin1' : 'iso-8859-1',
                        'ibm819' : 'iso-8859-1',
                        }
        self.encodings = {'utf-8' : 0}
        for i in range(1, len(utf8_iso.code_to_uni)):
            if utf8_iso.code_to_uni[i]:
                self.encodings['iso-8859-%d' % i] = i

    def install_alias(self, newname, oldname):
        self.aliases[_normalize(newname)] = _normalize(oldname)

    def from_utf8(self, utf8):
        return UTF8String(utf8)

    def decode(self, encoding, string):
        return UTF8String(string, encoding)

    def chr(self, ch):
        return UTF8String(utf8_iso.utf8chr(ch))

wstring = _Wstringmod()

class UTF8String:
    "Emulator for the wstring type"
    def __init__(self, string, encoding='utf-8'):
        self.data = string
        enc = _normalize(encoding)
        codeset = wstring.encodings.get(enc)
        if codeset is None:
            if wstring.aliases.has_key(enc):
                codeset = wstring.encoding.get(wstring.aliases[enc])
            if codeset is None:
                raise utf8_iso.ConvertError('Unknown encoding: %s' % encoding)
        self.codeset = codeset

    def utf8(self):
        if self.codeset == 0:
            return self.data
        output = map(lambda char, codeset=self.codeset:
                     utf8_iso.code_to_utf8(codeset, char),
                     self.data)
        return string.join(output, '')

    def encode(self, encoding):
        enc = _normalize(encoding)
        codeset = wstring.encodings.get(enc)
        if codeset is None:
            if wstring.aliases.has_key(enc):
                codeset = wstring.encoding.get(wstring.aliases[enc])
            if codeset is None:
                raise utf8_iso.ConvertError('Unknown encoding: %s' % encoding)

        if codeset == 0:
            return self.data

        input = self.data
        output = []
        while input:
            for i in range(len(input)):
                if ord(input[i])>128:
                    break
            if i == 0:
                char, input = utf8_iso.utf8_to_code(codeset, input)
                output.append(char)
            else:
                output.extend(list(input[:i]))
                input = input[i:]
        return string.join(output, '')