File: utf16le.py

package info (click to toggle)
felix 1.1.1-2
  • links: PTS
  • area: main
  • in suites: etch, etch-m68k
  • size: 4,992 kB
  • ctags: 1,178
  • sloc: python: 7,260; makefile: 408; sh: 58
file content (44 lines) | stat: -rw-r--r-- 844 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#line 349 "interscript/src/utf8.ipk"
import string

def utf16le(i):
  if i<0x10000:
    return chr(i &0xff) + chr(i>>8)
  else
    w1 = 0xD800 + ((i - 0x10000) >> 10)
    w2 = 0xDC00 + ((i - 0x10000) & 0x3FF)
    return\
      chr(w1 &0xff)+ chr(w1>>8) +\
      chr(w2 &0xff)+ chr(w2>>8)

def seq_to_utf16le(a):
  s = ''
  for ch in a: s = s + utf16le(ch)
  return s

# decoding
def parse_utf16le(s, i):
  if s[1] < 0xDC or s[1] > 0xE0:
    return s[1] << 8 + s[0],2
  else:
    w1 = s[1] << 8 + s[0]
    w2 = s[3] << 8 + s[2]
    return 0x10000 + ((w1 - 0xD800) << 10) + w2 -0xDC00, 4

def utf16le_to_array(s):
  n = len(s)
  i = 0
  m = 0
  while i < n:
    parse_utf16le(s,i)
    m = m + 1
  a = array('H',(0,)*m)
  i = 0
  while i < n:
    a[i/4],i = parse_utf16le(s,i)

def utf16le_to_utf8(s):
  return seq_to_utf8(utf16le_to_array(s))