File: iconv.py

package info (click to toggle)
ispell-lt 1.3.2-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,980 kB
  • sloc: perl: 3,675; python: 854; makefile: 337; sh: 155; awk: 56; xml: 6
file content (93 lines) | stat: -rw-r--r-- 2,425 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright (c) 2016, Laimonas VÄ—bra
# All rights reserved.
#
# This program is licensed under the Simplified BSD License.
# See <http://www.opensource.org/licenses/bsd-license>
#
"""Converts text from one encoding to another encoding.

Poor man's iconv. Does not buffer/block stdin on posix/linux, so
line-based two-way IPC (with a coprocess) is possible, unlike
the original iconv (fread), which blocks until EOF.
"""

import sys
import getopt
import locale

__version__ = '1.0a'
__program__ = 'iconv.py'

__usage_short = (
"""Usage:
  %s [OPTION...] [-f ENCODING] [-t ENCODING] [INPUTFILE...]
""") % __program__

__usage_full = (__usage_short +
"""
Converts text from one encoding to another encoding.

Options controlling the input and output format:
  -f ENCODING, --from-code=ENCODING
                              the encoding of the input
  -t ENCODING, --to-code=ENCODING
                              the encoding of the output
Options controlling conversion problems:
  -c                          discard unconvertible characters

Informative output:
  --help                      display this help and exit
  --version                   output version information and exit
""")


try:
    opts, files = getopt.getopt(sys.argv[1:],
                        "cf:t:",
                        ["help", "from-code", "to-code", "version"])
except getopt.GetoptError as e:
    print(e)
    print(__usage_short)
    sys.exit(2)

errors='strict'
src_enc = dst_enc = locale.getpreferredencoding()

for opt, arg in opts:
    if opt in ("--help"):
        print(__usage_full)
        sys.exit()
    elif opt in ("--version"):
        print(__program__, __version__)
        sys.exit()
    elif opt in ("-c"):
        errors='ignore'
    elif opt in ("-f", "--from-code"):
        src_enc = arg
    elif opt in ("-t", "--to-code"):
        dst_enc = arg
    else:
        assert False, "unhandled option"

if sys.version_info >= (3,1):
    sys.stdin = sys.stdin.detach()
    sys.stdout = sys.stdout.detach()

def convert(line):
    return line.decode(src_enc, errors).encode(dst_enc, errors)

if files:
    for f in files:
        f = open(f, mode='rb')
        for line in f:
            sys.stdout.write(convert(line))
        f.close()
else:
    while True:
        line = sys.stdin.readline()
        if not line:
            break
        sys.stdout.write(convert(line))