File: aff2utf8.awk

package info (click to toggle)
ispell-lt 1.3.2-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,980 kB
  • sloc: perl: 3,675; python: 854; makefile: 337; sh: 155; awk: 56; xml: 6
file content (92 lines) | stat: -rwxr-xr-x 2,201 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/bin/gawk -f
# -*- coding: utf-8 -*-
#
# Converts lithuanian (latin7) ispell affix file to utf-8.
#
# Copyright (c) 2016, Laimonas VÄ—bra
# All rights reserved.
#
# This program is licensed under the Simplified BSD License.
# See <http://www.opensource.org/licenses/bsd-license>
#
# Usage: gawk [-v PY_ICONV=/path/to/iconv.py] -f aff2utf8.awk <lietuviu.aff>

function conv(s) {
    if (s == "")
	return ""
    print s |& converter
    converter |& getline res
    return res
}

function exists(f) {
    # Suppress stdout of system(): we certainly don't want it on our stdout
    return (system("which " f " > /dev/null") == 0)
}

BEGIN {
    IGNORECASE = 1
   
    # Fallback converter if we don't find suitable one or can't use iconv.
    # (set PY_ICONV env. or assign var.: -v PY_ICONV=abs_or_rel/path/to/iconv.py)
    if (!PY_ICONV)
        PY_ICONV = ENVIRON["PY_ICONV"]
   
    if (index(ENVIRON["OS"], "windows") > 0) {
        if (exists("iconv")) {
            converter = "iconv -f ISO-8859-13 -t UTF8"
        }
    } else {
	# On linux/posix iconv (fread) blocks until EOF.
        # (two-way IPC with iconv coprocess won't work)
        # Maybe we have `luit' (from x11-utils, etc)?
        if (exists("luit")) {
            converter = "luit -c -encoding ISO-8859-13"
        }
    }

    if (!converter) {
        if (PY_ICONV) {
            converter = "python3 -u " PY_ICONV " -f ISO-8859-13 -t UTF-8"
        } else {
            print "No suitable converter found and PY_ICONV is not set."\
                > "/dev/stderr"
            exit 1
        }
    }
    
    #PROCINFO[iconv, "pty"] = 1
}

FNR == 1 { 
    ("sed -ne 's/altstringtype\\(.*\\)/\\1/p' " FILENAME) | getline val
    alt_fmt = val
}

# swap (def and alt) formatter values
/^\s*defstringtype/{
    match($0, /defstringtype(.*)/, m); def_fmt = m[1]
    print "defstringtype", alt_fmt
    next
}
/^\s*altstringtype/{
    print "altstringtype", def_fmt
    next
}

# swap altstringchars (utf-8 <=> latin7)
match($0, /^([#[:space:]]*altstringchar\s+)(\S+)\s+(\S+)(.*)/, m) {
    print m[1], m[3], m[2], conv(m[4])
    next
}

# convert to utf-8 all other lines
{
    print conv($0)
}

END {
    close(converter)
}