File: gen_iconv.cpp

package info (click to toggle)
openmw 0.49.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 33,992 kB
  • sloc: cpp: 372,479; xml: 2,149; sh: 1,403; python: 797; makefile: 26
file content (124 lines) | stat: -rw-r--r-- 3,387 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
// This program generates the file tables_gen.hpp

#include <iostream>

#include <cassert>
#include <iconv.h>

void tab()
{
    std::cout << "   ";
}

// write one number with a space in front of it and a comma after it
void num(char i, bool last)
{
    // Convert i to its integer value, i.e. -128 to 127. Printing it directly
    // would result in non-printable characters in the source code, which is bad.
    std::cout << " " << static_cast<int>(i);
    if (!last)
        std::cout << ",";
}

// Write one table entry (UTF8 value), 1-5 bytes
void writeChar(char* value, int length, bool last, const std::string& comment = "")
{
    assert(length >= 1 && length <= 5);
    tab();
    num(length, false);
    for (int i = 0; i < 5; i++)
        num(value[i], last && i == 4);

    if (comment != "")
        std::cout << " // " << comment;

    std::cout << std::endl;
}

// What to write on missing characters
void writeMissing(bool last)
{
    // Just write a space character
    char value[5];
    value[0] = ' ';
    for (int i = 1; i < 5; i++)
        value[i] = 0;
    writeChar(value, 1, last, "not part of this charset");
}

int write_table(const std::string& charset, const std::string& tableName)
{
    // Write table header
    std::cout << "const static signed char " << tableName << "[] =\n{\n";

    // Open conversion system
    iconv_t cd = iconv_open("UTF-8", charset.c_str());

    // Convert each character from 0 to 255
    for (int i = 0; i < 256; i++)
    {
        bool last = (i == 255);

        char input = i;
        char* iptr = &input;
        size_t ileft = 1;

        char output[5];
        for (int k = 0; k < 5; k++)
            output[k] = 0;
        char* optr = output;
        size_t oleft = 5;

        size_t res = iconv(cd, &iptr, &ileft, &optr, &oleft);

        if (res)
            writeMissing(last);
        else
            writeChar(output, 5 - oleft, last);
    }

    iconv_close(cd);

    // Finish table
    std::cout << "};\n";

    return 0;
}

int main()
{
    // Write header guard
    std::cout << "#ifndef COMPONENTS_TOUTF8_TABLE_GEN_H\n#define COMPONENTS_TOUTF8_TABLE_GEN_H\n\n";

    // Write namespace
    std::cout << "namespace ToUTF8\n{\n\n";

    // Central European and Eastern European languages that use Latin script, such as
    // Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian, Serbian (Latin script), Romanian and Albanian.
    std::cout << "\n/// Central European and Eastern European languages that use Latin script,"
                 "\n/// such as Polish, Czech, Slovak, Hungarian, Slovene, Bosnian, Croatian,"
                 "\n/// Serbian (Latin script), Romanian and Albanian."
                 "\n";
    write_table("WINDOWS-1250", "windows_1250");

    // Cyrillic alphabet such as Russian, Bulgarian, Serbian Cyrillic and other languages
    std::cout << "\n/// Cyrillic alphabet such as Russian, Bulgarian, Serbian Cyrillic"
                 "\n/// and other languages"
                 "\n";
    write_table("WINDOWS-1251", "windows_1251");

    // English
    std::cout << "\n/// Latin alphabet used by English and some other Western languages"
                 "\n";
    write_table("WINDOWS-1252", "windows_1252");

    write_table("CP437", "cp437");

    // Close namespace
    std::cout << "\n}\n\n";

    // Close header guard
    std::cout << "#endif\n\n";

    return 0;
}