File: testutf8.cpp

package info (click to toggle)
fcitx5 5.1.17-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 17,356 kB
  • sloc: cpp: 75,670; sh: 1,770; xml: 1,545; python: 1,052; ansic: 71; makefile: 11
file content (101 lines) | stat: -rw-r--r-- 3,588 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
 * SPDX-FileCopyrightText: 2016-2016 CSSlayer <wengxt@gmail.com>
 *
 * SPDX-License-Identifier: LGPL-2.1-or-later
 *
 */

#include <cstdio>
#include <cstring>
#include "fcitx-utils/cutf8.h"
#include "fcitx-utils/log.h"
#include "fcitx-utils/utf8.h"

#define BUF_SIZE 9

int main() {
    char buf[BUF_SIZE];
    const char string[] = "\xe4\xbd\xa0\xe5\xa5\xbd\xe6\xb5"
                          "\x8b\xe8\xaf\x95\xe5\xb8\x8c\xe6";
    const char result[] = {'\xe4', '\xbd', '\xa0', '\xe5', '\xa5',
                           '\xbd', '\0',   '\0',   '\0'};
    fcitx_utf8_strncpy(buf, string, BUF_SIZE - 1);
    buf[BUF_SIZE - 1] = 0;
    FCITX_ASSERT(memcmp(buf, result, BUF_SIZE) == 0);
    FCITX_UNUSED(result);

    FCITX_ASSERT(fcitx_utf8_strnlen(string, 0) == 0);
    FCITX_ASSERT(fcitx_utf8_strnlen(string, 1) == 0);
    FCITX_ASSERT(fcitx_utf8_strnlen(string, 2) == 0);
    FCITX_ASSERT(fcitx_utf8_strnlen(string, 3) == 1);
    FCITX_ASSERT(fcitx_utf8_strnlen(string, 6) == 2);
    FCITX_ASSERT(fcitx_utf8_strnlen(string, 8) == 2);
    FCITX_ASSERT(fcitx_utf8_strnlen(string, 9) == 3);

    for (uint32_t c = 0; c < 0x4000000; c++) {
        char utf8_buf[7];
        int len = fcitx_ucs4_to_utf8(c, utf8_buf);
        if (fcitx_utf8_check_string(utf8_buf)) {
            uint32_t c2 = 0;
            char *pos = fcitx_utf8_get_char(utf8_buf, &c2);
            if (c != c2) {
                printf("%x %x\n", c, c2);
                printf("%d\n", len);
            }
            FCITX_ASSERT(c == c2);
            FCITX_ASSERT(pos == utf8_buf + len);
        }
    }

    std::string str = "\xe4\xbd\xa0\xe5\xa5\xbd\xe5\x90\x97\x61\x62\x63\x0a";
    FCITX_ASSERT(fcitx::utf8::validate(str));
    FCITX_ASSERT(fcitx::utf8::lengthValidated(str) == 7);
    uint32_t expect[] = {0x4f60, 0x597d, 0x5417, 0x61, 0x62, 0x63, 0x0a};
    uint32_t expectLength[] = {3, 3, 3, 1, 1, 1, 1};
    std::string expectCharStr[] = {
        "\xe4\xbd\xa0", "\xe5\xa5\xbd", "\xe5\x90\x97", "\x61",
        "\x62",         "\x63",         "\x0a"};
    int counter = 0;
    for (auto c : fcitx::utf8::MakeUTF8CharRange(str)) {
        FCITX_ASSERT(expect[counter] == c);
        counter++;
    }

    auto range = fcitx::utf8::MakeUTF8CharRange(str);
    int i = 0;
    for (auto iter = std::begin(range), end = std::end(range); iter != end;
         ++iter, ++i) {
        FCITX_ASSERT(iter.charLength() == iter.view().length());
        FCITX_ASSERT(iter.charLength() == expectLength[i]);
        FCITX_ASSERT(iter.view() == expectCharStr[i]);
    }

    auto rangeView = fcitx::utf8::MakeUTF8StringViewRange(str);
    i = 0;
    for (auto iter = std::begin(rangeView), end = std::end(rangeView);
         iter != end; ++iter, ++i) {
        FCITX_ASSERT(iter->size() == expectLength[i]);
        FCITX_ASSERT(*iter == expectCharStr[i]);
    }

    FCITX_ASSERT(fcitx::utf8::getLastChar(str) == 0xa);

    std::string invalidStr = "\xe4\xff";
    FCITX_ASSERT(fcitx::utf8::getLastChar(invalidStr) ==
                 fcitx::utf8::INVALID_CHAR);
    std::string empty;
    FCITX_ASSERT(fcitx::utf8::getLastChar(empty) ==
                 fcitx::utf8::NOT_ENOUGH_SPACE);
    FCITX_ASSERT(fcitx::utf8::length(empty) == 0);
    FCITX_ASSERT(fcitx::utf8::lengthValidated(empty) == 0);
    FCITX_ASSERT(fcitx::utf8::lengthValidated(invalidStr) ==
                 fcitx::utf8::INVALID_LENGTH);

    FCITX_ASSERT(counter == 7);

    FCITX_ASSERT(fcitx::utf8::UCS4IsValid(0xfdd7));
    FCITX_ASSERT(fcitx::utf8::UCS4IsValid(0xffff));
    FCITX_ASSERT(!fcitx::utf8::UCS4IsValid(0x200000));

    return 0;
}