File: test-unicode-fold.c

package info (click to toggle)
libmongocrypt 1.17.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 12,572 kB
  • sloc: ansic: 70,067; python: 4,547; cpp: 615; sh: 460; makefile: 44; awk: 8
file content (101 lines) | stat: -rw-r--r-- 6,605 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
 * Copyright 2025-present MongoDB, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "mongocrypt-status-private.h"
#include "test-mongocrypt-assert.h"
#include "test-mongocrypt.h"
#include "unicode/fold.h"

#define TEST_UNICODE_FOLD(expected, expected_len, input, input_len, options)                                           \
    do {                                                                                                               \
        char *_buf;                                                                                                    \
        size_t _len;                                                                                                   \
        ASSERT_OR_PRINT(unicode_fold(input, input_len, options, &_buf, &_len, status), status);                        \
        TEST_PRINTF("Testing: input=%.*s, expected=%.*s, output=%.*s\n",                                               \
                    (int)input_len,                                                                                    \
                    input,                                                                                             \
                    (int)expected_len,                                                                                 \
                    expected,                                                                                          \
                    (int)_len,                                                                                         \
                    _buf);                                                                                             \
        ASSERT_CMPSIZE_T(_len, ==, expected_len);                                                                      \
        ASSERT_CMPBYTES((uint8_t *)_buf, _len, (uint8_t *)expected, expected_len);                                     \
        ASSERT_CMPUINT8((uint8_t)(_buf[_len]), ==, 0);                                                                 \
        bson_free(_buf);                                                                                               \
    } while (0)

#define TEST_UNICODE_FOLD_ALL_CASES(input, case_folded, dia_folded, both_folded)                                       \
    do {                                                                                                               \
        size_t _input_len = strlen(input);                                                                             \
        size_t _cf_len = strlen(case_folded);                                                                          \
        size_t _df_len = strlen(dia_folded);                                                                           \
        size_t _both_len = strlen(both_folded);                                                                        \
        TEST_UNICODE_FOLD(case_folded, _cf_len, input, _input_len, kUnicodeFoldToLower);                               \
        TEST_UNICODE_FOLD(dia_folded, _df_len, input, _input_len, kUnicodeFoldRemoveDiacritics);                       \
        TEST_UNICODE_FOLD(both_folded,                                                                                 \
                          _both_len,                                                                                   \
                          input,                                                                                       \
                          _input_len,                                                                                  \
                          (unicode_fold_options_t)(kUnicodeFoldToLower | kUnicodeFoldRemoveDiacritics));               \
    } while (0)

static void test_unicode_fold(_mongocrypt_tester_t *tester) {
    mongocrypt_status_t *status = mongocrypt_status_new();
    // Test all ascii chars.
    char *buf1 = bson_malloc0(2);
    char *buf2 = bson_malloc0(2);
    for (unsigned char ch = 0; ch <= 0x7f; ch++) {
        buf1[0] = ch;
        if (ch >= 'A' && ch <= 'Z') {
            // Caps
            buf2[0] = ch + 0x20;
            TEST_UNICODE_FOLD_ALL_CASES(buf1, buf2, buf1, buf2);
        } else if (ch == '^' || ch == '`') {
            // Diacritics
            TEST_UNICODE_FOLD_ALL_CASES(buf1, buf1, "", "");
        } else {
            // Characters with no transformations
            TEST_UNICODE_FOLD_ALL_CASES(buf1, buf1, buf1, buf1);
        }
    }
    bson_free(buf1);
    bson_free(buf2);
    TEST_UNICODE_FOLD_ALL_CASES("abc", "abc", "abc", "abc");
    // Tests of composed unicode
    TEST_UNICODE_FOLD_ALL_CASES("¿CUÁNTOS AÑOS tienes Tú?",
                                "¿cuántos años tienes tú?",
                                "¿CUANTOS ANOS tienes Tu?",
                                "¿cuantos anos tienes tu?");
    TEST_UNICODE_FOLD_ALL_CASES("СКОЛЬКО ТЕБЕ ЛЕТ?", "сколько тебе лет?", "СКОЛЬКО ТЕБЕ ЛЕТ?", "сколько тебе лет?");
    TEST_UNICODE_FOLD_ALL_CASES("Πόσο χρονών είσαι?", "πόσο χρονών είσαι?", "Ποσο χρονων εισαι?", "ποσο χρονων εισαι?");
    // Tests of decomposed unicode
    TEST_UNICODE_FOLD_ALL_CASES("Cafe\xcc\x81", "cafe\xcc\x81", "Cafe", "cafe");
    TEST_UNICODE_FOLD_ALL_CASES("CafE\xcc\x81", "cafe\xcc\x81", "CafE", "cafe");
    // Test string with null bytes
    TEST_UNICODE_FOLD("fo\0bar",
                      6,
                      "fo\0bar",
                      6,
                      (unicode_fold_options_t)(kUnicodeFoldToLower | kUnicodeFoldRemoveDiacritics));
    // Test strings with folded representations longer in bytes than the input
    TEST_UNICODE_FOLD("\xe2\xb1\xa6", 3, "\xc8\xbe", 2, kUnicodeFoldToLower);
    TEST_UNICODE_FOLD("\xf0\xa4\x8b\xae", 4, "\xef\xa9\xac", 3, kUnicodeFoldRemoveDiacritics);
    mongocrypt_status_destroy(status);
}

void _mongocrypt_tester_install_unicode_fold(_mongocrypt_tester_t *tester) {
    INSTALL_TEST(test_unicode_fold);
}