File: utrie2perf.cpp

package info (click to toggle)
icu 78.2-1
  • links: PTS
  • area: main
  • in suites: experimental
  • size: 123,992 kB
  • sloc: cpp: 527,891; ansic: 112,789; sh: 4,983; makefile: 4,657; perl: 3,199; python: 2,933; xml: 749; sed: 36; lisp: 12
file content (263 lines) | stat: -rw-r--r-- 8,977 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
/*  
 ***********************************************************************
 * © 2016 and later: Unicode, Inc. and others.
 * License & terms of use: http://www.unicode.org/copyright.html
 ***********************************************************************
 ***********************************************************************
 *   Copyright (C) 2002-2014, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 ***********************************************************************
 *  file name:  utrie2perf.cpp
 *  encoding:   UTF-8
 *  tab size:   8 (not used)
 *  indentation:4
 *
 *  created on: 2008sep07
 *  created by: Markus W. Scherer
 *
 *  Performance test program for UTrie2.
 */

#include <stdio.h>
#include <stdlib.h>
#include "unicode/uchar.h"
#include "unicode/unorm.h"
#include "unicode/uperf.h"
#include "uoptions.h"

#if 0
// Left over from when icu/branches/markus/utf8 could use both old UTrie
// and new UTrie2, switched with #if in unorm.cpp and ubidi_props.c.
// Comparative benchmarks were done in that branch on revision r24630
// and earlier.
U_CAPI void U_EXPORT2
unorm_initUTrie2(UErrorCode *pErrorCode);

U_CAPI void U_EXPORT2
ubidi_initUTrie2(UErrorCode *pErrorCode);
#endif

U_NAMESPACE_BEGIN

class UnicodeSet;

U_NAMESPACE_END

// Test object.
class UTrie2PerfTest : public UPerfTest {
public:
    UTrie2PerfTest(int32_t argc, const char *argv[], UErrorCode &status)
            : UPerfTest(argc, argv, nullptr, 0, "", status),
              utf8(nullptr), utf8Length(0), countInputCodePoints(0) {
        if (U_SUCCESS(status)) {
#if 0       // See comment at unorm_initUTrie2() forward declaration.
            unorm_initUTrie2(&status);
            ubidi_initUTrie2(&status);
#endif
            int32_t inputLength;
            UPerfTest::getBuffer(inputLength, status);
            if(U_SUCCESS(status) && inputLength>0) {
                countInputCodePoints = u_countChar32(buffer, bufferLen);

                // Preflight the UTF-8 length and allocate utf8.
                u_strToUTF8(nullptr, 0, &utf8Length, buffer, bufferLen, &status);
                if(status==U_BUFFER_OVERFLOW_ERROR) {
                    utf8 = static_cast<char*>(malloc(utf8Length));
                    if(utf8!=nullptr) {
                        status=U_ZERO_ERROR;
                        u_strToUTF8(utf8, utf8Length, nullptr, buffer, bufferLen, &status);
                    } else {
                        status=U_MEMORY_ALLOCATION_ERROR;
                    }
                }

                if(verbose) {
                    printf("code points:%ld  len16:%ld  len8:%ld  "
                           "B/cp:%.3g\n",
                           static_cast<long>(countInputCodePoints), static_cast<long>(bufferLen), static_cast<long>(utf8Length),
                           static_cast<double>(utf8Length) / countInputCodePoints);
                }
            }
        }
    }

    UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char*& name, char* par = nullptr) override;

    const char16_t *getBuffer() const { return buffer; }
    int32_t getBufferLen() const { return bufferLen; }

    char *utf8;
    int32_t utf8Length;

    // Number of code points in the input text.
    int32_t countInputCodePoints;
};

// Performance test function object.
class Command : public UPerfFunction {
protected:
    Command(const UTrie2PerfTest &testcase) : testcase(testcase) {}

public:
    virtual ~Command() {}

    // virtual void call(UErrorCode* pErrorCode) { ... }

    long getOperationsPerIteration() override {
        // Number of code points tested.
        return testcase.countInputCodePoints;
    }

    // virtual long getEventsPerIteration();

    const UTrie2PerfTest &testcase;
    UNormalizationCheckResult qcResult;
};

class CheckFCD : public Command {
protected:
    CheckFCD(const UTrie2PerfTest &testcase) : Command(testcase) {}
public:
    static UPerfFunction* get(const UTrie2PerfTest &testcase) {
        return new CheckFCD(testcase);
    }
    void call(UErrorCode* pErrorCode) override {
        UErrorCode errorCode=U_ZERO_ERROR;
        qcResult=unorm_quickCheck(testcase.getBuffer(), testcase.getBufferLen(),
                                  UNORM_FCD, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "error: unorm_quickCheck(UNORM_FCD) failed: %s\n",
                    u_errorName(errorCode));
        }
    }
};

#if 0  // See comment at unorm_initUTrie2() forward declaration.

class CheckFCDAlwaysGet : public Command {
protected:
    CheckFCDAlwaysGet(const UTrie2PerfTest &testcase) : Command(testcase) {}
public:
    static UPerfFunction* get(const UTrie2PerfTest &testcase) {
        return new CheckFCDAlwaysGet(testcase);
    }
    virtual void call(UErrorCode* pErrorCode) {
        UErrorCode errorCode=U_ZERO_ERROR;
        qcResult=unorm_quickCheck(testcase.getBuffer(), testcase.getBufferLen(),
                                  UNORM_FCD_ALWAYS_GET, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "error: unorm_quickCheck(UNORM_FCD) failed: %s\n",
                    u_errorName(errorCode));
        }
    }
};

U_CAPI UBool U_EXPORT2
unorm_checkFCDUTF8(const uint8_t *src, int32_t srcLength, const UnicodeSet *nx);

class CheckFCDUTF8 : public Command {
protected:
    CheckFCDUTF8(const UTrie2PerfTest &testcase) : Command(testcase) {}
public:
    static UPerfFunction* get(const UTrie2PerfTest &testcase) {
        return new CheckFCDUTF8(testcase);
    }
    virtual void call(UErrorCode* pErrorCode) {
        UBool isFCD=unorm_checkFCDUTF8((const uint8_t *)testcase.utf8, testcase.utf8Length, nullptr);
        if(isFCD>1) {
            fprintf(stderr, "error: bogus result from unorm_checkFCDUTF8()\n");
        }
    }
};

#endif

class ToNFC : public Command {
protected:
    ToNFC(const UTrie2PerfTest &testcase) : Command(testcase) {
        UErrorCode errorCode=U_ZERO_ERROR;
        destCapacity=unorm_normalize(testcase.getBuffer(), testcase.getBufferLen(),
                                     UNORM_NFC, 0,
                                     nullptr, 0,
                                     &errorCode);
        dest=new char16_t[destCapacity];
    }
    ~ToNFC() {
        delete [] dest;
    }
public:
    static UPerfFunction* get(const UTrie2PerfTest &testcase) {
        return new ToNFC(testcase);
    }
    void call(UErrorCode* pErrorCode) override {
        UErrorCode errorCode=U_ZERO_ERROR;
        int32_t destLength=unorm_normalize(testcase.getBuffer(), testcase.getBufferLen(),
                                           UNORM_NFC, 0,
                                           dest, destCapacity,
                                           &errorCode);
        if(U_FAILURE(errorCode) || destLength!=destCapacity) {
            fprintf(stderr, "error: unorm_normalize(UNORM_NFC) failed: %s\n",
                    u_errorName(errorCode));
        }
    }

private:
    char16_t *dest;
    int32_t destCapacity;
};

class GetBiDiClass : public Command {
protected:
    GetBiDiClass(const UTrie2PerfTest &testcase) : Command(testcase) {}
public:
    static UPerfFunction* get(const UTrie2PerfTest &testcase) {
        return new GetBiDiClass(testcase);
    }
    void call(UErrorCode* pErrorCode) override {
        const char16_t *buffer=testcase.getBuffer();
        int32_t length=testcase.getBufferLen();
        UChar32 c;
        int32_t i;
        uint32_t bitSet=0;
        for(i=0; i<length;) {
            U16_NEXT(buffer, i, length, c);
            bitSet |= static_cast<uint32_t>(1) << u_charDirection(c);
        }
        if(length>0 && bitSet==0) {
            fprintf(stderr, "error: GetBiDiClass() did not collect bits\n");
        }
    }
};

UPerfFunction* UTrie2PerfTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
    switch (index) {
        case 0: name = "CheckFCD";              if (exec) return CheckFCD::get(*this); break;
        case 1: name = "ToNFC";                 if (exec) return ToNFC::get(*this); break;
        case 2: name = "GetBiDiClass";          if (exec) return GetBiDiClass::get(*this); break;
#if 0  // See comment at unorm_initUTrie2() forward declaration.
        case 3: name = "CheckFCDAlwaysGet";     if (exec) return CheckFCDAlwaysGet::get(*this); break;
        case 4: name = "CheckFCDUTF8";          if (exec) return CheckFCDUTF8::get(*this); break;
#endif
        default: name = ""; break;
    }
    return nullptr;
}

int main(int argc, const char *argv[]) {
    UErrorCode status = U_ZERO_ERROR;
    UTrie2PerfTest test(argc, argv, status);

	if (U_FAILURE(status)){
        printf("The error is %s\n", u_errorName(status));
        test.usage();
        return status;
    }
        
    if (test.run() == false){
        fprintf(stderr, "FAILED: Tests could not be run please check the "
			            "arguments.\n");
        return -1;
    }

    return 0;
}