File: stringops.h

package info (click to toggle)
wxpython4.0 4.2.3%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 221,752 kB
  • sloc: cpp: 962,555; python: 230,573; ansic: 170,731; makefile: 51,756; sh: 9,342; perl: 1,564; javascript: 584; php: 326; xml: 200
file content (204 lines) | stat: -rw-r--r-- 6,239 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
///////////////////////////////////////////////////////////////////////////////
// Name:        wx/stringops.h
// Purpose:     implementation of wxString primitive operations
// Author:      Vaclav Slavik
// Modified by:
// Created:     2007-04-16
// Copyright:   (c) 2007 REA Elektronik GmbH
// Licence:     wxWindows licence
///////////////////////////////////////////////////////////////////////////////

#ifndef _WX_WXSTRINGOPS_H__
#define _WX_WXSTRINGOPS_H__

#include "wx/chartype.h"
#include "wx/stringimpl.h"
#include "wx/unichar.h"
#include "wx/buffer.h"

// This header contains wxStringOperations "namespace" class that implements
// elementary operations on string data as static methods; wxString methods and
// iterators are implemented in terms of it. Two implementations are available,
// one for UTF-8 encoded char* string and one for "raw" wchar_t* strings (or
// char* in ANSI build).

// FIXME-UTF8: only wchar after we remove ANSI build
#if wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE
struct WXDLLIMPEXP_BASE wxStringOperationsWchar
{
    // moves the iterator to the next Unicode character
    template <typename Iterator>
    static void IncIter(Iterator& i) { ++i; }

    // moves the iterator to the previous Unicode character
    template <typename Iterator>
    static void DecIter(Iterator& i) { --i; }

    // moves the iterator by n Unicode characters
    template <typename Iterator>
    static Iterator AddToIter(const Iterator& i, ptrdiff_t n)
        { return i + n; }

    // returns distance of the two iterators in Unicode characters
    template <typename Iterator>
    static ptrdiff_t DiffIters(const Iterator& i1, const Iterator& i2)
        { return i1 - i2; }

#if wxUSE_UNICODE_UTF16
    // encodes the characters as UTF-16:
    struct Utf16CharBuffer
    {
        // Notice that data is left uninitialized, it is filled by EncodeChar()
        // which is the only function creating objects of this class.

        wchar_t data[3];
        operator const wchar_t*() const { return data; }
    };
    static Utf16CharBuffer EncodeChar(const wxUniChar& ch);
    static wxWCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
    static bool IsSingleCodeUnitCharacter(const wxUniChar& ch)
        { return !ch.IsSupplementary(); }
#else
    // encodes the character to a form used to represent it in internal
    // representation
    struct SingleCharBuffer
    {
        wxChar data[2];
        operator const wxChar*() const { return data; }
    };
    static SingleCharBuffer EncodeChar(const wxUniChar& ch)
    {
        SingleCharBuffer buf;
        buf.data[0] = (wxChar)ch;
        buf.data[1] = 0;
        return buf;
    }
    static wxWxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);
    static bool IsSingleCodeUnitCharacter(const wxUniChar&) { return true; }
#endif

    static wxUniChar DecodeChar(const wxStringImpl::const_iterator& i)
        { return *i; }
};
#endif // wxUSE_UNICODE_WCHAR || !wxUSE_UNICODE


#if wxUSE_UNICODE_UTF8
struct WXDLLIMPEXP_BASE wxStringOperationsUtf8
{
    // checks correctness of UTF-8 sequence
    static bool IsValidUtf8String(const char *c,
                                  size_t len = wxStringImpl::npos);
    static bool IsValidUtf8LeadByte(unsigned char c)
    {
        return (c <= 0x7F) || (c >= 0xC2 && c <= 0xF4);
    }

    // returns offset to skip forward when iterating over UTF-8 sequence
    static unsigned char GetUTF8IterOffset(unsigned char c);


    template<typename Iterator>
    static void IncIter(Iterator& i)
    {
        wxASSERT( IsValidUtf8LeadByte(*i) );
        i += GetUTF8IterOffset(*i);
    }

    template<typename Iterator>
    static void DecIter(Iterator& i)
    {
        // Non-lead bytes are all in the 0x80..0xBF range (i.e. 10xxxxxx in
        // binary), so we just have to go back until we hit a byte that is
        // either < 0x80 (i.e. 0xxxxxxx in binary) or 0xC0..0xFF (11xxxxxx in
        // binary; this includes some invalid values, but we can ignore it
        // here, because we assume valid UTF-8 input for the purpose of
        // efficient implementation).
        --i;
        while ( ((*i) & 0xC0) == 0x80 /* 2 highest bits are '10' */ )
            --i;
    }

    template<typename Iterator>
    static Iterator AddToIter(const Iterator& i, ptrdiff_t n)
    {
        Iterator out(i);

        if ( n > 0 )
        {
            for ( ptrdiff_t j = 0; j < n; ++j )
                IncIter(out);
        }
        else if ( n < 0 )
        {
            for ( ptrdiff_t j = 0; j > n; --j )
                DecIter(out);
        }

        return out;
    }

    template<typename Iterator>
    static ptrdiff_t DiffIters(Iterator i1, Iterator i2)
    {
        ptrdiff_t dist = 0;

        if ( i1 < i2 )
        {
            while ( i1 != i2 )
            {
                IncIter(i1);
                dist--;
            }
        }
        else if ( i2 < i1 )
        {
            while ( i2 != i1 )
            {
                IncIter(i2);
                dist++;
            }
        }

        return dist;
    }

    static bool IsSingleCodeUnitCharacter(const wxUniChar& ch)
        { return ch.IsAscii(); }

    // encodes the character as UTF-8:
    typedef wxUniChar::Utf8CharBuffer Utf8CharBuffer;
    static Utf8CharBuffer EncodeChar(const wxUniChar& ch)
        { return ch.AsUTF8(); }

    // returns n copies of ch encoded in UTF-8 string
    static wxCharBuffer EncodeNChars(size_t n, const wxUniChar& ch);

    // returns the length of UTF-8 encoding of the character with lead byte 'c'
    static size_t GetUtf8CharLength(char c)
    {
        wxASSERT( IsValidUtf8LeadByte(c) );
        return GetUTF8IterOffset(c);
    }

    // decodes single UTF-8 character from UTF-8 string
    static wxUniChar DecodeChar(wxStringImpl::const_iterator i)
    {
        if ( (unsigned char)*i < 0x80 )
            return (int)*i;
        return DecodeNonAsciiChar(i);
    }

private:
    static wxUniChar DecodeNonAsciiChar(wxStringImpl::const_iterator i);
};
#endif // wxUSE_UNICODE_UTF8


#if wxUSE_UNICODE_UTF8
typedef wxStringOperationsUtf8 wxStringOperations;
#else
typedef wxStringOperationsWchar wxStringOperations;
#endif

#endif  // _WX_WXSTRINGOPS_H_