File: ASCIIFastPath.h

package info (click to toggle)
webkit 1.8.1-3.4
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 86,872 kB
  • sloc: cpp: 748,063; ansic: 17,151; sh: 11,084; perl: 10,883; yacc: 3,678; python: 3,440; lex: 559; makefile: 168; xml: 91
file content (101 lines) | stat: -rw-r--r-- 3,372 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
 * Copyright (C) 2011 Apple Inc. All rights reserved.
 * Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies).
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this library; see the file COPYING.LIB.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 *
 */

#ifndef ASCIIFastPath_h
#define ASCIIFastPath_h

#include <stdint.h>
#include <wtf/unicode/Unicode.h>

namespace WTF {

// Assuming that a pointer is the size of a "machine word", then
// uintptr_t is an integer type that is also a machine word.
typedef uintptr_t MachineWord;
const uintptr_t machineWordAlignmentMask = sizeof(MachineWord) - 1;

inline bool isAlignedToMachineWord(const void* pointer)
{
    return !(reinterpret_cast<uintptr_t>(pointer) & machineWordAlignmentMask);
}

template<typename T> inline T* alignToMachineWord(T* pointer)
{
    return reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(pointer) & ~machineWordAlignmentMask);
}

template<size_t size, typename CharacterType> struct NonASCIIMask;
template<> struct NonASCIIMask<4, UChar> {
    static inline uint32_t value() { return 0xFF80FF80U; }
};
template<> struct NonASCIIMask<4, LChar> {
    static inline uint32_t value() { return 0x80808080U; }
};
template<> struct NonASCIIMask<8, UChar> {
    static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
};
template<> struct NonASCIIMask<8, LChar> {
    static inline uint64_t value() { return 0x8080808080808080ULL; }
};


template<typename CharacterType>
inline bool isAllASCII(MachineWord word)
{
    return !(word & NonASCIIMask<sizeof(MachineWord), CharacterType>::value());
}

// Note: This function assume the input is likely all ASCII, and
// does not leave early if it is not the case.
template<typename CharacterType>
inline bool charactersAreAllASCII(const CharacterType* characters, size_t length)
{
    MachineWord allCharBits = 0;
    const CharacterType* end = characters + length;

    // Prologue: align the input.
    while (!isAlignedToMachineWord(characters) && characters != end) {
        allCharBits |= *characters;
        ++characters;
    }

    // Compare the values of CPU word size.
    const CharacterType* wordEnd = alignToMachineWord(end);
    const size_t loopIncrement = sizeof(MachineWord) / sizeof(CharacterType);
    while (characters < wordEnd) {
        allCharBits |= *(reinterpret_cast<const MachineWord*>(characters));
        characters += loopIncrement;
    }

    // Process the remaining bytes.
    while (characters != end) {
        allCharBits |= *characters;
        ++characters;
    }

    MachineWord nonASCIIBitMask = NonASCIIMask<sizeof(MachineWord), CharacterType>::value();
    return !(allCharBits & nonASCIIBitMask);
}


} // namespace WTF

#endif // ASCIIFastPath_h