1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
/*
* Copyright (C) 2005-2021 Team Kodi
* This file is part of Kodi - https://kodi.tv
*
* SPDX-License-Identifier: GPL-2.0-or-later
* See LICENSES/README.md for more information.
*/
#include "CSSUtils.h"
#include <cstdint>
#include <string>
namespace
{
// https://www.w3.org/TR/css-syntax-3/#hex-digit
bool isHexDigit(char c)
{
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
// https://www.w3.org/TR/css-syntax-3/#hex-digit
uint32_t convertHexDigit(char c)
{
if (c >= '0' && c <= '9')
{
return c - '0';
}
else if (c >= 'A' && c <= 'F')
{
return 10 + c - 'A';
}
else
{
return 10 + c - 'a';
}
}
// https://infra.spec.whatwg.org/#surrogate
bool isSurrogateCodePoint(uint32_t c)
{
return c >= 0xD800 && c <= 0xDFFF;
}
// https://www.w3.org/TR/css-syntax-3/#maximum-allowed-code-point
bool isGreaterThanMaximumAllowedCodePoint(uint32_t c)
{
return c > 0x10FFFF;
}
// https://www.w3.org/TR/css-syntax-3/#consume-escaped-code-point
std::string escapeStringChunk(std::string& str, size_t& pos)
{
if (str.size() < pos + 1)
return "";
uint32_t codePoint = convertHexDigit(str[pos + 1]);
if (str.size() >= pos + 2)
pos += 2;
else
return "";
int numDigits = 1;
while (numDigits < 6 && isHexDigit(str[pos]))
{
codePoint = 16 * codePoint + convertHexDigit(str[pos]);
if (str.size() >= pos + 1)
{
pos += 1;
numDigits += 1;
}
else
break;
}
std::string result;
// Convert code point to UTF-8 bytes
if (codePoint == 0 || isSurrogateCodePoint(codePoint) ||
isGreaterThanMaximumAllowedCodePoint(codePoint))
{
result += u8"\uFFFD";
}
else if (codePoint < 0x80)
{
// 1-byte UTF-8: 0xxxxxxx
result += static_cast<char>(codePoint);
}
else if (codePoint < 0x800)
{
// 2-byte UTF-8: 110xxxxx 10xxxxxx
uint32_t x1 = codePoint >> 6; // 6 = num of x's in 2nd byte
uint32_t x2 = codePoint - (x1 << 6); // 6 = num of x's in 2nd byte
uint32_t b1 = (6 << 5) + x1; // 6 = 0b110 ; 5 = num of x's in 1st byte
uint32_t b2 = (2 << 6) + x2; // 2 = 0b10 ; 6 = num of x's in 2nd byte
result += static_cast<char>(b1);
result += static_cast<char>(b2);
}
else if (codePoint < 0x10000)
{
// 3-byte UTF-8: 1110xxxx 10xxxxxx 10xxxxxx
uint32_t y1 = codePoint >> 6;
uint32_t x3 = codePoint - (y1 << 6);
uint32_t x1 = y1 >> 6;
uint32_t x2 = y1 - (x1 << 6);
uint32_t b1 = (14 << 4) + x1;
uint32_t b2 = (2 << 6) + x2;
uint32_t b3 = (2 << 6) + x3;
result += static_cast<char>(b1);
result += static_cast<char>(b2);
result += static_cast<char>(b3);
}
else
{
// 4-byte UTF-8: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
uint32_t y2 = codePoint >> 6;
uint32_t x4 = codePoint - (y2 << 6);
uint32_t y1 = y2 >> 6;
uint32_t x3 = y2 - (y1 << 6);
uint32_t x1 = y1 >> 6;
uint32_t x2 = y1 - (x1 << 6);
uint32_t b1 = (30 << 3) + x1;
uint32_t b2 = (2 << 6) + x2;
uint32_t b3 = (2 << 6) + x3;
uint32_t b4 = (2 << 6) + x4;
result += static_cast<char>(b1);
result += static_cast<char>(b2);
result += static_cast<char>(b3);
result += static_cast<char>(b4);
}
return result;
}
} // unnamed namespace
void UTILS::CSS::Escape(std::string& str)
{
std::string result;
for (size_t pos = 0; pos < str.size(); pos++)
{
if (str[pos] == '\\')
result += escapeStringChunk(str, pos);
else
result += str[pos];
}
str = result;
}
|