1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
|
/* Copyright (c) 2018, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CNIOBoringSSL_bytestring.h>
#include "internal.h"
static int is_valid_code_point(uint32_t v) {
// References in the following are to Unicode 9.0.0.
if (// The Unicode space runs from zero to 0x10ffff (3.4 D9).
v > 0x10ffff ||
// Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved
// (3.4 D14)
(v & 0xfffe) == 0xfffe ||
(v >= 0xfdd0 && v <= 0xfdef) ||
// Surrogate code points are invalid (3.2 C1).
(v >= 0xd800 && v <= 0xdfff)) {
return 0;
}
return 1;
}
// BOTTOM_BITS returns a byte with the bottom |n| bits set.
#define BOTTOM_BITS(n) (uint8_t)((1u << (n)) - 1)
// TOP_BITS returns a byte with the top |n| bits set.
#define TOP_BITS(n) ((uint8_t)~BOTTOM_BITS(8 - (n)))
int cbs_get_utf8(CBS *cbs, uint32_t *out) {
uint8_t c;
if (!CBS_get_u8(cbs, &c)) {
return 0;
}
if (c <= 0x7f) {
*out = c;
return 1;
}
uint32_t v, lower_bound;
size_t len;
if ((c & TOP_BITS(3)) == TOP_BITS(2)) {
v = c & BOTTOM_BITS(5);
len = 1;
lower_bound = 0x80;
} else if ((c & TOP_BITS(4)) == TOP_BITS(3)) {
v = c & BOTTOM_BITS(4);
len = 2;
lower_bound = 0x800;
} else if ((c & TOP_BITS(5)) == TOP_BITS(4)) {
v = c & BOTTOM_BITS(3);
len = 3;
lower_bound = 0x10000;
} else {
return 0;
}
for (size_t i = 0; i < len; i++) {
if (!CBS_get_u8(cbs, &c) ||
(c & TOP_BITS(2)) != TOP_BITS(1)) {
return 0;
}
v <<= 6;
v |= c & BOTTOM_BITS(6);
}
if (!is_valid_code_point(v) ||
v < lower_bound) {
return 0;
}
*out = v;
return 1;
}
int cbs_get_latin1(CBS *cbs, uint32_t *out) {
uint8_t c;
if (!CBS_get_u8(cbs, &c)) {
return 0;
}
*out = c;
return 1;
}
int cbs_get_ucs2_be(CBS *cbs, uint32_t *out) {
// Note UCS-2 (used by BMPString) does not support surrogates.
uint16_t c;
if (!CBS_get_u16(cbs, &c) ||
!is_valid_code_point(c)) {
return 0;
}
*out = c;
return 1;
}
int cbs_get_utf32_be(CBS *cbs, uint32_t *out) {
return CBS_get_u32(cbs, out) && is_valid_code_point(*out);
}
size_t cbb_get_utf8_len(uint32_t u) {
if (u <= 0x7f) {
return 1;
}
if (u <= 0x7ff) {
return 2;
}
if (u <= 0xffff) {
return 3;
}
return 4;
}
int cbb_add_utf8(CBB *cbb, uint32_t u) {
if (!is_valid_code_point(u)) {
return 0;
}
if (u <= 0x7f) {
return CBB_add_u8(cbb, (uint8_t)u);
}
if (u <= 0x7ff) {
return CBB_add_u8(cbb, TOP_BITS(2) | (u >> 6)) &&
CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
}
if (u <= 0xffff) {
return CBB_add_u8(cbb, TOP_BITS(3) | (u >> 12)) &&
CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
}
if (u <= 0x10ffff) {
return CBB_add_u8(cbb, TOP_BITS(4) | (u >> 18)) &&
CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 12) & BOTTOM_BITS(6))) &&
CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
}
return 0;
}
int cbb_add_latin1(CBB *cbb, uint32_t u) {
return u <= 0xff && CBB_add_u8(cbb, (uint8_t)u);
}
int cbb_add_ucs2_be(CBB *cbb, uint32_t u) {
return u <= 0xffff && is_valid_code_point(u) && CBB_add_u16(cbb, (uint16_t)u);
}
int cbb_add_utf32_be(CBB *cbb, uint32_t u) {
return is_valid_code_point(u) && CBB_add_u32(cbb, u);
}
|