1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: jdtang@google.com (Jonathan Tang)
#include "string_buffer.h"
#include <assert.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include "string_piece.h"
#include "util.h"
// Size chosen via statistical analysis of ~60K websites.
// 99% of text nodes and 98% of attribute names/values fit in this initial size.
static const size_t kDefaultStringBufferSize = 5;
static void maybe_resize_string_buffer(
size_t additional_chars, GumboStringBuffer* buffer) {
size_t new_length = buffer->length + additional_chars;
size_t new_capacity = buffer->capacity;
while (new_capacity < new_length) {
new_capacity *= 2;
}
if (new_capacity != buffer->capacity) {
buffer->capacity = new_capacity;
buffer->data = gumbo_realloc(buffer->data, buffer->capacity);
}
}
void gumbo_string_buffer_init(GumboStringBuffer* output) {
output->data = gumbo_malloc(kDefaultStringBufferSize);
output->length = 0;
output->capacity = kDefaultStringBufferSize;
}
void gumbo_string_buffer_reserve(
size_t min_capacity, GumboStringBuffer* output) {
maybe_resize_string_buffer(min_capacity - output->length, output);
}
void gumbo_string_buffer_append_codepoint(int c, GumboStringBuffer* output) {
// num_bytes is actually the number of continuation bytes, 1 less than the
// total number of bytes. This is done to keep the loop below simple and
// should probably change if we unroll it.
int num_bytes, prefix;
if (c <= 0x7f) {
num_bytes = 0;
prefix = 0;
} else if (c <= 0x7ff) {
num_bytes = 1;
prefix = 0xc0;
} else if (c <= 0xffff) {
num_bytes = 2;
prefix = 0xe0;
} else {
num_bytes = 3;
prefix = 0xf0;
}
maybe_resize_string_buffer(num_bytes + 1, output);
output->data[output->length++] = prefix | (c >> (num_bytes * 6));
for (int i = num_bytes - 1; i >= 0; --i) {
output->data[output->length++] = 0x80 | (0x3f & (c >> (i * 6)));
}
}
void gumbo_string_buffer_put(
GumboStringBuffer* buffer, const char* data, size_t length) {
maybe_resize_string_buffer(length, buffer);
memcpy(buffer->data + buffer->length, data, length);
buffer->length += length;
}
void gumbo_string_buffer_putv(GumboStringBuffer* buffer, int count, ...) {
va_list ap;
int i;
size_t total_len = 0;
va_start(ap, count);
for (i = 0; i < count; ++i) total_len += strlen(va_arg(ap, const char*));
va_end(ap);
maybe_resize_string_buffer(total_len, buffer);
va_start(ap, count);
for (i = 0; i < count; ++i) {
const char* data = va_arg(ap, const char*);
size_t length = strlen(data);
memcpy(buffer->data + buffer->length, data, length);
buffer->length += length;
}
va_end(ap);
}
void gumbo_string_buffer_append_string(
GumboStringPiece* str, GumboStringBuffer* output) {
gumbo_string_buffer_put(output, str->data, str->length);
}
const char* gumbo_string_buffer_cstr(GumboStringBuffer* buffer) {
maybe_resize_string_buffer(1, buffer);
/* do not increase length of the string */
buffer->data[buffer->length] = 0;
return buffer->data;
}
char* gumbo_string_buffer_to_string(GumboStringBuffer* input) {
char* buffer = gumbo_malloc(input->length + 1);
memcpy(buffer, input->data, input->length);
buffer[input->length] = '\0';
return buffer;
}
void gumbo_string_buffer_clear(GumboStringBuffer* input) {
input->length = 0;
if (input->capacity > kDefaultStringBufferSize * 8) {
// This approach to clearing means that the buffer can grow unbounded and
// tie up memory that may be needed for parsing the rest of the document, so
// we free and reinitialize the buffer if its grown more than 3 doublings.
gumbo_string_buffer_destroy(input);
gumbo_string_buffer_init(input);
}
}
void gumbo_string_buffer_destroy(GumboStringBuffer* buffer) {
gumbo_free(buffer->data);
}
|