1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
#include "liquid.h"
#include "raw.h"
#include "stringutil.h"
#include "tokenizer.h"
static VALUE id_block_name, id_raise_tag_never_closed, id_block_delimiter, id_ivar_body;
static VALUE cLiquidRaw;
struct full_token_possibly_invalid_t {
long body_len;
const char *delimiter_start;
long delimiter_len;
};
static bool match_full_token_possibly_invalid(token_t *token, struct full_token_possibly_invalid_t *match)
{
const char *str = token->str_full;
long len = token->len_full;
match->body_len = 0;
match->delimiter_start = NULL;
match->delimiter_len = 0;
if (len < 5) return false; // Must be at least 5 characters: \{%\w%\}
if (str[len - 1] != '}' || str[len - 2] != '%') return false;
const char *curr_delimiter_start;
long curr_delimiter_len = 0;
bool is_last_char_whitespace = true;
// Search from the end of the string.
// The token could have a part of the body like this:
// {% endraw {% endraw %}
// In this case, we need to return body_len to 10 to preserve the body content.
for (long i = len - 3; i > 1; i--) {
char c = str[i];
// match \s
bool is_whitespace = rb_isspace(c);
if (is_word_char(c)) {
curr_delimiter_start = str + i;
if (is_last_char_whitespace) {
// start a new delimiter match
curr_delimiter_len = 1;
} else {
curr_delimiter_len++;
}
} else if (!is_word_char(c) && !is_whitespace) {
curr_delimiter_start = NULL;
curr_delimiter_len = 0;
}
is_last_char_whitespace = is_whitespace;
if (curr_delimiter_len > 0) {
// match start of a tag which is {% or {%-
if (
(str[i - 1] == '%' && str[i - 2] == '{') ||
(i - 3 >= 0 && str[i - 1] == '-' && str[i - 2] == '%' && str[i - 3] == '{')
) {
match->delimiter_start = curr_delimiter_start;
match->delimiter_len = curr_delimiter_len;
if (str[i - 1] == '-') {
match->body_len = i - 3;
} else {
match->body_len = i - 2;
}
return true;
}
}
}
return false;
}
static VALUE raw_parse_method(VALUE self, VALUE tokens)
{
tokenizer_t *tokenizer;
Tokenizer_Get_Struct(tokens, tokenizer);
token_t token;
struct full_token_possibly_invalid_t match;
VALUE block_delimiter = rb_funcall(self, id_block_delimiter, 0);
Check_Type(block_delimiter, T_STRING);
char *block_delimiter_str = RSTRING_PTR(block_delimiter);
long block_delimiter_len = RSTRING_LEN(block_delimiter);
const char *body = NULL;
long body_len = 0;
while (true) {
tokenizer_next(tokenizer, &token);
if (!token.type) break;
if (body == NULL) {
body = token.str_full;
}
if (match_full_token_possibly_invalid(&token, &match)
&& match.delimiter_len == block_delimiter_len
&& memcmp(match.delimiter_start, block_delimiter_str, block_delimiter_len) == 0) {
body_len += match.body_len;
VALUE body_str = rb_enc_str_new(body, body_len, utf8_encoding);
rb_ivar_set(self, id_ivar_body, body_str);
if (RBASIC_CLASS(self) == cLiquidRaw) {
tokenizer->raw_tag_body = RSTRING_PTR(body_str);
tokenizer->raw_tag_body_len = (unsigned int)body_len;
}
return Qnil;
}
body_len += token.len_full;
}
rb_funcall(self, id_raise_tag_never_closed, 1, rb_funcall(self, id_block_name, 0));
return Qnil;
}
void liquid_define_raw(void)
{
id_block_name = rb_intern("block_name");
id_raise_tag_never_closed = rb_intern("raise_tag_never_closed");
id_block_delimiter = rb_intern("block_delimiter");
id_ivar_body = rb_intern("@body");
cLiquidRaw = rb_const_get(mLiquid, rb_intern("Raw"));
rb_define_method(cLiquidRaw, "c_parse", raw_parse_method, 1);
}
|