File: raw.c

package info (click to toggle)
ruby-liquid-c 4.2.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 504 kB
  • sloc: ansic: 3,866; ruby: 1,151; makefile: 7
file content (136 lines) | stat: -rw-r--r-- 4,131 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#include "liquid.h"
#include "raw.h"
#include "stringutil.h"
#include "tokenizer.h"

static VALUE id_block_name, id_raise_tag_never_closed, id_block_delimiter, id_ivar_body;
static VALUE cLiquidRaw;

struct full_token_possibly_invalid_t {
    long body_len;
    const char *delimiter_start;
    long delimiter_len;
};

static bool match_full_token_possibly_invalid(token_t *token, struct full_token_possibly_invalid_t *match)
{
    const char *str = token->str_full;
    long len = token->len_full;

    match->body_len = 0;
    match->delimiter_start = NULL;
    match->delimiter_len = 0;

    if (len < 5) return false; // Must be at least 5 characters: \{%\w%\}
    if (str[len - 1] != '}' || str[len - 2] != '%') return false;

    const char *curr_delimiter_start;
    long curr_delimiter_len = 0;

    bool is_last_char_whitespace = true;

    // Search from the end of the string.
    // The token could have a part of the body like this:
    // {% endraw {% endraw %}
    // In this case, we need to return body_len to 10 to preserve the body content.
    for (long i = len - 3; i > 1; i--) {
        char c = str[i];

        // match \s
        bool is_whitespace = rb_isspace(c);

        if (is_word_char(c)) {
            curr_delimiter_start = str + i;

            if (is_last_char_whitespace) {
                // start a new delimiter match
                curr_delimiter_len = 1;
            } else {
                curr_delimiter_len++;
            }
        } else if (!is_word_char(c) && !is_whitespace) {
            curr_delimiter_start = NULL;
            curr_delimiter_len = 0;
        }

        is_last_char_whitespace = is_whitespace;

        if (curr_delimiter_len > 0) {
            // match start of a tag which is {% or {%-
            if (
                (str[i - 1] == '%' && str[i - 2] == '{') ||
                (i - 3 >= 0 && str[i - 1] == '-' && str[i - 2] == '%' && str[i - 3] == '{')
            ) {
                match->delimiter_start = curr_delimiter_start;
                match->delimiter_len = curr_delimiter_len;

                if (str[i - 1] == '-') {
                    match->body_len = i - 3;
                } else {
                    match->body_len = i - 2;
                }

                return true;
            }
        }
    }

    return false;
}

static VALUE raw_parse_method(VALUE self, VALUE tokens)
{
    tokenizer_t *tokenizer;
    Tokenizer_Get_Struct(tokens, tokenizer);

    token_t token;
    struct full_token_possibly_invalid_t match;

    VALUE block_delimiter = rb_funcall(self, id_block_delimiter, 0);
    Check_Type(block_delimiter, T_STRING);
    char *block_delimiter_str = RSTRING_PTR(block_delimiter);
    long block_delimiter_len = RSTRING_LEN(block_delimiter);

    const char *body = NULL;
    long body_len = 0;

    while (true) {
        tokenizer_next(tokenizer, &token);

        if (!token.type) break;

        if (body == NULL) {
            body = token.str_full;
        }

        if (match_full_token_possibly_invalid(&token, &match)
                && match.delimiter_len == block_delimiter_len
                && memcmp(match.delimiter_start, block_delimiter_str, block_delimiter_len) == 0) {
            body_len += match.body_len;
            VALUE body_str = rb_enc_str_new(body, body_len, utf8_encoding);
            rb_ivar_set(self, id_ivar_body, body_str);
            if (RBASIC_CLASS(self) == cLiquidRaw) {
                tokenizer->raw_tag_body = RSTRING_PTR(body_str);
                tokenizer->raw_tag_body_len = (unsigned int)body_len;
            }
            return Qnil;
        }

        body_len += token.len_full;
    }

    rb_funcall(self, id_raise_tag_never_closed, 1, rb_funcall(self, id_block_name, 0));
    return Qnil;
}

void liquid_define_raw(void)
{
    id_block_name = rb_intern("block_name");
    id_raise_tag_never_closed = rb_intern("raise_tag_never_closed");
    id_block_delimiter = rb_intern("block_delimiter");
    id_ivar_body = rb_intern("@body");

    cLiquidRaw = rb_const_get(mLiquid, rb_intern("Raw"));

    rb_define_method(cLiquidRaw, "c_parse", raw_parse_method, 1);
}