1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#define NEED_newSVpvn_flags
#define NEED_sv_2pv_flags
#include "ppport.h"
/* Characters to escape:
* 0x22 " 0x26 & 0x27 ' 0x3c < 0x3e > 0x60 ` 0x7b { 0x7d }
*
* Note that we don't care whether the input uses Perl's single-byte
* (Latin-1) or multi-byte (UTF-8) encoding, because every byte >= 0x80 is
* safe regardless.
*/
static const char unsafe[256] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
/* 0x00 .. 0x0f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0x10 .. 0x1f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0x20 .. 0x2f */ 0,0,1,0, 0,0,1,1, 0,0,0,0, 0,0,0,0,
/* 0x30 .. 0x3f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 1,0,1,0,
/* 0x40 .. 0x4f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0x50 .. 0x5f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0x60 .. 0x6f */ 1,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0x70 .. 0x7f */ 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,1,0,0,
/* 0x80 .. 0x8f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0x90 .. 0x9f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0xa0 .. 0xaf */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0xb0 .. 0xbf */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0xc0 .. 0xcf */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0xd0 .. 0xdf */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0xe0 .. 0xef */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
/* 0xf0 .. 0xff */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
};
/* This is essentially a version of standard strcspn() that (a) handles
* arbitrary memory buffers, possibly containing \0 bytes, and (b) knows at
* compile-time which characters to detect, rather than having to build an
* internal data structure representing them on every call. */
static size_t safe_character_span(const char *start, const char *end) {
const char *cur = start;
while(cur != end) {
unsigned char c = (unsigned char) *cur;
if(unsafe[c]) {
break;
}
cur++;
}
return cur - start;
}
static void /* doesn't care about raw-ness */
tx_sv_cat_with_escape_html_force(pTHX_ SV* const dest, SV* const src) {
STRLEN len;
const char* cur = SvPV_const(src, len);
const char* const end = cur + len;
STRLEN const dest_cur = SvCUR(dest);
char* d;
(void)SvGROW(dest, dest_cur + ( len * ( sizeof(""") - 1) ) + 1);
if(!SvUTF8(dest) && SvUTF8(src)) {
sv_utf8_upgrade(dest);
}
d = SvPVX(dest) + dest_cur;
#define CopyToken(token, to) STMT_START { \
Copy(token "", to, sizeof(token)-1, char); \
to += sizeof(token)-1; \
} STMT_END
while(cur != end) {
size_t span = safe_character_span(cur, end);
Copy(cur, d, span, char);
cur += span;
d += span;
if(cur != end) {
const char c = *(cur++);
if(c == '&') {
CopyToken("&", d);
}
else if(c == '<') {
CopyToken("<", d);
}
else if(c == '>') {
CopyToken(">", d);
}
else if(c == '"') {
CopyToken(""", d);
}
else if(c == '`') {
CopyToken("`", d);
}
else if(c == '{') {
CopyToken("{", d);
}
else if(c == '}') {
CopyToken("}", d);
}
else { /* c == '\'' */
/* XXX: Internet Explorer (at least version 8) doesn't support ' in title */
/* CopyToken("'", d); */
CopyToken("'", d);
}
}
}
#undef CopyToken
SvCUR_set(dest, d - SvPVX(dest));
*SvEND(dest) = '\0';
}
static SV*
tx_escape_html(pTHX_ SV* const str) {
SvGETMAGIC(str);
if(!( !SvOK(str) )) {
SV* const dest = newSVpvs_flags("", SVs_TEMP);
tx_sv_cat_with_escape_html_force(aTHX_ dest, str);
return dest;
}
else {
return str;
}
}
MODULE = HTML::Escape PACKAGE = HTML::Escape
PROTOTYPES: DISABLE
void
escape_html(SV* str)
CODE:
{
ST(0) = tx_escape_html(aTHX_ str);
}
|