1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
use core:io;
private Byte? hexDigit(Byte ch) {
if (ch >= 0x30 & ch <= 0x39)
return ch - 0x30;
else if (ch >= 0x41 & ch <= 0x46)
return ch - 0x41 + 0xA;
else if (ch >= 0x61 & ch <= 0x66)
return ch - 0x61 + 0xA;
else
return null;
}
// Unescape an URL component.
Str unescapeUrl(Buffer piece) {
// Interestingly, escapes tend to be used to encode UTF-8 octets. So to simplify the
// implementation, we simply put all data into a buffer, and decode that as UTF8.
Nat original = piece.filled;
Buffer temp = buffer(piece.filled);
for (Nat i = 0; i < original; i++) {
Byte ch = piece[i];
if (ch == 0x25) {
// % ch, unescape it! If anything seems strange, just ignore the % and continue
// outputting the characters as they were.
if (i + 2 >= piece.filled)
continue;
var hex1 = hexDigit(piece[i + 1]);
var hex2 = hexDigit(piece[i + 2]);
unless (hex1)
continue;
unless (hex2)
continue;
ch = (hex1 << 4) | hex2;
i += 2;
}
temp.push(ch);
}
return temp.fromUtf8();
}
// Helper to unescape an URL parameter. Works much like the regular URL encoding, except that spaces
// are encoded as + and we rewrite 0xD 0xA (CR LF) into just 0xA (LF).
Str unescapeUrlParam(Buffer piece) {
// Interestingly, escapes tend to be used to encode UTF-8 octets. So to simplify the
// implementation, we simply put all data into a buffer, and decode that as UTF8.
Nat original = piece.filled;
Buffer temp = buffer(piece.filled);
for (Nat i = 0; i < original; i++) {
Byte ch = piece[i];
if (ch == 0x25) {
// % ch, unescape it! If anything seems strange, just ignore the % and continue
// outputting the characters as they were.
if (i + 2 >= piece.filled)
continue;
var hex1 = hexDigit(piece[i + 1]);
var hex2 = hexDigit(piece[i + 2]);
unless (hex1)
continue;
unless (hex2)
continue;
ch = (hex1 << 4) | hex2;
i += 2;
} else if (ch == 0x2B) {
// + is used instead of space
ch = 0x20;
} else if (ch == 0x0D) {
// Remove linefeeds before newline characters.
if (i + 1 < piece.filled) {
if (piece[i + 1] == 0x0A) {
i++;
ch = 0x0A;
}
}
}
temp.push(ch);
}
return temp.fromUtf8();
}
// Helper to escape an URL component. The output string will be ASCII, so the encoding does not matter.
Str escapeUrl(Str original) {
StrBuf out;
for (ch in original) {
Nat codepoint = ch.codepoint;
Bool safe = codepoint >= 0x30 & codepoint <= 0x39; // 0-9
safe |= codepoint >= 0x41 & codepoint <= 0x5A; // A-Z
safe |= codepoint >= 0x61 & codepoint <= 0x7A; // a-z
safe |= codepoint == 0x2D; // -
safe |= codepoint == 0x2E; // .
safe |= codepoint == 0x5F; // _
safe |= codepoint == 0x7E; // ~
if (!safe) {
Buffer utf8 = ch.toS.toUtf8();
for (ch in utf8) {
out << "%" << hex(ch);
}
} else {
out << ch;
}
}
return out.toS;
}
// Helper to escape an URL query parameter. Very much like for URL components, except that spaces
// are encoded as + and \n is expanded to \r\n.
Str escapeUrlParam(Str original) {
StrBuf out;
for (ch in original) {
Nat codepoint = ch.codepoint;
Bool safe = codepoint >= 0x30 & codepoint <= 0x39; // 0-9
safe |= codepoint >= 0x41 & codepoint <= 0x5A; // A-Z
safe |= codepoint >= 0x61 & codepoint <= 0x7A; // a-z
safe |= codepoint == 0x2D; // -
safe |= codepoint == 0x2E; // .
safe |= codepoint == 0x5F; // _
safe |= codepoint == 0x7E; // ~
if (!safe) {
if (codepoint == 0x20) {
out << "+";
} else if (codepoint == 0x0A) {
out << "%0D%0A";
} else {
Buffer utf8 = ch.toS.toUtf8();
for (ch in utf8) {
out << "%" << hex(ch);
}
}
} else {
out << ch;
}
}
return out.toS;
}
|