1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
|
# -- package RivetEntities
#
# The code is largely taken from a simple yet clever encoder/decoder of HTML entities
# starting from a utf-8 character string. The original code writted by Andy Goth is
# at http://wiki.tcl.tk/26403. Package entities extends the functionality in the
# original code combining it with Tcl's 'encoding' command to encode/decode from
# any supported encoding
#
# $Id: entities.tcl 1338460 2012-05-14 22:25:41Z mxmanghi $
package provide RivetEntities 1.0
namespace eval ::rivet {
# namespace ensemble create -subcommands {encode decode}
namespace export encode decode
variable utf8_entities_map {
\u00a0  \; \u00a1 ¡\; \u00a2 ¢\; \u00a3 £\; \u00a4
¤\; \u00a5 ¥\; \u00a6 ¦\; \u00a7 §\; \u00a8 ¨\;
\u00a9 ©\; \u00aa ª\; \u00ab «\; \u00ac ¬\; \u00ad
­\; \u00ae ®\; \u00af ¯\; \u00b0 °\; \u00b1 ±\;
\u00b2 ²\; \u00b3 ³\; \u00b4 ´\; \u00b5 µ\; \u00b6
¶\; \u00b7 ·\; \u00b8 ¸\; \u00b9 ¹\; \u00ba º\;
\u00bb »\; \u00bc ¼\; \u00bd ½\; \u00be ¾\;
\u00bf ¿\; \u00c0 À\; \u00c1 Á\; \u00c2 Â\;
\u00c3 Ã\; \u00c4 Ä\; \u00c5 Å\; \u00c6 Æ\; \u00c7
Ç\; \u00c8 È\; \u00c9 É\; \u00ca Ê\; \u00cb
Ë\; \u00cc Ì\; \u00cd Í\; \u00ce Î\; \u00cf
Ï\; \u00d0 Ð\; \u00d1 Ñ\; \u00d2 Ò\; \u00d3
Ó\; \u00d4 Ô\; \u00d5 Õ\; \u00d6 Ö\; \u00d7
×\; \u00d8 Ø\; \u00d9 Ù\; \u00da Ú\; \u00db
Û\; \u00dc Ü\; \u00dd Ý\; \u00de Þ\; \u00df
ß\; \u00e0 à\; \u00e1 á\; \u00e2 â\; \u00e3
ã\; \u00e4 ä\; \u00e5 å\; \u00e6 æ\; \u00e7
ç\; \u00e8 è\; \u00e9 é\; \u00ea ê\; \u00eb
ë\; \u00ec ì\; \u00ed í\; \u00ee î\; \u00ef
ï\; \u00f0 ð\; \u00f1 ñ\; \u00f2 ò\; \u00f3
ó\; \u00f4 ô\; \u00f5 õ\; \u00f6 ö\; \u00f7
÷\; \u00f8 ø\; \u00f9 ù\; \u00fa ú\; \u00fb
û\; \u00fc ü\; \u00fd ý\; \u00fe þ\; \u00ff ÿ\;
\u0192 &fnof\; \u0391 &Alpha\; \u0392 &Beta\; \u0393 &Gamma\; \u0394
&Delta\; \u0395 &Epsilon\; \u0396 &Zeta\; \u0397 &Eta\; \u0398 &Theta\;
\u0399 &Iota\; \u039a &Kappa\; \u039b &Lambda\; \u039c &Mu\; \u039d
&Nu\; \u039e &Xi\; \u039f &Omicron\; \u03a0 &Pi\; \u03a1 &Rho\; \u03a3
&Sigma\; \u03a4 &Tau\; \u03a5 &Upsilon\; \u03a6 &Phi\; \u03a7 &Chi\;
\u03a8 &Psi\; \u03a9 &Omega\; \u03b1 &alpha\; \u03b2 &beta\; \u03b3
&gamma\; \u03b4 &delta\; \u03b5 &epsilon\; \u03b6 &zeta\; \u03b7 &eta\;
\u03b8 &theta\; \u03b9 &iota\; \u03ba &kappa\; \u03bb &lambda\; \u03bc
&mu\; \u03bd &nu\; \u03be &xi\; \u03bf &omicron\; \u03c0 &pi\; \u03c1
&rho\; \u03c2 &sigmaf\; \u03c3 &sigma\; \u03c4 &tau\; \u03c5 &upsilon\;
\u03c6 &phi\; \u03c7 &chi\; \u03c8 &psi\; \u03c9 &omega\; \u03d1
&thetasym\; \u03d2 &upsih\; \u03d6 &piv\; \u2022 &bull\; \u2026
&hellip\; \u2032 &prime\; \u2033 &Prime\; \u203e &oline\; \u2044
&frasl\; \u2118 &weierp\; \u2111 &image\; \u211c &real\; \u2122
&trade\; \u2135 &alefsym\; \u2190 &larr\; \u2191 &uarr\; \u2192 &rarr\;
\u2193 &darr\; \u2194 &harr\; \u21b5 &crarr\; \u21d0 &lArr\; \u21d1
&uArr\; \u21d2 &rArr\; \u21d3 &dArr\; \u21d4 &hArr\; \u2200 &forall\;
\u2202 &part\; \u2203 &exist\; \u2205 &empty\; \u2207 &nabla\; \u2208
&isin\; \u2209 ¬in\; \u220b &ni\; \u220f &prod\; \u2211 &sum\;
\u2212 &minus\; \u2217 &lowast\; \u221a &radic\; \u221d &prop\; \u221e
&infin\; \u2220 &ang\; \u2227 &and\; \u2228 &or\; \u2229 &cap\; \u222a
&cup\; \u222b &int\; \u2234 &there4\; \u223c &sim\; \u2245 &cong\;
\u2248 &asymp\; \u2260 &ne\; \u2261 &equiv\; \u2264 &le\; \u2265 &ge\;
\u2282 &sub\; \u2283 &sup\; \u2284 &nsub\; \u2286 &sube\; \u2287
&supe\; \u2295 &oplus\; \u2297 &otimes\; \u22a5 &perp\; \u22c5 &sdot\;
\u2308 &lceil\; \u2309 &rceil\; \u230a &lfloor\; \u230b &rfloor\;
\u2329 &lang\; \u232a &rang\; \u25ca &loz\; \u2660 &spades\; \u2663
&clubs\; \u2665 &hearts\; \u2666 &diams\; \u0022 "\; \u0026 &\;
\u003c <\; \u003e >\; \u0152 &OElig\; \u0153 &oelig\; \u0160
&Scaron\; \u0161 &scaron\; \u0178 &Yuml\; \u02c6 &circ\; \u02dc
&tilde\; \u2002 &ensp\; \u2003 &emsp\; \u2009 &thinsp\; \u200c &zwnj\;
\u200d &zwj\; \u200e &lrm\; \u200f &rlm\; \u2013 &ndash\; \u2014
&mdash\; \u2018 &lsquo\; \u2019 &rsquo\; \u201a &sbquo\; \u201c
&ldquo\; \u201d &rdquo\; \u201e &bdquo\; \u2020 &dagger\; \u2021
&Dagger\; \u2030 &permil\; \u2039 &lsaquo\; \u203a &rsaquo\; \u20ac
&euro\;
}
variable entitities_utf8_map [lreverse $utf8_entities_map]
proc args_processing {string mode arglist} {
if { [dict exists $arglist -encoding]
&& [dict get $arglist -encoding] ne "utf-8"
} {
return [encoding $mode [dict get $arglist -encoding] $string]
} else {
return $string
}
}
# -- encode
#
# input string is processed and its characters suitable to be transformed
# into entities are replaced with their corrisponding HTML (SGML?) entity
#
# if the input string encoding is diffrent from utf-8 the string is
# transformed into utf-8 and then processed for entity substitution
#
# ::rivet::encode <input_string> ?-encoding <encoding>?
#
# Arguments:
#
# * <input_string>: string whose characted must go through HTML
# entities expansion
# * <encoding>: input string character encoding (utf-8 when omitted)
#
# Returned value:
#
# - expanded string
#
#
proc encode {string args} {
variable utf8_entities_map
return [string map $utf8_entities_map\
[args_processing $string convertfrom $args]]
}
# -- decode
#
# input string is converted into utf-8 and in case the final string
# has to be in some other encoding Tcl's command 'enconding' is
# invoked for final conversion
#
# ::rivet::decode <input_string> ?-encoding <encoding>?
#
# Arguments:
#
# * <input_string>: string whose HTML entities have to be reconverted
# in characters
# * <encoding>: output string character encoding (utf-8 when omitted)
#
# Returned value:
#
# - converted string
#
proc decode {string args} {
variable entitities_utf8_map
return [args_processing [string map $entitities_utf8_map $string]\
convertto $args]
}
}
|