1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
package ui
import (
"bytes"
"errors"
"io"
"strconv"
"strings"
"golang.org/x/net/html"
)
var tagsToAvoid = make(map[string]bool)
func init() {
tagsToAvoid["blockquote"] = true
tagsToAvoid["br"] = true
tagsToAvoid["cite"] = true
tagsToAvoid["em"] = true
tagsToAvoid["font"] = true
tagsToAvoid["p"] = true
tagsToAvoid["span"] = true
tagsToAvoid["strong"] = true
tagsToAvoid["a"] = true
tagsToAvoid["i"] = true
tagsToAvoid["b"] = true
tagsToAvoid["u"] = true
tagsToAvoid["img"] = true
}
// StripSomeHTML removes the most common html presentation tags from the text
func StripSomeHTML(msg []byte) (out []byte) {
z := html.NewTokenizer(bytes.NewReader(msg))
loop:
for {
tt := z.Next()
switch tt {
case html.TextToken:
out = append(out, z.Text()...)
case html.ErrorToken:
if err := z.Err(); err != nil && err != io.EOF {
out = msg
return
}
break loop
case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
raw := z.Raw()
name, _ := z.TagName()
if !tagsToAvoid[string(name)] {
out = append(out, raw...)
}
case html.CommentToken:
out = append(out, z.Raw()...)
case html.DoctypeToken:
out = append(out, z.Raw()...)
}
}
return
}
// StripHTML removes all html in the text
func StripHTML(msg []byte) (out []byte) {
z := html.NewTokenizer(bytes.NewReader(msg))
loop:
for {
tt := z.Next()
switch tt {
case html.TextToken:
out = append(out, z.Text()...)
case html.ErrorToken:
if err := z.Err(); err != nil && err != io.EOF {
out = msg
return
}
break loop
}
}
return
}
func EscapeAllHTMLTags(in string) string {
in = strings.Replace(in, "<", "<", -1)
in = strings.Replace(in, ">", ">", -1)
return in
}
var (
hexTable = "0123456789abcdef"
// NewLine contains a new line
NewLine = []byte{'\n'}
)
// EscapeNonASCII replaces tabs and other non-printable characters with a
// "\x01" form of hex escaping. It works on a byte-by-byte basis.
func EscapeNonASCII(in string) string {
escapes := 0
for i := 0; i < len(in); i++ {
if in[i] < 32 || in[i] > 126 || in[i] == '\\' {
escapes++
}
}
if escapes == 0 {
return in
}
out := make([]byte, 0, len(in)+3*escapes)
for i := 0; i < len(in); i++ {
if in[i] < 32 || in[i] > 126 || in[i] == '\\' {
out = append(out, '\\', 'x', hexTable[in[i]>>4], hexTable[in[i]&15])
} else {
out = append(out, in[i])
}
}
return string(out)
}
// UnescapeNonASCII undoes the transformation of escapeNonASCII.
func UnescapeNonASCII(in string) (string, error) {
needsUnescaping := false
for i := 0; i < len(in); i++ {
if in[i] == '\\' {
needsUnescaping = true
break
}
}
if !needsUnescaping {
return in, nil
}
out := make([]byte, 0, len(in))
for i := 0; i < len(in); i++ {
if in[i] == '\\' {
if len(in) <= i+3 {
return "", errors.New("truncated escape sequence at end: " + in)
}
if in[i+1] != 'x' {
return "", errors.New("escape sequence didn't start with \\x in: " + in)
}
v, err := strconv.ParseUint(in[i+2:i+4], 16, 8)
if err != nil {
return "", errors.New("failed to parse value in '" + in + "': " + err.Error())
}
out = append(out, byte(v))
i += 3
} else {
out = append(out, in[i])
}
}
return string(out), nil
}
|