1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
|
package enmime
import (
"bufio"
"bytes"
"fmt"
"mime"
"net/textproto"
"strings"
"github.com/jhillyerd/enmime/internal/coding"
"github.com/jhillyerd/enmime/mediatype"
"github.com/pkg/errors"
)
const (
// Standard MIME content dispositions
cdAttachment = "attachment"
cdInline = "inline"
// Standard MIME content types
ctAppPrefix = "application/"
ctAppOctetStream = "application/octet-stream"
ctMultipartAltern = "multipart/alternative"
ctMultipartMixed = "multipart/mixed"
ctMultipartPrefix = "multipart/"
ctMultipartRelated = "multipart/related"
ctTextPrefix = "text/"
ctTextPlain = "text/plain"
ctTextHTML = "text/html"
// Standard Transfer encodings
cte7Bit = "7bit"
cte8Bit = "8bit"
cteBase64 = "base64"
cteBinary = "binary"
cteQuotedPrintable = "quoted-printable"
// Standard MIME header names
hnContentDisposition = "Content-Disposition"
hnContentEncoding = "Content-Transfer-Encoding"
hnContentID = "Content-ID"
hnContentType = "Content-Type"
hnMIMEVersion = "MIME-Version"
// Standard MIME header parameters
hpBoundary = "boundary"
hpCharset = "charset"
hpFile = "file"
hpFilename = "filename"
hpName = "name"
hpModDate = "modification-date"
utf8 = "utf-8"
)
// AddressHeaders is the set of SMTP headers that contain email addresses, used by
// Envelope.AddressList(). Key characters must be all lowercase.
var AddressHeaders = map[string]bool{
"bcc": true,
"cc": true,
"delivered-to": true,
"from": true,
"reply-to": true,
"to": true,
"sender": true,
"resent-bcc": true,
"resent-cc": true,
"resent-from": true,
"resent-reply-to": true,
"resent-to": true,
"resent-sender": true,
}
// Terminology from RFC 2047:
// encoded-word: the entire =?charset?encoding?encoded-text?= string
// charset: the character set portion of the encoded word
// encoding: the character encoding type used for the encoded-text
// encoded-text: the text we are decoding
// ParseMediaType is a more tolerant implementation of Go's mime.ParseMediaType function.
//
// Tolerances accounted for:
// - Missing ';' between content-type and media parameters
// - Repeating media parameters
// - Unquoted values in media parameters containing 'tspecials' characters
func ParseMediaType(ctype string) (mtype string, params map[string]string, invalidParams []string,
err error) {
// Export of internal function.
return mediatype.Parse(ctype)
}
// readHeader reads a block of SMTP or MIME headers and returns a textproto.MIMEHeader.
// Header parse warnings & errors will be added to p.Errors, io errors will be returned directly.
func readHeader(r *bufio.Reader, p *Part) (textproto.MIMEHeader, error) {
// buf holds the massaged output for textproto.Reader.ReadMIMEHeader()
buf := &bytes.Buffer{}
tp := textproto.NewReader(r)
firstHeader := true
line:
for {
// Pull out each line of the headers as a temporary slice s
s, err := tp.ReadLineBytes()
if err != nil {
buf.Write([]byte{'\r', '\n'})
break
}
firstColon := bytes.IndexByte(s, ':')
firstSpace := bytes.IndexAny(s, " \t\n\r")
if firstSpace == 0 {
// Starts with space: continuation
buf.WriteByte(' ')
buf.Write(textproto.TrimBytes(s))
continue
}
if firstColon == 0 {
// Can't parse line starting with colon: skip
p.addError(ErrorMalformedHeader, "Header line %q started with a colon", s)
continue
}
if firstColon > 0 {
// Behavior change in net/textproto package in Golang 1.12.10 and 1.13.1:
// A space preceding the first colon in a header line is no longer handled
// automatically due to CVE-2019-16276 which takes advantage of this
// particular violation of RFC-7230 to exploit HTTP/1.1
if bytes.Contains(s[:firstColon+1], []byte{' ', ':'}) {
s = bytes.Replace(s, []byte{' ', ':'}, []byte{':'}, 1)
firstColon = bytes.IndexByte(s, ':')
}
// Behavior change in net/textproto package in Golang 1.20: invalid characters
// in header keys are no longer allowed; https://github.com/golang/go/issues/53188
for _, c := range s[:firstColon] {
if c != ' ' && !validHeaderFieldByte(c) {
p.addError(
ErrorMalformedHeader, "Header name %q contains invalid character %q", s, c)
continue line
}
}
// Contains a colon, treat as a new header line
if !firstHeader {
// New Header line, end the previous
buf.Write([]byte{'\r', '\n'})
}
s = textproto.TrimBytes(s)
buf.Write(s)
firstHeader = false
} else {
// No colon: potential non-indented continuation
if len(s) > 0 {
// Attempt to detect and repair a non-indented continuation of previous line
buf.WriteByte(' ')
buf.Write(s)
p.addWarning(ErrorMalformedHeader, "Continued line %q was not indented", s)
} else {
// Empty line, finish header parsing
buf.Write([]byte{'\r', '\n'})
break
}
}
}
buf.Write([]byte{'\r', '\n'})
tr := textproto.NewReader(bufio.NewReader(buf))
header, err := tr.ReadMIMEHeader()
return header, errors.WithStack(err)
}
// decodeToUTF8Base64Header decodes a MIME header per RFC 2047, reencoding to =?utf-8b?
func decodeToUTF8Base64Header(input string) string {
if !strings.Contains(input, "=?") {
// Don't scan if there is nothing to do here
return input
}
// The standard lib performs an incremental inspection of this string, where the
// "skipSpace" method only strings.trimLeft for spaces and tabs. Here we have a
// hard dependency on space existing and not on next expected rune.
//
// For resolving #112 with the least change, I will implement the
// "quoted display-name" detector, which will resolve the case specific
// issue stated in #112, but only in the case of a quoted display-name
// followed, without whitespace, by addr-spec.
tokens := strings.FieldsFunc(quotedDisplayName(input), whiteSpaceRune)
output := make([]string, len(tokens))
for i, token := range tokens {
if len(token) > 4 && strings.Contains(token, "=?") {
// Stash parenthesis, they should not be encoded
prefix := ""
suffix := ""
if token[0] == '(' {
prefix = "("
token = token[1:]
}
if token[len(token)-1] == ')' {
suffix = ")"
token = token[:len(token)-1]
}
// Base64 encode token
output[i] = prefix +
mime.BEncoding.Encode("UTF-8", coding.DecodeExtHeader(token)) +
suffix
} else {
output[i] = token
}
}
// Return space separated tokens
return strings.Join(output, " ")
}
func quotedDisplayName(s string) string {
if !strings.HasPrefix(s, "\"") {
return s
}
idx := strings.LastIndex(s, "\"")
return fmt.Sprintf("%s %s", s[:idx+1], s[idx+1:])
}
// Detects a RFC-822 linear-white-space, passed to strings.FieldsFunc.
func whiteSpaceRune(r rune) bool {
return r == ' ' || r == '\t' || r == '\r' || r == '\n'
}
// Mirror of func in net/textproto/reader.go; from go 1.20.0-rc.2
func validHeaderFieldByte(c byte) bool {
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c >= 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
(1<<(10)-1)<<'0' |
(1<<(26)-1)<<'a' |
(1<<(26)-1)<<'A' |
1<<'!' |
1<<'#' |
1<<'$' |
1<<'%' |
1<<'&' |
1<<'\'' |
1<<'*' |
1<<'+' |
1<<'-' |
1<<'.' |
1<<'^' |
1<<'_' |
1<<'`' |
1<<'|' |
1<<'~'
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
}
|