1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
package amp
import (
"bufio"
"bytes"
"encoding/base64"
"fmt"
"io"
"golang.org/x/net/html"
)
// ErrUnknownVersion is the error returned when the first character inside the
// element encoding (but outside the base64 encoding) is not '0'.
type ErrUnknownVersion byte
func (err ErrUnknownVersion) Error() string {
return fmt.Sprintf("unknown armor version indicator %+q", byte(err))
}
func isASCIIWhitespace(b byte) bool {
switch b {
// https://infra.spec.whatwg.org/#ascii-whitespace
case '\x09', '\x0a', '\x0c', '\x0d', '\x20':
return true
default:
return false
}
}
func splitASCIIWhitespace(data []byte, atEOF bool) (advance int, token []byte, err error) {
var i, j int
// Skip initial whitespace.
for i = 0; i < len(data); i++ {
if !isASCIIWhitespace(data[i]) {
break
}
}
// Look for next whitespace.
for j = i; j < len(data); j++ {
if isASCIIWhitespace(data[j]) {
return j + 1, data[i:j], nil
}
}
// We reached the end of data without finding more whitespace. Only
// consider it a token if we are at EOF.
if atEOF && i < j {
return j, data[i:j], nil
}
// Otherwise, request more data.
return i, nil, nil
}
func decodeToWriter(w io.Writer, r io.Reader) (int64, error) {
tokenizer := html.NewTokenizer(r)
// Set a memory limit on token sizes, otherwise the tokenizer will
// buffer text indefinitely if it is not broken up by other token types.
tokenizer.SetMaxBuf(elementSizeLimit)
active := false
total := int64(0)
for {
tt := tokenizer.Next()
switch tt {
case html.ErrorToken:
err := tokenizer.Err()
if err == io.EOF {
err = nil
}
if err == nil && active {
return total, fmt.Errorf("missing </pre> tag")
}
return total, err
case html.TextToken:
if active {
// Re-join the separate chunks of text and
// feed them to the decoder.
scanner := bufio.NewScanner(bytes.NewReader(tokenizer.Text()))
scanner.Split(splitASCIIWhitespace)
for scanner.Scan() {
n, err := w.Write(scanner.Bytes())
total += int64(n)
if err != nil {
return total, err
}
}
if err := scanner.Err(); err != nil {
return total, err
}
}
case html.StartTagToken:
tn, _ := tokenizer.TagName()
if string(tn) == "pre" {
if active {
// nesting not allowed
return total, fmt.Errorf("unexpected %s", tokenizer.Token())
}
active = true
}
case html.EndTagToken:
tn, _ := tokenizer.TagName()
if string(tn) == "pre" {
if !active {
// stray end tag
return total, fmt.Errorf("unexpected %s", tokenizer.Token())
}
active = false
}
}
}
}
// NewArmorDecoder returns a new AMP armor decoder.
func NewArmorDecoder(r io.Reader) (io.Reader, error) {
pr, pw := io.Pipe()
go func() {
_, err := decodeToWriter(pw, r)
pw.CloseWithError(err)
}()
// The first byte inside the element encoding is a server–client
// protocol version indicator.
var version [1]byte
_, err := pr.Read(version[:])
if err != nil {
pr.CloseWithError(err)
return nil, err
}
switch version[0] {
case '0':
return base64.NewDecoder(base64.StdEncoding, pr), nil
default:
err := ErrUnknownVersion(version[0])
pr.CloseWithError(err)
return nil, err
}
}
|