File: armor_decoder.go

package info (click to toggle)
snowflake 2.5.1-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bookworm-backports, forky, sid, trixie
  • size: 968 kB
  • sloc: makefile: 5
file content (136 lines) | stat: -rw-r--r-- 3,228 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
package amp

import (
	"bufio"
	"bytes"
	"encoding/base64"
	"fmt"
	"io"

	"golang.org/x/net/html"
)

// ErrUnknownVersion is the error returned when the first character inside the
// element encoding (but outside the base64 encoding) is not '0'.
type ErrUnknownVersion byte

func (err ErrUnknownVersion) Error() string {
	return fmt.Sprintf("unknown armor version indicator %+q", byte(err))
}

func isASCIIWhitespace(b byte) bool {
	switch b {
	// https://infra.spec.whatwg.org/#ascii-whitespace
	case '\x09', '\x0a', '\x0c', '\x0d', '\x20':
		return true
	default:
		return false
	}
}

func splitASCIIWhitespace(data []byte, atEOF bool) (advance int, token []byte, err error) {
	var i, j int
	// Skip initial whitespace.
	for i = 0; i < len(data); i++ {
		if !isASCIIWhitespace(data[i]) {
			break
		}
	}
	// Look for next whitespace.
	for j = i; j < len(data); j++ {
		if isASCIIWhitespace(data[j]) {
			return j + 1, data[i:j], nil
		}
	}
	// We reached the end of data without finding more whitespace. Only
	// consider it a token if we are at EOF.
	if atEOF && i < j {
		return j, data[i:j], nil
	}
	// Otherwise, request more data.
	return i, nil, nil
}

func decodeToWriter(w io.Writer, r io.Reader) (int64, error) {
	tokenizer := html.NewTokenizer(r)
	// Set a memory limit on token sizes, otherwise the tokenizer will
	// buffer text indefinitely if it is not broken up by other token types.
	tokenizer.SetMaxBuf(elementSizeLimit)
	active := false
	total := int64(0)
	for {
		tt := tokenizer.Next()
		switch tt {
		case html.ErrorToken:
			err := tokenizer.Err()
			if err == io.EOF {
				err = nil
			}
			if err == nil && active {
				return total, fmt.Errorf("missing </pre> tag")
			}
			return total, err
		case html.TextToken:
			if active {
				// Re-join the separate chunks of text and
				// feed them to the decoder.
				scanner := bufio.NewScanner(bytes.NewReader(tokenizer.Text()))
				scanner.Split(splitASCIIWhitespace)
				for scanner.Scan() {
					n, err := w.Write(scanner.Bytes())
					total += int64(n)
					if err != nil {
						return total, err
					}
				}
				if err := scanner.Err(); err != nil {
					return total, err
				}
			}
		case html.StartTagToken:
			tn, _ := tokenizer.TagName()
			if string(tn) == "pre" {
				if active {
					// nesting not allowed
					return total, fmt.Errorf("unexpected %s", tokenizer.Token())
				}
				active = true
			}
		case html.EndTagToken:
			tn, _ := tokenizer.TagName()
			if string(tn) == "pre" {
				if !active {
					// stray end tag
					return total, fmt.Errorf("unexpected %s", tokenizer.Token())
				}
				active = false
			}
		}
	}
}

// NewArmorDecoder returns a new AMP armor decoder.
func NewArmorDecoder(r io.Reader) (io.Reader, error) {
	pr, pw := io.Pipe()
	go func() {
		_, err := decodeToWriter(pw, r)
		pw.CloseWithError(err)
	}()

	// The first byte inside the element encoding is a server–client
	// protocol version indicator.
	var version [1]byte
	_, err := pr.Read(version[:])
	if err != nil {
		pr.CloseWithError(err)
		return nil, err
	}
	switch version[0] {
	case '0':
		return base64.NewDecoder(base64.StdEncoding, pr), nil
	default:
		err := ErrUnknownVersion(version[0])
		pr.CloseWithError(err)
		return nil, err
	}
}