File: scanner.go

package info (click to toggle)
golang-github-protonmail-gluon 0.17.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 16,020 kB
  • sloc: sh: 55; makefile: 5
file content (153 lines) | stat: -rw-r--r-- 3,803 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
package rfc822

import (
	"bytes"
)

type Part struct {
	Data   []byte
	Offset int
}

type ByteScanner struct {
	data          []byte
	startBoundary []byte
	progress      int
}

func NewByteScanner(data []byte, boundary []byte) (*ByteScanner, error) {
	scanner := &ByteScanner{
		data:          data,
		startBoundary: append([]byte{'-', '-'}, boundary...),
	}

	scanner.readToBoundary()

	return scanner, nil
}

func (s *ByteScanner) ScanAll() []Part {
	var parts []Part

	for {
		offset := s.progress

		data, more := s.readToBoundary()

		if data != nil {
			parts = append(parts, Part{Data: data, Offset: offset})
		}

		if !more {
			return parts
		}
	}
}

func indexOfNewLineAfterBoundary(data []byte) int {
	dataLen := len(data)

	if dataLen == 0 {
		return -1
	}

	if dataLen == 1 && data[0] == '\n' {
		return 0
	}

	// consume extra '\r's
	index := 0
	for ; index < dataLen && data[index] == '\r'; index++ {
	}

	if index < dataLen && data[index] == '\n' {
		return index
	}

	return -1
}

func (s *ByteScanner) getPreviousLineBreakIndex(offset int) int {
	if s.progress == offset {
		return 0
	} else if s.data[offset-1] == '\n' {
		if offset-s.progress >= 2 && s.data[offset-2] == '\r' {
			return 2
		}
		return 1
	}

	return -1
}

// readToBoundary returns the slice matching to the boundary and whether this is the start or the end of said boundary.
func (s *ByteScanner) readToBoundary() ([]byte, bool) {
	boundarySuffix := []byte{'-', '-'}
	boundarySuffixLen := len(boundarySuffix)
	boundaryLen := len(s.startBoundary)
	dataLen := len(s.data)
	searchStart := s.progress

	for s.progress < dataLen {
		remaining := s.data[s.progress:]

		index := bytes.Index(remaining, s.startBoundary)
		if index < 0 {
			s.progress = len(s.data)
			return remaining, false
		}

		// Matched the pattern, now we need to check if the previous line break is available or not. It can also not be
		// available if the pattern just happens to match exactly at the offset search.
		prevNewLineOffset := s.getPreviousLineBreakIndex(s.progress + index)
		if prevNewLineOffset != -1 {
			// Since we matched the pattern we can check whether this is a starting or terminating pattern.
			if s.progress+index+boundaryLen+boundarySuffixLen <= dataLen &&
				bytes.Equal(remaining[index+boundaryLen:index+boundaryLen+boundarySuffixLen], boundarySuffix) {
				lineEndIndex := index + boundaryLen + boundarySuffixLen
				afterBoundary := remaining[lineEndIndex:]

				var newLineStartIndex int

				// It can happen that this boundary is at the end of the file/message with no new line.
				if len(afterBoundary) != 0 {
					newLineStartIndex = indexOfNewLineAfterBoundary(afterBoundary)
					// If there is no new line this can't be a boundary pattern. RFC 1341 states that tey are
					// immediately followed by either \r\n or \n.
					if newLineStartIndex < 0 {
						s.progress += index + boundaryLen + boundarySuffixLen
						continue
					}
				} else {
					newLineStartIndex = 0
				}

				result := s.data[searchStart : s.progress+index-prevNewLineOffset]
				s.progress += index + boundaryLen + boundarySuffixLen + newLineStartIndex + 1

				return result, false
			} else {

				// Check for new line.
				lineEndIndex := index + boundaryLen
				afterBoundary := remaining[lineEndIndex:]
				newLineStart := indexOfNewLineAfterBoundary(afterBoundary)

				// If there is no new line this can't be a boundary pattern. RFC 1341 states that tey are
				// immediately followed by either \r\n or \n.
				if newLineStart < 0 {
					s.progress += index + boundaryLen
					continue
				}

				result := s.data[searchStart : s.progress+index-prevNewLineOffset]
				s.progress += index + boundaryLen + newLineStart + 1
				return result, true
			}
		}

		s.progress += index + boundaryLen
	}

	return nil, false
}