1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
|
package rfc822
import (
"bytes"
)
type Part struct {
Data []byte
Offset int
}
type ByteScanner struct {
data []byte
startBoundary []byte
progress int
}
func NewByteScanner(data []byte, boundary []byte) (*ByteScanner, error) {
scanner := &ByteScanner{
data: data,
startBoundary: append([]byte{'-', '-'}, boundary...),
}
scanner.readToBoundary()
return scanner, nil
}
func (s *ByteScanner) ScanAll() []Part {
var parts []Part
for {
offset := s.progress
data, more := s.readToBoundary()
if data != nil {
parts = append(parts, Part{Data: data, Offset: offset})
}
if !more {
return parts
}
}
}
func indexOfNewLineAfterBoundary(data []byte) int {
dataLen := len(data)
if dataLen == 0 {
return -1
}
if dataLen == 1 && data[0] == '\n' {
return 0
}
// consume extra '\r's
index := 0
for ; index < dataLen && data[index] == '\r'; index++ {
}
if index < dataLen && data[index] == '\n' {
return index
}
return -1
}
func (s *ByteScanner) getPreviousLineBreakIndex(offset int) int {
if s.progress == offset {
return 0
} else if s.data[offset-1] == '\n' {
if offset-s.progress >= 2 && s.data[offset-2] == '\r' {
return 2
}
return 1
}
return -1
}
// readToBoundary returns the slice matching to the boundary and whether this is the start or the end of said boundary.
func (s *ByteScanner) readToBoundary() ([]byte, bool) {
boundarySuffix := []byte{'-', '-'}
boundarySuffixLen := len(boundarySuffix)
boundaryLen := len(s.startBoundary)
dataLen := len(s.data)
searchStart := s.progress
for s.progress < dataLen {
remaining := s.data[s.progress:]
index := bytes.Index(remaining, s.startBoundary)
if index < 0 {
s.progress = len(s.data)
return remaining, false
}
// Matched the pattern, now we need to check if the previous line break is available or not. It can also not be
// available if the pattern just happens to match exactly at the offset search.
prevNewLineOffset := s.getPreviousLineBreakIndex(s.progress + index)
if prevNewLineOffset != -1 {
// Since we matched the pattern we can check whether this is a starting or terminating pattern.
if s.progress+index+boundaryLen+boundarySuffixLen <= dataLen &&
bytes.Equal(remaining[index+boundaryLen:index+boundaryLen+boundarySuffixLen], boundarySuffix) {
lineEndIndex := index + boundaryLen + boundarySuffixLen
afterBoundary := remaining[lineEndIndex:]
var newLineStartIndex int
// It can happen that this boundary is at the end of the file/message with no new line.
if len(afterBoundary) != 0 {
newLineStartIndex = indexOfNewLineAfterBoundary(afterBoundary)
// If there is no new line this can't be a boundary pattern. RFC 1341 states that tey are
// immediately followed by either \r\n or \n.
if newLineStartIndex < 0 {
s.progress += index + boundaryLen + boundarySuffixLen
continue
}
} else {
newLineStartIndex = 0
}
result := s.data[searchStart : s.progress+index-prevNewLineOffset]
s.progress += index + boundaryLen + boundarySuffixLen + newLineStartIndex + 1
return result, false
} else {
// Check for new line.
lineEndIndex := index + boundaryLen
afterBoundary := remaining[lineEndIndex:]
newLineStart := indexOfNewLineAfterBoundary(afterBoundary)
// If there is no new line this can't be a boundary pattern. RFC 1341 states that tey are
// immediately followed by either \r\n or \n.
if newLineStart < 0 {
s.progress += index + boundaryLen
continue
}
result := s.data[searchStart : s.progress+index-prevNewLineOffset]
s.progress += index + boundaryLen + newLineStart + 1
return result, true
}
}
s.progress += index + boundaryLen
}
return nil, false
}
|