1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
|
// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
package reader
import (
"bufio"
"fmt"
"io"
"strings"
"unicode"
)
// TagValuePair is a convenience struct for a (tag, value) string pair.
type TagValuePair struct {
Tag string
Value string
}
// ReadTagValues takes an io.Reader, scans it line by line and returns
// a slice of {string, string} structs in the form {tag, value}.
func ReadTagValues(content io.Reader) ([]TagValuePair, error) {
r := &tvReader{}
scanner := bufio.NewScanner(content)
for scanner.Scan() {
// read each line, one by one
err := r.readNextLine(scanner.Text())
if err != nil {
return nil, err
}
}
if err := scanner.Err(); err != nil {
return nil, err
}
// finalize and make sure all is well
tvList, err := r.finalize()
if err != nil {
return nil, err
}
// convert internal format to exported TagValueList
var exportedTVList []TagValuePair
for _, tv := range tvList {
tvPair := TagValuePair{Tag: tv.tag, Value: tv.value}
exportedTVList = append(exportedTVList, tvPair)
}
return exportedTVList, nil
}
type tagvalue struct {
tag string
value string
}
type tvReader struct {
midtext bool
tvList []tagvalue
currentLine int
currentTag string
currentValue string
}
func (reader *tvReader) finalize() ([]tagvalue, error) {
if reader.midtext {
return nil, fmt.Errorf("finalize called while still midtext parsing a text tag")
}
return reader.tvList, nil
}
func (reader *tvReader) readNextLine(line string) error {
reader.currentLine++
if reader.midtext {
return reader.readNextLineFromMidtext(line)
}
return reader.readNextLineFromReady(line)
}
func (reader *tvReader) readNextLineFromReady(line string) error {
// strip whitespace from beginning of line
line2 := strings.TrimLeftFunc(line, func(r rune) bool {
return unicode.IsSpace(r)
})
// ignore empty lines
if line2 == "" {
return nil
}
// ignore comment lines
if strings.HasPrefix(line2, "#") {
return nil
}
// split at colon
substrings := strings.SplitN(line2, ":", 2)
if len(substrings) == 1 {
// error if a colon isn't found
return fmt.Errorf("no colon found in '%s'", line)
}
// the first substring is the tag
reader.currentTag = strings.TrimSpace(substrings[0])
// determine whether the value contains (or starts) a <text> line
substrings = strings.SplitN(substrings[1], "<text>", 2)
if len(substrings) == 1 {
// no <text> tag found means this is a single-line value
// strip whitespace and use as a single line
reader.currentValue = strings.TrimSpace(substrings[0])
} else {
// there was a <text> tag; now decide whether it's multi-line
substrings = strings.SplitN(substrings[1], "</text>", 2)
if len(substrings) > 1 {
// there is also a </text> tag; take the middle part and
// set as value
reader.currentValue = substrings[0]
} else {
// there is no </text> tag on this line; switch to midtext
reader.currentValue = substrings[0] + "\n"
reader.midtext = true
return nil
}
}
// if we got here, the value was on a single line
// so go ahead and add it to the tag-value list
tv := tagvalue{reader.currentTag, reader.currentValue}
reader.tvList = append(reader.tvList, tv)
// and reset
reader.currentTag = ""
reader.currentValue = ""
return nil
}
func (reader *tvReader) readNextLineFromMidtext(line string) error {
// look for whether the line closes here
substrings := strings.SplitN(line, "</text>", 2)
if len(substrings) == 1 {
// doesn't contain </text>, so keep building the current value
reader.currentValue += line + "\n"
return nil
}
// contains </text>, so end and record this pair
reader.currentValue += substrings[0]
tv := tagvalue{reader.currentTag, reader.currentValue}
reader.tvList = append(reader.tvList, tv)
// and reset
reader.midtext = false
reader.currentTag = ""
reader.currentValue = ""
return nil
}
|