1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
package interp
import (
"io"
"regexp"
"strings"
"github.com/wader/fq/internal/gojqex"
"github.com/wader/fq/internal/ioex"
"github.com/wader/fq/pkg/bitio"
"github.com/wader/fq/pkg/ranges"
"github.com/wader/gojq"
)
func init() {
RegisterIter2("_match_binary", (*Interp)._binaryMatch)
}
func (i *Interp) _binaryMatch(c any, pattern any, flags string) gojq.Iter {
bv, err := toBinary(c)
if err != nil {
return gojq.NewIter(err)
}
var re string
var byteRunes bool
var global bool
switch pattern := pattern.(type) {
case string:
re = pattern
default:
reBuf, err := toBytes(pattern)
if err != nil {
return gojq.NewIter(err)
}
var reRs []rune
for _, b := range reBuf {
reRs = append(reRs, rune(b))
}
byteRunes = true
// escape parentheses runes etc
re = regexp.QuoteMeta(string(reRs))
}
if strings.Contains(flags, "b") {
byteRunes = true
}
global = strings.Contains(flags, "g")
// TODO: err to string
// TODO: extract to regexpextra? "all" FindReaderSubmatchIndex that can iter?
sre, err := gojqex.CompileRegexp(re, "gimb", flags)
if err != nil {
return gojq.NewIter(err)
}
sreNames := sre.SubexpNames()
br, err := bv.toReader()
if err != nil {
return gojq.NewIter(err)
}
var rr interface {
io.RuneReader
io.Seeker
}
// raw bytes regexp matching is a bit tricky, what we do is to read each byte as a codepoint (ByteRuneReader)
// and then we can use UTF-8 encoded codepoint to match a raw byte. So for example \u00ff (encoded as 0xc3 0xbf)
// will match the byte \0xff
if byteRunes {
// byte mode, read each byte as a rune
rr = ioex.ByteRuneReader{RS: bitio.NewIOReadSeeker(br)}
} else {
rr = ioex.RuneReadSeeker{RS: bitio.NewIOReadSeeker(br)}
}
var off int64
prevOff := int64(-1)
return iterFn(func() (any, bool) {
// TODO: correct way to handle empty match for binary, move one byte forward?
// > "asdasd" | [match(""; "g")], [(tobytes | match(""; "g"))] | length
// 7
// 1
if prevOff == off {
return nil, false
}
if prevOff != -1 && !global {
return nil, false
}
_, err = rr.Seek(off, io.SeekStart)
if err != nil {
return err, false
}
l := sre.FindReaderSubmatchIndex(rr)
if l == nil {
return nil, false
}
var captures []any
var firstCapture map[string]any
for i := 0; i < len(l)/2; i++ {
start, end := l[i*2], l[i*2+1]
capture := map[string]any{
"offset": int(off) + start,
"length": end - start,
}
if start != -1 {
matchBitOff := (off + int64(start)) * 8
matchLength := int64(end-start) * 8
bbo := Binary{
br: bv.br,
r: ranges.Range{
Start: bv.r.Start + matchBitOff,
Len: matchLength,
},
unit: 8,
}
capture["string"] = bbo
} else {
capture["string"] = nil
}
if i > 0 {
if sreNames[i] != "" {
capture["name"] = sreNames[i]
} else {
capture["name"] = nil
}
}
if i == 0 {
firstCapture = capture
}
captures = append(captures, capture)
}
prevOff = off
off = off + int64(l[1])
firstCapture["captures"] = captures[1:]
return firstCapture, true
})
}
|