1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
|
package stdlib
import (
"fmt"
"regexp"
resyntax "regexp/syntax"
"github.com/zclconf/go-cty/cty"
"github.com/zclconf/go-cty/cty/function"
)
var RegexFunc = function.New(&function.Spec{
Params: []function.Parameter{
{
Name: "pattern",
Type: cty.String,
},
{
Name: "string",
Type: cty.String,
},
},
Type: func(args []cty.Value) (cty.Type, error) {
if !args[0].IsKnown() {
// We can't predict our type without seeing our pattern
return cty.DynamicPseudoType, nil
}
retTy, err := regexPatternResultType(args[0].AsString())
if err != nil {
err = function.NewArgError(0, err)
}
return retTy, err
},
Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
if retType == cty.DynamicPseudoType {
return cty.DynamicVal, nil
}
re, err := regexp.Compile(args[0].AsString())
if err != nil {
// Should never happen, since we checked this in the Type function above.
return cty.NilVal, function.NewArgErrorf(0, "error parsing pattern: %s", err)
}
str := args[1].AsString()
captureIdxs := re.FindStringSubmatchIndex(str)
if captureIdxs == nil {
return cty.NilVal, fmt.Errorf("pattern did not match any part of the given string")
}
return regexPatternResult(re, str, captureIdxs, retType), nil
},
})
var RegexAllFunc = function.New(&function.Spec{
Params: []function.Parameter{
{
Name: "pattern",
Type: cty.String,
},
{
Name: "string",
Type: cty.String,
},
},
Type: func(args []cty.Value) (cty.Type, error) {
if !args[0].IsKnown() {
// We can't predict our type without seeing our pattern,
// but we do know it'll always be a list of something.
return cty.List(cty.DynamicPseudoType), nil
}
retTy, err := regexPatternResultType(args[0].AsString())
if err != nil {
err = function.NewArgError(0, err)
}
return cty.List(retTy), err
},
Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
ety := retType.ElementType()
if ety == cty.DynamicPseudoType {
return cty.DynamicVal, nil
}
re, err := regexp.Compile(args[0].AsString())
if err != nil {
// Should never happen, since we checked this in the Type function above.
return cty.NilVal, function.NewArgErrorf(0, "error parsing pattern: %s", err)
}
str := args[1].AsString()
captureIdxsEach := re.FindAllStringSubmatchIndex(str, -1)
if len(captureIdxsEach) == 0 {
return cty.ListValEmpty(ety), nil
}
elems := make([]cty.Value, len(captureIdxsEach))
for i, captureIdxs := range captureIdxsEach {
elems[i] = regexPatternResult(re, str, captureIdxs, ety)
}
return cty.ListVal(elems), nil
},
})
// Regex is a function that extracts one or more substrings from a given
// string by applying a regular expression pattern, describing the first
// match.
//
// The return type depends on the composition of the capture groups (if any)
// in the pattern:
//
// - If there are no capture groups at all, the result is a single string
// representing the entire matched pattern.
// - If all of the capture groups are named, the result is an object whose
// keys are the named groups and whose values are their sub-matches, or
// null if a particular sub-group was inside another group that didn't
// match.
// - If none of the capture groups are named, the result is a tuple whose
// elements are the sub-groups in order and whose values are their
// sub-matches, or null if a particular sub-group was inside another group
// that didn't match.
// - It is invalid to use both named and un-named capture groups together in
// the same pattern.
//
// If the pattern doesn't match, this function returns an error. To test for
// a match, call RegexAll and check if the length of the result is greater
// than zero.
func Regex(pattern, str cty.Value) (cty.Value, error) {
return RegexFunc.Call([]cty.Value{pattern, str})
}
// RegexAll is similar to Regex but it finds all of the non-overlapping matches
// in the given string and returns a list of them.
//
// The result type is always a list, whose element type is deduced from the
// pattern in the same way as the return type for Regex is decided.
//
// If the pattern doesn't match at all, this function returns an empty list.
func RegexAll(pattern, str cty.Value) (cty.Value, error) {
return RegexAllFunc.Call([]cty.Value{pattern, str})
}
// regexPatternResultType parses the given regular expression pattern and
// returns the structural type that would be returned to represent its
// capture groups.
//
// Returns an error if parsing fails or if the pattern uses a mixture of
// named and unnamed capture groups, which is not permitted.
func regexPatternResultType(pattern string) (cty.Type, error) {
re, rawErr := regexp.Compile(pattern)
switch err := rawErr.(type) {
case *resyntax.Error:
return cty.NilType, fmt.Errorf("invalid regexp pattern: %s in %s", err.Code, err.Expr)
case error:
// Should never happen, since all regexp compile errors should
// be resyntax.Error, but just in case...
return cty.NilType, fmt.Errorf("error parsing pattern: %s", err)
}
allNames := re.SubexpNames()[1:]
var names []string
unnamed := 0
for _, name := range allNames {
if name == "" {
unnamed++
} else {
if names == nil {
names = make([]string, 0, len(allNames))
}
names = append(names, name)
}
}
switch {
case unnamed == 0 && len(names) == 0:
// If there are no capture groups at all then we'll return just a
// single string for the whole match.
return cty.String, nil
case unnamed > 0 && len(names) > 0:
return cty.NilType, fmt.Errorf("invalid regexp pattern: cannot mix both named and unnamed capture groups")
case unnamed > 0:
// For unnamed captures, we return a tuple of them all in order.
etys := make([]cty.Type, unnamed)
for i := range etys {
etys[i] = cty.String
}
return cty.Tuple(etys), nil
default:
// For named captures, we return an object using the capture names
// as keys.
atys := make(map[string]cty.Type, len(names))
for _, name := range names {
atys[name] = cty.String
}
return cty.Object(atys), nil
}
}
func regexPatternResult(re *regexp.Regexp, str string, captureIdxs []int, retType cty.Type) cty.Value {
switch {
case retType == cty.String:
start, end := captureIdxs[0], captureIdxs[1]
return cty.StringVal(str[start:end])
case retType.IsTupleType():
captureIdxs = captureIdxs[2:] // index 0 is the whole pattern span, which we ignore by skipping one pair
vals := make([]cty.Value, len(captureIdxs)/2)
for i := range vals {
start, end := captureIdxs[i*2], captureIdxs[i*2+1]
if start < 0 || end < 0 {
vals[i] = cty.NullVal(cty.String) // Did not match anything because containing group didn't match
continue
}
vals[i] = cty.StringVal(str[start:end])
}
return cty.TupleVal(vals)
case retType.IsObjectType():
captureIdxs = captureIdxs[2:] // index 0 is the whole pattern span, which we ignore by skipping one pair
vals := make(map[string]cty.Value, len(captureIdxs)/2)
names := re.SubexpNames()[1:]
for i, name := range names {
start, end := captureIdxs[i*2], captureIdxs[i*2+1]
if start < 0 || end < 0 {
vals[name] = cty.NullVal(cty.String) // Did not match anything because containing group didn't match
continue
}
vals[name] = cty.StringVal(str[start:end])
}
return cty.ObjectVal(vals)
default:
// Should never happen
panic(fmt.Sprintf("invalid return type %#v", retType))
}
}
|