1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
package scraper
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/ffuf/ffuf/v2/pkg/ffuf"
"github.com/PuerkitoBio/goquery"
)
type ScraperRule struct {
Name string `json:"name"`
Rule string `json:"rule"`
Target string `json:"target"`
compiledRule *regexp.Regexp
Type string `json:"type"`
OnlyMatched bool `json:"onlymatched"`
Action []string `json:"action"`
}
type ScraperGroup struct {
Rules []*ScraperRule `json:"rules"`
Name string `json:"groupname"`
Active bool `json:"active"`
}
type Scraper struct {
Rules []*ScraperRule
}
func readGroupFromFile(filename string) (ScraperGroup, error) {
data, err := os.ReadFile(filename)
if err != nil {
return ScraperGroup{Rules: make([]*ScraperRule, 0)}, err
}
sc := ScraperGroup{}
err = json.Unmarshal([]byte(data), &sc)
return sc, err
}
func FromDir(dirname string, activestr string) (ffuf.Scraper, ffuf.Multierror) {
scr := Scraper{Rules: make([]*ScraperRule, 0)}
errs := ffuf.NewMultierror()
activegrps := parseActiveGroups(activestr)
all_files, err := os.ReadDir(ffuf.SCRAPERDIR)
if err != nil {
errs.Add(err)
return &scr, errs
}
for _, filename := range all_files {
if filename.Type().IsRegular() && strings.HasSuffix(filename.Name(), ".json") {
sg, err := readGroupFromFile(filepath.Join(dirname, filename.Name()))
if err != nil {
cerr := fmt.Errorf("%s : %s", filepath.Join(dirname, filename.Name()), err)
errs.Add(cerr)
continue
}
if (sg.Active && isActive("all", activegrps)) || isActive(sg.Name, activegrps) {
for _, r := range sg.Rules {
err = r.init()
if err != nil {
cerr := fmt.Errorf("%s : %s", filepath.Join(dirname, filename.Name()), err)
errs.Add(cerr)
continue
}
scr.Rules = append(scr.Rules, r)
}
}
}
}
return &scr, errs
}
// FromFile initializes a scraper instance and reads rules from a file
func (s *Scraper) AppendFromFile(path string) error {
sg, err := readGroupFromFile(path)
if err != nil {
return err
}
for _, r := range sg.Rules {
err = r.init()
if err != nil {
continue
}
s.Rules = append(s.Rules, r)
}
return err
}
func (s *Scraper) Execute(resp *ffuf.Response, matched bool) []ffuf.ScraperResult {
res := make([]ffuf.ScraperResult, 0)
for _, rule := range s.Rules {
if !matched && rule.OnlyMatched {
// pass this rule as there was no match
continue
}
sourceData := ""
if rule.Target == "body" {
sourceData = string(resp.Data)
} else if rule.Target == "headers" {
sourceData = headerString(resp.Headers)
} else {
sourceData = headerString(resp.Headers) + string(resp.Data)
}
val := rule.Check(sourceData)
if len(val) > 0 {
res = append(res, ffuf.ScraperResult{
Name: rule.Name,
Type: rule.Type,
Action: rule.Action,
Results: val,
})
}
}
return res
}
// init initializes the scraper rule, and returns an error in case there's an error in the syntax
func (r *ScraperRule) init() error {
var err error
if r.Type == "regexp" {
r.compiledRule, err = regexp.Compile(r.Rule)
if err != nil {
return err
}
}
return err
}
func (r *ScraperRule) Check(data string) []string {
if r.Type == "regexp" {
return r.checkRegexp(data)
} else if r.Type == "query" {
return r.checkQuery(data)
}
return []string{}
}
func (r *ScraperRule) checkQuery(data string) []string {
val := make([]string, 0)
doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
if err != nil {
return []string{}
}
doc.Find(r.Rule).Each(func(i int, sel *goquery.Selection) {
val = append(val, sel.Text())
})
return val
}
func (r *ScraperRule) checkRegexp(data string) []string {
val := make([]string, 0)
if r.compiledRule != nil {
res := r.compiledRule.FindAllStringSubmatch(data, -1)
for _, grp := range res {
val = append(val, grp...)
}
return val
}
return []string{}
}
|