File: html.go

package info (click to toggle)
golang-github-advancedlogic-goose 0.0~git20210820.9d5822d%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 516 kB
  • sloc: makefile: 128; sh: 11
file content (47 lines) | stat: -rw-r--r-- 1,243 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
package goose

import (
	resty "github.com/go-resty/resty/v2"
	"github.com/pkg/errors"
)

type HtmlRequester interface {
	fetchHTML(string) (string, error)
}

// Crawler can fetch the target HTML page
type htmlrequester struct {
	config Configuration
}

// NewCrawler returns a crawler object initialised with the URL and the [optional] raw HTML body
func NewHtmlRequester(config Configuration) HtmlRequester {
	return htmlrequester{
		config: config,
	}
}

func (hr htmlrequester) fetchHTML(url string) (string, error) {
	client := resty.New()
	client.SetTimeout(hr.config.timeout)
	resp, err := client.R().
		SetHeader("Content-Type", "text/html").
		SetHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.91 Safari/534.30").
		Get(url)

	if err != nil {
		return "", errors.Wrap(err, "could not perform request on "+url)
	}
	if resp.IsError() {
		return "", &badRequest{Message: "could not perform request with " + url + " status code " + string(resp.StatusCode())}
	}
	return resp.String(), nil
}

type badRequest struct {
	Message string `json:"message,omitempty"`
}

func (BadRequest *badRequest) Error() string {
	return "Required request fields are not filled"
}