File: test_robotstxt.py

package info (click to toggle)
dosage 3.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 1,400 kB
  • sloc: python: 12,703; sh: 55; makefile: 6
file content (44 lines) | stat: -rw-r--r-- 1,389 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: © 2025 Tobias Gruetzmacher
import pytest
import requests
import responses

from dosagelib import http


@responses.activate
def test_no_robotstxt():
    responses.get('http://a/robots.txt', status=404)
    http.check_robotstxt("http://a/somefile.html", requests.Session())


@responses.activate
def test_err_robotstxt():
    responses.get('http://b/robots.txt', body=IOError("Timeout!"))
    http.check_robotstxt("http://b/somefile.html", requests.Session())


@responses.activate
def test_empty_robotstxt():
    responses.get('http://c/robots.txt')
    http.check_robotstxt("http://c/somefile.html", requests.Session())


@responses.activate
def test_allowed_robotstxt():
    responses.get('http://d/robots.txt', body="User-agent: *\nDisallow:")
    http.check_robotstxt("http://d/somefile.html", requests.Session())


@responses.activate
def test_all_denied_robotstxt():
    responses.get('http://e/robots.txt', body="User-agent: *\nDisallow: /")
    http.check_robotstxt("http://e/somefile.html", requests.Session())


@responses.activate
def test_explit_denied_robotstxt():
    responses.get('http://f/robots.txt', body="User-agent: dosage\nDisallow: /")
    with pytest.raises(IOError):  # noqa: PT011 # FIXME: Refactor code to use another exception?
        http.check_robotstxt("http://f/somefile.html", requests.Session())