File: __init__.py

package info (click to toggle)
python-cloudscraper 1.2.71~git20230426.cbb3c0ea-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,608 kB
  • sloc: python: 2,496; makefile: 37
file content (118 lines) | stat: -rw-r--r-- 3,917 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# -*- coding: utf-8 -*-
import hashlib
import responses

from os import path
from io import open

try:
    from urlparse import parse_qsl
except ImportError:
    from urllib.parse import parse_qsl

# Fake URL, network requests are not allowed by default when using the decorator
url = 'http://www.evildomain.com'

# These kwargs will be passed to tests by the decorator
cloudscraper_kwargs = dict(delay=0.01, debug=False)

# Cloudflare challenge fixtures are only read from the FS once
cache = {}

# ------------------------------------------------------------------------------- #


def fixtures(filename):
    """
    Read and cache a challenge fixture

    Returns: HTML (bytes): The HTML challenge fixture
    """
    if not cache.get(filename):
        print('reading...')
        with open(path.join(path.dirname(__file__), 'fixtures', filename), 'r') as fp:
            cache[filename] = fp.read()
    return cache[filename]

# ------------------------------------------------------------------------------- #


def mockCloudflare(fixture, payload):
    def responses_decorator(test):
        @responses.activate
        def wrapper(self):
            def post_callback(request):
                postPayload = dict(parse_qsl(request.body))
                postPayload['r'] = hashlib.sha256(postPayload.get('r', '').encode('ascii')).hexdigest()

                for param in payload:
                    if param not in postPayload or postPayload[param] != payload[param]:
                        return (
                            503,
                            {'Server': 'cloudflare'},
                            fixtures(fixture)
                        )

                # ------------------------------------------------------------------------------- #

                return (
                    200,
                    [
                        (
                            'Set-Cookie', '__cfduid=d5927a7cbaa96ec536939f93648e3c08a1576098703; Domain=.evildomain.com; path=/'
                        ),
                        (
                            'Set-Cookie',
                            '__cfduid=d5927a7cbaa96ec536939f93648e3c08a1576098703; domain=.evildomain.com; path=/'
                        ),
                        ('Server', 'cloudflare')
                    ],
                    'Solved OK'
                )

            # ------------------------------------------------------------------------------- #

            def challengeCallback(request):
                status_code = 503

                if 'reCaptcha' in fixture or '1020' in fixture:
                    status_code = 403
                return (
                    status_code,
                    [
                        (
                            'Set-Cookie',
                            '__cfduid=d5927a7cbaa96ec536939f93648e3c08a1576098703; Domain=.evildomain.com; path=/'
                        ),
                        ('Server', 'cloudflare')
                    ],
                    fixtures(fixture)
                )

            # ------------------------------------------------------------------------------- #

            responses.add_callback(
                responses.POST,
                url,
                callback=post_callback,
                content_type='text/html',
            )

            responses.add_callback(
                responses.GET,
                url,
                callback=challengeCallback,
                content_type='text/html',
            )

            # ------------------------------------------------------------------------------- #

            return test(self, **cloudscraper_kwargs)

        # ------------------------------------------------------------------------------- #

        return wrapper

    # ------------------------------------------------------------------------------- #

    return responses_decorator