File: requester.py

package info (click to toggle)
dirsearch 0.4.2%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 864 kB
  • sloc: python: 3,486; makefile: 2; sh: 2
file content (268 lines) | stat: -rwxr-xr-x 9,121 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# -*- coding: utf-8 -*-
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#
#  Author: Mauro Soria

import urllib3
import http.client
import random
import socket
import requests

from urllib.parse import urlparse, urljoin

from lib.utils.fmt import safequote
from requests.adapters import HTTPAdapter
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
from requests_ntlm import HttpNtlmAuth
from .request_exception import RequestException
from .response import Response

urllib3.disable_warnings()


class Requester(object):
    def __init__(
        self,
        url,
        max_pool=1,
        max_retries=5,
        timeout=20,
        ip=None,
        proxy=None,
        proxylist=None,
        redirect=False,
        request_by_hostname=False,
        httpmethod="get",
        data=None,
        scheme=None,
    ):
        self.httpmethod = httpmethod
        self.data = data
        self.headers = {}

        parsed = urlparse(url)

        # If no protocol specified, set http by default
        if "://" not in url:
            parsed = urlparse("{0}://{1}".format(scheme, url))

        # If protocol is not supported
        elif parsed.scheme not in ["https", "http"]:
            raise RequestException({"message": "Unsupported URL scheme: {0}".format(parsed.scheme)})

        self.base_path = parsed.path
        if parsed.path.startswith("/"):
            self.base_path = parsed.path[1:]

        # Safe quote all special characters in base_path to prevent from being encoded
        self.base_path = safequote(self.base_path)
        self.protocol = parsed.scheme
        self.host = parsed.netloc.split(":")[0]

        # Resolve DNS to decrease overhead
        if ip:
            self.ip = ip
        # A proxy could have a different DNS that would resolve the name. ThereFore.
        # resolving the name when using proxy to raise an error is pointless
        elif not proxy and not proxylist:
            try:
                self.ip = socket.gethostbyname(self.host)
            except socket.gaierror:
                # Check if hostname resolves to IPv6 address only
                try:
                    self.ip = socket.gethostbyname(self.host, None, socket.AF_INET6)
                except socket.gaierror:
                    raise RequestException({"message": "Couldn't resolve DNS"})

        # If no port specified, set default (80, 443)
        try:
            self.port = int(parsed.netloc.split(":")[1])
        except IndexError:
            self.port = 443 if self.protocol == "https" else 80
        except ValueError:
            raise RequestException(
                {"message": "Invalid port number: {0}".format(parsed.netloc.split(":")[1])}
            )

        # Set the Host header, this will be overwritten if the user has already set the header
        self.headers["Host"] = self.host

        # Include port in Host header if it's non-standard
        if (self.protocol == "https" and self.port != 443) or (
            self.protocol == "http" and self.port != 80
        ):
            self.headers["Host"] += ":{0}".format(self.port)

        self.max_retries = max_retries
        self.max_pool = max_pool
        self.timeout = timeout
        self.pool = None
        self.proxy = proxy
        self.proxylist = proxylist
        self.redirect = redirect
        self.random_agents = None
        self.auth = None
        self.request_by_hostname = request_by_hostname
        self.session = requests.Session()
        self.url = "{0}://{1}:{2}/".format(
            self.protocol,
            self.host if self.request_by_hostname else self.ip,
            self.port,
        )
        self.base_url = "{0}://{1}:{2}/".format(
            self.protocol,
            self.host,
            self.port,
        )
        self.set_adapter()

    def set_adapter(self):
        self.session.mount(self.url, HTTPAdapter(max_retries=self.max_retries))

    def set_header(self, key, value):
        self.headers[key.strip()] = value.strip() if value else value

    def set_random_agents(self, agents):
        self.random_agents = list(agents)

    def set_auth(self, type, credential):
        if type == "bearer":
            self.set_header("Authorization", "Bearer {0}".format(credential))
        else:
            user = credential.split(":")[0]
            try:
                password = ":".join(credential.split(":")[1:])
            except IndexError:
                password = ""

            if type == "basic":
                self.auth = HTTPBasicAuth(user, password)
            elif type == "digest":
                self.auth = HTTPDigestAuth(user, password)
            else:
                self.auth = HttpNtlmAuth(user, password)

    def request(self, path, proxy=None):
        result = None

        try:
            if not proxy:
                if self.proxylist:
                    proxy = random.choice(self.proxylist)
                elif self.proxy:
                    proxy = self.proxy

            if proxy:
                if not proxy.startswith(
                    ("http://", "https://", "socks5://", "socks5h://", "socks4://", "socks4a://")
                ):
                    proxy = "http://" + proxy

                if proxy.startswith("https://"):
                    proxies = {"https": proxy}
                else:
                    proxies = {"https": proxy, "http": proxy}
            else:
                proxies = None

            url = self.url + self.base_path + path

            if self.random_agents:
                self.headers["User-Agent"] = random.choice(self.random_agents)

            """
            We can't just do `allow_redirects=True` because we set the host header in
            optional request headers, which will be kept in next requests (follow redirects)
            """
            headers = self.headers.copy()
            for i in range(6):
                request = requests.Request(
                    self.httpmethod,
                    url=url,
                    headers=headers,
                    auth=self.auth,
                    data=self.data,
                )
                prepare = request.prepare()
                prepare.url = url

                response = self.session.send(
                    prepare,
                    proxies=proxies,
                    allow_redirects=False,
                    timeout=self.timeout,
                    stream=True,
                    verify=False,
                )
                result = Response(response)

                if self.redirect and result.redirect:
                    url = urljoin(url, result.redirect)
                    headers["Host"] = url.split("/")[2]
                    continue
                elif i == 5:
                    raise requests.exceptions.TooManyRedirects

                break

        except requests.exceptions.SSLError:
            self.url = self.base_url
            self.set_adapter()
            result = self.request(path, proxy=proxy)

        except requests.exceptions.TooManyRedirects:
            raise RequestException(
                {"message": "Too many redirects: {0}".format(self.base_url)}
            )

        except requests.exceptions.ProxyError:
            raise RequestException(
                {"message": "Error with the proxy: {0}".format(proxy)}
            )

        except requests.exceptions.ConnectionError:
            raise RequestException(
                {"message": "Cannot connect to: {0}:{1}".format(self.host, self.port)}
            )

        except requests.exceptions.InvalidURL:
            raise RequestException(
                {"message": "Invalid URL: {0}".format(self.base_url)}
            )

        except requests.exceptions.InvalidProxyURL:
            raise RequestException(
                {"message": "Invalid proxy URL: {0}".format(proxy)}
            )

        except (
            requests.exceptions.ConnectTimeout,
            requests.exceptions.ReadTimeout,
            requests.exceptions.Timeout,
            http.client.IncompleteRead,
            socket.timeout,
        ):
            raise RequestException(
                {"message": "Request timeout: {0}".format(self.base_url)}
            )

        except Exception:
            raise RequestException(
                {"message": "There was a problem in the request to: {0}".format(self.base_url)}
            )

        return result