1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
|
"""
The tool to check the availability or syntax of domain, IP or URL.
::
██████╗ ██╗ ██╗███████╗██╗ ██╗███╗ ██╗ ██████╗███████╗██████╗ ██╗ ███████╗
██╔══██╗╚██╗ ██╔╝██╔════╝██║ ██║████╗ ██║██╔════╝██╔════╝██╔══██╗██║ ██╔════╝
██████╔╝ ╚████╔╝ █████╗ ██║ ██║██╔██╗ ██║██║ █████╗ ██████╔╝██║ █████╗
██╔═══╝ ╚██╔╝ ██╔══╝ ██║ ██║██║╚██╗██║██║ ██╔══╝ ██╔══██╗██║ ██╔══╝
██║ ██║ ██║ ╚██████╔╝██║ ╚████║╚██████╗███████╗██████╔╝███████╗███████╗
╚═╝ ╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚═══╝ ╚═════╝╚══════╝╚═════╝ ╚══════╝╚══════╝
Provides our miner worker. This is the description of a single miner worker.
Author:
Nissar Chababy, @funilrys, contactTATAfunilrysTODTODcom
Special thanks:
https://pyfunceble.github.io/#/special-thanks
Contributors:
https://pyfunceble.github.io/#/contributors
Project link:
https://github.com/funilrys/PyFunceble
Project documentation:
https://docs.pyfunceble.com
Project homepage:
https://pyfunceble.github.io/
License:
::
Copyright 2017, 2018, 2019, 2020, 2022, 2023, 2024 Nissar Chababy
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import copy
import socket
from typing import List, Optional, Tuple
from domain2idna import domain2idna
import PyFunceble.facility
import PyFunceble.factory
import PyFunceble.storage
from PyFunceble.checker.status_base import CheckerStatusBase
from PyFunceble.cli.processes.workers.base import WorkerBase
from PyFunceble.cli.utils.stdout import print_single_line
from PyFunceble.converter.url2netloc import Url2Netloc
class MinerWorker(WorkerBase):
"""
Provides our miner worker. The objective of this worker is to provides
a single worker (or process if you prefer) which will be used to handle
the mining of dataset to test.
"""
STD_NAME: str = "pyfunceble_miner_worker"
INACTIVE_STATUSES: Tuple[str, ...] = (
PyFunceble.storage.STATUS.down,
PyFunceble.storage.STATUS.invalid,
)
url2netloc: Optional[Url2Netloc] = None
def __post_init__(self) -> None:
self.url2netloc = Url2Netloc()
return super().__post_init__()
@staticmethod
def mine_from(subject: str) -> Optional[List[str]]:
"""
Given the subject to work from, try to get the related subjects.
:param subject:
The URL to start from.
"""
PyFunceble.facility.Logger.info("Started mining from %r", subject)
result = []
try:
req = PyFunceble.factory.Requester.get(subject, allow_redirects=True)
for element in req.history:
if "location" in element.headers:
result.append(element.headers["location"])
result.extend([x for x in req.history if isinstance(x, str)])
except (
PyFunceble.factory.Requester.exceptions.RequestException,
PyFunceble.factory.Requester.exceptions.ConnectionError,
PyFunceble.factory.Requester.exceptions.Timeout,
PyFunceble.factory.Requester.exceptions.InvalidURL,
PyFunceble.factory.Requester.urllib3_exceptions.InvalidHeader,
socket.timeout,
):
PyFunceble.facility.Logger.error(
"Could not mine from %r", subject, exc_info=True
)
PyFunceble.facility.Logger.debug("Mined from %r:\n%r.", subject, result)
PyFunceble.facility.Logger.info("Finished mining from %r", subject)
return result
def target(self, consumed: Tuple[dict, CheckerStatusBase]) -> None:
if not isinstance(consumed, tuple) or not isinstance(
consumed[1], CheckerStatusBase
):
PyFunceble.facility.Logger.info(
"Skipping latest dataset because consumed data was not a tuple."
)
return None
# Just for human brain.
test_dataset, test_result = consumed
if "from_miner" in test_dataset:
PyFunceble.facility.Logger.info(
"Skipping dataset because it comes from the mining mechanism."
)
return None
if test_result.status in self.INACTIVE_STATUSES:
PyFunceble.facility.Logger.info(
"Skipping dataset because status is not active."
)
return None
if test_dataset["subject_type"] == "domain":
subject = f"http://{test_result.idna_subject}:80"
else:
# Assuming it's already a URL.
subject = test_result.idna_subject
print_single_line("M")
self.add_to_output_queue("pyfunceble")
self.share_waiting_message()
mined = self.mine_from(subject)
for url in mined:
to_send = copy.deepcopy(test_dataset)
to_send["from_miner"] = True
if test_dataset["subject_type"] == "domain":
netloc = self.url2netloc.set_data_to_convert(url).get_converted()
if ":" in netloc:
netloc = netloc[: netloc.find(":")]
to_send["subject"] = netloc
to_send["idna_subject"] = domain2idna(netloc)
else:
if not test_result.idna_subject.endswith("/") and url.endswith("/"):
url = url[:-1]
to_send["subject"] = url
to_send["idna_subject"] = domain2idna(url)
if to_send["idna_subject"] == test_result.idna_subject:
PyFunceble.facility.Logger.info(
"Skipping %r because found in test result.", url
)
continue
self.add_to_output_queue(to_send)
# Returning None because we manually add into the queue.
return None
|