File: ip_address.py

package info (click to toggle)
python-scrapy 2.4.1-2%2Bdeb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 4,748 kB
  • sloc: python: 32,888; xml: 199; makefile: 90; sh: 7
file content (50 lines) | stat: -rw-r--r-- 1,740 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from urllib.parse import urlparse

from twisted.internet import reactor
from twisted.names import cache, hosts as hostsModule, resolve
from twisted.names.client import Resolver
from twisted.python.runtime import platform

from scrapy import Spider, Request
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging

from tests.mockserver import MockServer, MockDNSServer


# https://stackoverflow.com/a/32784190
def createResolver(servers=None, resolvconf=None, hosts=None):
    if hosts is None:
        hosts = b'/etc/hosts' if platform.getType() == 'posix' else r'c:\windows\hosts'
    theResolver = Resolver(resolvconf, servers)
    hostResolver = hostsModule.Resolver(hosts)
    chain = [hostResolver, cache.CacheResolver(), theResolver]
    return resolve.ResolverChain(chain)


class LocalhostSpider(Spider):
    name = "localhost_spider"

    def start_requests(self):
        yield Request(self.url)

    def parse(self, response):
        netloc = urlparse(response.url).netloc
        self.logger.info("Host: %s" % netloc.split(":")[0])
        self.logger.info("Type: %s" % type(response.ip_address))
        self.logger.info("IP address: %s" % response.ip_address)


if __name__ == "__main__":
    with MockServer() as mock_http_server, MockDNSServer() as mock_dns_server:
        port = urlparse(mock_http_server.http_address).port
        url = f"http://not.a.real.domain:{port}/echo"

        servers = [(mock_dns_server.host, mock_dns_server.port)]
        reactor.installResolver(createResolver(servers=servers))

        configure_logging()
        runner = CrawlerRunner()
        d = runner.crawl(LocalhostSpider, url=url)
        d.addBoth(lambda _: reactor.stop())
        reactor.run()