1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
|
"""
This module contains some assorted functions used in tests
"""
import asyncio
import os
from posixpath import split
from unittest import mock
from importlib import import_module
from twisted.trial.unittest import SkipTest
from scrapy.utils.boto import is_botocore_available
def assert_gcs_environ():
if 'GCS_PROJECT_ID' not in os.environ:
raise SkipTest("GCS_PROJECT_ID not found")
def skip_if_no_boto():
if not is_botocore_available():
raise SkipTest('missing botocore library')
def get_gcs_content_and_delete(bucket, path):
from google.cloud import storage
client = storage.Client(project=os.environ.get('GCS_PROJECT_ID'))
bucket = client.get_bucket(bucket)
blob = bucket.get_blob(path)
content = blob.download_as_string()
acl = list(blob.acl) # loads acl before it will be deleted
bucket.delete_blob(path)
return content, acl, blob
def get_ftp_content_and_delete(
path, host, port, username,
password, use_active_mode=False):
from ftplib import FTP
ftp = FTP()
ftp.connect(host, port)
ftp.login(username, password)
if use_active_mode:
ftp.set_pasv(False)
ftp_data = []
def buffer_data(data):
ftp_data.append(data)
ftp.retrbinary(f'RETR {path}', buffer_data)
dirname, filename = split(path)
ftp.cwd(dirname)
ftp.delete(filename)
return "".join(ftp_data)
def get_crawler(spidercls=None, settings_dict=None):
"""Return an unconfigured Crawler object. If settings_dict is given, it
will be used to populate the crawler settings with a project level
priority.
"""
from scrapy.crawler import CrawlerRunner
from scrapy.spiders import Spider
runner = CrawlerRunner(settings_dict)
return runner.create_crawler(spidercls or Spider)
def get_pythonpath():
"""Return a PYTHONPATH suitable to use in processes so that they find this
installation of Scrapy"""
scrapy_path = import_module('scrapy').__path__[0]
return os.path.dirname(scrapy_path) + os.pathsep + os.environ.get('PYTHONPATH', '')
def get_testenv():
"""Return a OS environment dict suitable to fork processes that need to import
this installation of Scrapy, instead of a system installed one.
"""
env = os.environ.copy()
env['PYTHONPATH'] = get_pythonpath()
return env
def assert_samelines(testcase, text1, text2, msg=None):
"""Asserts text1 and text2 have the same lines, ignoring differences in
line endings between platforms
"""
testcase.assertEqual(text1.splitlines(), text2.splitlines(), msg)
def get_from_asyncio_queue(value):
q = asyncio.Queue()
getter = q.get()
q.put_nowait(value)
return getter
def mock_google_cloud_storage():
"""Creates autospec mocks for google-cloud-storage Client, Bucket and Blob
classes and set their proper return values.
"""
from google.cloud.storage import Client, Bucket, Blob
client_mock = mock.create_autospec(Client)
bucket_mock = mock.create_autospec(Bucket)
client_mock.get_bucket.return_value = bucket_mock
blob_mock = mock.create_autospec(Blob)
bucket_mock.blob.return_value = blob_mock
return (client_mock, bucket_mock, blob_mock)
|