1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
|
"""
This module contains some assorted functions used in tests
"""
import os
from importlib import import_module
from twisted.trial.unittest import SkipTest
def assert_aws_environ():
"""Asserts the current environment is suitable for running AWS testsi.
Raises SkipTest with the reason if it's not.
"""
try:
import boto
except ImportError as e:
raise SkipTest(str(e))
if 'AWS_ACCESS_KEY_ID' not in os.environ:
raise SkipTest("AWS keys not found")
def get_crawler(settings_dict=None):
"""Return an unconfigured Crawler object. If settings_dict is given, it
will be used to populate the crawler settings with a project level
priority.
"""
from scrapy.crawler import Crawler
from scrapy.settings import Settings
return Crawler(Settings(settings_dict))
def get_pythonpath():
"""Return a PYTHONPATH suitable to use in processes so that they find this
installation of Scrapy"""
scrapy_path = import_module('scrapy').__path__[0]
return os.path.dirname(scrapy_path) + os.pathsep + os.environ.get('PYTHONPATH', '')
def get_testenv():
"""Return a OS environment dict suitable to fork processes that need to import
this installation of Scrapy, instead of a system installed one.
"""
env = os.environ.copy()
env['PYTHONPATH'] = get_pythonpath()
return env
def get_testlog():
"""Get Scrapy log of current test, ignoring the rest"""
with open("test.log", "rb") as fp:
loglines = fp.readlines()
thistest = []
for line in loglines[::-1]:
thistest.append(line)
if "[-] -->" in line:
break
return "".join(thistest[::-1])
def assert_samelines(testcase, text1, text2, msg=None):
"""Asserts text1 and text2 have the same lines, ignoring differences in
line endings between platforms
"""
testcase.assertEqual(text1.splitlines(), text2.splitlines(), msg)
def docrawl(spider, settings=None):
"""Configure and start Crawler; return the result of crawler.start()"""
crawler = get_crawler(settings)
crawler.configure()
crawler.crawl(spider)
return crawler.start()
|