1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
|
from twisted.trial import unittest
from scrapy.contrib.loader.processor import MapCompose
from scrapy.item import Item, Field
from scrapy.http import HtmlResponse
try:
import lxml
except ImportError:
lxml = False
class TestItem(Item):
name = Field()
if lxml:
from scrapy.contrib_exp.loader.lxmlloader import LxmlItemLoader
class TestLxmlItemLoader(LxmlItemLoader):
default_item_class = TestItem
class LxmlItemLoaderTest(unittest.TestCase):
response = HtmlResponse(url="", body='<html><body><div id="id">marta</div><p>paragraph</p></body></html>')
def setUp(self):
if not lxml:
raise unittest.SkipTest("lxml is not available")
def test_constructor_with_response(self):
l = TestLxmlItemLoader(response=self.response)
self.assert_(l.tree)
def test_add_xpath(self):
l = TestLxmlItemLoader(response=self.response)
l.add_xpath('name', '//div')
self.assertEqual(l.get_output_value('name'), [u'<div id="id">marta</div>'])
def test_add_xpath_text(self):
l = TestLxmlItemLoader(response=self.response)
l.add_xpath('name', '//div/text()')
self.assertEqual(l.get_output_value('name'), [u'marta'])
def test_replace_xpath(self):
l = TestLxmlItemLoader(response=self.response)
l.add_xpath('name', '//div/text()')
self.assertEqual(l.get_output_value('name'), [u'marta'])
l.replace_xpath('name', '//p/text()')
self.assertEqual(l.get_output_value('name'), [u'paragraph'])
def test_add_css(self):
l = TestLxmlItemLoader(response=self.response)
l.add_css('name', '#id')
self.assertEqual(l.get_output_value('name'), [u'<div id="id">marta</div>'])
def test_replace_css(self):
l = TestLxmlItemLoader(response=self.response)
l.add_css('name', '#id')
self.assertEqual(l.get_output_value('name'), [u'<div id="id">marta</div>'])
l.replace_css('name', 'p')
self.assertEqual(l.get_output_value('name'), [u'<p>paragraph</p>'])
if __name__ == "__main__":
unittest.main()
|