1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
|
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
class $classname(CrawlSpider):
name = "$name"
allowed_domains = ["$domain"]
start_urls = ["$url"]
rules = (Rule(LinkExtractor(allow=r"Items/"), callback="parse_item", follow=True),)
def parse_item(self, response):
item = {}
#item["domain_id"] = response.xpath('//input[@id="sid"]/@value').get()
#item["name"] = response.xpath('//div[@id="name"]').get()
#item["description"] = response.xpath('//div[@id="description"]').get()
return item
|