File: crawl.tmpl

package info (click to toggle)

python-scrapy 2.13.3-1

links: PTS, VCS
area: main
in suites: forky, sid
size: 5,664 kB
sloc: python: 52,028; xml: 199; makefile: 25; sh: 7

file content (18 lines) | stat: -rw-r--r-- 608 bytes

parent folder | download | duplicates (2)

import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule


class $classname(CrawlSpider):
    name = "$name"
    allowed_domains = ["$domain"]
    start_urls = ["$url"]

    rules = (Rule(LinkExtractor(allow=r"Items/"), callback="parse_item", follow=True),)

    def parse_item(self, response):
        item = {}
        #item["domain_id"] = response.xpath('//input[@id="sid"]/@value').get()
        #item["name"] = response.xpath('//div[@id="name"]').get()
        #item["description"] = response.xpath('//div[@id="description"]').get()
        return item