1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
|
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
# SPDX-FileCopyrightText: © 2019 Daniel Ring
import re
from .. import util
from ..helpers import bounceStarter
from ..scraper import ParserScraper
class WLPComics(ParserScraper):
imageSearch = '//img[contains(@alt, " Comic")]'
prevSearch = '//a[contains(text(), "Previous ")]'
nextSearch = '//a[contains(text(), "Next ")]'
starter = bounceStarter
help = 'Index format: nnn'
def __init__(self, name):
super().__init__('WLP/' + name)
def namer(self, image_url, page_url):
return (page_url.rsplit('/', 1)[-1].split('.')[0] + '_' +
util.urlpathsplit(image_url)[-1])
class ChichiChan(WLPComics):
url = 'http://www.wlpcomics.com/adult/chichi/'
stripUrl = url + '%s.html'
adult = True
class ChocolateMilkMaid(WLPComics):
# Newer pages seem to be broken
stripUrl = 'http://www.wlpcomics.com/adult/cm/%s.html'
url = stripUrl % '264'
adult = True
def link_modifier(self, fromurl, tourl):
"""Bugfix for self-referencing pages..."""
if tourl == fromurl:
return re.sub(r'/(\d+)\.ht',
lambda m: '/%03i.ht' % (int(m.group(1)) - 1), tourl)
if '263.html' in fromurl and '265.html' in tourl:
return self.stripUrl % '264'
return tourl
class MaidAttack(WLPComics):
url = 'http://www.wlpcomics.com/general/maidattack/'
stripUrl = url + '%s.html'
class PeterIsTheWolfAdult(WLPComics):
stripUrl = 'http://www.peteristhewolf.com/adult/%s.html'
# navigation to newest page is broken
url = stripUrl % '427'
firstStripUrl = stripUrl % '001'
multipleImagesPerStrip = True
adult = True
endOfLife = True
def namer(self, image_url, page_url):
name = page_url.rsplit('/', 1)[-1].split('.')[0] + '_' + util.urlpathsplit(image_url)[-1]
if 'adult' in image_url:
name = name.split('.')
return name[0] + '_adult.' + name[1]
return name
def getPrevUrl(self, url, data):
# Fix loop in site navigation
if url == self.stripUrl % '194':
return self.stripUrl % '193'
return super(PeterIsTheWolfAdult, self).getPrevUrl(url, data)
class PeterIsTheWolfGeneral(WLPComics):
stripUrl = 'http://www.peteristhewolf.com/general/%s.html'
# navigation to newest page is broken
url = stripUrl % '427'
firstStripUrl = stripUrl % '001'
endOfLife = True
def getPrevUrl(self, url, data):
# Fix loops in site navigation
if url == self.stripUrl % '406':
return self.stripUrl % '405'
if url == self.stripUrl % '230':
return self.stripUrl % '229'
if url == self.stripUrl % '229':
return self.stripUrl % '228'
if url == self.stripUrl % '153':
return self.stripUrl % '152'
return super(PeterIsTheWolfGeneral, self).getPrevUrl(url, data)
class Stellar(WLPComics):
url = 'http://www.wlpcomics.com/adult/stellar/'
stripUrl = url + '%s.html'
adult = True
def link_modifier(self, fromurl, tourl):
"""Bugfix for empty page..."""
if tourl == self.url + '075.html':
return self.url + '074.html'
return tourl
|