1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
|
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: © 2004 Tristan Seligmann and Jonathan Jacobs
# SPDX-FileCopyrightText: © 2012 Bastian Kleineidam
# SPDX-FileCopyrightText: © 2015 Tobias Gruetzmacher
# SPDX-FileCopyrightText: © 2019 Daniel Ring
from .. import util
from ..helpers import bounceStarter, indirectStarter
from ..scraper import ParserScraper, _ParserScraper
from .common import ComicControlScraper, WordPressScraper
class Lancer(WordPressScraper):
url = 'https://lancercomic.com/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'chapter-1-cover'
class LastResort(WordPressScraper):
url = 'http://www.lastres0rt.com/'
stripUrl = url + 'comic/%s/'
firstStripUrl = stripUrl % 'that-sound-you-hear-is-a-shattered-stereotype'
class LazJonesAndTheMayfieldRegulators(_ParserScraper):
baseUrl = 'https://www.lazjones.com/'
url = baseUrl + 'regulators'
stripUrl = baseUrl + 'comic/%s'
firstStripUrl = stripUrl % 'chapter1_00'
imageSearch = '//img[contains(@src, "comic/pages/")]'
prevSearch = '//a[contains(text(), "Previous")]'
class LazJonesAndTheMayfieldRegulatorsSideStories(LazJonesAndTheMayfieldRegulators):
name = 'LazJonesAndTheMayfieldRegulators/SideStories'
baseUrl = 'https://www.lazjones.com/'
url = baseUrl + 'comics'
stripUrl = baseUrl + 'comic/%s'
firstStripUrl = stripUrl % 'journal01'
def getPrevUrl(self, url, data):
# Fix broken navigation links
if url == self.url and self.match(data, self.prevSearch + '/@href')[0] == self.stripUrl % 'summer00':
return self.stripUrl % 'summer21'
return super(LazJonesAndTheMayfieldRegulators, self).getPrevUrl(url, data)
class LeastICouldDo(ParserScraper):
url = 'https://leasticoulddo.com/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % '20030210'
imageSearch = '//div[d:class("single-post-comic")]//img'
prevSearch = '//a[@rel="prev"]'
latestSearch = '//a[contains(@href, "/comic/")]'
starter = indirectStarter
help = 'Index format: yyyymmdd'
class LetsSpeakEnglish(ComicControlScraper):
url = 'http://www.marycagle.com'
class LifeAintNoPonyFarm(WordPressScraper):
url = ('https://web.archive.org/web/20181221154155/'
'http://sarahburrini.com/en/')
firstStripUrl = url + 'comic/my-first-webcomic/'
multipleImagesPerStrip = True
endOfLife = True
class LifeAsRendered(ParserScraper):
# Reverse navigation doesn't work properly, so search forward instead
stripUrl = 'https://kittyredden.com/LAR/%s/'
url = stripUrl % '0100'
firstStripUrl = stripUrl % '05extra'
imageSearch = '//figure[@class="wp-block-image"]//img'
prevSearch = '//a[img[@alt="Next"]]'
textSearch = '//div[@class="entry-content"]//text()'
adult = True
endOfLife = True
nav = {
'0140': '0200',
'0272': '02ss00',
'02SS14': '0300',
'0367': '03ss00',
'03ss10': '0400',
'0408': '0409',
'0409': '0410',
'0421': '0422',
'0449': '0450',
'0458': '0460',
'0460': '04ss00',
'04ss00': '04ss01',
'04ss10': '0500',
'0500': '0501',
'0508': '0509',
'0558': '0559',
'0577': '05extra',
}
def namer(self, image_url, page_url):
# Fix inconsistent filenames
filename = util.urlpathsplit(image_url)[-1]
return filename.replace('ReN', 'N').replace('N01P', 'A02S')
def extract_image_urls(self, url, data):
# Fix missing image link
if 'LAR/0403' in url:
return [self.stripUrl.rstrip('/') % 'A04/A04P03.png']
return super().extract_image_urls(url, data)
def getPrevUrl(self, url, data):
# Fix broken navigation links
page = util.urlpathsplit(url)[-1]
if page in self.nav:
return self.stripUrl % self.nav[page]
return super().getPrevUrl(url, data)
def fetchText(self, url, data, textSearch, optional):
# Save final summary text
if url == self.firstStripUrl:
url = self.stripUrl % 'the-end'
data = self.getPage(url)
return super().fetchText(url, data, textSearch, optional)
return None
class LilithsWord(ComicControlScraper):
url = 'https://www.lilithword.com/'
stripUrl = url + 'comic/%s'
firstStripUrl = stripUrl % 'prologue-page-00'
def namer(self, image_url, page_url):
return util.urlpathsplit(image_url)[-1].split('-', 1)[1]
class LittleGamers(ParserScraper):
url = 'https://www.little-gamers.com/'
firstStripUrl = url + '2000/12/01/99'
imageSearch = '//div[d:class("comic")]//img'
prevSearch = ('//a[@id="previous"]',
'//div[d:class("comic-navigation")]//a[text()="previous"]')
class LittleTales(_ParserScraper):
url = 'http://www.little-tales.com/'
stripUrl = url + 'index.php?Strip=%s'
firstStripUrl = stripUrl % '1'
url = stripUrl % '450'
imageSearch = '//img[contains(@src, "strips/")]'
prevSearch = '//a[./img[@alt="BACK"]]'
nextSearch = '//a[./img[@alt="FORWARD"]]'
starter = bounceStarter
nav = {
'517': '515',
'449': '447',
}
def namer(self, imageUrl, pageUrl):
page = pageUrl.rsplit('=', 1)[-1]
ext = imageUrl.rsplit('.', 1)[-1]
return page + '.' + ext
def getPrevUrl(self, url, data):
# Skip missing pages with broken navigation links
page = url.rsplit('=', 1)[1]
if page in self.nav:
return self.stripUrl % self.nav[page]
return super(LittleTales, self).getPrevUrl(url, data)
class LoadingArtist(_ParserScraper):
url = 'https://loadingartist.com/'
firstStripUrl = url + 'comic/born/'
imageSearch = '//div[d:class("main-image-container")]//img'
prevSearch = '//div[d:class("left-nav")]//a'
latestSearch = '//nav//a[text()="Comic"]'
starter = indirectStarter
class LoFiJinks(WordPressScraper):
baseUrl = 'https://hijinksensue.com/comic/'
url = baseUrl + 'learning-to-love-again/'
firstStripUrl = baseUrl + 'lo-fijinks-everything-i-know-anout-james-camerons-avatar-movie/'
endOfLife = True
class LookingForGroup(ParserScraper):
url = 'https://www.lfg.co/'
stripUrl = url + 'page/%s/'
firstStripUrl = stripUrl % '1'
imageSearch = '//div[@id="comic-img"]//img'
prevSearch = '//a[d:class("comic-nav-prev")]'
latestSearch = '//div[@id="feature-lfg-footer"]/a[contains(@href, "page/")]'
starter = indirectStarter
help = 'Index format: nnn'
def namer(self, imageUrl, pageUrl):
page = util.urlpathsplit(pageUrl)[-1]
return page.replace('2967', '647')
|