1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
|
# -*- coding: utf-8 -*-
# Copyright(C) 2010-2012 Julien Veyssier, Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.
try:
from urlparse import parse_qs
except ImportError:
from cgi import parse_qs
from urlparse import urlsplit
from weboob.capabilities.torrent import Torrent
from weboob.capabilities.base import NotAvailable
from weboob.tools.browser import BasePage
from weboob.tools.misc import get_bytes_size
__all__ = ['TorrentsPage']
class TorrentsPage(BasePage):
def iter_torrents(self):
for tr in self.document.getiterator('tr'):
if tr.attrib.get('class', '') == 'odd' or tr.attrib.get('class', '') == ' even':
if not 'id' in tr.attrib:
continue
title = tr.getchildren()[0].getchildren()[1].getchildren()[1].text
if not title:
title = ''
for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren():
title += red.text_content()
idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \
.replace('.html', '')
# look for url
for a in tr.getchildren()[0].getiterator('a'):
if '.torrent' in a.attrib.get('href', ''):
url = a.attrib['href']
size = tr.getchildren()[1].text
u = tr.getchildren()[1].getchildren()[0].text
size = size = size.replace(',', '.')
size = float(size)
seed = tr.getchildren()[4].text
leech = tr.getchildren()[5].text
torrent = Torrent(idt, title)
torrent.url = url
torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0]
torrent.size = get_bytes_size(size, u)
torrent.seeders = int(seed)
torrent.leechers = int(leech)
yield torrent
class TorrentPage(BasePage):
def get_torrent(self, id):
seed = 0
leech = 0
description = NotAvailable
url = NotAvailable
title = NotAvailable
for div in self.document.getiterator('div'):
if div.attrib.get('id', '') == 'desc':
try:
description = div.text_content().strip()
except UnicodeDecodeError:
description = 'Description with invalid UTF-8.'
elif div.attrib.get('class', '') == 'seedBlock':
if div.getchildren()[1].text is not None:
seed = int(div.getchildren()[1].text)
else:
seed = 0
elif div.attrib.get('class', '') == 'leechBlock':
if div.getchildren()[1].text is not None:
leech = int(div.getchildren()[1].text)
else:
leech = 0
title = self.parser.select(self.document.getroot(),
'h1.torrentName span', 1)
title = title.text
for a in self.document.getiterator('a'):
if ('Download' in a.attrib.get('title', '')) \
and ('torrent file' in a.attrib.get('title', '')):
url = a.attrib.get('href', '')
size = 0
u = ''
for span in self.document.getiterator('span'):
# sometimes there are others span, this is not so sure but the size of the children list
# is enough to know if this is the right span
if (span.attrib.get('class', '') == 'folder' \
or span.attrib.get('class', '') == 'folderopen') \
and len(span.getchildren()) > 2:
size = span.getchildren()[1].tail
u = span.getchildren()[2].text
size = float(size.split(': ')[1].replace(',', '.'))
files = []
for td in self.document.getiterator('td'):
if td.attrib.get('class', '') == 'torFileName':
files.append(td.text)
torrent = Torrent(id, title)
torrent.url = url
if torrent.url:
torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0]
torrent.size = get_bytes_size(size, u)
torrent.seeders = int(seed)
torrent.leechers = int(leech)
torrent.description = description
torrent.files = files
return torrent
|