File: pages.py

package info (click to toggle)
weboob 0.c-4.1
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 5,008 kB
  • sloc: python: 28,678; perl: 244; sh: 198; makefile: 111; sql: 17
file content (131 lines) | stat: -rw-r--r-- 5,095 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- coding: utf-8 -*-

# Copyright(C) 2010-2012 Julien Veyssier, Laurent Bachelier
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with weboob. If not, see <http://www.gnu.org/licenses/>.


try:
    from urlparse import parse_qs
except ImportError:
    from cgi import parse_qs

from urlparse import urlsplit

from weboob.capabilities.torrent import Torrent
from weboob.capabilities.base import NotAvailable
from weboob.tools.browser import BasePage
from weboob.tools.misc import get_bytes_size


__all__ = ['TorrentsPage']


class TorrentsPage(BasePage):
    def iter_torrents(self):
        for tr in self.document.getiterator('tr'):
            if tr.attrib.get('class', '') == 'odd' or tr.attrib.get('class', '') == ' even':
                if not 'id' in tr.attrib:
                    continue
                title = tr.getchildren()[0].getchildren()[1].getchildren()[1].text
                if not title:
                    title = ''
                for red in tr.getchildren()[0].getchildren()[1].getchildren()[1].getchildren():
                    title += red.text_content()
                idt = tr.getchildren()[0].getchildren()[1].getchildren()[1].attrib.get('href', '').replace('/', '') \
                    .replace('.html', '')

                # look for url
                for a in tr.getchildren()[0].getiterator('a'):
                    if '.torrent' in a.attrib.get('href', ''):
                        url = a.attrib['href']

                size = tr.getchildren()[1].text
                u = tr.getchildren()[1].getchildren()[0].text
                size = size = size.replace(',', '.')
                size = float(size)
                seed = tr.getchildren()[4].text
                leech = tr.getchildren()[5].text

                torrent = Torrent(idt, title)
                torrent.url = url
                torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0]
                torrent.size = get_bytes_size(size, u)
                torrent.seeders = int(seed)
                torrent.leechers = int(leech)
                yield torrent


class TorrentPage(BasePage):
    def get_torrent(self, id):
        seed = 0
        leech = 0
        description = NotAvailable
        url = NotAvailable
        title = NotAvailable
        for div in self.document.getiterator('div'):
            if div.attrib.get('id', '') == 'desc':
                try:
                    description = div.text_content().strip()
                except UnicodeDecodeError:
                    description = 'Description with invalid UTF-8.'
            elif div.attrib.get('class', '') == 'seedBlock':
                if div.getchildren()[1].text is not None:
                    seed = int(div.getchildren()[1].text)
                else:
                    seed = 0
            elif div.attrib.get('class', '') == 'leechBlock':
                if div.getchildren()[1].text is not None:
                    leech = int(div.getchildren()[1].text)
                else:
                    leech = 0

        title = self.parser.select(self.document.getroot(),
                'h1.torrentName span', 1)
        title = title.text

        for a in self.document.getiterator('a'):
            if ('Download' in a.attrib.get('title', '')) \
            and ('torrent file' in a.attrib.get('title', '')):
                url = a.attrib.get('href', '')

        size = 0
        u = ''
        for span in self.document.getiterator('span'):
            # sometimes there are others span, this is not so sure but the size of the children list
            # is enough to know if this is the right span
            if (span.attrib.get('class', '') == 'folder' \
                or span.attrib.get('class', '') == 'folderopen') \
            and len(span.getchildren()) > 2:
                size = span.getchildren()[1].tail
                u = span.getchildren()[2].text
                size = float(size.split(': ')[1].replace(',', '.'))

        files = []
        for td in self.document.getiterator('td'):
            if td.attrib.get('class', '') == 'torFileName':
                files.append(td.text)

        torrent = Torrent(id, title)
        torrent.url = url
        if torrent.url:
            torrent.filename = parse_qs(urlsplit(url).query).get('title', [None])[0]
        torrent.size = get_bytes_size(size, u)
        torrent.seeders = int(seed)
        torrent.leechers = int(leech)
        torrent.description = description
        torrent.files = files
        return torrent