1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
"""
helpers module (imdb package).
This module provides functions not used directly by the imdb package,
but useful for IMDbPY-based programs.
Copyright 2006 Davide Alberani <da@erlug.linux.it>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""
# XXX: find better names for the functions in this modules.
import re
from cgi import escape
from types import UnicodeType, TupleType, ListType
# The modClearRefs can be used to strip names and titles references from
# the strings in Movie and Person objects.
from utils import modClearRefs, re_titleRef, re_nameRef
from imdb import IMDb
from imdb.parser.http.utils import re_entcharrefssub, entcharrefs, \
entcharrefsget, subXMLRefs, subSGMLRefs
# An URL, more or less.
_re_href = re.compile(r'(http://.+?)(?=\s|$)', re.I)
_re_hrefsub = _re_href.sub
def makeCgiPrintEncoding(encoding):
"""Make a function to pretty-print strings for the web."""
def cgiPrint(s):
"""Encode the given string using the %s encoding, and replace
chars outside the given charset with XML char references.""" % encoding
s = escape(s, quote=1)
if isinstance(s, UnicodeType):
s = s.encode(encoding, 'xmlcharrefreplace')
return s
return cgiPrint
# cgiPrint uses the latin_1 encoding.
cgiPrint = makeCgiPrintEncoding('latin_1')
def makeModCGILinks(movieTxt, personTxt):
"""Make a function used to pretty-print movies and persons refereces;
movieTxt and personTxt are the strings used for the substitutions.
movieTxt must contains %(movieID)s and %(title)s, while personTxt
must contains %(personID)s and %(name)s."""
def modCGILinks(s, titlesRefs, namesRefs):
"""Substitute movies and persons references."""
# XXX: look ma'... more nested scopes! <g>
def _replaceMovie(match):
to_replace = match.group(1)
item = titlesRefs.get(to_replace)
if item:
movieID = item.movieID
to_replace = movieTxt % {'movieID': movieID,
'title': unicode(cgiPrint(to_replace),
'latin_1',
'xmlcharrefreplace')}
return to_replace
def _replacePerson(match):
to_replace = match.group(1)
item = namesRefs.get(to_replace)
if item:
personID = item.personID
to_replace = personTxt % {'personID': personID,
'name': unicode(cgiPrint(to_replace),
'latin_1',
'xmlcharrefreplace')}
return to_replace
s = s.replace('<', '<').replace('>', '>')
s = _re_hrefsub(r'<a href="\1">\1</a>', s)
s = re_titleRef.sub(_replaceMovie, s)
s = re_nameRef.sub(_replacePerson, s)
return s
return modCGILinks
# links to the imdb.com web site.
modHtmlLinks = makeModCGILinks(
movieTxt='<a href="http://akas.imdb.com/title/tt%(movieID)s">%(title)s</a>',
personTxt='<a href="http://akas.imdb.com/name/nm%(personID)s">%(name)s</a>')
everyentcharrefs = entcharrefs.copy()
for k, v in {'lt':u'<','gt':u'>','amp':u'&','quot':u'"','apos':u'\''}.items():
everyentcharrefs[k] = v
everyentcharrefs['#%s' % ord(v)] = v
everyentcharrefsget = everyentcharrefs.get
re_everyentcharrefs = re.compile('&(%s|\#160|\#\d{1,5});' %
'|'.join(map(re.escape, everyentcharrefs)))
re_everyentcharrefssub = re_everyentcharrefs.sub
def _replAllXMLRef(match):
"""Replace the matched XML reference."""
ref = match.group(1)
value = everyentcharrefsget(ref)
if value is None:
if ref[0] == '#':
return unichr(int(ref[1:]))
else:
return ref
return value
def subXMLHTMLSGMLRefs(s):
"""Return the given string with XML/HTML/SGML entity and char references
replaced."""
return re_everyentcharrefssub(_replAllXMLRef, s)
def sortedSeasons(m):
"""Return a sorted list of seasons of the given series."""
seasons = m.get('episodes', {}).keys()
seasons.sort()
return seasons
def sortedEpisodes(m, season=None):
"""Return a sorted list of episodes of the given series,
considering only the specified season(s) (every season, if None)."""
episodes = []
seasons = season
if season is None:
seasons = sortedSeasons(m)
else:
if not isinstance(season, (TupleType, ListType)):
seasons = [season]
for s in seasons:
eps_indx = m.get('episodes', {}).get(s, {}).keys()
eps_indx.sort()
for e in eps_indx:
episodes.append(m['episodes'][s][e])
return episodes
# Idea an portions of the code courtesy of none none (dclist at gmail.com)
_re_imdbIDurl = re.compile(r'\b(nm|tt)([0-9]{7})\b')
def get_byURL(url, info=None, args=None, kwds=None):
"""Return a Movie or Person object for the given URL; info is the
info set to retrieve, args and kwds are respectively a list and a
dictionary or arguments to initialize the data access system.
Returns None if unable to correctly parse the url; can raise
exceptions if unable to retrieve the data."""
if args is None: args = []
if kwds is None: kwds = {}
ia = IMDb(*args, **kwds)
match = _re_imdbIDurl.search(url)
if not match:
return None
imdbtype = match.group(1)
imdbID = match.group(2)
if imdbtype == 'tt':
return ia.get_movie(imdbID, info=info)
elif imdbtype == 'nm':
return ia.get_person(imdbID, info=info)
return None
|