1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
|
"""
$Id: mediaTypes.py 988 2008-03-12 18:22:48Z sa3ruby $
This module deals with valid internet media types for feeds.
"""
__author__ = "Joseph Walton <http://www.kafsemo.org/>"
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2004 Joseph Walton"
from cgi import parse_header
from logging import *
FEED_TYPES = [
'text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml',
'application/atom+xml', 'text/x-opml', 'application/xrds+xml',
'application/opensearchdescription+xml', 'application/vnd.google-earth.kml+xml', 'application/vnd.google-earth.kmz',
'application/atomsvc+xml', 'application/atomcat+xml',
]
# Is the Content-Type correct?
def checkValid(contentType, loggedEvents):
(mediaType, params) = parse_header(contentType)
if mediaType.lower() not in FEED_TYPES:
loggedEvents.append(UnexpectedContentType({"type": "Feeds", "contentType": mediaType}))
if 'charset' in params:
charset = params['charset']
else:
charset = None
return (mediaType, charset)
# Warn about mismatches between media type and feed version
def checkAgainstFeedType(mediaType, feedType, loggedEvents):
mtl = mediaType.lower()
if mtl in ['application/x.atom+xml', 'application/atom+xml']:
if feedType not in [TYPE_ATOM, TYPE_ATOM_ENTRY]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-Atom 1.0 feeds', "contentType": mediaType}))
elif mtl == 'application/atomcat+xml':
if feedType != TYPE_APP_CATEGORIES:
loggedEvents.append(UnexpectedContentType({"type": 'Non-AtomPub Category document', "contentType": mediaType}))
elif mtl == 'application/atomsvc+xml':
if feedType != TYPE_APP_SERVICE:
loggedEvents.append(UnexpectedContentType({"type": 'Non-AtomPub Service document', "contentType": mediaType}))
elif mtl == 'application/rdf+xml':
if feedType != TYPE_RSS1:
loggedEvents.append(UnexpectedContentType({"type": 'Non-RSS 1.0 feeds', "contentType": mediaType}))
elif mtl == 'application/rss+xml':
if feedType not in [TYPE_RSS1, TYPE_RSS2]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-RSS feeds', "contentType": mediaType}))
elif mtl == 'text/x-opml':
if feedType not in [TYPE_OPML]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-OPML feeds', "contentType": mediaType}))
elif mtl == 'application/opensearchdescription+xml':
if feedType not in [TYPE_OPENSEARCH]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-OpenSearchDescription documents', "contentType": mediaType}))
elif mtl == 'application/xrds+xml':
if feedType not in [TYPE_XRD]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-Extensible Resource Descriptor documents', "contentType": mediaType}))
elif mtl == 'application/vnd.google-earth.kml+xml':
if feedType not in [TYPE_KML20, TYPE_KML21, TYPE_KML22]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-KML documents', "contentType": mediaType}))
elif mtl == 'application/earthviewer':
loggedEvents.append(InvalidKmlMediaType({"type": 'Non-KML documents', "contentType": mediaType}))
# warn if a non-specific media type is used without a 'marker'
def contentSniffing(mediaType, rawdata, loggedEvents):
if mediaType not in FEED_TYPES: return
if mediaType == 'application/atom+xml': return
if mediaType == 'application/atomcat+xml': return
if mediaType == 'application/atomsvc+xml': return
if mediaType == 'application/rss+xml': return
if mediaType == 'text/x-opml': return
if mediaType == 'application/opensearchdescription+xml': return
if mediaType == 'application/xrds+xml': return
if mediaType == 'application/vnd.google-earth.kml+xml': return
block = rawdata[:512]
if block.find('<rss') >= 0: return
if block.find('<feed') >= 0: return
if block.find('<opml') >= 0: return
if block.find('<kml') >= 0: return
if block.find('<OpenSearchDescription') >= 0: return
if (block.find('<rdf:RDF') >=0 and
block.find('http://www.w3.org/1999/02/22-rdf-syntax-ns#') >= 0 and
block.find( 'http://purl.org/rss/1.0/')): return
from logging import NonSpecificMediaType
loggedEvents.append(NonSpecificMediaType({"contentType": mediaType}))
|