File: mediaTypes.py

package info (click to toggle)
python-feedvalidator 0~svn1022-2
links: PTS, VCS
area: main
in suites: jessie, jessie-kfreebsd, squeeze, wheezy
size: 652 kB
ctags: 2,452
sloc: python: 9,481; makefile: 27; sh: 8
file content (90 lines) | stat: -rw-r--r-- 4,208 bytes
parent folder | download | duplicates (2)
"""
$Id: mediaTypes.py 988 2008-03-12 18:22:48Z sa3ruby $
This module deals with valid internet media types for feeds.
"""

__author__ = "Joseph Walton <http://www.kafsemo.org/>"
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2004 Joseph Walton"

from cgi import parse_header
from logging import *

FEED_TYPES = [
  'text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml',
  'application/atom+xml', 'text/x-opml', 'application/xrds+xml',
  'application/opensearchdescription+xml', 'application/vnd.google-earth.kml+xml', 'application/vnd.google-earth.kmz',
  'application/atomsvc+xml', 'application/atomcat+xml',  
]

# Is the Content-Type correct?
def checkValid(contentType, loggedEvents):
  (mediaType, params) = parse_header(contentType)
  if mediaType.lower() not in FEED_TYPES:
    loggedEvents.append(UnexpectedContentType({"type": "Feeds", "contentType": mediaType}))
  if 'charset' in params:
    charset = params['charset']
  else:
    charset = None

  return (mediaType, charset)

# Warn about mismatches between media type and feed version
def checkAgainstFeedType(mediaType, feedType, loggedEvents):
  mtl = mediaType.lower()

  if mtl in ['application/x.atom+xml', 'application/atom+xml']:
    if feedType not in [TYPE_ATOM, TYPE_ATOM_ENTRY]:
      loggedEvents.append(UnexpectedContentType({"type": 'Non-Atom 1.0 feeds', "contentType": mediaType}))
  elif mtl == 'application/atomcat+xml':
    if feedType != TYPE_APP_CATEGORIES:
      loggedEvents.append(UnexpectedContentType({"type": 'Non-AtomPub Category document', "contentType": mediaType}))
  elif mtl == 'application/atomsvc+xml':
    if feedType != TYPE_APP_SERVICE:
      loggedEvents.append(UnexpectedContentType({"type": 'Non-AtomPub Service document', "contentType": mediaType}))
  elif mtl == 'application/rdf+xml':
    if feedType != TYPE_RSS1:
      loggedEvents.append(UnexpectedContentType({"type": 'Non-RSS 1.0 feeds', "contentType": mediaType}))
  elif mtl == 'application/rss+xml':
    if feedType not in [TYPE_RSS1, TYPE_RSS2]:
      loggedEvents.append(UnexpectedContentType({"type": 'Non-RSS feeds', "contentType": mediaType}))
  elif mtl == 'text/x-opml':
    if feedType not in [TYPE_OPML]:
      loggedEvents.append(UnexpectedContentType({"type": 'Non-OPML feeds', "contentType": mediaType}))
  elif mtl == 'application/opensearchdescription+xml':
    if feedType not in [TYPE_OPENSEARCH]:
      loggedEvents.append(UnexpectedContentType({"type": 'Non-OpenSearchDescription documents', "contentType": mediaType}))
  elif mtl == 'application/xrds+xml':
    if feedType not in [TYPE_XRD]:
      loggedEvents.append(UnexpectedContentType({"type": 'Non-Extensible Resource Descriptor documents', "contentType": mediaType}))
  elif mtl == 'application/vnd.google-earth.kml+xml':
    if feedType not in [TYPE_KML20, TYPE_KML21, TYPE_KML22]:
      loggedEvents.append(UnexpectedContentType({"type": 'Non-KML documents', "contentType": mediaType}))
  elif mtl == 'application/earthviewer':
    loggedEvents.append(InvalidKmlMediaType({"type": 'Non-KML documents', "contentType": mediaType}))

# warn if a non-specific media type is used without a 'marker'
def contentSniffing(mediaType, rawdata, loggedEvents):
  if mediaType not in FEED_TYPES: return
  if mediaType == 'application/atom+xml': return
  if mediaType == 'application/atomcat+xml': return
  if mediaType == 'application/atomsvc+xml': return
  if mediaType == 'application/rss+xml': return
  if mediaType == 'text/x-opml': return
  if mediaType == 'application/opensearchdescription+xml': return
  if mediaType == 'application/xrds+xml': return
  if mediaType == 'application/vnd.google-earth.kml+xml': return

  block = rawdata[:512]

  if block.find('<rss') >= 0: return
  if block.find('<feed') >= 0: return
  if block.find('<opml') >= 0: return
  if block.find('<kml') >= 0: return
  if block.find('<OpenSearchDescription') >= 0: return
  if (block.find('<rdf:RDF') >=0 and 
      block.find('http://www.w3.org/1999/02/22-rdf-syntax-ns#') >= 0 and
      block.find( 'http://purl.org/rss/1.0/')): return

  from logging import NonSpecificMediaType
  loggedEvents.append(NonSpecificMediaType({"contentType": mediaType}))