1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
"""$Id: rdf.py 988 2008-03-12 18:22:48Z sa3ruby $"""
__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
from base import validatorBase
from logging import *
from validators import rdfAbout, noduplicates, text, eater
from root import rss11_namespace as rss11_ns
from extension import extension_everywhere
rdfNS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
#
# rdf:RDF element. The valid children include "channel", "item", "textinput", "image"
#
class rdf(validatorBase,object):
def do_rss090_channel(self):
from channel import channel
self.dispatcher.defaultNamespaces.append("http://my.netscape.com/rdf/simple/0.9/")
return channel(), noduplicates()
def do_channel(self):
from channel import rss10Channel
return rdfAbout(), rss10Channel(), noduplicates()
def _is_090(self):
return "http://my.netscape.com/rdf/simple/0.9/" in self.dispatcher.defaultNamespaces
def _withAbout(self,v):
if self._is_090():
return v
else:
return v, rdfAbout()
def do_item(self):
from item import rss10Item
return self._withAbout(rss10Item())
def do_textinput(self):
from textInput import textInput
return self._withAbout(textInput())
def do_image(self):
return self._withAbout(rss10Image())
def do_cc_License(self):
return eater()
def do_taxo_topic(self):
return eater()
def do_rdf_Description(self):
return eater()
def prevalidate(self):
self.setFeedType(TYPE_RSS1)
def validate(self):
if not "channel" in self.children and not "rss090_channel" in self.children:
self.log(MissingElement({"parent":self.name.replace('_',':'), "element":"channel"}))
from validators import rfc2396_full
class rss10Image(validatorBase, extension_everywhere):
def validate(self):
if not "title" in self.children:
self.log(MissingTitle({"parent":self.name, "element":"title"}))
if not "link" in self.children:
self.log(MissingLink({"parent":self.name, "element":"link"}))
if not "url" in self.children:
self.log(MissingElement({"parent":self.name, "element":"url"}))
def do_title(self):
from image import title
return title(), noduplicates()
def do_link(self):
return rfc2396_full(), noduplicates()
def do_url(self):
return rfc2396_full(), noduplicates()
def do_dc_creator(self):
return text()
def do_dc_subject(self):
return text() # duplicates allowed
def do_dc_date(self):
from validators import w3cdtf
return w3cdtf(), noduplicates()
def do_cc_license(self):
return eater()
#
# This class performs RSS 1.x specific validations on extensions.
#
class rdfExtension(validatorBase):
def __init__(self, qname, literal=False):
validatorBase.__init__(self)
self.qname=qname
self.literal=literal
def textOK(self):
pass
def setElement(self, name, attrs, parent):
validatorBase.setElement(self, name, attrs, parent)
if attrs.has_key((rdfNS,"parseType")):
if attrs[(rdfNS,"parseType")] == "Literal": self.literal=True
if not self.literal:
# ensure no rss11 children
if self.qname==rss11_ns:
from logging import UndefinedElement
self.log(UndefinedElement({"parent":parent.name, "element":name}))
# no duplicate rdf:abouts
if attrs.has_key((rdfNS,"about")):
about = attrs[(rdfNS,"about")]
if not "abouts" in self.dispatcher.__dict__:
self.dispatcher.__dict__["abouts"] = []
if about in self.dispatcher.__dict__["abouts"]:
self.log(DuplicateValue(
{"parent":parent.name, "element":"rdf:about", "value":about}))
else:
self.dispatcher.__dict__["abouts"].append(about)
def getExpectedAttrNames(self):
# no rss11 attributes
if self.literal or not self.attrs: return self.attrs.keys()
return [(ns,n) for ns,n in self.attrs.keys() if ns!=rss11_ns]
def validate(self):
# rdflib 2.0.5 does not catch mixed content errors
if self.value.strip() and self.children and not self.literal:
self.log(InvalidRDF({"message":"mixed content"}))
def startElementNS(self, name, qname, attrs):
# ensure element is "namespace well formed"
if name.find(':') != -1:
from logging import MissingNamespace
self.log(MissingNamespace({"parent":self.name, "element":name}))
# ensure all attribute namespaces are properly defined
for (namespace,attr) in attrs.keys():
if ':' in attr and not namespace:
from logging import MissingNamespace
self.log(MissingNamespace({"parent":self.name, "element":attr}))
# eat children
self.children.append((qname,name))
self.push(rdfExtension(qname, self.literal), name, attrs)
def characters(self, string):
if not self.literal: validatorBase.characters(self, string)
|