File: rdf.py

package info (click to toggle)
python-feedvalidator 0~svn1022-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd, squeeze, wheezy
  • size: 652 kB
  • ctags: 2,452
  • sloc: python: 9,481; makefile: 27; sh: 8
file content (162 lines) | stat: -rw-r--r-- 4,961 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""$Id: rdf.py 988 2008-03-12 18:22:48Z sa3ruby $"""

__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"

from base import validatorBase
from logging import *
from validators import rdfAbout, noduplicates, text, eater
from root import rss11_namespace as rss11_ns
from extension import extension_everywhere

rdfNS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"

#
# rdf:RDF element.  The valid children include "channel", "item", "textinput", "image"
#
class rdf(validatorBase,object):

  def do_rss090_channel(self):
    from channel import channel
    self.dispatcher.defaultNamespaces.append("http://my.netscape.com/rdf/simple/0.9/")
    return channel(), noduplicates()

  def do_channel(self):
    from channel import rss10Channel
    return rdfAbout(), rss10Channel(), noduplicates()

  def _is_090(self):
    return "http://my.netscape.com/rdf/simple/0.9/" in self.dispatcher.defaultNamespaces

  def _withAbout(self,v):
    if self._is_090():
      return v
    else:
      return v, rdfAbout()
      
  def do_item(self):
    from item import rss10Item
    return self._withAbout(rss10Item())

  def do_textinput(self):
    from textInput import textInput
    return self._withAbout(textInput())

  def do_image(self):
    return self._withAbout(rss10Image())
  
  def do_cc_License(self):
    return eater()

  def do_taxo_topic(self):
    return eater()

  def do_rdf_Description(self):
    return eater()

  def prevalidate(self):
    self.setFeedType(TYPE_RSS1)
    
  def validate(self):
    if not "channel" in self.children and not "rss090_channel" in self.children:
      self.log(MissingElement({"parent":self.name.replace('_',':'), "element":"channel"}))

from validators import rfc2396_full

class rss10Image(validatorBase, extension_everywhere):
  def validate(self):
    if not "title" in self.children:
      self.log(MissingTitle({"parent":self.name, "element":"title"}))
    if not "link" in self.children:
      self.log(MissingLink({"parent":self.name, "element":"link"}))
    if not "url" in self.children:
      self.log(MissingElement({"parent":self.name, "element":"url"}))

  def do_title(self):
    from image import title
    return title(), noduplicates()

  def do_link(self):
    return rfc2396_full(), noduplicates()

  def do_url(self):
    return rfc2396_full(), noduplicates()

  def do_dc_creator(self):
    return text()

  def do_dc_subject(self):
    return text() # duplicates allowed

  def do_dc_date(self):
    from validators import w3cdtf
    return w3cdtf(), noduplicates()

  def do_cc_license(self):
    return eater()

#
# This class performs RSS 1.x specific validations on extensions.
#
class rdfExtension(validatorBase):
  def __init__(self, qname, literal=False):
    validatorBase.__init__(self)
    self.qname=qname
    self.literal=literal

  def textOK(self):
    pass

  def setElement(self, name, attrs, parent):
    validatorBase.setElement(self, name, attrs, parent)

    if attrs.has_key((rdfNS,"parseType")):
      if attrs[(rdfNS,"parseType")] == "Literal": self.literal=True

    if not self.literal:

      # ensure no rss11 children
      if self.qname==rss11_ns:
        from logging import UndefinedElement
        self.log(UndefinedElement({"parent":parent.name, "element":name}))

      # no duplicate rdf:abouts
      if attrs.has_key((rdfNS,"about")):
        about = attrs[(rdfNS,"about")]
        if not "abouts" in self.dispatcher.__dict__:
          self.dispatcher.__dict__["abouts"] = []
        if about in self.dispatcher.__dict__["abouts"]:
          self.log(DuplicateValue(
            {"parent":parent.name, "element":"rdf:about", "value":about}))
        else:
          self.dispatcher.__dict__["abouts"].append(about)

  def getExpectedAttrNames(self):
    # no rss11 attributes
    if self.literal or not self.attrs: return self.attrs.keys()
    return [(ns,n) for ns,n in self.attrs.keys() if ns!=rss11_ns]

  def validate(self):
    # rdflib 2.0.5 does not catch mixed content errors
    if self.value.strip() and self.children and not self.literal:
      self.log(InvalidRDF({"message":"mixed content"}))

  def startElementNS(self, name, qname, attrs):
    # ensure element is "namespace well formed"
    if name.find(':') != -1:
      from logging import MissingNamespace
      self.log(MissingNamespace({"parent":self.name, "element":name}))

    # ensure all attribute namespaces are properly defined
    for (namespace,attr) in attrs.keys():
      if ':' in attr and not namespace:
        from logging import MissingNamespace
        self.log(MissingNamespace({"parent":self.name, "element":attr}))

    # eat children
    self.children.append((qname,name))
    self.push(rdfExtension(qname, self.literal), name, attrs)

  def characters(self, string):
    if not self.literal: validatorBase.characters(self, string)