File: link.py

package info (click to toggle)
python-feedvalidator 0~svn1022-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd, squeeze, wheezy
  • size: 652 kB
  • ctags: 2,452
  • sloc: python: 9,481; makefile: 27; sh: 8
file content (160 lines) | stat: -rw-r--r-- 6,858 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
"""$Id: link.py 988 2008-03-12 18:22:48Z sa3ruby $"""

__author__ = "Sam Ruby <http://intertwingly.net/> and Mark Pilgrim <http://diveintomark.org/>"
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"

from base import validatorBase
from validators import *

#
# Atom link element
#
class link(nonblank,xmlbase,iso639,nonhtml,nonNegativeInteger,rfc3339,nonblank):
  validRelations = [
    # http://www.iana.org/assignments/link-relations.html
    'alternate',    # RFC4287
    'current',      # RFC5005
    'enclosure',    # RFC4287
    'edit',         # RFC-ietf-atompub-protocol-17.txt
    'edit-media',   # RFC-ietf-atompub-protocol-17.txt
    'first',        # RFC5005
    'last',         # RFC5005
    'license',      # RFC4946
    'next',         # RFC5005
    'next-archive', # RFC5005
    'payment',      # Kinberg
    'prev-archive', # RFC5005
    'previous',     # RFC5005
    'related',      # RFC4287
    'replies',      # RFC4685
    'self',         # RFC4287
    'via'           # RFC4287
    ]

  rfc5005 = [
    'current',      # RFC5005
    'first',        # RFC5005
    'last',         # RFC5005
    'next',         # RFC5005
    'next-archive', # RFC5005
    'prev-archive', # RFC5005
    'previous',     # RFC5005
    ]

  def getExpectedAttrNames(self):
    return [(None, u'type'), (None, u'title'), (None, u'rel'),
      (None, u'href'), (None, u'length'), (None, u'hreflang'),
      (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'type'),
      (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'resource'),
      (u'http://purl.org/syndication/thread/1.0', u'count'),
      (u'http://purl.org/syndication/thread/1.0', u'when'),
      (u'http://purl.org/syndication/thread/1.0', u'updated')]
      
  def validate(self):
    self.type = ""
    self.rel = "alternate"
    self.href = ""
    self.hreflang = ""
    self.title = ""

    if self.attrs.has_key((None, "rel")):
      self.value = self.rel = self.attrs.getValue((None, "rel"))

      if self.rel.startswith('http://www.iana.org/assignments/relation/'): 
        self.rel=self.rel[len('http://www.iana.org/assignments/relation/'):]

      if self.rel in self.validRelations: 
        self.log(ValidAtomLinkRel({"parent":self.parent.name, "element":self.name, "attr":"rel", "value":self.rel}))
      elif rfc2396_full.rfc2396_re.match(self.rel.encode('idna')):
        self.log(ValidAtomLinkRel({"parent":self.parent.name, "element":self.name, "attr":"rel", "value":self.rel}))
      else:
        self.log(UnregisteredAtomLinkRel({"parent":self.parent.name, "element":self.name, "attr":"rel", "value":self.rel}))
      nonblank.validate(self, errorClass=AttrNotBlank, extraParams={"attr": "rel"})

      if self.rel in self.rfc5005 and self.parent.name == 'entry':
        self.log(FeedHistoryRelInEntry({"rel":self.rel}))

    if self.attrs.has_key((None, "type")):
      self.value = self.type = self.attrs.getValue((None, "type"))
      if not mime_re.match(self.type):
        self.log(InvalidMIMEType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type}))
      elif self.rel == "self" and self.type not in ["application/atom+xml", "application/rss+xml", "application/rdf+xml"]:
        self.log(SelfNotAtom({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type}))
      else:
        self.log(ValidMIMEAttribute({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type}))

    if self.attrs.has_key((None, "title")):
      self.log(ValidTitle({"parent":self.parent.name, "element":self.name, "attr":"title"}))
      self.value = self.title = self.attrs.getValue((None, "title"))
      nonblank.validate(self, errorClass=AttrNotBlank, extraParams={"attr": "title"})
      nonhtml.validate(self)

    if self.attrs.has_key((None, "length")):
      self.name = 'length'
      self.value = self.attrs.getValue((None, "length"))
      nonNegativeInteger.validate(self)
      nonblank.validate(self)

    if self.attrs.has_key((None, "hreflang")):
      self.name = 'hreflang'
      self.value = self.hreflang = self.attrs.getValue((None, "hreflang"))
      iso639.validate(self)

    if self.attrs.has_key((None, "href")):
      self.name = 'href'
      self.value = self.href = self.attrs.getValue((None, "href"))
      xmlbase.validate(self, extraParams={"attr": "href"})

      if self.rel == "self" and self.parent.name in ["feed","channel"]:

        # detect relative self values
        from urlparse import urlparse
        from xml.dom import XML_NAMESPACE
        absolute = urlparse(self.href)[1]
        element = self
        while not absolute and element and hasattr(element,'attrs'):
          pattrs = element.attrs
          if pattrs and pattrs.has_key((XML_NAMESPACE, u'base')):
            absolute=urlparse(pattrs.getValue((XML_NAMESPACE, u'base')))[1]
          element = element.parent
        if not absolute:
          self.log(RelativeSelf({"value":self.href}))

        from urlparse import urljoin
        if urljoin(self.xmlBase,self.value) not in self.dispatcher.selfURIs:
          if urljoin(self.xmlBase,self.value).split('#')[0] != self.xmlBase.split('#')[0]:
            from uri import Uri
            value = Uri(self.value)
            for docbase in self.dispatcher.selfURIs:
              if value == Uri(docbase): break
            else:
              self.log(SelfDoesntMatchLocation({"parent":self.parent.name, "element":self.name}))
              self.dispatcher.selfURIs.append(urljoin(self.xmlBase,self.value))

    else:
      self.log(MissingHref({"parent":self.parent.name, "element":self.name, "attr":"href"}))

    if self.attrs.has_key((u'http://purl.org/syndication/thread/1.0', u'count')):
      if self.rel != "replies":
        self.log(UnexpectedAttribute({"parent":self.parent.name, "element":self.name, "attribute":"thr:count"}))
      self.value = self.attrs.getValue((u'http://purl.org/syndication/thread/1.0', u'count'))
      self.name="thr:count"
      nonNegativeInteger.validate(self)

    if self.attrs.has_key((u'http://purl.org/syndication/thread/1.0', u'when')):
        self.log(NoThrWhen({"parent":self.parent.name, "element":self.name, "attribute":"thr:when"}))

    if self.attrs.has_key((u'http://purl.org/syndication/thread/1.0', u'updated')):
      if self.rel != "replies":
        self.log(UnexpectedAttribute({"parent":self.parent.name, "element":self.name, "attribute":"thr:updated"}))
      self.value = self.attrs.getValue((u'http://purl.org/syndication/thread/1.0', u'updated'))
      self.name="thr:updated"
      rfc3339.validate(self)

  def startElementNS(self, name, qname, attrs):
    self.push(eater(), name, attrs)

  def characters(self, text):
    if text.strip():
      self.log(AtomLinkNotEmpty({"parent":self.parent.name, "element":self.name}))