1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
|
#!/usr/bin/env python
# Copyright (C) 2002-2004 Artifex Software, Inc.
# All Rights Reserved.
#
# This software is provided AS-IS with no warranty, either express or
# implied.
#
# This software is distributed under license and may not be copied, modified
# or distributed except as expressly authorized under the terms of that
# license. Refer to licensing information at http://www.artifex.com/
# or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
# San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
# $Id: check_docrefs.py 8409 2007-11-27 20:43:09Z giles $
# Check that the hrefs in an HTML document mention all of a set of files.
# The requirement is that the union of all the docfiles must somewhere
# reference all the files. Usage:
# check_docrefs (+src | +lib | [+-]from <docfile>* | [+-]to (<directory> | <file>)*)*
# +from or +to adds files; -from or -to removes them;
# +src, +lib, or +tests execute SRC_LIST, LIB_LIST, or TEST_LIST below.
# Define the Ghostscript-specific parameter lists.
SRC_LIST = [
'+from', 'doc/Develop.htm',
'+to', 'lib', 'src',
'-to', '*/CVS', 'src/rinkj/CVS',
'-to', 'src/*.mak.tcl',
'-to', 'lib/*.upp',
'-to', 'lib/*.ps',
'+to', 'lib/gs_*.ps', 'lib/pdf_*.ps'
]
LIB_LIST = [
'+from', 'doc/Psfiles.htm',
'+to', 'examples/*.ps', 'lib/*.ps'
]
TEST_LIST = [
'+from', 'doc/Testing.htm',
'+to', 'toolbin/tests/*',
'-to', 'toolbin/tests/check_*.py', 'toolbin/tests/gscheck_*.py',
'-to', 'toolbin/tests/*.pyc', 'toolbin/tests/CVS/*'
]
import glob, os.path, re, sys
from gstestutils import GSTestCase, gsRunTestsMain
# Utilities
# List all the files referenced from a document.
# Note that we only count files referenced as a whole, i.e., without #.
def hrefs(doc):
prefix = os.path.dirname(doc)
fp = open(doc, 'r')
contents = fp.read()
fp.close()
pattern = re.compile('href="([^"#]*)"', re.IGNORECASE)
return map(lambda match,pre=prefix:
os.path.normpath(os.path.join(pre, match)),
re.findall(pattern, contents))
# Define a class for storing to/from information.
class DocRefs:
def __init__(self):
self.to = {}
self.toDir = {}
self.From = {}
def doTo(self, fname, adding):
if os.path.isdir(fname):
self.toDir[fname] = adding
for f in glob.glob(os.path.join(fname, '*')):
self.doTo(f, adding)
elif os.path.islink(fname):
pass
elif os.path.isfile(fname):
self.to[os.path.normpath(fname)] = adding
def doFrom(self, fname, adding):
self.From[os.path.normpath(fname)] = adding
def doFromDoc(self, docname, adding):
for f in hrefs(docname):
self.doFrom(f, adding)
def cleanup(self):
for k in self.to.keys():
if self.to[k] == 0: del self.to[k]
for k in self.From.keys():
if self.From[k] == 0: del self.From[k]
# Process command line arguments and switches.
def mainArgs(arglist, root, fromProc, toProc):
for arg in arglist:
if arg == '+src':
mainArgs(SRC_LIST, root, fromProc, toProc)
elif arg == '+lib':
mainArgs(LIB_LIST, root, fromProc, toProc)
elif arg == '+tests':
mainArgs(TEST_LIST, root, fromProc, toProc)
elif arg == '+from':
do, adding = fromProc, 1
elif arg == '-from':
do, adding = fromProc, 0
elif arg == '+to':
do, adding = toProc, 1
elif arg == '-to':
do, adding = toProc, 0
elif re.match('[+-]', arg):
print sys.stderr >> 'Unknown switch: ' + arg
exit(1)
elif arg.find('*') >= 0:
for f in glob.glob(os.path.join(root, arg)): do(f, adding)
else:
do(os.path.join(root, arg), adding)
class GSCheckDocRefs(GSTestCase):
def __init__(self, root, arglist):
self.root = root
self.arglist = arglist
GSTestCase.__init__(self)
def _fromDocs(self):
refs = DocRefs()
mainArgs(self.arglist, self.root,
lambda f,b,refs=refs: refs.doFrom(f, b),
lambda f,b: None)
refs.cleanup()
return refs.From.keys()
def shortDescription(self):
docs = reduce(lambda a,b: a+' or '+b, self._fromDocs())
return docs + ' must reference all relevant files.'
def runTest(self):
refs = DocRefs()
mainArgs(self.arglist, self.root,
lambda f,b,refs=refs: refs.doFromDoc(f, b),
lambda f,b,refs=refs: refs.doTo(f, b))
refs.cleanup()
docs = self._fromDocs()
if len(docs) == 1:
fromFormat = docs[0] + ' fails to reference these %d files:'
toFormat = docs[0] + ' references these %d files that do not exist:'
else:
fromFormat = 'These %d files are not referenced:'
toFormat = 'These %d files are referenced but do not exist:'
messages = []
noFrom = []
for f in refs.to.keys():
if not refs.From.has_key(f):
noFrom.append(f)
if len(noFrom) > 0:
messages.append(fromFormat % len(noFrom))
noFrom.sort()
messages += noFrom
noTo = []
for f in refs.to.keys():
if not refs.to.has_key(f) and refs.toDir.has_key(os.path.dirname(f)):
noTo.append(f)
if len(noTo) > 0:
messages.append(toFormat % len(noTo))
noTo.sort()
messages += noTo
self.failIfMessages(messages)
# Add the tests defined in this file to a suite.
def addTests(suite, gsroot, **args):
suite.addTest(GSCheckDocRefs(gsroot, ['+src']))
suite.addTest(GSCheckDocRefs(gsroot, ['+lib']))
suite.addTest(GSCheckDocRefs(gsroot, ['+tests']))
if __name__ == "__main__":
gsRunTestsMain(addTests)
|