1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
|
"""
A command-line interface to the validating xmlproc parser. Prints error
messages and can output the parsed data in various formats.
Usage:
xvcmd.py [-c catalog] [-l language] {-o format] [urltodoc]
---Options:
catalog: path to catalog file to use to resolve public identifiers
language: ISO 3166 language code for language to use in error messages
format: Format to output parsed XML. 'e': ESIS, 'x': canonical XML
No data will be outputted if this option is not specified
urltodoc: URL to the document to parse. (You can use plain file names
as well.) Can be omitted if a catalog is specified and contains
a DOCUMENT entry.
Catalog files with URLs that end in '.xml' are assumed to be XCatalogs,
all others are assumed to be SGML Open Catalogs.
If the -c option is not specified the environment variables XMLXCATALOG
and XMLSOCATALOG will be used (in that order).
"""
# --- INITIALIZATION
from xml.parsers.xmlproc import xmlval,catalog,xcatalog,xmlproc
import sys,getopt,os,outputters
# --- ERROR HANDLING
class MyErrorHandler(xmlval.ErrorHandler):
def __init__(self,locator):
xmlval.ErrorHandler.__init__(self,locator)
self.reset()
def get_location(self):
return "%s:%d:%d" % (self.locator.get_current_sysid(),\
self.locator.get_line(),
self.locator.get_column())
def warning(self,msg):
print "WARNING ON %s: %s" % (self.get_location(),msg)
self.warnings=self.warnings+1
def error(self,msg):
self.fatal(msg)
def fatal(self,msg):
print "%s: %s" % (self.get_location(),msg)
self.errors=self.errors+1
def reset(self):
self.errors=0
self.warnings=0
# --- MAIN PROGRAM
# --- Initialization
print "xmlproc version %s" % xmlval.version
p=xmlval.XMLValidator()
err=MyErrorHandler(p)
p.set_error_handler(err)
# --- Interpreting options
(options,sysids)=getopt.getopt(sys.argv[1:],"c:l:o:")
cat=None
pf=None
for option in options:
if option[0]=="-c":
cat=option[1]
pf=xcatalog.FancyParserFactory()
elif option[0]=="-l":
try:
p.set_error_language(option[1])
except KeyError:
print "Error language '%s' not available" % option[1]
elif option[0]=="-o":
if option[1]=="e" or option[1]=="E":
p.set_application(outputters.ESISDocHandler())
elif option[1]=="x" or option[1]=="X":
p.set_application(outputters.Canonizer())
else:
print "Error: Unknown output format "+option[1]
if cat==None and os.environ.has_key("XMLXCATALOG"):
cat=os.environ["XMLXCATALOG"]
pf=xcatalog.XCatParserFactory()
elif cat==None and os.environ.has_key("XMLSOCATALOG"):
cat=os.environ["XMLSOCATALOG"]
pf=catalog.CatParserFactory()
if cat!=None:
print "Parsing catalog file '%s'" % cat
cat=catalog.xmlproc_catalog(cat,pf)
p.set_pubid_resolver(cat)
if len(sysids)==0:
if cat==None:
print "You must specify a system identifier if no catalog is used"
sys.exit(1)
elif cat.get_document_sysid()==None:
print "You must specify a system identifier if the catalog has no "+\
"DOCUMENT entry"
sys.exit(1)
sysids=[cat.get_document_sysid()]
print "Parsing DOCUMENT '%s' from catalog" % sysids[0]
# --- Parsing
for sysid in sysids:
print
print "Parsing '%s'" % sysid
p.parse_resource(sysid)
print "Parse complete, %d error(s) and %d warning(s)" % \
(err.errors,err.warnings)
err.reset()
p.reset()
|