1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
#!/usr/bin/python
"""
A command-line interface to the validating xmlproc parser. Prints error
messages and can output the parsed data in various formats.
"""
usage=\
"""
Usage:
xvcmd.py [options] [urlstodocs]
---Options:
-c catalog: path to catalog file to use to resolve public identifiers
-l language: ISO 3166 language code for language to use in error messages
-o format: Format to output parsed XML. 'e': ESIS, 'x': canonical XML
and 'n': normalized XML. No data will be output if this
option is not specified.
urlstodocs: URLs to the documents to parse. (You can use plain file names
as well.) Can be omitted if a catalog is specified and contains
a DOCUMENT entry.
-n: Report qualified names as 'URI name'. (Namespace processing.)
--nowarn: Suppress warnings.
--entstck: Show entity stack on errors.
--rawxml: Show raw XML string where error occurred.
Catalog files with URLs that end in '.xml' are assumed to be XCatalogs,
all others are assumed to be SGML Open Catalogs.
If the -c option is not specified the environment variables XMLXCATALOG
and XMLSOCATALOG will be used (in that order).
"""
from xml.parsers.xmlproc import xmlval,catalog,xcatalog,xmlproc,_outputters
import sys, getopt, os, string
# --- Utilities
def print_usage(message):
print message
print usage
sys.exit(1)
# --- Initialization
print "xmlproc version %s" % xmlval.version
p=xmlval.XMLValidator()
# --- Interpreting options
try:
(options,sysids)=getopt.getopt(sys.argv[1:],"c:l:o:n",
["nowarn","entstck","rawxml"])
except getopt.error,e:
print_usage("Usage error: "+e)
warnings=1
entstack=0
rawxml=0
cat=None
pf=None
namespaces=0
app=xmlproc.Application()
err_lang=None
for option in options:
if option[0]=="-c":
cat=option[1]
elif option[0]=="-l":
try:
p.set_error_language(option[1])
err_lang=option[1]
except KeyError:
print "Error: Language '%s' not available" % option[1]
elif option[0]=="-o":
if string.lower(option[1]) == "e":
app = _outputters.ESISDocHandler()
elif string.lower(option[1]) == "x":
app = _outputters.Canonizer()
elif string.lower(option[1]) == "n":
app = _outputters.DocGenerator()
else:
print_usage("Error: Unknown output format " + option[1])
elif option[0]=="-n":
namespaces=1
elif option[0]=="--nowarn":
warnings=0
elif option[0]=="--entstck":
entstack=1
elif option[0]=="--rawxml":
rawxml=1
# Acting on option settings
err = _outputters.MyErrorHandler(p, p.parser, warnings, entstack, rawxml)
p.set_error_handler(err)
if namespaces:
from xml.parsers.xmlproc import namespace
nsf=namespace.NamespaceFilter(p)
nsf.set_application(app)
p.set_application(nsf)
else:
p.set_application(app)
if cat!=None:
pf=xcatalog.FancyParserFactory(err_lang)
elif cat==None and os.environ.has_key("XMLXCATALOG"):
cat=os.environ["XMLXCATALOG"]
pf=xcatalog.XCatParserFactory(err_lang)
elif cat==None and os.environ.has_key("XMLSOCATALOG"):
cat=os.environ["XMLSOCATALOG"]
pf=catalog.CatParserFactory(err_lang)
if cat!=None:
print "Parsing catalog file '%s'" % cat
cat=catalog.xmlproc_catalog(cat,pf,err)
p.set_pubid_resolver(cat)
if len(sysids)==0:
if cat==None:
print_usage("You must specify a system identifier if no catalog is "
"used")
elif cat.get_document_sysid()==None:
print_usage("You must specify a system identifier if the catalog has "
"no DOCUMENT entry")
sysids=[cat.get_document_sysid()]
print "Parsing DOCUMENT '%s' from catalog" % sysids[0]
# --- Parsing
for sysid in sysids:
print
print "Parsing '%s'" % sysid
p.parse_resource(sysid)
print
print "Parse complete, %d error(s)" % err.errors,
if warnings:
print "and %d warning(s)" % err.warnings
else:
print
err.reset()
p.reset()
|