1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
|
-- ------------------------------------------------------------
{- |
Module : HXmlParser
Copyright : Copyright (C) 2005-2010 Uwe Schmidt
License : MIT
Maintainer : Uwe Schmidt
Maintainer : uwe@fh-wedel.de
Stability : experimental
Portability: portable
HXmlParser - Minimal Validating XML Parser of the Haskell XML Toolbox, no HTTP supported
XML well-formed checker and validator.
this program may be used as example main program for the
arrow API of the Haskell XML Toolbox
commandline parameter evaluation and
and return code is the most complicated part
of this example application
-}
-- ------------------------------------------------------------
module Main
where
import Text.XML.HXT.Core -- import all stuff for parsing, validating, and transforming XML
import Text.XML.HXT.Curl -- import HTTP access
import Text.XML.HXT.RelaxNG -- import RelaxNG validation
import System.IO -- import the IO and commandline option stuff
import System.Environment
import System.Console.GetOpt
import System.Exit
-- ------------------------------------------------------------
-- |
-- the main program of the Haskell XML Validating Parser
main :: IO ()
main
= do
argv <- getArgs -- get the commandline arguments
(al, src) <- cmdlineOpts argv -- and evaluate them, return a key-value list
[rc] <- runX (parser al src) -- run the parser arrow
exitProg (rc >= c_err) -- set return code and terminate
-- ------------------------------------------------------------
exitProg :: Bool -> IO a
exitProg True = exitWith (ExitFailure (-1))
exitProg False = exitWith ExitSuccess
-- ------------------------------------------------------------
-- |
-- the /real/ main program
--
-- get wellformed document, validates document, propagates and check namespaces
-- and controls output
parser :: SysConfigList -> String -> IOSArrow b Int
parser config src
= configSysVars config -- set all global config options, the output file and the
>>> -- other user options are stored as key-value pairs in the stystem state
readDocument [ withCurl []
] src -- no more special read options needed
>>>
( ( traceMsg 1 "start processing document"
>>>
( processDocument $< getSysAttr "action" ) -- ask for the action stored in the key-value list of user defined values
>>>
traceMsg 1 "document processing finished"
)
`when`
documentStatusOk
)
>>>
traceSource
>>>
traceTree
>>>
( (writeDocument [] $< getSysAttr "output-file") -- ask for the output file stored in the system configuration
`whenNot`
( getSysAttr "no-output" >>> isA (== "1") ) -- ask for the no-output attr value in the system key-value list
)
>>>
getErrStatus
-- simple example of a processing arrow, selected by a command line option
processDocument :: String -> IOSArrow XmlTree XmlTree
processDocument "only-text"
= traceMsg 1 "selecting plain text"
>>>
processChildren (deep isText)
processDocument "indent"
= traceMsg 1 "indent document"
>>>
indentDoc
processDocument _action
= traceMsg 1 "default action: do nothing"
>>>
this
-- ------------------------------------------------------------
--
-- the options definition part
-- see doc for System.Console.GetOpt
progName :: String
progName = "HXmlParser"
options :: [OptDescr SysConfig]
options
= generalOptions
++
inputOptions
++
relaxOptions
++
outputOptions
++
showOptions
++
[ Option "f" ["output-file"] (ReqArg (withSysAttr "output-file") "FILE") "output file for resulting document (default: stdout)"
, Option "q" ["no-output"] (NoArg $ withSysAttr "no-output" "1") "no output of resulting document"
, Option "x" ["action"] (ReqArg (withSysAttr "action") "ACTION") "actions are: only-text, indent, no-op"
]
-- the last 2 option values will be stored by withAttr in the system key-value list
-- and can be read by getSysAttr key
usage :: [String] -> IO a
usage errl
| null errl
= do
hPutStrLn stdout use
exitProg False
| otherwise
= do
hPutStrLn stderr (concat errl ++ "\n" ++ use)
exitProg True
where
header = "HXmlParser - Validating XML Parser of the Haskell XML Toolbox with Arrow Interface\n" ++
"XML well-formed checker, DTD validator, RelaxNG validator, HTML parser.\n\n" ++
"Usage: " ++ progName ++ " [OPTION...] [URI or FILE]"
use = usageInfo header options
cmdlineOpts :: [String] -> IO (SysConfigList, String)
cmdlineOpts argv
= case (getOpt Permute options argv) of
(scfg,n,[])
-> do
sa <- src n
help (getConfigAttr a_help scfg) sa
return (scfg, sa)
(_,_,errs)
-> usage errs
where
src [] = return []
src [uri] = return uri
src _ = usage ["only one input uri or file allowed\n"]
help "1" _ = usage []
help _ [] = usage ["no input uri or file given\n"]
help _ _ = return ()
-- ------------------------------------------------------------
|