1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
{-# LANGUAGE ForeignFunctionInterface, EmptyDataDecls #-}
{-# OPTIONS_GHC -fno-cse -fno-full-laziness #-}
-- | Low-level interface to Expat. Unless speed is paramount, this should
-- normally be avoided in favour of the interfaces provided by
-- 'Text.XML.Expat.SAX' and 'Text.XML.Expat.Tree', etc.
module Text.XML.Expat.Internal.IO (
HParser,
hexpatNewParser,
encodingToString,
Encoding(..),
XMLParseError(..),
XMLParseLocation(..)
) where
import Control.Applicative
import Control.DeepSeq
import qualified Data.ByteString as B
import qualified Data.ByteString.Internal as I
import Data.Int
import Data.Word
import Foreign
import Foreign.C
data Parser_struct
type ParserPtr = Ptr Parser_struct
data Encoding = ASCII | UTF8 | UTF16 | ISO88591
encodingToString :: Encoding -> String
encodingToString ASCII = "US-ASCII"
encodingToString UTF8 = "UTF-8"
encodingToString UTF16 = "UTF-16"
encodingToString ISO88591 = "ISO-8859-1"
withOptEncoding :: Maybe Encoding -> (CString -> IO a) -> IO a
withOptEncoding Nothing f = f nullPtr
withOptEncoding (Just enc) f = withCString (encodingToString enc) f
-- ByteString.useAsCStringLen is almost what we need, but C2HS wants a CInt
-- instead of an Int.
withBStringLen :: B.ByteString -> ((CString, CInt) -> IO a) -> IO a
withBStringLen bs f = do
B.useAsCStringLen bs $ \(str, len) -> f (str, fromIntegral len)
unStatus :: CInt -> Bool
unStatus 0 = False
unStatus _ = True
getError :: ParserPtr -> IO XMLParseError
getError pp = do
code <- xmlGetErrorCode pp
cerr <- xmlErrorString code
err <- peekCString cerr
loc <- getParseLocation pp
return $ XMLParseError err loc
-- |Obtain C value from Haskell 'Bool'.
--
cFromBool :: Num a => Bool -> a
cFromBool = fromBool
-- | Parse error, consisting of message text and error location
data XMLParseError = XMLParseError String XMLParseLocation deriving (Eq, Show)
instance NFData XMLParseError where
rnf (XMLParseError msg loc) = rnf (msg, loc)
-- | Specifies a location of an event within the input text
data XMLParseLocation = XMLParseLocation {
xmlLineNumber :: Int64, -- ^ Line number of the event
xmlColumnNumber :: Int64, -- ^ Column number of the event
xmlByteIndex :: Int64, -- ^ Byte index of event from start of document
xmlByteCount :: Int64 -- ^ The number of bytes in the event
}
deriving (Eq, Show)
instance NFData XMLParseLocation where
rnf (XMLParseLocation lin col ind cou) = rnf (lin, col, ind, cou)
getParseLocation :: ParserPtr -> IO XMLParseLocation
getParseLocation pp = do
line <- xmlGetCurrentLineNumber pp
col <- xmlGetCurrentColumnNumber pp
index <- xmlGetCurrentByteIndex pp
count <- xmlGetCurrentByteCount pp
return $ XMLParseLocation {
xmlLineNumber = fromIntegral line,
xmlColumnNumber = fromIntegral col,
xmlByteIndex = fromIntegral index,
xmlByteCount = fromIntegral count
}
-- Note on word sizes:
--
-- on expat 2.0:
-- XML_GetCurrentLineNumber returns XML_Size
-- XML_GetCurrentColumnNumber returns XML_Size
-- XML_GetCurrentByteIndex returns XML_Index
-- These are defined in expat_external.h
--
-- debian-i386 says XML_Size and XML_Index are 4 bytes.
-- ubuntu-amd64 says XML_Size and XML_Index are 8 bytes.
-- These two systems do NOT define XML_LARGE_SIZE, which would force these types
-- to be 64-bit.
--
-- If we guess the word size too small, it shouldn't matter: We will just discard
-- the most significant part. If we get the word size too large, we will get
-- garbage (very bad).
--
-- So - what I will do is use CLong and CULong, which correspond to what expat
-- is using when XML_LARGE_SIZE is disabled, and give the correct sizes on the
-- two machines mentioned above. At the absolute worst the word size will be too
-- short.
foreign import ccall unsafe "expat.h XML_GetErrorCode" xmlGetErrorCode
:: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_GetCurrentLineNumber" xmlGetCurrentLineNumber
:: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentColumnNumber" xmlGetCurrentColumnNumber
:: ParserPtr -> IO CULong
foreign import ccall unsafe "expat.h XML_GetCurrentByteIndex" xmlGetCurrentByteIndex
:: ParserPtr -> IO CLong
foreign import ccall unsafe "expat.h XML_GetCurrentByteCount" xmlGetCurrentByteCount
:: ParserPtr -> IO CInt
foreign import ccall unsafe "expat.h XML_ErrorString" xmlErrorString
:: CInt -> IO CString
type HParser = B.ByteString -> Bool -> IO (ForeignPtr Word8, CInt, Maybe XMLParseError)
foreign import ccall unsafe "hexpatNewParser"
_hexpatNewParser :: Ptr CChar -> CInt -> IO MyParserPtr
foreign import ccall unsafe "hexpatGetParser"
_hexpatGetParser :: MyParserPtr -> ParserPtr
data MyParser_struct
type MyParserPtr = Ptr MyParser_struct
foreign import ccall "&hexpatFreeParser" hexpatFreeParser :: FunPtr (MyParserPtr -> IO ())
hexpatNewParser :: Maybe Encoding
-> Maybe (B.ByteString -> Maybe B.ByteString) -- ^ Entity decoder
-> Bool -- ^ Whether to include input locations
-> IO (HParser, IO XMLParseLocation)
hexpatNewParser enc mDecoder locations =
withOptEncoding enc $ \cEnc -> do
parser <- newForeignPtr hexpatFreeParser =<< _hexpatNewParser cEnc (cFromBool locations)
return (parse parser, withForeignPtr parser $ \mp -> getParseLocation $ _hexpatGetParser mp)
where
parse parser = case mDecoder of
Nothing -> \text final ->
alloca $ \ppData ->
alloca $ \pLen ->
withBStringLen text $ \(textBuf, textLen) ->
withForeignPtr parser $ \pp -> do
ok <- unStatus <$> _hexpatParseUnsafe pp textBuf textLen (cFromBool final) ppData pLen
pData <- peek ppData
len <- peek pLen
err <- if ok
then return Nothing
else Just <$> getError (_hexpatGetParser pp)
fpData <- newForeignPtr funPtrFree pData
return (fpData, len, err)
Just decoder -> \text final ->
alloca $ \ppData ->
alloca $ \pLen ->
withBStringLen text $ \(textBuf, textLen) ->
withForeignPtr parser $ \pp -> do
eh <- mkCEntityHandler . wrapCEntityHandler $ decoder
_hexpatSetEntityHandler pp eh
ok <- unStatus <$> _hexpatParseSafe pp textBuf textLen (cFromBool final) ppData pLen
freeHaskellFunPtr eh
pData <- peek ppData
len <- peek pLen
err <- if ok
then return Nothing
else Just <$> getError (_hexpatGetParser pp)
fpData <- newForeignPtr funPtrFree pData
return (fpData, len, err)
foreign import ccall unsafe "hexpatParse"
_hexpatParseUnsafe :: MyParserPtr -> Ptr CChar -> CInt -> CInt -> Ptr (Ptr Word8) -> Ptr CInt -> IO CInt
foreign import ccall safe "hexpatParse"
_hexpatParseSafe :: MyParserPtr -> Ptr CChar -> CInt -> CInt -> Ptr (Ptr Word8) -> Ptr CInt -> IO CInt
type CEntityHandler = Ptr CChar -> IO (Ptr CChar)
foreign import ccall safe "wrapper"
mkCEntityHandler :: CEntityHandler
-> IO (FunPtr CEntityHandler)
peekByteStringLen :: CStringLen -> IO B.ByteString
{-# INLINE peekByteStringLen #-}
peekByteStringLen (cstr, len) =
I.create (fromIntegral len) $ \ptr ->
I.memcpy ptr (castPtr cstr) (fromIntegral len)
wrapCEntityHandler :: (B.ByteString -> Maybe B.ByteString) -> CEntityHandler
wrapCEntityHandler handler = h
where
h cname = do
sz <- fromIntegral <$> I.c_strlen cname
name <- peekByteStringLen (cname, sz)
case handler name of
Just text -> do
let (fp, offset, len) = I.toForeignPtr text
withForeignPtr fp $ \ctextBS -> do
ctext <- mallocBytes (len + 1) :: IO CString
I.memcpy (castPtr ctext) (ctextBS `plusPtr` offset) (fromIntegral len)
poke (ctext `plusPtr` len) (0 :: CChar)
return ctext
Nothing -> return nullPtr
foreign import ccall unsafe "hexpatSetEntityHandler"
_hexpatSetEntityHandler :: MyParserPtr -> FunPtr CEntityHandler -> IO ()
foreign import ccall "&free" funPtrFree :: FunPtr (Ptr Word8 -> IO ())
|