1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
|
{-# LANGUAGE ParallelListComp, CPP #-}
module Data.Encoding.Preprocessor.XMLMappingBuilder where
import Data.Word
import Data.List
import Data.Ord
import Data.Char
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Array.Static.Builder
import Data.CharMap.Builder
import Data.Encoding.Preprocessor.XMLMapping
import Distribution.Simple.PreProcess
import System.FilePath
import Text.XML.HaXml.OneOfN
import Text.XML.HaXml.XmlContent
import Control.Exception (assert)
xmlPreprocessor :: PreProcessor
xmlPreprocessor = PreProcessor
{ platformIndependent = True
#if MIN_VERSION_Cabal(3,8,0)
, ppOrdering=unsorted
#endif
, runPreProcessor = \src trg verb -> do
createModuleFromFile src trg
}
description :: CharacterMapping -> Maybe String
description (CharacterMapping attrs _ _ _) = characterMappingDescription attrs
createModuleFromFile (sbase,sfile) (tbase,tfile) = do
xml <- testFile (sbase </> sfile)
let (dir,fn) = splitFileName sfile
let (bname,ext) = splitExtensions fn
let dirs = splitDirectories dir
let body = buildDecisionTree minBound maxBound "ch" (encodingElements xml)
let body2 = createDecoding (states xml) (decodingElements xml)
let mpname = "encoding_map_"++bname
let mp = buildCharMap $ [SingleMapping c w | (c,w) <- assignments xml]
++[ RangeMapping
st end
(foldl (\v (w,mi,ma) -> v*((fromIntegral $ ma-mi)+1) + (fromIntegral (w-mi))) 0 (zip3 bfirst bmin bmax))
[(min,max-min+1) | min <- bmin | max <- bmax]
| (st,end,bfirst,blast,bmin,bmax) <- ranges xml ]
writeFile (tbase</>tfile) $ unlines $
["{- This file has been auto-generated. Do not edit it. -}"
,"{-# LANGUAGE MagicHash,DeriveDataTypeable #-}"]++
(case description xml of
Nothing -> []
Just str -> ["{- | "++str++" -}"]) ++
["module "++concat (intersperse "." (dirs++[bname]))
," ("++bname++"("++bname++"))"
," where"
,""
,"import Control.Throws"
,"import Data.Encoding.Base"
,"import Data.Encoding.ByteSink"
,"import Data.Encoding.ByteSource"
,"import Data.Encoding.Exception"
,"import Data.Array.Static"
,"import Data.Map.Static"
,"import Data.CharMap"
,"import Data.Char"
,"import Data.Word"
,"import Data.Typeable"
,""
,"data "++bname++" = "++bname
," deriving (Eq,Show,Typeable)"
,""
,mpname++" :: CharMap"
,mpname++" = "++mp
,""
,"instance Encoding "++bname++" where"
," encodeChar _ ch = mapEncode ch "++mpname
," decodeChar _ = "++body2
," encodeable _ ch = mapMember ch "++mpname
]
decodingValueRange :: [(Word8,Word8)] -> DecodingElement -> (Int,Int)
decodingValueRange path (DecodingElement c ws)
= let v = foldl (\n (w,(lo,up)) -> n*((fromIntegral $ up-lo)+1) + (fromIntegral $ w - lo)) 0 (zip ws path)
in (v,v)
decodingValueRange path (DecodingRange first last bfirst blast bmin bmax)
= assert (zip bmin bmax == path) $
(decodingValue path bfirst
,decodingValue path blast)
decodingValue :: [(Word8,Word8)] -> [Word8] -> Int
decodingValue path ws
= foldl (\n (w,(lo,up)) -> n*((fromIntegral $ up-lo) + 1) + (fromIntegral $ w - lo))
0 (zip ws path)
type StateMachine = Map String [(Word8,Word8,String)]
createDecoding :: StateMachine -> [DecodingElement] -> String
createDecoding sm els = create' els [] 0 "FIRST"
where
create' els path n st = let trans = sortBy (\(s1,e1,st1) (s2,e2,st2) -> compare s1 s2) $ sm Map.! st
in "(fetchWord8 >>= \\w"++show n++" -> " ++ tree' n path els trans 0 255++")"
tree' :: Int -> [(Word8,Word8)] -> [DecodingElement] -> [(Word8,Word8,String)] -> Word8 -> Word8 -> String
tree' n path els [] _ _ = illWord $ "w"++show n
tree' n path els [(s,e,nst)] bl br
= let e1 = if s > bl
then "(if w"++show n++" < "++show s++" then "++illWord ("w"++show n)++" else "++e2++")"
else e2
e2 = if e < br
then "(if w"++show n++" > "++show e++" then "++illWord ("w"++show n)++" else "++e3++")"
else e3
e3 = if nst == "VALID"
then array' rpath sels
else "{- for "++nst++"-}" ++ create' nels npath (n+1) nst
npath = (s,e):path
rpath = reverse npath
sels = sortBy (comparing (decodingValueRange rpath)) nels
nels = filter (\el -> let (ll,lr) = (decodingLimits el)!!n in ll>=s && lr <= e) els
in e1
tree' n path els trans bl br
= let (left,right@((b,_,_):_)) = splitAt (length trans `div` 2) trans
(eleft,eright) = partition (\el -> fst ((decodingLimits el)!!n) < b) els
in "(if w"++show n++" < "++show b++" then "++tree' n path eleft left bl (b-1)
++" else "++tree' n path eright right b br++")"
array' path els = let grps = groupBy (\e1 e2 -> case e1 of
DecodingRange _ _ _ _ _ _ -> False
_ -> case e2 of
DecodingRange _ _ _ _ _ _ -> False
_ -> True
) els
ranges = map (\(l,u) -> (fromIntegral $ u-l)+1) path
val = foldl (\expr (r,n,m) -> "("++expr++"*"++show r++"+(fromIntegral w"++show n++"-"++show m++"))")
"0"
(zip3 ranges [0..] (map fst path))
offset = (product ranges)-1
in "(let val = " ++ val ++ " in "++array'' path grps 0 offset++")"
array'' path [] _ _ = "throwException (IllegalRepresentation ["++concat (intersperse "," (zipWith (\n _ -> "w"++show n) [0..] path))++"])"
array'' path [grp] lo up
= case grp of
[DecodingRange first end bfirst bend bmin bmax] ->
let ranges = map (\(l,u) -> (fromIntegral $ u-l)+1) path
off = foldl (\v (r,c,m) -> v*r+(fromIntegral $ c-m)) 0 (zip3 ranges bfirst bmin)
equalranges = and $ zipWith (==) path (zip bmin bmax)
in if equalranges
then "(return (chr (val + ("++show (ord first - off)++"))))"
else error "Can't have a range that has a different range..."
_ -> let chars = fillRange lo $ map (\el@(DecodingElement c _) -> (c,fst $ decodingValueRange path el)) grp
in "(return (("++buildStaticArray (lo,up) chars++")!val))"
array'' path grps lo up = let (left,right@(brk:_)) = splitAt (length grps `div` 2) grps
(off,_) = decodingValueRange path (head brk)
in "(if val < "++show off++" then "++array'' path left lo (off-1)
++" else "++array'' path right off up++")"
fillRange :: Int -> [(Char,Int)] -> [Char]
fillRange s [] = []
fillRange s all@((c,i):cs) = case compare i s of
GT -> '\0':fillRange (s+1) all
LT -> error $ "Char out of range "++show (take 10 all)
EQ -> c:fillRange (s+1) cs
states :: CharacterMapping -> StateMachine
states (CharacterMapping attrs hist val ass)
= case val of
OneOf2 (Validity (NonEmpty lst)) -> Map.fromListWith (++) $
map (\st -> let BS [start] = stateS st
end = case stateE st of
Nothing -> start
Just (BS [rend]) -> rend
in (stateType st,[(start,end,stateNext st)])) lst
_ -> error "Mapping doesn't contain validity section"
decodingElements :: CharacterMapping -> [DecodingElement]
decodingElements mp = map (\(c,ws) -> DecodingElement c ws) (assignments mp)
++ map (\(fi,la,bfi,bla,bmi,bma) -> DecodingRange fi la bfi bla bmi bma) (ranges mp)
illWord :: String -> String
illWord n = "throwException (IllegalCharacter "++n++")"
decodingLimits :: DecodingElement -> [(Word8,Word8)]
decodingLimits (DecodingElement _ ws) = map (\w -> (w,w)) ws
decodingLimits (DecodingRange _ _ bfirst blast bmin bmax) = lim' False (zip4 bfirst blast bmin bmax)
where
lim' dec [] = []
lim' dec ((fi,la,mi,ma):xs) = if dec
then (mi,ma):(lim' dec xs)
else (fi,la):(lim' (fi/=la) xs)
decodingLength :: DecodingElement -> Int
decodingLength (DecodingRange _ _ first _ _ _) = length first
decodingLength (DecodingElement _ ws) = length ws
decodingElementCount :: DecodingElement -> Int
decodingElementCount (DecodingRange s e _ _ _ _) = ord e - ord s
decodingElementCount (DecodingElement _ _) = 1
data DecodingElement
= DecodingRange Char Char [Word8] [Word8] [Word8] [Word8]
| DecodingElement Char [Word8]
deriving Show
norep :: String -> String
norep var = "(throwException $ HasNoRepresentation "++var++")"
buildDecisionTree :: Char -> Char -> String -> [EncodingElement] -> String
buildDecisionTree l r var [] = norep var
buildDecisionTree l r var [el]
= let e1 = if l < startChar el
then "(if "++var++" < "++show (startChar el)++" then "++norep var++" else "++e2++")"
else e2
e2 = if r > endChar el
then "(if "++var++" > "++show (endChar el)++" then "++norep var++" else "++e3++")"
else e3
e3 = buildEncoding el var
in e1
buildDecisionTree ll lr var els
= let (l,r@(sep:_)) = splitAt (length els `div` 2) els
in "(if "++var++" < "++show (startChar sep)
++" then ("++(buildDecisionTree ll (pred $ startChar sep) var l)++")"
++" else ("++(buildDecisionTree (endChar sep) lr var r)++")"
++")"
buildEncoding :: EncodingElement -> String -> String
buildEncoding (EncodingRange start end bf bl bmin bmax) var
= let ranges :: [Int]
ranges = map fromIntegral $ zipWith (-) bmax bmin
in "(let num = (ord "++var++") - ("++show (ord start - (foldl (\n (r,vf,vm) -> n*(r+1) + (fromIntegral (vf-vm))) 0 (zip3 ranges bf bmin)))++")"
++concat ([ " ; (p"++show n++",r"++show n++") = "
++(if n==1 then "num" else "p"++show (n-1))
++" `divMod` "++show (r+1)
| r <- reverse ranges | n <- [1..] ])
++" in "
++concat (intersperse " >> " (reverse ["pushWord8 (fromIntegral (r"++show n++" + "++show w++"))" | n <- [1..] | w <- reverse bmin]))
++")"
buildEncoding (EncodingGroup start end encs) var
= let findParams st [] = st
findParams st (x:xs) = findParams (case compare (length x) (fst st) of
LT -> (fst st,False)
GT -> (length x,False)
EQ -> st) xs
(mx,same) = findParams (length $ head encs,True) (tail encs)
in if same
then ("(let off = "++show mx++"*(ord "++var++" - "++show (ord start)++") ; arr = "
++buildStaticArray (0,(length encs)*mx-1) (concat encs)
++" in "
++concat (intersperse " >> " ["pushWord8 (arr!(off+"++show (n-1)++"))" | n <- [1..mx]])
++")")
else ("(let off = "++show (mx+1)++"*((ord "++var++") - "++show (ord start)++") ; arr = "
++buildStaticArray (0,(length encs)*(mx+1)-1)
(concat [(fromIntegral $ length e)
:(e++replicate (mx-length e) 0) | e <- encs])
++ "::StaticArray Int Word8"
++" ; len = fromIntegral (arr!off)::Int ; bytes = map (\\n -> arr!(off+n)) [1..len]"
++" in mapM_ pushWord8 bytes)")
data EncodingElement
= EncodingRange Char Char [Word8] [Word8] [Word8] [Word8]
| EncodingGroup Char Char [[Word8]]
deriving Show
mergeBy :: (a -> a -> Ordering) -> [a] -> [a] -> [a]
mergeBy f [] ys = ys
mergeBy f xs [] = xs
mergeBy f (x:xs) (y:ys)
= case f x y of
LT -> x:mergeBy f xs (y:ys)
_ -> y:mergeBy f (x:xs) ys
startChar :: EncodingElement -> Char
startChar (EncodingRange c _ _ _ _ _) = c
startChar (EncodingGroup c _ _) = c
endChar :: EncodingElement -> Char
endChar (EncodingRange _ c _ _ _ _) = c
endChar (EncodingGroup _ c _) = c
encodingElements :: CharacterMapping -> [EncodingElement]
encodingElements mp = mergeBy (comparing startChar)
(buildGroups $ sortAssignments $ assignments mp)
(encodingRanges $ ranges mp)
assignments :: CharacterMapping -> [(Char,[Word8])]
assignments (CharacterMapping _ _ _ (Assignments _ ass _ _ _ ranges))
= map (\a -> let CP [cp] = aU a
BS bs = aB a
in (cp,bs)
) ass
encodingRanges :: [(Char,Char,[Word8],[Word8],[Word8],[Word8])] -> [EncodingElement]
encodingRanges lst = sortBy (comparing (\(EncodingRange c _ _ _ _ _) -> c)) $
map (\(ufirst,ulast,bfirst,blast,bmin,bmax) -> EncodingRange ufirst ulast bfirst blast bmin bmax) lst
ranges :: CharacterMapping -> [(Char,Char,[Word8],[Word8],[Word8],[Word8])]
ranges (CharacterMapping _ _ _ (Assignments _ ass _ _ _ ranges))
= map (\r -> let BS bfirst = rangeBFirst r
BS blast = rangeBLast r
CP [ufirst] = rangeUFirst r
CP [ulast] = rangeULast r
BS bmin = rangeBMin r
BS bmax = rangeBMax r
in (ufirst,ulast,bfirst,blast,bmin,bmax)
) ranges
sortAssignments :: [(Char,[Word8])] -> [(Char,[Word8])]
sortAssignments = sortBy (comparing fst)
buildGroups :: [(Char,[Word8])] -> [EncodingElement]
buildGroups [] = []
buildGroups ((c,bs):rest) = (EncodingGroup c end (bs:wrds)):buildGroups oth
where
(end,wrds,oth) = group c rest
group n [] = (n,[],[])
group n all@((c,bs):rest)
| succ n == c = let (e,res,oth) = group c rest
in (e,bs:res,oth)
| otherwise = (n,[],all)
|