1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
|
-- This script processes the following source file:
--
-- http://unicode.org/Public/UNIDATA/SpecialCasing.txt
module SpecialCasing
(
SpecialCasing(..)
, Case(..)
, parseSC
, mapSC
) where
import Arsec
import Data.Bits
data SpecialCasing = SC { scComments :: [Comment], scCasing :: [Case] }
deriving (Show)
data Case = Case {
code :: Char
, lower :: [Char]
, title :: [Char]
, upper :: [Char]
, conditions :: String
, name :: String
} deriving (Eq, Ord, Show)
entries :: Parser SpecialCasing
entries = SC <$> many comment <*> many (entry <* many comment)
where
entry = Case <$> unichar <* semi
<*> unichars
<*> unichars
<*> unichars
<*> manyTill anyToken (string "# ")
<*> manyTill anyToken (char '\n')
parseSC :: FilePath -> IO (Either ParseError SpecialCasing)
parseSC name = parse entries name <$> readFile name
mapSC :: String -> (Case -> String) -> (Char -> Char) -> SpecialCasing
-> [String]
mapSC which access twiddle (SC _ ms) =
typ ++ map printUnusual ms' ++ map printUsual usual ++ [last]
where
ms' = filter p ms
p c = [k] /= a && a /= [twiddle k] && null (conditions c)
where a = access c
k = code c
unusual = map code ms'
usual = filter (\c -> twiddle c /= c && c `notElem` unusual) [minBound..maxBound]
typ = [which ++ "Mapping :: Char# -> _ {- unboxed Int64 -}"
,"{-# NOINLINE " ++ which ++ "Mapping #-}"
,which ++ "Mapping = \\case"]
last = " _ -> unI64 0"
printUnusual c = " -- " ++ name c ++ "\n" ++
" " ++ showC (code c) ++ "# -> unI64 " ++ show (ord x + (ord y `shiftL` 21) + (ord z `shiftL` 42))
where x:y:z:_ = access c ++ repeat '\0'
printUsual c = " " ++ showC c ++ "# -> unI64 " ++ show (ord (twiddle c))
ucFirst (c:cs) = toUpper c : cs
ucFirst [] = []
|