1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
|
-- This script processes the following source file:
--
-- http://unicode.org/Public/UNIDATA/CaseFolding.txt
module CaseFolding
(
CaseFolding(..)
, Fold(..)
, parseCF
, mapCF
) where
import Arsec
import Data.Bits
data Fold = Fold {
code :: Char
, status :: Char
, mapping :: [Char]
, name :: String
} deriving (Eq, Ord, Show)
data CaseFolding = CF { cfComments :: [Comment], cfFolding :: [Fold] }
deriving (Show)
entries :: Parser CaseFolding
entries = CF <$> many comment <*> many (entry <* many comment)
where
entry = Fold <$> unichar <* semi
<*> oneOf "CFST" <* semi
<*> unichars
<*> (string "# " *> manyTill anyToken (char '\n'))
parseCF :: FilePath -> IO (Either ParseError CaseFolding)
parseCF name = parse entries name <$> readFile name
mapCF :: CaseFolding -> [String]
mapCF (CF _ ms) = typ ++ map printUnusual (filter (\f -> status f `elem` "CF") ms) ++ [last]
where
typ = ["foldMapping :: Char# -> _ {- unboxed Int64 -}"
,"{-# NOINLINE foldMapping #-}"
,"foldMapping = \\case"]
last = " _ -> unI64 0"
printUnusual c = " -- " ++ name c ++ "\n" ++
" " ++ showC (code c) ++ "# -> unI64 " ++ show (ord x + (ord y `shiftL` 21) + (ord z `shiftL` 42))
where x:y:z:_ = mapping c ++ repeat '\0'
|