File: CaseFolding.hs

package info (click to toggle)
haskell-text 1.2.3.0-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 956 kB
  • sloc: haskell: 9,554; ansic: 233; python: 87; ruby: 84; sh: 49; makefile: 38
file content (46 lines) | stat: -rw-r--r-- 1,432 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
-- This script processes the following source file:
--
--   http://unicode.org/Public/UNIDATA/CaseFolding.txt

module CaseFolding
    (
      CaseFolding(..)
    , Fold(..)
    , parseCF
    , mapCF
    ) where

import Arsec

data Fold = Fold {
      code :: Char
    , status :: Char
    , mapping :: [Char]
    , name :: String
    } deriving (Eq, Ord, Show)

data CaseFolding = CF { cfComments :: [Comment], cfFolding :: [Fold] }
                 deriving (Show)

entries :: Parser CaseFolding
entries = CF <$> many comment <*> many (entry <* many comment)
  where
    entry = Fold <$> unichar <* semi
                 <*> oneOf "CFST" <* semi
                 <*> unichars
                 <*> (string "# " *> manyTill anyToken (char '\n'))

parseCF :: FilePath -> IO (Either ParseError CaseFolding)
parseCF name = parse entries name <$> readFile name

mapCF :: CaseFolding -> [String]
mapCF (CF _ ms) = typ ++ (map nice . filter p $ ms) ++ [last]
  where
    typ = ["foldMapping :: forall s. Char -> s -> Step (CC s) Char"
           ,"{-# NOINLINE foldMapping #-}"]
    last = "foldMapping c s = Yield (toLower c) (CC s '\\0' '\\0')"
    nice c = "-- " ++ name c ++ "\n" ++
             "foldMapping " ++ showC (code c) ++ " s = Yield " ++ x ++ " (CC s " ++ y ++ " " ++ z ++ ")"
       where [x,y,z] = (map showC . take 3) (mapping c ++ repeat '\0')
    p f = status f `elem` "CF" &&
          mapping f /= [toLower (code f)]