File: CaseFolding.hs

package info (click to toggle)
ghc 9.6.6-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, trixie
  • size: 158,216 kB
  • sloc: haskell: 648,228; ansic: 81,656; cpp: 11,808; javascript: 8,444; sh: 5,831; fortran: 3,527; python: 3,277; asm: 2,523; makefile: 2,298; yacc: 1,570; lisp: 532; xml: 196; perl: 145; csh: 2
file content (46 lines) | stat: -rw-r--r-- 1,386 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
-- This script processes the following source file:
--
--   http://unicode.org/Public/UNIDATA/CaseFolding.txt

module CaseFolding
    (
      CaseFolding(..)
    , Fold(..)
    , parseCF
    , mapCF
    ) where

import Arsec
import Data.Bits

data Fold = Fold {
      code :: Char
    , status :: Char
    , mapping :: [Char]
    , name :: String
    } deriving (Eq, Ord, Show)

data CaseFolding = CF { cfComments :: [Comment], cfFolding :: [Fold] }
                 deriving (Show)

entries :: Parser CaseFolding
entries = CF <$> many comment <*> many (entry <* many comment)
  where
    entry = Fold <$> unichar <* semi
                 <*> oneOf "CFST" <* semi
                 <*> unichars
                 <*> (string "# " *> manyTill anyToken (char '\n'))

parseCF :: FilePath -> IO (Either ParseError CaseFolding)
parseCF name = parse entries name <$> readFile name

mapCF :: CaseFolding -> [String]
mapCF (CF _ ms) = typ ++ map printUnusual (filter (\f -> status f `elem` "CF") ms) ++ [last]
  where
    typ = ["foldMapping :: Char# -> _ {- unboxed Int64 -}"
           ,"{-# NOINLINE foldMapping #-}"
           ,"foldMapping = \\case"]
    last = "  _ -> unI64 0"
    printUnusual c = "  -- " ++ name c ++ "\n" ++
             "  " ++ showC (code c) ++ "# -> unI64 "  ++ show (ord x + (ord y `shiftL` 21) + (ord z `shiftL` 42))
       where x:y:z:_ = mapping c ++ repeat '\0'