File: SpecialCasing.hs

package info (click to toggle)
ghc 9.10.3-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 168,924 kB
  • sloc: haskell: 713,548; ansic: 84,223; cpp: 30,255; javascript: 9,003; sh: 7,870; fortran: 3,527; python: 3,228; asm: 2,523; makefile: 2,326; yacc: 1,570; lisp: 532; xml: 196; perl: 111; csh: 2
file content (63 lines) | stat: -rw-r--r-- 1,968 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
-- This script processes the following source file:
--
--   http://unicode.org/Public/UNIDATA/SpecialCasing.txt

module SpecialCasing
    (
      SpecialCasing(..)
    , Case(..)
    , parseSC
    , mapSC
    ) where

import Arsec
import Data.Bits

data SpecialCasing = SC { scComments :: [Comment], scCasing :: [Case] }
                   deriving (Show)

data Case = Case {
      code :: Char
    , lower :: [Char]
    , title :: [Char]
    , upper :: [Char]
    , conditions :: String
    , name :: String
    } deriving (Eq, Ord, Show)

entries :: Parser SpecialCasing
entries = SC <$> many comment <*> many (entry <* many comment)
  where
    entry = Case <$> unichar <* semi
                 <*> unichars
                 <*> unichars
                 <*> unichars
                 <*> manyTill anyToken (string "# ")
                 <*> manyTill anyToken (char '\n')

parseSC :: FilePath -> IO (Either ParseError SpecialCasing)
parseSC name = parse entries name <$> readFile name

mapSC :: String -> (Case -> String) -> (Char -> Char) -> SpecialCasing
         -> [String]
mapSC which access twiddle (SC _ ms) =
    typ ++ map printUnusual ms' ++ map printUsual usual ++ [last]
  where
    ms' = filter p ms
    p c = [k] /= a && a /= [twiddle k] && null (conditions c)
        where a = access c
              k = code c
    unusual = map code ms'
    usual = filter (\c -> twiddle c /= c && c `notElem` unusual) [minBound..maxBound]

    typ = [which ++ "Mapping :: Char# -> _ {- unboxed Int64 -}"
           ,"{-# NOINLINE " ++ which ++ "Mapping #-}"
           ,which ++ "Mapping = \\case"]
    last = "  _ -> unI64 0"
    printUnusual c = "  -- " ++ name c ++ "\n" ++
             "  " ++ showC (code c) ++ "# -> unI64 " ++ show (ord x + (ord y `shiftL` 21) + (ord z `shiftL` 42))
       where x:y:z:_ = access c ++ repeat '\0'
    printUsual c = "  " ++ showC c ++ "# -> unI64 " ++ show (ord (twiddle c))

ucFirst (c:cs) = toUpper c : cs
ucFirst [] = []