File: AllNonAsciiChars.hs

package info (click to toggle)
agda-stdlib 0.8-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 1,768 kB
  • ctags: 1
  • sloc: haskell: 119; sh: 18; makefile: 15; lisp: 1
file content (39 lines) | stat: -rw-r--r-- 1,082 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
-- | This module extracts all the non-ASCII characters used by the
-- library code (along with how many times they are used).

module Main where

import qualified Data.List as L
import Data.Char
import Data.Function
import Control.Applicative
import Numeric ( showHex )
import System.FilePath.Find
import System.IO

readUTF8File :: FilePath -> IO String
readUTF8File f = do
  h <- openFile f ReadMode
  hSetEncoding h utf8
  hGetContents h

main :: IO ()
main = do
  agdaFiles <- find always
                    (extension ==? ".agda" ||? extension ==? ".lagda")
                    "src"
  nonAsciiChars <-
    filter (not . isAscii) . concat <$> mapM readUTF8File agdaFiles
  let table = reverse $
              L.sortBy (compare `on` snd) $
              map (\cs -> (head cs, length cs)) $
              L.group $ L.sort $ nonAsciiChars

  let codePoint :: Char -> String
      codePoint c = showHex (ord c) ""

      uPlus :: Char -> String
      uPlus c = "(U+" ++ codePoint c ++ ")"

  mapM_ (\(c, count) -> putStrLn (c : " " ++ uPlus c ++ ": " ++ show count))
        table