File: Main.hs

package info (click to toggle)
haskell-unicode-collation 0.1.3.6-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 10,012 kB
  • sloc: haskell: 1,566; makefile: 3
file content (97 lines) | stat: -rw-r--r-- 3,555 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where

import Text.Collate
import Data.Char (chr, ord)
import Data.Text (Text)
import qualified Data.Text.IO as T
import qualified Data.Text as T
import qualified Data.Text.Read as TR
import Data.List (sortBy, find)
import System.Environment (getArgs)
import Control.Monad
import System.Exit
import Text.Printf
import System.IO
import Text.Collate.Normalize (toNFD)

main :: IO ()
main = do
  args <- getArgs

  let isHelp "-h"     = True
      isHelp "--help" = True
      isHelp _        = False

  when (any isHelp args) $ do
    putStrLn "Usage:    unicode-collate [COLLATION]"
    putStrLn "Options:"
    putStrLn "          --hex     Parse input as hex code points"
    putStrLn "          --help    Print usage information"
    putStrLn "          --list    List supported collations"
    putStrLn "          --verbose Include diagnostic information"
    putStrLn ""
    putStrLn "Sorts lines from stdin using the specified collation."
    putStrLn "COLLATION is a BCP47 language code. Examples:"
    putStrLn "unicode-collate                 # Use root collation"
    putStrLn "unicode-collate es              # Use standard Spanish collation"
    putStrLn "unicode-collate es-u-co-trad    # Use traditional Spanish"
    putStrLn "unicode-collate fr-CA           # Use Canadian French collation"
    putStrLn "unicode-collate fr-u-kb         # Use reverse accent order"
    putStrLn "unicode-collate fr-u-ka-shifted # Use Shifted variable weighting"
    exitSuccess

  when ("--list" `elem` args) $ do
    mapM_ (T.putStrLn . renderLang . fst) tailorings
    exitSuccess

  let isOpt ('-':_) = True
      isOpt _       = False
  spec <- maybe "root" T.pack . find (not . isOpt) <$> getArgs
  lang <- either handleError return $ parseLang spec
  let myCollator = collatorFor lang
  let verbose = "--verbose" `elem` args
  let codepoints = "--hex" `elem` args
  let opts = collatorOptions myCollator
  when verbose $ do
    hPutStrLn stderr "Options:"
    hPutStrLn stderr $ "  Tailoring:          " <>
                    maybe "ROOT" (T.unpack . renderLang) (optLang opts)
    hPutStrLn stderr $ "  Variable weighting: " <>
                      show (optVariableWeighting opts)
    hPutStrLn stderr $ "  French accents:     " <>
                      show (optFrenchAccents opts)
    hPutStrLn stderr $ "  Upper before lower: " <>
                      show (optUpperBeforeLower opts)
    hPutStrLn stderr $ "  Normalize:          " <>
                    show (optNormalize opts)
  let renderLine t = do
        t' <- if codepoints
                 then parseAsCodePoints t
                 else return t
        when verbose $
          hPutStrLn stderr $ renderCodePoints
            (toNFD $ map ord $ T.unpack t') ++ "; # ("
            ++ T.unpack t' ++ ") " ++ renderSortKey (sortKey myCollator t')
        T.putStrLn t'
  T.getContents >>= mapM_ renderLine . sortBy (collate myCollator) . T.lines

renderCodePoints :: [Int] -> String
renderCodePoints = unwords . map (printf "%04X")

parseAsCodePoints :: Text -> IO Text
parseAsCodePoints t = do
  let ws = T.words $ T.takeWhile (/=';') t -- everything after ; is ignored
  cs <- mapM parseCodePoint ws
  return $ T.pack $ map chr cs

parseCodePoint :: Text -> IO Int
parseCodePoint t =
  case TR.hexadecimal t of
    Right (x,t') | T.null t' -> return x
    _ -> handleError $ "Could not parse " <> show t <> " as hex code point."

handleError :: String -> IO a
handleError msg = do
  hPutStrLn stderr msg
  exitWith $ ExitFailure 1