1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
|
{-# LANGUAGE DeriveDataTypeable, ForeignFunctionInterface, ScopedTypeVariables #-}
-- |
-- Module : Data.Text.ICU.Collate.Pure
-- Copyright : (c) 2010 Bryan O'Sullivan
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com
-- Stability : experimental
-- Portability : GHC
--
-- Pure string collation functions for Unicode, implemented as
-- bindings to the International Components for Unicode (ICU)
-- libraries.
--
-- For the impure collation API (which is richer, but less easy to
-- use), see the "Data.Text.ICU.Collate" module.
module Data.Text.ICU.Collate.Pure
(
-- * Unicode collation API
-- $api
Collator
, collator
, collatorWith
, collatorFromRules
, collatorFromRulesWith
, collate
, collateIter
, rules
, sortKey
, uca
) where
import qualified Control.Exception as E
import Control.Monad (forM_)
import Data.ByteString (ByteString)
import Data.Text (Text)
import Data.Text.ICU.Error.Internal (ParseError(..))
import Data.Text.ICU.Collate.Internal (Collator(..))
import Data.Text.ICU.Internal (CharIterator, LocaleName(..))
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Text.ICU.Collate as IO
-- $api
--
-- | Create an immutable 'Collator' for comparing strings.
--
-- If 'Root' is passed as the locale, UCA collation rules will be
-- used.
collator :: LocaleName -> Collator
collator loc = unsafePerformIO $ C `fmap` IO.open loc
-- | Create an immutable 'Collator' with the given 'Attribute's.
collatorWith :: LocaleName -> [IO.Attribute] -> Collator
collatorWith loc atts = unsafePerformIO $ do
mc <- IO.open loc
forM_ atts $ IO.setAttribute mc
return (C mc)
-- | Create an immutable 'Collator' from the given collation rules.
collatorFromRules :: Text -> Either ParseError Collator
collatorFromRules rul = collatorFromRulesWith rul []
-- | Create an immutable 'Collator' from the given collation rules with the given 'Attribute's.
collatorFromRulesWith :: Text -> [IO.Attribute] -> Either ParseError Collator
collatorFromRulesWith rul atts = unsafePerformIO $
(Right `fmap` openAndSetAtts)
`E.catch` \(err::ParseError) -> return (Left err)
where
openAndSetAtts = do
mc <- IO.openRules rul Nothing Nothing
forM_ atts $ IO.setAttribute mc
return (C mc)
-- | Get rules for the given 'Collator'.
rules :: Collator -> Text
rules (C c) = unsafePerformIO $ IO.getRules c
-- | Compare two strings.
collate :: Collator -> Text -> Text -> Ordering
collate (C c) a b = unsafePerformIO $ IO.collate c a b
{-# INLINE collate #-}
-- | Compare two 'CharIterator's.
--
-- If either iterator was constructed from a 'ByteString', it does not
-- need to be copied or converted beforehand, so this function can be
-- quite cheap.
collateIter :: Collator -> CharIterator -> CharIterator -> Ordering
collateIter (C c) a b = unsafePerformIO $ IO.collateIter c a b
{-# INLINE collateIter #-}
-- | Create a key for sorting the 'Text' using the given 'Collator'.
-- The result of comparing two 'ByteString's that have been
-- transformed with 'sortKey' will be the same as the result of
-- 'collate' on the two untransformed 'Text's.
sortKey :: Collator -> Text -> ByteString
sortKey (C c) = unsafePerformIO . IO.sortKey c
{-# INLINE sortKey #-}
-- | A 'Collator' that uses the Unicode Collation Algorithm (UCA).
uca :: Collator
uca = collator Root
{-# NOINLINE uca #-}
|