File: Pure.hs

package info (click to toggle)
haskell-text-icu 0.8.0.5-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 536 kB
  • sloc: haskell: 1,210; ansic: 1,147; makefile: 4
file content (147 lines) | stat: -rw-r--r-- 5,436 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
-- |
-- Module      : Data.Text.ICU.Spoof.Pure
-- Copyright   : (c) 2015 Ben Hamilton
--
-- License     : BSD-style
-- Maintainer  : bgertzfield@gmail.com
-- Stability   : experimental
-- Portability : GHC
--
-- Pure string spoof checking functions for Unicode, implemented as
-- bindings to the International Components for Unicode (ICU)
-- libraries.
--
-- For the impure spoof checking API (which is richer, but less easy to
-- use), see the "Data.Text.ICU.Spoof" module.

module Data.Text.ICU.Spoof.Pure
    (
    -- * Types
      Spoof
    , SpoofParams(..)
    , spoof
    , spoofWithParams
    , spoofFromSource
    , spoofFromSerialized
    -- * String spoof checks
    , areConfusable
    , getSkeleton
    , spoofCheck
    -- * Configuration
    , getAllowedLocales
    , getChecks
    , getRestrictionLevel
    -- * Persistence
    , serialize
    ) where

import Data.ByteString (ByteString)
import Data.Foldable (forM_)
import Data.Text (Text)
import Data.Text.ICU.Spoof.Internal (Spoof(..))
import System.IO.Unsafe (unsafePerformIO)
import qualified Data.Text.ICU.Spoof as S

data SpoofParams
  -- | Used to configure a 'Spoof' checker via 'spoofWithParams'.
  = SpoofParams {
    -- | Optional 'S.SpoofCheck's to perform on a string. By default, performs
    -- all checks except 'CharLimit'.
    spoofChecks :: Maybe [S.SpoofCheck]
    -- | Optional 'S.RestrictionLevel' to which characters in the string will
    -- be limited. By default, uses 'HighlyRestrictive'.
  , level :: Maybe S.RestrictionLevel
    -- | Optional locale(s) whose scripts will be used to limit the
    -- set of allowed characters in a string. If set, automatically
    -- enables the 'CharLimit' spoof check.
  , locales :: Maybe [String]
} deriving (Show, Eq)

applySpoofParams :: SpoofParams -> S.MSpoof -> S.MSpoof
applySpoofParams (SpoofParams c lev loc) s = unsafePerformIO $ do
  forM_ c (S.setChecks s)
  forM_ lev (S.setRestrictionLevel s)
  forM_ loc (S.setAllowedLocales s)
  return s

-- | Open an immutable 'Spoof' checker with default options (all
-- 'S.SpoofCheck's except 'CharLimit').
spoof :: Spoof
spoof = unsafePerformIO $ S `fmap` S.open
{-# NOINLINE spoof #-}

-- | Open an immutable 'Spoof' checker with specific 'SpoofParams'
-- to control its behavior.
spoofWithParams :: SpoofParams -> Spoof
spoofWithParams p = unsafePerformIO $ do
  s <- S.open
  return (S $ applySpoofParams p s)

-- | Open a immutable 'Spoof' checker with specific 'SpoofParams'
-- to control its behavior and custom rules given the UTF-8 encoded
-- contents of the @confusables.txt@ and @confusablesWholeScript.txt@
-- files as described in <http://unicode.org/reports/tr39/ Unicode UAX #39>.
spoofFromSource :: (ByteString, ByteString) -> SpoofParams -> Spoof
spoofFromSource (confusables, confusablesWholeScript) p = unsafePerformIO $ do
  s <- S.openFromSource (confusables, confusablesWholeScript)
  return (S $ applySpoofParams p s)

-- | Create an immutable spoof checker with specific 'SpoofParams'
-- to control its behavior and custom rules previously returned
-- by 'serialize'.
spoofFromSerialized :: ByteString -> SpoofParams -> Spoof
spoofFromSerialized b p = unsafePerformIO $ do
  s <- S.openFromSerialized b
  return (S $ applySpoofParams p s)

-- | Check two strings for confusability.
areConfusable :: Spoof -> Text -> Text -> S.SpoofCheckResult
areConfusable (S s) t1 t2 = unsafePerformIO $ S.areConfusable s t1 t2

-- | Check a string for spoofing issues.
spoofCheck :: Spoof -> Text -> S.SpoofCheckResult
spoofCheck (S s) t = unsafePerformIO $ S.spoofCheck s t

-- | Generates re-usable \"skeleton\" strings which can be used (via
-- Unicode equality) to check if an identifier is confusable
-- with some large set of existing identifiers.
--
-- If you cache the returned strings in storage, you /must/ invalidate
-- your cache any time the underlying confusables database changes
-- (i.e., on ICU upgrade).
--
-- By default, assumes all input strings have been passed through
-- 'toCaseFold' and are lower-case. To change this, pass
-- 'SkeletonAnyCase'.
--
-- By default, builds skeletons which catch visually confusable
-- characters across multiple scripts.  Pass 'SkeletonSingleScript' to
-- override that behavior and build skeletons which catch visually
-- confusable characters across single scripts.
getSkeleton :: Spoof -> Maybe S.SkeletonTypeOverride -> Text -> Text
getSkeleton (S s) o t = unsafePerformIO $ S.getSkeleton s o t

-- | Gets the restriction level currently configured in the spoof
-- checker, if present.
getRestrictionLevel :: Spoof -> Maybe S.RestrictionLevel
getRestrictionLevel (S s) = unsafePerformIO $ S.getRestrictionLevel s

-- | Gets the checks currently configured in the spoof checker.
getChecks :: Spoof -> [S.SpoofCheck]
getChecks (S s) = unsafePerformIO $ S.getChecks s

-- | Gets the locales whose scripts are currently allowed by the spoof
-- checker.  (We don't use 'LocaleName' since the root and default
-- locales have no meaning here.)
getAllowedLocales :: Spoof -> [String]
getAllowedLocales (S s) = unsafePerformIO $ S.getAllowedLocales s

-- | Serializes the rules in this spoof checker to a byte array,
-- suitable for re-use by 'spoofFromSerialized'.
--
-- Only includes any data provided to 'openFromSource'. Does not
-- include any other state or configuration.
serialize :: Spoof -> ByteString
serialize (S s) = unsafePerformIO $ S.serialize s

{-# INLINE spoofCheck #-}