1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
{-# LANGUAGE PatternGuards #-}
module Text.CharRanges
( Range(..)
, range
, single
, CharSet
, toSet
, member
) where
import Data.List
import Data.Set (Set)
import qualified Data.Set as Set
data Range = Single {-# UNPACK #-} !Char
| Range {-# UNPACK #-} !Char {-# UNPACK #-} !Char
deriving (Eq, Show)
newtype CharRange = CR { unCR :: Range }
-- | A rather hacked-up instance.
-- This is to support fast lookups using 'Data.Set' (see 'toSet').
-- x == y iff x and y overlap
instance Eq CharRange where
CR (Single x) == CR (Single y) = x == y
CR (Single a) == CR (Range x y) = x <= a && a <= y
CR (Range x y) == CR (Single a) = x <= a && a <= y
CR (Range lx ux) == CR (Range ly uy) = (lx <= uy && ly <= ux)
|| (lx <= uy && ly <= ux) -- INTENTIONAL
-- For some strange reason GHC
-- (7.6.3) seems to have problems
-- optimizing this expressions
-- without the additional or
instance Ord CharRange where
CR (Single x) <= CR (Single y) = x <= y
CR (Single x) <= CR (Range y _) = x <= y
CR (Range _ x) <= CR (Single y) = x <= y
CR (Range _ x) <= CR (Range y _) = x <= y
newtype CharSet = CharSet (Set CharRange)
-- | Allows quick lookups using ranges.
toSet :: [Range] -> CharSet
toSet = CharSet . Set.fromDistinctAscList . prepareRanges
where prepareRanges :: [Range] -> [CharRange]
prepareRanges = go . sort . map CR -- we could use unsafeCoerce to
-- avoid the cost of mapping
go (r1:r2:rs) | Just r' <- maybeMergeRanges r1 r2 = go (r':rs)
| rss@(r3:rs') <- go (r2:rs) =
case maybeMergeRanges r1 r3 of
Nothing -> r1:rss
Just r' -> r':rs'
go rs = rs
maybeMergeRanges :: CharRange -> CharRange -> Maybe CharRange
maybeMergeRanges x y = if x == y -- overlap
then Just . CR $ minMax (unCR x) (unCR y)
else Nothing
{-# INLINE maybeMergeRanges #-}
minMax :: Range -> Range -> Range
minMax (Range lx ux) (Range ly uy) = Range (min lx ly) (max ux uy)
minMax (Single _) y = y
minMax x (Single _) = x
{-# INLINE minMax #-}
range :: Char -> Char -> Range
range x y = if x < y then Range x y
else error "range: x not smaller than y"
{-# INLINE range #-}
single :: Char -> Range
single = Single
{-# INLINE single #-}
member :: Char -> CharSet -> Bool
member x (CharSet cs) = Set.member (CR $ Single x) cs
|