1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
|
{-# LANGUAGE GADTs #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE CPP #-}
module Text.Regex.Applicative.Types where
import Control.Applicative
import Control.Monad ((<=<))
import Data.Filtrable (Filtrable (..))
import Data.Functor.Identity (Identity (..))
import Data.String
#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup
#endif
newtype ThreadId = ThreadId Int
-- | A thread either is a result or corresponds to a symbol in the regular
-- expression, which is expected by that thread.
data Thread s r
= Thread
{ threadId_ :: ThreadId
, _threadCont :: s -> [Thread s r]
}
| Accept r
-- | Returns thread identifier. This will be 'Just' for ordinary threads and
-- 'Nothing' for results.
threadId :: Thread s r -> Maybe ThreadId
threadId Thread { threadId_ = i } = Just i
threadId _ = Nothing
data Greediness = Greedy | NonGreedy
deriving (Show, Read, Eq, Ord, Enum)
-- | Type of regular expressions that recognize symbols of type @s@ and
-- produce a result of type @a@.
--
-- Regular expressions can be built using 'Functor', 'Applicative',
-- 'Alternative', and 'Filtrable' instances in the following natural way:
--
-- * @f@ '<$>' @ra@ matches iff @ra@ matches, and its return value is the result
-- of applying @f@ to the return value of @ra@.
--
-- * 'pure' @x@ matches the empty string (i.e. it does not consume any symbols),
-- and its return value is @x@
--
-- * @rf@ '<*>' @ra@ matches a string iff it is a concatenation of two
-- strings: one matched by @rf@ and the other matched by @ra@. The return value
-- is @f a@, where @f@ and @a@ are the return values of @rf@ and @ra@
-- respectively.
--
-- * @ra@ '<|>' @rb@ matches a string which is accepted by either @ra@ or @rb@.
-- It is left-biased, so if both can match, the result of @ra@ is used.
--
-- * 'empty' is a regular expression which does not match any string.
--
-- * 'many' @ra@ matches concatenation of zero or more strings matched by @ra@
-- and returns the list of @ra@'s return values on those strings.
--
-- * 'some' @ra@ matches concatenation of one or more strings matched by @ra@
-- and returns the list of @ra@'s return values on those strings.
--
-- * 'catMaybes' @ram@ matches iff @ram@ matches and produces 'Just _'.
--
-- * @ra@ '<>' @rb@ matches @ra@ followed by @rb@. The return value is @a <> b@,
-- where @a@ and @b@ are the return values of @ra@ and @rb@ respectively.
-- (See <https://github.com/feuerbach/regex-applicative/issues/37#issue-499781703>
-- for an example usage.)
--
-- * 'mempty' matches the empty string (i.e. it does not consume any symbols),
-- and its return value is the 'mempty' value of type @a@.
data RE s a where
Eps :: RE s ()
Symbol :: ThreadId -> (s -> Maybe a) -> RE s a
Alt :: RE s a -> RE s a -> RE s a
App :: RE s (a -> b) -> RE s a -> RE s b
Fmap :: (a -> b) -> RE s a -> RE s b
CatMaybes :: RE s (Maybe a) -> RE s a
Fail :: RE s a
Rep :: Greediness -- repetition may be greedy or not
-> (b -> a -> b) -- folding function (like in foldl)
-> b -- the value for zero matches, and also the initial value
-- for the folding function
-> RE s a
-> RE s b
Void :: RE s a -> RE s ()
-- | Traverse each (reflexive, transitive) subexpression of a 'RE', depth-first and post-order.
traversePostorder :: forall s a m . Monad m => (forall a . RE s a -> m (RE s a)) -> RE s a -> m (RE s a)
traversePostorder f = go
where
go :: forall a . RE s a -> m (RE s a)
go = f <=< \ case
Eps -> pure Eps
Symbol i p -> pure (Symbol i p)
Alt a b -> Alt <$> go a <*> go b
App a b -> App <$> go a <*> go b
Fmap g a -> Fmap g <$> go a
CatMaybes a -> CatMaybes <$> go a
Fail -> pure Fail
Rep greed g b a -> Rep greed g b <$> go a
Void a -> Void <$> go a
-- | Fold each (reflexive, transitive) subexpression of a 'RE', depth-first and post-order.
foldMapPostorder :: Monoid b => (forall a . RE s a -> b) -> RE s a -> b
foldMapPostorder f = fst . traversePostorder ((,) <$> f <*> id)
-- | Map each (reflexive, transitive) subexpression of a 'RE'.
mapRE :: (forall a . RE s a -> RE s a) -> RE s a -> RE s a
mapRE f = runIdentity . traversePostorder (Identity . f)
instance Functor (RE s) where
fmap f x = Fmap f x
f <$ x = pure f <* x
instance Applicative (RE s) where
pure x = const x <$> Eps
a1 <*> a2 = App a1 a2
a *> b = pure (const id) <*> Void a <*> b
a <* b = pure const <*> a <*> Void b
instance Alternative (RE s) where
a1 <|> a2 = Alt a1 a2
empty = Fail
many a = reverse <$> Rep Greedy (flip (:)) [] a
some a = (:) <$> a <*> many a
-- | @since 0.3.4
instance Filtrable (RE s) where
catMaybes = CatMaybes
instance (char ~ Char, string ~ String) => IsString (RE char string) where
fromString = string
-- | @since 0.3.4
instance Semigroup a => Semigroup (RE s a) where
x <> y = (<>) <$> x <*> y
-- | @since 0.3.4
instance Monoid a => Monoid (RE s a) where
mempty = pure mempty
-- | Match and return the given sequence of symbols.
--
-- Note that there is an 'IsString' instance for regular expression, so
-- if you enable the @OverloadedStrings@ language extension, you can write
-- @string \"foo\"@ simply as @\"foo\"@.
--
-- Example:
--
-- >{-# LANGUAGE OverloadedStrings #-}
-- >import Text.Regex.Applicative
-- >
-- >number = "one" *> pure 1 <|> "two" *> pure 2
-- >
-- >main = print $ "two" =~ number
string :: Eq a => [a] -> RE a [a]
string = traverse sym
-- | Match and return a single symbol which satisfies the predicate
psym :: (s -> Bool) -> RE s s
psym p = msym (\s -> if p s then Just s else Nothing)
-- | Like 'psym', but allows to return a computed value instead of the
-- original symbol
msym :: (s -> Maybe a) -> RE s a
msym p = Symbol (error "Not numbered symbol") p
-- | Match and return the given symbol
sym :: Eq s => s -> RE s s
sym s = psym (s ==)
|