1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
|
-- |
-- Module : Data.Binary.Parser.Char8
-- Copyright : Bryan O'Sullivan 2007-2015, Winterland 2016
-- License : BSD3
--
-- Maintainer : drkoster@qq.com
-- Stability : experimental
-- Portability : unknown
--
-- This module is intended for parsing text that is
-- represented using an 8-bit character set, e.g. ASCII or
-- ISO-8859-15. It /does not/ make any attempt to deal with character
-- encodings, multibyte characters, or wide characters. In
-- particular, all attempts to use characters above code point U+00FF
-- will give wrong answers.
--
-- Code points below U+0100 are simply translated to and from their
-- numeric values, so e.g. the code point U+00A4 becomes the byte
-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic
-- currency sign in ISO-8859-1). Haskell 'Char' values above U+00FF
-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@.
module Data.Binary.Parser.Char8 where
import Control.Applicative
import qualified Data.Binary.Get as BG
import Data.Binary.Get.Internal
import qualified Data.Binary.Parser.Word8 as W
import Data.ByteString (ByteString)
import qualified Data.ByteString as B
import Data.ByteString.Internal (c2w, w2c)
import qualified Data.ByteString.Unsafe as B
import Prelude hiding (takeWhile)
--------------------------------------------------------------------------------
-- | Match any char, to perform lookahead. Returns 'Nothing' if end of
-- input has been reached. Does not consume any input.
--
peekMaybe :: Get (Maybe Char)
peekMaybe = fmap w2c <$> W.peekMaybe
{-# INLINE peekMaybe #-}
-- | Match any char, to perform lookahead. Does not consume any
-- input, but will fail if end of input has been reached.
--
peek :: Get Char
peek = w2c <$> W.peek
{-# INLINE peek #-}
-- | The parser @satisfy p@ succeeds for any char for which the
-- predicate @p@ returns 'True'. Returns the char that is actually
-- parsed.
--
satisfy :: (Char -> Bool) -> Get Char
satisfy p = w2c <$> W.satisfy (p . w2c)
{-# INLINE satisfy #-}
-- | The parser @satisfyWith f p@ transforms a char, and succeeds if
-- the predicate @p@ returns 'True' on the transformed value. The
-- parser returns the transformed char that was parsed.
--
satisfyWith :: (Char -> a) -> (a -> Bool) -> Get a
satisfyWith f = W.satisfyWith (f . w2c)
{-# INLINE satisfyWith #-}
-- | Match a specific character.
--
char :: Char -> Get ()
char c = W.word8 (c2w c)
{-# INLINE char #-}
-- | Match any character.
--
anyChar :: Get Char
anyChar = w2c <$> BG.getWord8
{-# INLINE anyChar #-}
-- | The parser @skipChar p@ succeeds for any char for which the predicate @p@ returns 'True'.
--
skipChar :: (Char -> Bool) -> Get ()
skipChar p = W.skipWord8 (p . w2c)
{-# INLINE skipChar #-}
--------------------------------------------------------------------------------
-- | Consume input as long as the predicate returns 'False' or reach the end of input,
-- and return the consumed input.
--
takeTill :: (Char -> Bool) -> Get ByteString
takeTill p = W.takeTill (p . w2c)
{-# INLINE takeTill #-}
-- | Consume input as long as the predicate returns 'True' or reach the end of input,
-- and return the consumed input.
--
takeWhile :: (Char -> Bool) -> Get ByteString
takeWhile p = W.takeWhile (p . w2c)
{-# INLINE takeWhile #-}
-- Similar to 'takeWhile', but requires the predicate to succeed on at least one char
-- of input: it will fail if the predicate never returns 'True' or reach the end of input
--
takeWhile1 :: (Char -> Bool) -> Get ByteString
takeWhile1 p = W.takeWhile1 (p . w2c)
{-# INLINE takeWhile1 #-}
-- | Skip past input for as long as the predicate returns 'True'.
--
skipWhile :: (Char -> Bool) -> Get ()
skipWhile p = W.skipWhile (p . w2c)
{-# INLINE skipWhile #-}
-- | Satisfy a literal string but ignoring case.
--
stringCI :: ByteString -> Get ByteString
stringCI bs = do
let l = B.length bs
ensureN l
bs' <- B.unsafeTake l <$> get
if B.map toLower bs' == B.map toLower bs
then put (B.unsafeDrop l bs') >> return bs'
else fail "stringCI"
where
toLower w | w >= 65 && w <= 90 = w + 32
| otherwise = w
{-# INLINE stringCI #-}
--------------------------------------------------------------------------------
-- | Fast predicate for matching ASCII space characters.
--
-- /Note/: This predicate only gives correct answers for the ASCII
-- encoding. For instance, it does not recognise U+00A0 (non-breaking
-- space) as a space character, even though it is a valid ISO-8859-15
-- byte. For a Unicode-aware and only slightly slower predicate,
-- use 'Data.Char.isSpace'
--
isSpace :: Char -> Bool
isSpace c = (c == ' ') || ('\t' <= c && c <= '\r')
{-# INLINE isSpace #-}
-- | Decimal digit predicate.
--
isDigit :: Char -> Bool
isDigit c = c >= '0' && c <= '9'
{-# INLINE isDigit #-}
-- | Hex digit predicate.
--
isHexDigit :: Char -> Bool
isHexDigit c = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
{-# INLINE isHexDigit #-}
-- | A predicate that matches either a space @\' \'@ or horizontal tab
-- @\'\\t\'@ character.
--
isHorizontalSpace :: Char -> Bool
isHorizontalSpace c = c == ' ' || c == '\t'
{-# INLINE isHorizontalSpace #-}
-- | A predicate that matches either a carriage return @\'\\r\'@ or
-- newline @\'\\n\'@ character.
--
isEndOfLine :: Char -> Bool
isEndOfLine c = c == '\r' || c == '\n'
{-# INLINE isEndOfLine #-}
|