File: Char8.hs

package info (click to toggle)
haskell-binary-parsers 0.2.4.0-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,252 kB
  • sloc: haskell: 2,156; makefile: 5
file content (167 lines) | stat: -rw-r--r-- 5,497 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
-- |
-- Module      :  Data.Binary.Parser.Char8
-- Copyright   :  Bryan O'Sullivan 2007-2015, Winterland 2016
-- License     :  BSD3
--
-- Maintainer  :  drkoster@qq.com
-- Stability   :  experimental
-- Portability :  unknown
--
-- This module is intended for parsing text that is
-- represented using an 8-bit character set, e.g. ASCII or
-- ISO-8859-15.  It /does not/ make any attempt to deal with character
-- encodings, multibyte characters, or wide characters.  In
-- particular, all attempts to use characters above code point U+00FF
-- will give wrong answers.
--
-- Code points below U+0100 are simply translated to and from their
-- numeric values, so e.g. the code point U+00A4 becomes the byte
-- @0xA4@ (which is the Euro symbol in ISO-8859-15, but the generic
-- currency sign in ISO-8859-1).  Haskell 'Char' values above U+00FF
-- are truncated, so e.g. U+1D6B7 is truncated to the byte @0xB7@.

module Data.Binary.Parser.Char8 where

import           Control.Applicative
import qualified Data.Binary.Get          as BG
import           Data.Binary.Get.Internal
import qualified Data.Binary.Parser.Word8 as W
import           Data.ByteString          (ByteString)
import qualified Data.ByteString          as B
import           Data.ByteString.Internal (c2w, w2c)
import qualified Data.ByteString.Unsafe   as B
import           Prelude                  hiding (takeWhile)

--------------------------------------------------------------------------------

-- | Match any char, to perform lookahead. Returns 'Nothing' if end of
-- input has been reached. Does not consume any input.
--
peekMaybe :: Get (Maybe Char)
peekMaybe = fmap w2c <$> W.peekMaybe
{-# INLINE peekMaybe #-}

-- | Match any char, to perform lookahead.  Does not consume any
-- input, but will fail if end of input has been reached.
--
peek :: Get Char
peek = w2c <$> W.peek
{-# INLINE peek #-}

-- | The parser @satisfy p@ succeeds for any char for which the
-- predicate @p@ returns 'True'. Returns the char that is actually
-- parsed.
--
satisfy :: (Char -> Bool) -> Get Char
satisfy p = w2c <$> W.satisfy (p . w2c)
{-# INLINE satisfy #-}

-- | The parser @satisfyWith f p@ transforms a char, and succeeds if
-- the predicate @p@ returns 'True' on the transformed value. The
-- parser returns the transformed char that was parsed.
--
satisfyWith :: (Char -> a) -> (a -> Bool) -> Get a
satisfyWith f = W.satisfyWith (f . w2c)
{-# INLINE satisfyWith #-}

-- | Match a specific character.
--
char :: Char -> Get ()
char c = W.word8 (c2w c)
{-# INLINE char #-}

-- | Match any character.
--
anyChar :: Get Char
anyChar = w2c <$> BG.getWord8
{-# INLINE anyChar #-}

-- | The parser @skipChar p@ succeeds for any char for which the predicate @p@ returns 'True'.
--
skipChar :: (Char -> Bool) -> Get ()
skipChar p = W.skipWord8 (p . w2c)
{-# INLINE skipChar #-}

--------------------------------------------------------------------------------

-- | Consume input as long as the predicate returns 'False' or reach the end of input,
-- and return the consumed input.
--
takeTill :: (Char -> Bool) -> Get ByteString
takeTill p = W.takeTill (p . w2c)
{-# INLINE takeTill #-}

-- | Consume input as long as the predicate returns 'True' or reach the end of input,
-- and return the consumed input.
--
takeWhile :: (Char -> Bool) -> Get ByteString
takeWhile p = W.takeWhile (p . w2c)
{-# INLINE takeWhile #-}

-- Similar to 'takeWhile', but requires the predicate to succeed on at least one char
-- of input: it will fail if the predicate never returns 'True' or reach the end of input
--
takeWhile1 :: (Char -> Bool) -> Get ByteString
takeWhile1 p = W.takeWhile1 (p . w2c)
{-# INLINE takeWhile1 #-}

-- | Skip past input for as long as the predicate returns 'True'.
--
skipWhile :: (Char -> Bool) -> Get ()
skipWhile p = W.skipWhile (p . w2c)
{-# INLINE skipWhile #-}

-- | Satisfy a literal string but ignoring case.
--
stringCI :: ByteString -> Get ByteString
stringCI bs = do
    let l = B.length bs
    ensureN l
    bs' <- B.unsafeTake l <$> get
    if B.map toLower bs' == B.map toLower bs
    then put (B.unsafeDrop l bs') >> return bs'
    else fail "stringCI"
  where
    toLower w | w >= 65 && w <= 90 = w + 32
              | otherwise          = w
{-# INLINE stringCI #-}

--------------------------------------------------------------------------------

-- | Fast predicate for matching ASCII space characters.
--
-- /Note/: This predicate only gives correct answers for the ASCII
-- encoding.  For instance, it does not recognise U+00A0 (non-breaking
-- space) as a space character, even though it is a valid ISO-8859-15
-- byte. For a Unicode-aware and only slightly slower predicate,
-- use 'Data.Char.isSpace'
--
isSpace :: Char -> Bool
isSpace c = (c == ' ') || ('\t' <= c && c <= '\r')
{-# INLINE isSpace #-}

-- | Decimal digit predicate.
--
isDigit :: Char -> Bool
isDigit c = c >= '0' && c <= '9'
{-# INLINE isDigit #-}

-- | Hex digit predicate.
--
isHexDigit :: Char -> Bool
isHexDigit c = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
{-# INLINE isHexDigit #-}

-- | A predicate that matches either a space @\' \'@ or horizontal tab
-- @\'\\t\'@ character.
--
isHorizontalSpace :: Char -> Bool
isHorizontalSpace c = c == ' ' || c == '\t'
{-# INLINE isHorizontalSpace #-}

-- | A predicate that matches either a carriage return @\'\\r\'@ or
-- newline @\'\\n\'@ character.
--
isEndOfLine :: Char -> Bool
isEndOfLine c = c == '\r' || c == '\n'
{-# INLINE isEndOfLine #-}