File: String.hs

package info (click to toggle)
haskell-basement 0.0.16-4
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,044 kB
  • sloc: haskell: 11,336; ansic: 63; makefile: 2
file content (135 lines) | stat: -rw-r--r-- 4,978 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
{-# LANGUAGE BangPatterns               #-}
{-# LANGUAGE MagicHash                  #-}
{-# LANGUAGE NoImplicitPrelude          #-}
{-# LANGUAGE TypeFamilies               #-}
{-# LANGUAGE FlexibleContexts           #-}
{-# LANGUAGE ScopedTypeVariables        #-}
{-# LANGUAGE CPP                        #-}
module Basement.Alg.String
    ( copyFilter
    , validate
    , findIndexPredicate
    , revFindIndexPredicate
    ) where

import           GHC.Prim
import           GHC.ST
import           Basement.Alg.Class
import           Basement.Alg.UTF8
import           Basement.Compat.Base
import           Basement.Numerical.Additive
import           Basement.Types.OffsetSize
import           Basement.PrimType
import           Basement.Block (MutableBlock(..))

import           Basement.UTF8.Helper
import           Basement.UTF8.Table
import           Basement.UTF8.Types

copyFilter :: forall s container . Indexable container Word8
           => (Char -> Bool)
           -> CountOf Word8
           -> MutableByteArray# s
           -> container
           -> Offset Word8
           -> ST s (CountOf Word8)
copyFilter predicate !sz dst src start = loop (Offset 0) start
  where
    !end = start `offsetPlusE` sz
    loop !d !s
        | s == end  = pure (offsetAsSize d)
        | otherwise =
            let !h = nextAscii src s
             in case headerIsAscii h of
                    True | predicate (toChar1 h) -> primMbaWrite dst d (stepAsciiRawValue h) >> loop (d + Offset 1) (s + Offset 1)
                         | otherwise             -> loop d (s + Offset 1)
                    False ->
                        case next src s of
                            Step c s' | predicate c -> writeUTF8 (MutableBlock dst :: MutableBlock Word8 s) d c >>= \d' -> loop d' s'
                                      | otherwise   -> loop d s'
{-# INLINE copyFilter #-}

validate :: Indexable container Word8
         => Offset Word8
         -> container
         -> Offset Word8
         -> (Offset Word8, Maybe ValidationFailure)
validate end ba ofsStart = loop4 ofsStart
  where
    loop4 !ofs
        | ofs4 < end =
            let h1 = nextAscii ba ofs
                h2 = nextAscii ba (ofs+1)
                h3 = nextAscii ba (ofs+2)
                h4 = nextAscii ba (ofs+3)
             in if headerIsAscii h1 && headerIsAscii h2 && headerIsAscii h3 && headerIsAscii h4
                    then loop4 ofs4
                    else loop ofs
        | otherwise     = loop ofs
      where
        !ofs4 = ofs+4
    loop !ofs
        | ofs == end      = (end, Nothing)
        | headerIsAscii h = loop (ofs + Offset 1)
        | otherwise       = multi (CountOf $ getNbBytes h) ofs
      where
        h = nextAscii ba ofs

    multi (CountOf 0xff) pos = (pos, Just InvalidHeader)
    multi nbConts pos
        | (posNext `offsetPlusE` nbConts) > end = (pos, Just MissingByte)
        | otherwise =
            case nbConts of
                CountOf 1 ->
                    let c1 = index ba posNext
                    in if isContinuation c1
                        then loop (pos + Offset 2)
                        else (pos, Just InvalidContinuation)
                CountOf 2 ->
                    let c1 = index ba posNext
                        c2 = index ba (pos + Offset 2)
                     in if isContinuation2 c1 c2
                            then loop (pos + Offset 3)
                            else (pos, Just InvalidContinuation)
                CountOf _ ->
                    let c1 = index ba posNext
                        c2 = index ba (pos + Offset 2)
                        c3 = index ba (pos + Offset 3)
                     in if isContinuation3 c1 c2 c3
                            then loop (pos + Offset 4)
                            else (pos, Just InvalidContinuation)
      where posNext = pos + Offset 1
{-# INLINE validate #-}

findIndexPredicate :: Indexable container Word8
                   => (Char -> Bool)
                   -> container
                   -> Offset Word8
                   -> Offset Word8
                   -> Offset Word8
findIndexPredicate predicate ba !startIndex !endIndex = loop startIndex
  where
    loop !i
        | i < endIndex && not (predicate c) = loop (i')
        | otherwise                         = i
      where
        Step c i' = next ba i
{-# INLINE findIndexPredicate #-}

revFindIndexPredicate :: Indexable container Word8
                      => (Char -> Bool)
                      -> container
                      -> Offset Word8
                      -> Offset Word8
                      -> Offset Word8
revFindIndexPredicate predicate ba startIndex endIndex
    | endIndex > startIndex = loop endIndex
    | otherwise             = endIndex
  where
    loop !i
        | predicate c     = i'
        | i' > startIndex = loop i'
        | otherwise       = endIndex
      where 
        StepBack c i' = prev ba i
{-# INLINE revFindIndexPredicate #-}