File: ByteOffset.hs

package info (click to toggle)
haskell-cborg 0.2.10.0-3
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 808 kB
  • sloc: haskell: 8,273; ansic: 14; makefile: 3
file content (458 lines) | stat: -rw-r--r-- 17,865 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
{-# LANGUAGE DeriveFunctor, BangPatterns #-}

module Tests.ByteOffset (testTree) where

import           Data.Word
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import           Control.Exception (throw)
import           Control.Applicative

import           Codec.CBOR.Decoding
import           Codec.CBOR.Read (deserialiseFromBytes)
import           Codec.CBOR.Write (toLazyByteString)
import           Codec.CBOR.Term (Term, encodeTerm, decodeTerm)
import qualified Codec.CBOR.Term as Term (Term(..))

import           Test.Tasty (TestTree, testGroup, localOption)
import           Test.Tasty.QuickCheck (testProperty, QuickCheckMaxSize(..))
import           Test.QuickCheck hiding (subterms)

import qualified Tests.Reference.Implementation as RefImpl
import           Tests.Reference.Generators (floatToWord, doubleToWord)
import           Tests.Term (eqTerm, canonicaliseTerm)
import           Tests.Util

import Prelude hiding (encodeFloat, decodeFloat)


-- | Like a 'Term', but with an annotation on top level terms and every
-- subterm. This is used for tests of 'peekByteOffset' where we annotate a
-- decoded term with the byte range it covers.
--
data ATerm annotation = ATerm (TermF (ATerm annotation)) annotation
  deriving (Show, Eq, Functor)

-- | Term one-level functor.
--
data TermF t = TInt     Int
             | TInteger Integer
             | TBytes   BS.ByteString
             | TBytesI  LBS.ByteString
             | TString  T.Text
             | TStringI LT.Text
             | TList    [t]
             | TListI   [t]
             | TMap     [(t, t)]
             | TMapI    [(t, t)]
             | TTagged  Word64 t
             | TBool    Bool
             | TNull
             | TSimple  Word8
             | THalf    Float
             | TFloat   Float
             | TDouble  Double
  deriving (Show, Eq, Functor)


testTree :: TestTree
testTree =
  testGroup "peekByteOffset"
    [ testGroup "ATerm framework"
        [ testProperty "isomorphic 1" prop_ATerm_isomorphic
        , testProperty "isomorphic 2" prop_ATerm_isomorphic2
        , testProperty "isomorphic 3" prop_ATerm_isomorphic3
        ]
    , testProperty "bytes deserialise" prop_peekByteOffset_deserialise
    , testProperty "bytes reserialise" prop_peekByteOffset_reserialise
    , testProperty "non-canonical encoding" prop_peekByteOffset_noncanonical
    , localOption (QuickCheckMaxSize 30) $
      testProperty "same offsets with all 2-splits" prop_peekByteOffset_splits2
    , localOption (QuickCheckMaxSize 20) $
      testProperty "same offsets with all 3-splits" prop_peekByteOffset_splits3
    ]


--------------------------------------------------------------------------------
-- Properties of the framework
--

-- | Basic property to check that 'ATerm' is isomorphic to the 'Term'.
--
prop_ATerm_isomorphic :: Term -> Bool
prop_ATerm_isomorphic t =
    t `eqTerm` (convertATermToTerm . convertTermToATerm) t

-- | Variation on 'prop_ATerm_isomorphic', checking that serialising as a
-- 'Term', deserialising as an 'ATerm' and converting back gives an equivalent
-- term.
--
prop_ATerm_isomorphic2 :: Term -> Bool
prop_ATerm_isomorphic2 t =
           canonicaliseTerm t
  `eqTerm` (convertATermToTerm . deserialiseATerm . serialiseTerm) t

-- | Variation on 'prop_ATerm_isomorphic2', but where we check the terms are
-- equivalent as 'ATerm's.
--
prop_ATerm_isomorphic3 :: Term -> Bool
prop_ATerm_isomorphic3 t =
            (convertTermToATerm . canonicaliseTerm) t
  `eqATerm` (fmap (const ()) . deserialiseATerm . serialiseTerm) t


--------------------------------------------------------------------------------
-- Properties of peekByteOffset
--

-- | A key consistency property for terms annotated with their bytes:
-- taking those bytes and deserialising them gives the corresponding term
--
prop_ATerm_deserialise :: ATerm ByteSpan -> Bool
prop_ATerm_deserialise t@(ATerm _ bs) =
    deserialiseTerm bs `eqTerm` convertATermToTerm t

-- | For the case of canonical encodings it is also true for terms annotated
-- with their bytes: taking the term and serialising it gives the bytes.
--
-- Note this is /only/ expected to hold for canonical encodings. See
-- 'prop_peekByteOffset_noncanonical' for a demonstration of this not holding
-- for non-canonical encodings.
--
prop_ATerm_reserialise :: ATerm ByteSpan -> Bool
prop_ATerm_reserialise t@(ATerm _ bs) =
    serialiseTerm (convertATermToTerm t) == bs

-- | For an 'ATerm' annotated with its bytes (obtained by decoding a term),
-- 'prop_ATerm_deserialise' should be true for the whole term and all subterms.
--
prop_peekByteOffset_deserialise :: Term -> Bool
prop_peekByteOffset_deserialise t =
    all prop_ATerm_deserialise (subterms t')
  where
    t' = deserialiseATerm (serialiseTerm t)

-- | For an 'ATerm' annotated with its bytes (obtained by decoding a canonical
-- term), 'prop_ATerm_serialise' should be true for the whole term and all
-- subterms.
--
prop_peekByteOffset_reserialise :: Term -> Bool
prop_peekByteOffset_reserialise t =
    all prop_ATerm_reserialise (subterms t')
  where
    t' = deserialiseATerm (serialiseTerm t)

-- | For an 'ATerm' annotated with its bytes obtained by decoding a
-- /non-canonical/ term, 'prop_ATerm_serialise' should not always hold.
--
-- This is in some sense the essence of why we want 'peekByteOffset' in the
-- first place: to get the bytes corresponding to a term we have to get the
-- original input bytes since we cannot rely on re-serialising to recover the
-- bytes (at least not without relying on and checking for canonical encodings).
--
prop_peekByteOffset_noncanonical :: RefImpl.Term -> Property
prop_peekByteOffset_noncanonical t =
    not (RefImpl.isCanonicalTerm t) ==>
    not (prop_ATerm_reserialise t')
  where
    t' = deserialiseATerm (RefImpl.serialise t)

-- | The offsets we get when decoding a term should be the same irrespective of
-- block boundaries in the input data stream. This checks the property for all
-- possible 2-chunk splits of the input data.
--
prop_peekByteOffset_splits2 :: Term -> Bool
prop_peekByteOffset_splits2 t =
    and [ deserialiseATermOffsets lbs' `eqATerm` t'
        | lbs' <- splits2 lbs ]
  where
    lbs = serialiseTerm t
    t'  = deserialiseATermOffsets lbs

-- | The offsets we get when decoding a term should be the same irrespective of
-- block boundaries in the input data stream. This checks the property for all
-- possible 3-chunk splits of the input data.
--
prop_peekByteOffset_splits3 :: Term -> Bool
prop_peekByteOffset_splits3 t =
    and [ deserialiseATermOffsets lbs' `eqATerm` t'
        | lbs' <- splits3 lbs ]
  where
    lbs = serialiseTerm t
    t'  = deserialiseATermOffsets lbs


------------------------------------------------------------------------------

subterms :: ATerm a -> [ATerm a]
subterms at@(ATerm t0 _) = at : subtermsF t0
  where
    subtermsF :: TermF (ATerm a) -> [ATerm a]
    subtermsF (TList  ts)  = concatMap subterms ts
    subtermsF (TListI ts)  = concatMap subterms ts
    subtermsF (TMap   ts)  = [ t' | (x, y) <- ts
                                 , t' <- subterms x
                                      ++ subterms y ]
    subtermsF (TMapI  ts)  = [ t' | (x, y) <- ts
                                 , t' <- subterms x
                                      ++ subterms y ]
    subtermsF (TTagged _ t') = subterms t'

    subtermsF TInt    {} = []
    subtermsF TInteger{} = []
    subtermsF TBytes  {} = []
    subtermsF TBytesI {} = []
    subtermsF TString {} = []
    subtermsF TStringI{} = []
    subtermsF TBool   {} = []
    subtermsF TNull   {} = []
    subtermsF TSimple {} = []
    subtermsF THalf   {} = []
    subtermsF TFloat  {} = []
    subtermsF TDouble {} = []


------------------------------------------------------------------------------

serialiseTerm :: Term -> LBS.ByteString
serialiseTerm = toLazyByteString . encodeTerm

deserialiseTerm :: LBS.ByteString -> Term
deserialiseTerm = either throw snd . deserialiseFromBytes decodeTerm


--------------------------------------------------------------------------------
-- Decoding a term, annotated with its underlying bytes
--

type Offsets  = (ByteOffset, ByteOffset)
type ByteSpan = LBS.ByteString

deserialiseATermOffsets :: LBS.ByteString -> ATerm Offsets
deserialiseATermOffsets = either throw snd . deserialiseFromBytes decodeATerm

deserialiseATerm :: LBS.ByteString -> ATerm ByteSpan
deserialiseATerm lbs = atermOffsetsToBytes lbs (deserialiseATermOffsets lbs)

atermOffsetsToBytes :: LBS.ByteString -> ATerm Offsets -> ATerm ByteSpan
atermOffsetsToBytes original =
    fmap (`slice` original)
  where
    slice :: (ByteOffset, ByteOffset) -> LBS.ByteString -> LBS.ByteString
    slice (n,m) = LBS.take (m-n) . LBS.drop n


decodeATerm :: Decoder s (ATerm Offsets)
decodeATerm = do
    start <- peekByteOffset
    t     <- decodeTermFATerm
    end   <- peekByteOffset
    return (ATerm t (start, end))

decodeTermFATerm :: Decoder s (TermF (ATerm Offsets))
decodeTermFATerm = do
    tkty <- peekTokenType
    case tkty of
      TypeUInt   -> do w <- decodeWord
                       return $! fromWord w
                    where
                      fromWord :: Word -> TermF (ATerm Offsets)
                      fromWord w
                        | w <= fromIntegral (maxBound :: Int)
                                    = TInt     (fromIntegral w)
                        | otherwise = TInteger (fromIntegral w)

      TypeUInt64 -> do w <- decodeWord64
                       return $! fromWord64 w
                    where
                      fromWord64 w
                        | w <= fromIntegral (maxBound :: Int)
                                    = TInt     (fromIntegral w)
                        | otherwise = TInteger (fromIntegral w)

      TypeNInt   -> do w <- decodeNegWord
                       return $! fromNegWord w
                    where
                      fromNegWord w
                        | w <= fromIntegral (maxBound :: Int)
                                    = TInt     (-1 - fromIntegral w)
                        | otherwise = TInteger (-1 - fromIntegral w)

      TypeNInt64 -> do w <- decodeNegWord64
                       return $! fromNegWord64 w
                    where
                      fromNegWord64 w
                        | w <= fromIntegral (maxBound :: Int)
                                    = TInt     (-1 - fromIntegral w)
                        | otherwise = TInteger (-1 - fromIntegral w)

      TypeInteger -> do !x <- decodeInteger
                        return (TInteger x)
      TypeFloat16 -> do !x <- decodeFloat
                        return (THalf x)
      TypeFloat32 -> do !x <- decodeFloat
                        return (TFloat x)
      TypeFloat64 -> do !x <- decodeDouble
                        return (TDouble x)

      TypeBytes        -> do !x <- decodeBytes
                             return (TBytes x)
      TypeBytesIndef   -> decodeBytesIndef >> decodeBytesIndefLen []
      TypeString       -> do !x <- decodeString
                             return (TString x)
      TypeStringIndef  -> decodeStringIndef >> decodeStringIndefLen []

      TypeListLen      -> decodeListLen      >>= flip decodeListN []
      TypeListLen64    -> decodeListLen      >>= flip decodeListN []
      TypeListLenIndef -> decodeListLenIndef >>  decodeListIndefLen []
      TypeMapLen       -> decodeMapLen       >>= flip decodeMapN []
      TypeMapLen64     -> decodeMapLen       >>= flip decodeMapN []
      TypeMapLenIndef  -> decodeMapLenIndef  >>  decodeMapIndefLen []
      TypeTag          -> do !x <- decodeTag64
                             !y <- decodeATerm
                             return (TTagged x y)
      TypeTag64        -> do !x <- decodeTag64
                             !y <- decodeATerm
                             return (TTagged x y)

      TypeBool    -> do !x <- decodeBool
                        return (TBool x)
      TypeNull    -> TNull   <$  decodeNull
      TypeSimple  -> do !x <- decodeSimple
                        return (TSimple x)
      TypeBreak   -> fail "unexpected break"
      TypeInvalid -> fail "invalid token encoding"


decodeBytesIndefLen :: [BS.ByteString] -> Decoder s (TermF (ATerm Offsets))
decodeBytesIndefLen acc = do
    stop <- decodeBreakOr
    if stop then return $! TBytesI (LBS.fromChunks (reverse acc))
            else do !bs <- decodeBytes
                    decodeBytesIndefLen (bs : acc)


decodeStringIndefLen :: [T.Text] -> Decoder s (TermF (ATerm Offsets))
decodeStringIndefLen acc = do
    stop <- decodeBreakOr
    if stop then return $! TStringI (LT.fromChunks (reverse acc))
            else do !str <- decodeString
                    decodeStringIndefLen (str : acc)


decodeListN :: Int -> [ATerm Offsets] -> Decoder s (TermF (ATerm Offsets))
decodeListN !n acc =
    case n of
      0 -> return $! TList (reverse acc)
      _ -> do !t <- decodeATerm
              decodeListN (n-1) (t : acc)


decodeListIndefLen :: [ATerm Offsets] -> Decoder s (TermF (ATerm Offsets))
decodeListIndefLen acc = do
    stop <- decodeBreakOr
    if stop then return $! TListI (reverse acc)
            else do !tm <- decodeATerm
                    decodeListIndefLen (tm : acc)


decodeMapN :: Int -> [(ATerm Offsets, ATerm Offsets)] -> Decoder s (TermF (ATerm Offsets))
decodeMapN !n acc =
    case n of
      0 -> return $! TMap (reverse acc)
      _ -> do !tm   <- decodeATerm
              !tm'  <- decodeATerm
              decodeMapN (n-1) ((tm, tm') : acc)


decodeMapIndefLen :: [(ATerm Offsets, ATerm Offsets)] -> Decoder s (TermF (ATerm Offsets))
decodeMapIndefLen acc = do
    stop <- decodeBreakOr
    if stop then return $! TMapI (reverse acc)
            else do !tm  <- decodeATerm
                    !tm' <- decodeATerm
                    decodeMapIndefLen ((tm, tm') : acc)


--------------------------------------------------------------------------------
-- Converting between terms and annotated terms


convertTermToATerm :: Term -> ATerm ()
convertTermToATerm t = ATerm (convertTermToTermF t) ()

convertTermToTermF :: Term -> TermF (ATerm ())
convertTermToTermF (Term.TList   ts)  = TList  (map convertTermToATerm ts)
convertTermToTermF (Term.TListI  ts)  = TListI (map convertTermToATerm ts)
convertTermToTermF (Term.TMap    ts)  = TMap  [ ( convertTermToATerm x
                                                , convertTermToATerm y )
                                              | (x, y) <- ts ]
convertTermToTermF (Term.TMapI   ts)  = TMapI [ ( convertTermToATerm x
                                                , convertTermToATerm y )
                                              | (x, y) <- ts ]
convertTermToTermF (Term.TTagged x t) = TTagged x (convertTermToATerm t)

convertTermToTermF (Term.TInt     x)  = TInt   x
convertTermToTermF (Term.TInteger x)  = TInteger x
convertTermToTermF (Term.TBytes   x)  = TBytes  x
convertTermToTermF (Term.TBytesI  x)  = TBytesI x
convertTermToTermF (Term.TString  x)  = TString  x
convertTermToTermF (Term.TStringI x)  = TStringI x
convertTermToTermF (Term.TBool    x)  = TBool x
convertTermToTermF  Term.TNull        = TNull
convertTermToTermF (Term.TSimple  x)  = TSimple x
convertTermToTermF (Term.THalf    x)  = THalf x
convertTermToTermF (Term.TFloat   x)  = TFloat x
convertTermToTermF (Term.TDouble  x)  = TDouble x

convertATermToTerm :: ATerm a -> Term
convertATermToTerm (ATerm t _ann) = convertTermFToTerm t

convertTermFToTerm :: TermF (ATerm a) -> Term
convertTermFToTerm (TList   ts)  = Term.TList  (map convertATermToTerm ts)
convertTermFToTerm (TListI  ts)  = Term.TListI (map convertATermToTerm ts)
convertTermFToTerm (TMap    ts)  = Term.TMap  [ ( convertATermToTerm x
                                                , convertATermToTerm y )
                                              | (x, y) <- ts ]
convertTermFToTerm (TMapI   ts)  = Term.TMapI [ ( convertATermToTerm x
                                                , convertATermToTerm y )
                                              | (x, y) <- ts ]
convertTermFToTerm (TTagged x t) = Term.TTagged  x (convertATermToTerm t)
convertTermFToTerm (TInt     x)  = Term.TInt     x
convertTermFToTerm (TInteger x)  = Term.TInteger x
convertTermFToTerm (TBytes   x)  = Term.TBytes   x
convertTermFToTerm (TBytesI  x)  = Term.TBytesI  x
convertTermFToTerm (TString  x)  = Term.TString  x
convertTermFToTerm (TStringI x)  = Term.TStringI x
convertTermFToTerm (TBool    x)  = Term.TBool    x
convertTermFToTerm  TNull        = Term.TNull
convertTermFToTerm (TSimple  x)  = Term.TSimple  x
convertTermFToTerm (THalf    x)  = Term.THalf    x
convertTermFToTerm (TFloat   x)  = Term.TFloat   x
convertTermFToTerm (TDouble  x)  = Term.TDouble  x


-- NaNs are so annoying...
eqATerm :: Eq a => ATerm a -> ATerm a -> Bool
eqATerm (ATerm t1 ann1) (ATerm t2 ann2) =
    ann1 == ann2 && eqATermF t1 t2

eqATermF :: Eq a => TermF (ATerm a) -> TermF (ATerm a) -> Bool
eqATermF (TList   ts)  (TList   ts')   = and (zipWith eqATerm ts ts')
eqATermF (TListI  ts)  (TListI  ts')   = and (zipWith eqATerm ts ts')
eqATermF (TMap    ts)  (TMap    ts')   = and (zipWith eqATermPair ts ts')
eqATermF (TMapI   ts)  (TMapI   ts')   = and (zipWith eqATermPair ts ts')
eqATermF (TTagged w t) (TTagged w' t') = w == w' && eqATerm t t'
eqATermF (THalf   f)   (THalf   f')    = floatToWord  f == floatToWord  f'
eqATermF (TFloat  f)   (TFloat  f')    = floatToWord  f == floatToWord  f'
eqATermF (TDouble f)   (TDouble f')    = doubleToWord f == doubleToWord f'
eqATermF a b = a == b

eqATermPair :: (Eq a, Eq b)
              => (ATerm a, ATerm b)
              -> (ATerm a, ATerm b)
              -> Bool
eqATermPair (a,b) (a',b') = eqATerm a a' && eqATerm b b'