File: ByteOffset.hs

package info (click to toggle)
haskell-cborg 0.2.10.0-3
links: PTS, VCS
area: main
in suites: sid
size: 808 kB
sloc: haskell: 8,273; ansic: 14; makefile: 3
file content (458 lines) | stat: -rw-r--r-- 17,865 bytes
parent folder | download | duplicates (3)
{-# LANGUAGE DeriveFunctor, BangPatterns #-}

module Tests.ByteOffset (testTree) where

import           Data.Word
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import           Control.Exception (throw)
import           Control.Applicative

import           Codec.CBOR.Decoding
import           Codec.CBOR.Read (deserialiseFromBytes)
import           Codec.CBOR.Write (toLazyByteString)
import           Codec.CBOR.Term (Term, encodeTerm, decodeTerm)
import qualified Codec.CBOR.Term as Term (Term(..))

import           Test.Tasty (TestTree, testGroup, localOption)
import           Test.Tasty.QuickCheck (testProperty, QuickCheckMaxSize(..))
import           Test.QuickCheck hiding (subterms)

import qualified Tests.Reference.Implementation as RefImpl
import           Tests.Reference.Generators (floatToWord, doubleToWord)
import           Tests.Term (eqTerm, canonicaliseTerm)
import           Tests.Util

import Prelude hiding (encodeFloat, decodeFloat)


-- | Like a 'Term', but with an annotation on top level terms and every
-- subterm. This is used for tests of 'peekByteOffset' where we annotate a
-- decoded term with the byte range it covers.
--
data ATerm annotation = ATerm (TermF (ATerm annotation)) annotation
  deriving (Show, Eq, Functor)

-- | Term one-level functor.
--
data TermF t = TInt     Int
             | TInteger Integer
             | TBytes   BS.ByteString
             | TBytesI  LBS.ByteString
             | TString  T.Text
             | TStringI LT.Text
             | TList    [t]
             | TListI   [t]
             | TMap     [(t, t)]
             | TMapI    [(t, t)]
             | TTagged  Word64 t
             | TBool    Bool
             | TNull
             | TSimple  Word8
             | THalf    Float
             | TFloat   Float
             | TDouble  Double
  deriving (Show, Eq, Functor)


testTree :: TestTree
testTree =
  testGroup "peekByteOffset"
    [ testGroup "ATerm framework"
        [ testProperty "isomorphic 1" prop_ATerm_isomorphic
        , testProperty "isomorphic 2" prop_ATerm_isomorphic2
        , testProperty "isomorphic 3" prop_ATerm_isomorphic3
        ]
    , testProperty "bytes deserialise" prop_peekByteOffset_deserialise
    , testProperty "bytes reserialise" prop_peekByteOffset_reserialise
    , testProperty "non-canonical encoding" prop_peekByteOffset_noncanonical
    , localOption (QuickCheckMaxSize 30) $
      testProperty "same offsets with all 2-splits" prop_peekByteOffset_splits2
    , localOption (QuickCheckMaxSize 20) $
      testProperty "same offsets with all 3-splits" prop_peekByteOffset_splits3
    ]


--------------------------------------------------------------------------------
-- Properties of the framework
--

-- | Basic property to check that 'ATerm' is isomorphic to the 'Term'.
--
prop_ATerm_isomorphic :: Term -> Bool
prop_ATerm_isomorphic t =
    t `eqTerm` (convertATermToTerm . convertTermToATerm) t

-- | Variation on 'prop_ATerm_isomorphic', checking that serialising as a
-- 'Term', deserialising as an 'ATerm' and converting back gives an equivalent
-- term.
--
prop_ATerm_isomorphic2 :: Term -> Bool
prop_ATerm_isomorphic2 t =
           canonicaliseTerm t
  `eqTerm` (convertATermToTerm . deserialiseATerm . serialiseTerm) t

-- | Variation on 'prop_ATerm_isomorphic2', but where we check the terms are
-- equivalent as 'ATerm's.
--
prop_ATerm_isomorphic3 :: Term -> Bool
prop_ATerm_isomorphic3 t =
            (convertTermToATerm . canonicaliseTerm) t
  `eqATerm` (fmap (const ()) . deserialiseATerm . serialiseTerm) t


--------------------------------------------------------------------------------
-- Properties of peekByteOffset
--

-- | A key consistency property for terms annotated with their bytes:
-- taking those bytes and deserialising them gives the corresponding term
--
prop_ATerm_deserialise :: ATerm ByteSpan -> Bool
prop_ATerm_deserialise t@(ATerm _ bs) =
    deserialiseTerm bs `eqTerm` convertATermToTerm t

-- | For the case of canonical encodings it is also true for terms annotated
-- with their bytes: taking the term and serialising it gives the bytes.
--
-- Note this is /only/ expected to hold for canonical encodings. See
-- 'prop_peekByteOffset_noncanonical' for a demonstration of this not holding
-- for non-canonical encodings.
--
prop_ATerm_reserialise :: ATerm ByteSpan -> Bool
prop_ATerm_reserialise t@(ATerm _ bs) =
    serialiseTerm (convertATermToTerm t) == bs

-- | For an 'ATerm' annotated with its bytes (obtained by decoding a term),
-- 'prop_ATerm_deserialise' should be true for the whole term and all subterms.
--
prop_peekByteOffset_deserialise :: Term -> Bool
prop_peekByteOffset_deserialise t =
    all prop_ATerm_deserialise (subterms t')
  where
    t' = deserialiseATerm (serialiseTerm t)

-- | For an 'ATerm' annotated with its bytes (obtained by decoding a canonical
-- term), 'prop_ATerm_serialise' should be true for the whole term and all
-- subterms.
--
prop_peekByteOffset_reserialise :: Term -> Bool
prop_peekByteOffset_reserialise t =
    all prop_ATerm_reserialise (subterms t')
  where
    t' = deserialiseATerm (serialiseTerm t)

-- | For an 'ATerm' annotated with its bytes obtained by decoding a
-- /non-canonical/ term, 'prop_ATerm_serialise' should not always hold.
--
-- This is in some sense the essence of why we want 'peekByteOffset' in the
-- first place: to get the bytes corresponding to a term we have to get the
-- original input bytes since we cannot rely on re-serialising to recover the
-- bytes (at least not without relying on and checking for canonical encodings).
--
prop_peekByteOffset_noncanonical :: RefImpl.Term -> Property
prop_peekByteOffset_noncanonical t =
    not (RefImpl.isCanonicalTerm t) ==>
    not (prop_ATerm_reserialise t')
  where
    t' = deserialiseATerm (RefImpl.serialise t)

-- | The offsets we get when decoding a term should be the same irrespective of
-- block boundaries in the input data stream. This checks the property for all
-- possible 2-chunk splits of the input data.
--
prop_peekByteOffset_splits2 :: Term -> Bool
prop_peekByteOffset_splits2 t =
    and [ deserialiseATermOffsets lbs' `eqATerm` t'
        | lbs' <- splits2 lbs ]
  where
    lbs = serialiseTerm t
    t'  = deserialiseATermOffsets lbs

-- | The offsets we get when decoding a term should be the same irrespective of
-- block boundaries in the input data stream. This checks the property for all
-- possible 3-chunk splits of the input data.
--
prop_peekByteOffset_splits3 :: Term -> Bool
prop_peekByteOffset_splits3 t =
    and [ deserialiseATermOffsets lbs' `eqATerm` t'
        | lbs' <- splits3 lbs ]
  where
    lbs = serialiseTerm t
    t'  = deserialiseATermOffsets lbs


------------------------------------------------------------------------------

subterms :: ATerm a -> [ATerm a]
subterms at@(ATerm t0 _) = at : subtermsF t0
  where
    subtermsF :: TermF (ATerm a) -> [ATerm a]
    subtermsF (TList  ts)  = concatMap subterms ts
    subtermsF (TListI ts)  = concatMap subterms ts
    subtermsF (TMap   ts)  = [ t' | (x, y) <- ts
                                 , t' <- subterms x
                                      ++ subterms y ]
    subtermsF (TMapI  ts)  = [ t' | (x, y) <- ts
                                 , t' <- subterms x
                                      ++ subterms y ]
    subtermsF (TTagged _ t') = subterms t'

    subtermsF TInt    {} = []
    subtermsF TInteger{} = []
    subtermsF TBytes  {} = []
    subtermsF TBytesI {} = []
    subtermsF TString {} = []
    subtermsF TStringI{} = []
    subtermsF TBool   {} = []
    subtermsF TNull   {} = []
    subtermsF TSimple {} = []
    subtermsF THalf   {} = []
    subtermsF TFloat  {} = []
    subtermsF TDouble {} = []


------------------------------------------------------------------------------

serialiseTerm :: Term -> LBS.ByteString
serialiseTerm = toLazyByteString . encodeTerm

deserialiseTerm :: LBS.ByteString -> Term
deserialiseTerm = either throw snd . deserialiseFromBytes decodeTerm


--------------------------------------------------------------------------------
-- Decoding a term, annotated with its underlying bytes
--

type Offsets  = (ByteOffset, ByteOffset)
type ByteSpan = LBS.ByteString

deserialiseATermOffsets :: LBS.ByteString -> ATerm Offsets
deserialiseATermOffsets = either throw snd . deserialiseFromBytes decodeATerm

deserialiseATerm :: LBS.ByteString -> ATerm ByteSpan
deserialiseATerm lbs = atermOffsetsToBytes lbs (deserialiseATermOffsets lbs)

atermOffsetsToBytes :: LBS.ByteString -> ATerm Offsets -> ATerm ByteSpan
atermOffsetsToBytes original =
    fmap (`slice` original)
  where
    slice :: (ByteOffset, ByteOffset) -> LBS.ByteString -> LBS.ByteString
    slice (n,m) = LBS.take (m-n) . LBS.drop n


decodeATerm :: Decoder s (ATerm Offsets)
decodeATerm = do
    start <- peekByteOffset
    t     <- decodeTermFATerm
    end   <- peekByteOffset
    return (ATerm t (start, end))

decodeTermFATerm :: Decoder s (TermF (ATerm Offsets))
decodeTermFATerm = do
    tkty <- peekTokenType
    case tkty of
      TypeUInt   -> do w <- decodeWord
                       return $! fromWord w
                    where
                      fromWord :: Word -> TermF (ATerm Offsets)
                      fromWord w
                        | w <= fromIntegral (maxBound :: Int)
                                    = TInt     (fromIntegral w)
                        | otherwise = TInteger (fromIntegral w)

      TypeUInt64 -> do w <- decodeWord64
                       return $! fromWord64 w
                    where
                      fromWord64 w
                        | w <= fromIntegral (maxBound :: Int)
                                    = TInt     (fromIntegral w)
                        | otherwise = TInteger (fromIntegral w)

      TypeNInt   -> do w <- decodeNegWord
                       return $! fromNegWord w
                    where
                      fromNegWord w
                        | w <= fromIntegral (maxBound :: Int)
                                    = TInt     (-1 - fromIntegral w)
                        | otherwise = TInteger (-1 - fromIntegral w)

      TypeNInt64 -> do w <- decodeNegWord64
                       return $! fromNegWord64 w
                    where
                      fromNegWord64 w
                        | w <= fromIntegral (maxBound :: Int)
                                    = TInt     (-1 - fromIntegral w)
                        | otherwise = TInteger (-1 - fromIntegral w)

      TypeInteger -> do !x <- decodeInteger
                        return (TInteger x)
      TypeFloat16 -> do !x <- decodeFloat
                        return (THalf x)
      TypeFloat32 -> do !x <- decodeFloat
                        return (TFloat x)
      TypeFloat64 -> do !x <- decodeDouble
                        return (TDouble x)

      TypeBytes        -> do !x <- decodeBytes
                             return (TBytes x)
      TypeBytesIndef   -> decodeBytesIndef >> decodeBytesIndefLen []
      TypeString       -> do !x <- decodeString
                             return (TString x)
      TypeStringIndef  -> decodeStringIndef >> decodeStringIndefLen []

      TypeListLen      -> decodeListLen      >>= flip decodeListN []
      TypeListLen64    -> decodeListLen      >>= flip decodeListN []
      TypeListLenIndef -> decodeListLenIndef >>  decodeListIndefLen []
      TypeMapLen       -> decodeMapLen       >>= flip decodeMapN []
      TypeMapLen64     -> decodeMapLen       >>= flip decodeMapN []
      TypeMapLenIndef  -> decodeMapLenIndef  >>  decodeMapIndefLen []
      TypeTag          -> do !x <- decodeTag64
                             !y <- decodeATerm
                             return (TTagged x y)
      TypeTag64        -> do !x <- decodeTag64
                             !y <- decodeATerm
                             return (TTagged x y)

      TypeBool    -> do !x <- decodeBool
                        return (TBool x)
      TypeNull    -> TNull   <$  decodeNull
      TypeSimple  -> do !x <- decodeSimple
                        return (TSimple x)
      TypeBreak   -> fail "unexpected break"
      TypeInvalid -> fail "invalid token encoding"


decodeBytesIndefLen :: [BS.ByteString] -> Decoder s (TermF (ATerm Offsets))
decodeBytesIndefLen acc = do
    stop <- decodeBreakOr
    if stop then return $! TBytesI (LBS.fromChunks (reverse acc))
            else do !bs <- decodeBytes
                    decodeBytesIndefLen (bs : acc)


decodeStringIndefLen :: [T.Text] -> Decoder s (TermF (ATerm Offsets))
decodeStringIndefLen acc = do
    stop <- decodeBreakOr
    if stop then return $! TStringI (LT.fromChunks (reverse acc))
            else do !str <- decodeString
                    decodeStringIndefLen (str : acc)


decodeListN :: Int -> [ATerm Offsets] -> Decoder s (TermF (ATerm Offsets))
decodeListN !n acc =
    case n of
      0 -> return $! TList (reverse acc)
      _ -> do !t <- decodeATerm
              decodeListN (n-1) (t : acc)


decodeListIndefLen :: [ATerm Offsets] -> Decoder s (TermF (ATerm Offsets))
decodeListIndefLen acc = do
    stop <- decodeBreakOr
    if stop then return $! TListI (reverse acc)
            else do !tm <- decodeATerm
                    decodeListIndefLen (tm : acc)


decodeMapN :: Int -> [(ATerm Offsets, ATerm Offsets)] -> Decoder s (TermF (ATerm Offsets))
decodeMapN !n acc =
    case n of
      0 -> return $! TMap (reverse acc)
      _ -> do !tm   <- decodeATerm
              !tm'  <- decodeATerm
              decodeMapN (n-1) ((tm, tm') : acc)


decodeMapIndefLen :: [(ATerm Offsets, ATerm Offsets)] -> Decoder s (TermF (ATerm Offsets))
decodeMapIndefLen acc = do
    stop <- decodeBreakOr
    if stop then return $! TMapI (reverse acc)
            else do !tm  <- decodeATerm
                    !tm' <- decodeATerm
                    decodeMapIndefLen ((tm, tm') : acc)


--------------------------------------------------------------------------------
-- Converting between terms and annotated terms


convertTermToATerm :: Term -> ATerm ()
convertTermToATerm t = ATerm (convertTermToTermF t) ()

convertTermToTermF :: Term -> TermF (ATerm ())
convertTermToTermF (Term.TList   ts)  = TList  (map convertTermToATerm ts)
convertTermToTermF (Term.TListI  ts)  = TListI (map convertTermToATerm ts)
convertTermToTermF (Term.TMap    ts)  = TMap  [ ( convertTermToATerm x
                                                , convertTermToATerm y )
                                              | (x, y) <- ts ]
convertTermToTermF (Term.TMapI   ts)  = TMapI [ ( convertTermToATerm x
                                                , convertTermToATerm y )
                                              | (x, y) <- ts ]
convertTermToTermF (Term.TTagged x t) = TTagged x (convertTermToATerm t)

convertTermToTermF (Term.TInt     x)  = TInt   x
convertTermToTermF (Term.TInteger x)  = TInteger x
convertTermToTermF (Term.TBytes   x)  = TBytes  x
convertTermToTermF (Term.TBytesI  x)  = TBytesI x
convertTermToTermF (Term.TString  x)  = TString  x
convertTermToTermF (Term.TStringI x)  = TStringI x
convertTermToTermF (Term.TBool    x)  = TBool x
convertTermToTermF  Term.TNull        = TNull
convertTermToTermF (Term.TSimple  x)  = TSimple x
convertTermToTermF (Term.THalf    x)  = THalf x
convertTermToTermF (Term.TFloat   x)  = TFloat x
convertTermToTermF (Term.TDouble  x)  = TDouble x

convertATermToTerm :: ATerm a -> Term
convertATermToTerm (ATerm t _ann) = convertTermFToTerm t

convertTermFToTerm :: TermF (ATerm a) -> Term
convertTermFToTerm (TList   ts)  = Term.TList  (map convertATermToTerm ts)
convertTermFToTerm (TListI  ts)  = Term.TListI (map convertATermToTerm ts)
convertTermFToTerm (TMap    ts)  = Term.TMap  [ ( convertATermToTerm x
                                                , convertATermToTerm y )
                                              | (x, y) <- ts ]
convertTermFToTerm (TMapI   ts)  = Term.TMapI [ ( convertATermToTerm x
                                                , convertATermToTerm y )
                                              | (x, y) <- ts ]
convertTermFToTerm (TTagged x t) = Term.TTagged  x (convertATermToTerm t)
convertTermFToTerm (TInt     x)  = Term.TInt     x
convertTermFToTerm (TInteger x)  = Term.TInteger x
convertTermFToTerm (TBytes   x)  = Term.TBytes   x
convertTermFToTerm (TBytesI  x)  = Term.TBytesI  x
convertTermFToTerm (TString  x)  = Term.TString  x
convertTermFToTerm (TStringI x)  = Term.TStringI x
convertTermFToTerm (TBool    x)  = Term.TBool    x
convertTermFToTerm  TNull        = Term.TNull
convertTermFToTerm (TSimple  x)  = Term.TSimple  x
convertTermFToTerm (THalf    x)  = Term.THalf    x
convertTermFToTerm (TFloat   x)  = Term.TFloat   x
convertTermFToTerm (TDouble  x)  = Term.TDouble  x


-- NaNs are so annoying...
eqATerm :: Eq a => ATerm a -> ATerm a -> Bool
eqATerm (ATerm t1 ann1) (ATerm t2 ann2) =
    ann1 == ann2 && eqATermF t1 t2

eqATermF :: Eq a => TermF (ATerm a) -> TermF (ATerm a) -> Bool
eqATermF (TList   ts)  (TList   ts')   = and (zipWith eqATerm ts ts')
eqATermF (TListI  ts)  (TListI  ts')   = and (zipWith eqATerm ts ts')
eqATermF (TMap    ts)  (TMap    ts')   = and (zipWith eqATermPair ts ts')
eqATermF (TMapI   ts)  (TMapI   ts')   = and (zipWith eqATermPair ts ts')
eqATermF (TTagged w t) (TTagged w' t') = w == w' && eqATerm t t'
eqATermF (THalf   f)   (THalf   f')    = floatToWord  f == floatToWord  f'
eqATermF (TFloat  f)   (TFloat  f')    = floatToWord  f == floatToWord  f'
eqATermF (TDouble f)   (TDouble f')    = doubleToWord f == doubleToWord f'
eqATermF a b = a == b

eqATermPair :: (Eq a, Eq b)
              => (ATerm a, ATerm b)
              -> (ATerm a, ATerm b)
              -> Bool
eqATermPair (a,b) (a',b') = eqATerm a a' && eqATerm b b'