File: Tree.hs

package info (click to toggle)
haskell-hashed-storage 0.5.9-2
  • links: PTS, VCS
  • area: main
  • in suites: wheezy
  • size: 288 kB
  • sloc: haskell: 2,157; ansic: 799; makefile: 3
file content (463 lines) | stat: -rw-r--r-- 21,341 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
{-# LANGUAGE ScopedTypeVariables, MultiParamTypeClasses, FlexibleInstances, BangPatterns #-}

-- | The abstract representation of a Tree and useful abstract utilities to
-- handle those.
module Storage.Hashed.Tree
    ( Tree, Blob(..), TreeItem(..), ItemType(..), Hash(..)
    , makeTree, makeTreeWithHash, emptyTree, emptyBlob, makeBlob, makeBlobBS

    -- * Unfolding stubbed (lazy) Trees.
    --
    -- | By default, Tree obtained by a read function is stubbed: it will
    -- contain Stub items that need to be executed in order to access the
    -- respective subtrees. 'expand' will produce an unstubbed Tree.
    , expandUpdate, expand, expandPath, checkExpand

    -- * Tree access and lookup.
    , items, list, listImmediate, treeHash
    , lookup, find, findFile, findTree, itemHash, itemType
    , zipCommonFiles, zipFiles, zipTrees, diffTrees

    -- * Files (Blobs).
    , readBlob

    -- * Filtering trees.
    , FilterTree(..), restrict

    -- * Manipulating trees.
    , modifyTree, updateTree, partiallyUpdateTree, updateSubtrees, overlay
    , addMissingHashes ) where

import Prelude hiding( lookup, filter, all )
import Storage.Hashed.AnchoredPath
import Storage.Hashed.Hash

import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.Char8 as BS
import qualified Data.Map as M

import Data.Maybe( catMaybes, isNothing )
import Data.Either( lefts, rights )
import Data.List( union, sort )
import Control.Monad( filterM, liftM )
import Control.Applicative( (<$>) )

--------------------------------
-- Tree, Blob and friends
--

data Blob m = Blob !(m BL.ByteString) !Hash
data TreeItem m = File !(Blob m)
                | SubTree !(Tree m)
                | Stub !(m (Tree m)) !Hash

data ItemType = BlobType | TreeType deriving (Show, Eq)

-- | Abstraction of a filesystem tree.
-- Please note that the Tree returned by the respective read operations will
-- have TreeStub items in it. To obtain a Tree without such stubs, call
-- expand on it, eg.:
--
-- > tree <- readDarcsPristine "." >>= expand
--
-- When a Tree is expanded, it becomes \"final\". All stubs are forced and the
-- Tree can be traversed purely. Access to actual file contents stays in IO
-- though.
--
-- A Tree may have a Hash associated with it. A pair of Tree's is identical
-- whenever their hashes are (the reverse need not hold, since not all Trees
-- come equipped with a hash).
data Tree m = Tree { items :: (M.Map Name (TreeItem m))
                   -- | Get hash of a Tree. This is guaranteed to uniquely
                   -- identify the Tree (including any blob content), as far as
                   -- cryptographic hashes are concerned. Sha256 is recommended.
                   , treeHash :: !Hash }

listImmediate :: Tree m -> [(Name, TreeItem m)]
listImmediate = M.toList . items

-- | Get a hash of a TreeItem. May be Nothing.
itemHash :: TreeItem m -> Hash
itemHash (File (Blob _ h)) = h
itemHash (SubTree t) = treeHash t
itemHash (Stub _ h) = h

itemType :: TreeItem m -> ItemType
itemType (File _) = BlobType
itemType (SubTree _) = TreeType
itemType (Stub _ _) = TreeType

emptyTree :: (Monad m) => Tree m
emptyTree = Tree { items = M.empty
                 , treeHash = NoHash }

emptyBlob :: (Monad m) => Blob m
emptyBlob = Blob (return BL.empty) NoHash

makeBlob :: (Monad m) => BL.ByteString -> Blob m
makeBlob str = Blob (return str) (sha256 str)

makeBlobBS :: (Monad m) => BS.ByteString -> Blob m
makeBlobBS s' = let s = BL.fromChunks [s'] in Blob (return s) (sha256 s)

makeTree :: (Monad m) => [(Name,TreeItem m)] -> Tree m
makeTree l = Tree { items = M.fromList l
                  , treeHash = NoHash }

makeTreeWithHash :: (Monad m) => [(Name,TreeItem m)] -> Hash -> Tree m
makeTreeWithHash l h = Tree { items = M.fromList l
                            , treeHash = h }

-----------------------------------
-- Tree access and lookup
--

-- | Look up a 'Tree' item (an immediate subtree or blob).
lookup :: Tree m -> Name -> Maybe (TreeItem m)
lookup t n = M.lookup n (items t)

find' :: TreeItem m -> AnchoredPath -> Maybe (TreeItem m)
find' t (AnchoredPath []) = Just t
find' (SubTree t) (AnchoredPath (d : rest)) =
    case lookup t d of
      Just sub -> find' sub (AnchoredPath rest)
      Nothing -> Nothing
find' _ _ = Nothing

-- | Find a 'TreeItem' by its path. Gives 'Nothing' if the path is invalid.
find :: Tree m -> AnchoredPath -> Maybe (TreeItem m)
find = find' . SubTree

-- | Find a 'Blob' by its path. Gives 'Nothing' if the path is invalid, or does
-- not point to a Blob.
findFile :: Tree m -> AnchoredPath -> Maybe (Blob m)
findFile t p = case find t p of
                 Just (File x) -> Just x
                 _ -> Nothing

-- | Find a 'Tree' by its path. Gives 'Nothing' if the path is invalid, or does
-- not point to a Tree.
findTree :: Tree m -> AnchoredPath -> Maybe (Tree m)
findTree t p = case find t p of
                 Just (SubTree x) -> Just x
                 _ -> Nothing

-- | List all contents of a 'Tree'.
list :: Tree m -> [(AnchoredPath, TreeItem m)]
list t_ = paths t_ (AnchoredPath [])
    where paths t p = [ (appendPath p n, i)
                          | (n,i) <- listImmediate t ] ++
                    concat [ paths subt (appendPath p subn)
                             | (subn, SubTree subt) <- listImmediate t ]

expandUpdate :: (Monad m) => (AnchoredPath -> Tree m -> m (Tree m)) -> Tree m -> m (Tree m)
expandUpdate update t_ = go (AnchoredPath []) t_
    where go path t = do
            let subtree (name, sub) = do tree <- go (path `appendPath` name) =<< unstub sub
                                         return (name, SubTree tree)
            expanded <- mapM subtree [ x | x@(_, item) <- listImmediate t, isSub item ]
            let orig_map = M.filter (not . isSub) (items t)
                expanded_map = M.fromList expanded
                tree = t { items = M.union orig_map expanded_map }
            update path tree

-- | Expand a stubbed Tree into a one with no stubs in it. You might want to
-- filter the tree before expanding to save IO. This is the basic
-- implementation, which may be overriden by some Tree instances (this is
-- especially true of the Index case).
expand :: (Monad m) => Tree m -> m (Tree m)
expand = expandUpdate $ \_ -> return

-- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path is
-- reachable without crossing any stubs. Moreover, the leaf ought not be a Stub
-- in the resulting Tree. A non-existent path is expanded as far as it can be.
expandPath :: (Monad m) => Tree m -> AnchoredPath -> m (Tree m)
expandPath t_ path_ = expand' t_ path_
    where expand' t (AnchoredPath []) = return t
          expand' t (AnchoredPath (n:rest)) =
            case lookup t n of
              (Just item) | isSub item -> amend t n rest =<< unstub item
              _ -> return t -- fail $ "Descent error in expandPath: " ++ show path_
          amend t name rest sub = do
            sub' <- expand' sub (AnchoredPath rest)
            let tree = t { items = M.insert name (SubTree sub') (items t) }
            return tree

-- | Check the disk version of a Tree: expands it, and checks each
-- hash. Returns either the expanded tree or a list of AnchoredPaths
-- where there are problems. The first argument is the hashing function
-- used to create the tree.
checkExpand :: (TreeItem IO -> IO Hash) -> Tree IO
            -> IO (Either [(AnchoredPath, Hash, Maybe Hash)] (Tree IO))
checkExpand hashFunc t = go (AnchoredPath []) t
    where
      go path t_ = do
        let
            subtree (name, sub) =
                do let here = path `appendPath` name
                   sub' <- (Just <$> unstub sub) `catch` \_ -> return Nothing
                   case sub' of
                     Nothing -> return $ Left [(here, treeHash t_, Nothing)]
                     Just sub -> do
                       treeOrTrouble <- go (path `appendPath` name) sub
                       return $ case treeOrTrouble of
                              Left problems -> Left problems
                              Right tree -> Right (name, SubTree tree)
            badBlob (_, f@(File (Blob s h))) =
              fmap (/= h) (hashFunc f `catch` (\_ -> return NoHash))
            badBlob _ = return False
            render (name, f@(File (Blob _ h))) = do
              h' <- (Just <$> hashFunc f) `catch` \_ -> return Nothing
              return (path `appendPath` name, h, h')
        subs <- mapM subtree [ x | x@(_, item) <- listImmediate t_, isSub item ]
        badBlobs <- filterM badBlob (listImmediate t) >>= mapM render
        let problems = badBlobs ++ (concat $ lefts subs)
        if null problems
         then do
           let orig_map = M.filter (not . isSub) (items t)
               expanded_map = M.fromList $ rights subs
               tree = t_ {items = orig_map `M.union` expanded_map}
           h' <- hashFunc (SubTree t_)
           if h' `match` treeHash t_
            then return $ Right tree
            else return $ Left [(path, treeHash t_, Just h')]
         else return $ Left problems

class (Monad m) => FilterTree a m where
    -- | Given @pred tree@, produce a 'Tree' that only has items for which
    -- @pred@ returns @True@.
    -- The tree might contain stubs. When expanded, these will be subject to
    -- filtering as well.
    filter :: (AnchoredPath -> TreeItem m -> Bool) -> a m -> a m

instance (Monad m) => FilterTree Tree m where
    filter predicate t_ = filter' t_ (AnchoredPath [])
        where filter' t path =
                  let subs = (catMaybes [ (,) name `fmap` wibble path name item
                                              | (name,item) <- listImmediate t ])
                  in t { items = M.mapMaybeWithKey (wibble path) $ items t }
              wibble path name item =
                  let npath = path `appendPath` name in
                      if predicate npath item
                         then Just $ filterSub npath item
                         else Nothing
              filterSub npath (SubTree t) = SubTree $ filter' t npath
              filterSub npath (Stub stub h) =
                  Stub (do x <- stub
                           return $ filter' x npath) h
              filterSub _ x = x

-- | Given two Trees, a @guide@ and a @tree@, produces a new Tree that is a
-- identical to @tree@, but only has those items that are present in both
-- @tree@ and @guide@. The @guide@ Tree may not contain any stubs.
restrict :: (FilterTree t m, Monad n) => Tree n -> t m -> t m
restrict guide tree = filter accept tree
    where accept path item =
              case (find guide path, item) of
                (Just (SubTree _), SubTree _) -> True
                (Just (SubTree _), Stub _ _) -> True
                (Just (File _), File _) -> True
                (Just (Stub _ _), _) ->
                    error "*sulk* Go away, you, you precondition violator!"
                (_, _) -> False

-- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use with
-- care.
readBlob :: Blob m -> m BL.ByteString
readBlob (Blob r _) = r

-- | For every pair of corresponding blobs from the two supplied trees,
-- evaluate the supplied function and accumulate the results in a list. Hint:
-- to get IO actions through, just use sequence on the resulting list.
-- NB. This won't expand any stubs.
zipCommonFiles :: (AnchoredPath -> Blob m -> Blob m -> a) -> Tree m -> Tree m -> [a]
zipCommonFiles f a b = catMaybes [ flip (f p) x `fmap` findFile a p
                                   | (p, File x) <- list b ]

-- | For each file in each of the two supplied trees, evaluate the supplied
-- function (supplying the corresponding file from the other tree, or Nothing)
-- and accumulate the results in a list. Hint: to get IO actions through, just
-- use sequence on the resulting list.  NB. This won't expand any stubs.
zipFiles :: (AnchoredPath -> Maybe (Blob m) -> Maybe (Blob m) -> a)
         -> Tree m -> Tree m -> [a]
zipFiles f a b = [ f p (findFile a p) (findFile b p)
                   | p <- paths a `sortedUnion` paths b ]
    where paths t = sort [ p | (p, File _) <- list t ]

zipTrees :: (AnchoredPath -> Maybe (TreeItem m) -> Maybe (TreeItem m) -> a)
         -> Tree m -> Tree m -> [a]
zipTrees f a b = [ f p (find a p) (find b p)
                   | p <- reverse (paths a `sortedUnion` paths b) ]
    where paths t = sort [ p | (p, _) <- list t ]

-- | Helper function for taking the union of AnchoredPath lists that
-- are already sorted.  This function does not check the precondition
-- so use it carefully.
sortedUnion :: [AnchoredPath] -> [AnchoredPath] -> [AnchoredPath]
sortedUnion [] ys = ys
sortedUnion xs [] = xs
sortedUnion a@(x:xs) b@(y:ys) = case compare x y of
                                LT -> x : sortedUnion xs b
                                EQ -> x : sortedUnion xs ys
                                GT -> y : sortedUnion a ys

-- | Cautiously extracts differing subtrees from a pair of Trees. It will never
-- do any unneccessary expanding. Tree hashes are used to cut the comparison as
-- high up the Tree branches as possible. The result is a pair of trees that do
-- not share any identical subtrees. They are derived from the first and second
-- parameters respectively and they are always fully expanded. It might be
-- advantageous to feed the result into 'zipFiles' or 'zipTrees'.
diffTrees :: forall m. (Functor m, Monad m) => Tree m -> Tree m -> m (Tree m, Tree m)
diffTrees left right =
            if treeHash left `match` treeHash right
               then return (emptyTree, emptyTree)
               else diff left right
  where isFile (File _) = True
        isFile _ = False
        notFile = not . isFile
        isEmpty = null . listImmediate
        subtree :: TreeItem m -> m (Tree m)
        subtree (Stub x _) = x
        subtree (SubTree x) = return x
        subtree (File _) = error "diffTrees tried to descend a File as a subtree"
        maybeUnfold (Stub x _) = SubTree `fmap` (x >>= expand)
        maybeUnfold (SubTree x) = SubTree `fmap` expand x
        maybeUnfold i = return i
        immediateN t = [ n | (n, _) <- listImmediate t ]
        diff left' right' = do
          is <- sequence [
                   case (lookup left' n, lookup right' n) of
                     (Just l, Nothing) -> do
                       l' <- maybeUnfold l
                       return (n, Just l', Nothing)
                     (Nothing, Just r) -> do
                       r' <- maybeUnfold r
                       return (n, Nothing, Just r')
                     (Just l, Just r)
                         | itemHash l `match` itemHash r ->
                             return (n, Nothing, Nothing)
                         | notFile l && notFile r ->
                             do x <- subtree l
                                y <- subtree r
                                (x', y') <- diffTrees x y
                                if isEmpty x' && isEmpty y'
                                   then return (n, Nothing, Nothing)
                                   else return (n, Just $ SubTree x', Just $ SubTree y')
                         | isFile l && isFile r ->
                             return (n, Just l, Just r)
                         | otherwise ->
                             do l' <- maybeUnfold l
                                r' <- maybeUnfold r
                                return (n, Just l', Just r')
                     _ -> error "n lookups failed"
                   | n <- immediateN left' `union` immediateN right' ]
          let is_l = [ (n, l) | (n, Just l, _) <- is ]
              is_r = [ (n, r) | (n, _, Just r) <- is ]
          return (makeTree is_l, makeTree is_r)

-- | Modify a Tree (by replacing, or removing or adding items).
modifyTree :: (Monad m) => Tree m -> AnchoredPath -> Maybe (TreeItem m) -> Tree m
modifyTree t_ p_ i_ = snd $ go t_ p_ i_
  where fix t unmod items' = (unmod, t { items = countmap items' `seq` items'
                                       , treeHash = if unmod then treeHash t else NoHash })

        go t (AnchoredPath []) (Just (SubTree sub)) = (treeHash t `match` treeHash sub, sub)

        go t (AnchoredPath [n]) (Just item) = fix t unmod items'
            where !items' = M.insert n item (items t)
                  !unmod = itemHash item `match` case lookup t n of
                                             Nothing -> NoHash
                                             Just i -> itemHash i

        go t (AnchoredPath [n]) Nothing = fix t unmod items'
            where !items' = M.delete n (items t)
                  !unmod = isNothing $ lookup t n

        go t path@(AnchoredPath (n:r)) item = fix t unmod items'
            where subtree s = go s (AnchoredPath r) item
                  !items' = M.insert n sub (items t)
                  !sub = snd sub'
                  !unmod = fst sub'
                  !sub' = case lookup t n of
                    Just (SubTree s) -> let (mod, sub) = subtree s in (mod, SubTree sub)
                    Just (Stub s _) -> (False, Stub (do x <- s
                                                        return $! snd $! subtree x) NoHash)
                    Nothing -> (False, SubTree $! snd $! subtree emptyTree)
                    _ -> error $ "Modify tree at " ++ show path

        go _ (AnchoredPath []) (Just (Stub _ _)) =
            error $ "BUG: Error descending in modifyTree, path = " ++ show p_
        go _ (AnchoredPath []) (Just (File _)) =
            error $ "BUG: Error descending in modifyTree, path = " ++ show p_
        go _ (AnchoredPath []) Nothing =
            error $ "BUG: Error descending in modifyTree, path = " ++ show p_

countmap = M.fold (\_ i -> i + 1) 0

updateSubtrees :: (Tree m -> Tree m) -> Tree m -> Tree m
updateSubtrees fun t =
    fun $ t { items = M.mapWithKey (curry $ snd . update) $ items t
            , treeHash = NoHash }
  where update (k, SubTree s) = (k, SubTree $ updateSubtrees fun s)
        update (k, File f) = (k, File f)
        update (_, Stub _ _) = error "Stubs not supported in updateTreePostorder"

-- | Does /not/ expand the tree.
updateTree :: (Functor m, Monad m) => (TreeItem m -> m (TreeItem m)) -> Tree m -> m (Tree m)
updateTree fun t = partiallyUpdateTree fun (\_ _ -> True) t

-- | Does /not/ expand the tree.
partiallyUpdateTree :: (Functor m, Monad m) => (TreeItem m -> m (TreeItem m))
                       -> (AnchoredPath -> TreeItem m -> Bool) -> Tree m -> m (Tree m)
partiallyUpdateTree fun pred t' = go (AnchoredPath []) t'
  where go path t = do
          items' <- M.fromList <$> mapM (maybeupdate path) (listImmediate t)
          SubTree t' <- fun . SubTree $ t { items = items'
                                          , treeHash = NoHash }
          return t'
        maybeupdate path (k, item) = case pred (path `appendPath` k) item of
          True -> update (path `appendPath` k) (k, item)
          False -> return (k, item)
        update path (k, SubTree tree) = (\new -> (k, SubTree new)) <$> go path tree
        update    _ (k, item) = (\new -> (k, new)) <$> fun item

-- | Lay one tree over another. The resulting Tree will look like the base (1st
-- parameter) Tree, although any items also present in the overlay Tree will be
-- taken from the overlay. It is not allowed to overlay a different kind of an
-- object, nor it is allowed for the overlay to add new objects to base.  This
-- means that the overlay Tree should be a subset of the base Tree (although
-- any extraneous items will be ignored by the implementation).
overlay :: (Functor m, Monad m) => Tree m -> Tree m -> Tree m
overlay base over = Tree { items = M.fromList immediate
                         , treeHash = NoHash }
    where immediate = [ (n, get n) | (n, _) <- listImmediate base ]
          get n = case (M.lookup n $ items base, M.lookup n $ items over) of
                    (Just (File _), Just f@(File _)) -> f
                    (Just (SubTree b), Just (SubTree o)) -> SubTree $ overlay b o
                    (Just (Stub b _), Just (SubTree o)) -> Stub (flip overlay o `fmap` b) NoHash
                    (Just (SubTree b), Just (Stub o _)) -> Stub (overlay b `fmap` o) NoHash
                    (Just (Stub b _), Just (Stub o _)) -> Stub (do o' <- o
                                                                   b' <- b
                                                                   return $ overlay b' o') NoHash
                    (Just x, _) -> x
                    (_, _) -> error $ "Unexpected case in overlay at get " ++ show n ++ "."

addMissingHashes :: (Monad m, Functor m) => (TreeItem m -> m Hash) -> Tree m -> m (Tree m)
addMissingHashes make = updateTree update -- use partiallyUpdateTree here
    where update (SubTree t) = make (SubTree t) >>= \x -> return $ SubTree (t { treeHash = x })
          update (File blob@(Blob con NoHash)) =
              do hash <- make $ File blob
                 return $ File (Blob con hash)
          update (Stub s NoHash) = update . SubTree =<< s
          update x = return x

------ Private utilities shared among multiple functions. --------

unstub :: (Monad m) => TreeItem m -> m (Tree m)
unstub (Stub s _) = s
unstub (SubTree s) = return s

isSub :: TreeItem m -> Bool
isSub (File _) = False
isSub _ = True