1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
|
{-# LANGUAGE ScopedTypeVariables, MultiParamTypeClasses, FlexibleInstances, BangPatterns #-}
-- | The abstract representation of a Tree and useful abstract utilities to
-- handle those.
module Storage.Hashed.Tree
( Tree, Blob(..), TreeItem(..), ItemType(..), Hash(..)
, makeTree, makeTreeWithHash, emptyTree, emptyBlob, makeBlob, makeBlobBS
-- * Unfolding stubbed (lazy) Trees.
--
-- | By default, Tree obtained by a read function is stubbed: it will
-- contain Stub items that need to be executed in order to access the
-- respective subtrees. 'expand' will produce an unstubbed Tree.
, expandUpdate, expand, expandPath, checkExpand
-- * Tree access and lookup.
, items, list, listImmediate, treeHash
, lookup, find, findFile, findTree, itemHash, itemType
, zipCommonFiles, zipFiles, zipTrees, diffTrees
-- * Files (Blobs).
, readBlob
-- * Filtering trees.
, FilterTree(..), restrict
-- * Manipulating trees.
, modifyTree, updateTree, partiallyUpdateTree, updateSubtrees, overlay
, addMissingHashes ) where
import Prelude hiding( lookup, filter, all )
import Storage.Hashed.AnchoredPath
import Storage.Hashed.Hash
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.Char8 as BS
import qualified Data.Map as M
import Data.Maybe( catMaybes, isNothing )
import Data.Either( lefts, rights )
import Data.List( union, sort )
import Control.Monad( filterM, liftM )
import Control.Applicative( (<$>) )
--------------------------------
-- Tree, Blob and friends
--
data Blob m = Blob !(m BL.ByteString) !Hash
data TreeItem m = File !(Blob m)
| SubTree !(Tree m)
| Stub !(m (Tree m)) !Hash
data ItemType = BlobType | TreeType deriving (Show, Eq)
-- | Abstraction of a filesystem tree.
-- Please note that the Tree returned by the respective read operations will
-- have TreeStub items in it. To obtain a Tree without such stubs, call
-- expand on it, eg.:
--
-- > tree <- readDarcsPristine "." >>= expand
--
-- When a Tree is expanded, it becomes \"final\". All stubs are forced and the
-- Tree can be traversed purely. Access to actual file contents stays in IO
-- though.
--
-- A Tree may have a Hash associated with it. A pair of Tree's is identical
-- whenever their hashes are (the reverse need not hold, since not all Trees
-- come equipped with a hash).
data Tree m = Tree { items :: (M.Map Name (TreeItem m))
-- | Get hash of a Tree. This is guaranteed to uniquely
-- identify the Tree (including any blob content), as far as
-- cryptographic hashes are concerned. Sha256 is recommended.
, treeHash :: !Hash }
listImmediate :: Tree m -> [(Name, TreeItem m)]
listImmediate = M.toList . items
-- | Get a hash of a TreeItem. May be Nothing.
itemHash :: TreeItem m -> Hash
itemHash (File (Blob _ h)) = h
itemHash (SubTree t) = treeHash t
itemHash (Stub _ h) = h
itemType :: TreeItem m -> ItemType
itemType (File _) = BlobType
itemType (SubTree _) = TreeType
itemType (Stub _ _) = TreeType
emptyTree :: (Monad m) => Tree m
emptyTree = Tree { items = M.empty
, treeHash = NoHash }
emptyBlob :: (Monad m) => Blob m
emptyBlob = Blob (return BL.empty) NoHash
makeBlob :: (Monad m) => BL.ByteString -> Blob m
makeBlob str = Blob (return str) (sha256 str)
makeBlobBS :: (Monad m) => BS.ByteString -> Blob m
makeBlobBS s' = let s = BL.fromChunks [s'] in Blob (return s) (sha256 s)
makeTree :: (Monad m) => [(Name,TreeItem m)] -> Tree m
makeTree l = Tree { items = M.fromList l
, treeHash = NoHash }
makeTreeWithHash :: (Monad m) => [(Name,TreeItem m)] -> Hash -> Tree m
makeTreeWithHash l h = Tree { items = M.fromList l
, treeHash = h }
-----------------------------------
-- Tree access and lookup
--
-- | Look up a 'Tree' item (an immediate subtree or blob).
lookup :: Tree m -> Name -> Maybe (TreeItem m)
lookup t n = M.lookup n (items t)
find' :: TreeItem m -> AnchoredPath -> Maybe (TreeItem m)
find' t (AnchoredPath []) = Just t
find' (SubTree t) (AnchoredPath (d : rest)) =
case lookup t d of
Just sub -> find' sub (AnchoredPath rest)
Nothing -> Nothing
find' _ _ = Nothing
-- | Find a 'TreeItem' by its path. Gives 'Nothing' if the path is invalid.
find :: Tree m -> AnchoredPath -> Maybe (TreeItem m)
find = find' . SubTree
-- | Find a 'Blob' by its path. Gives 'Nothing' if the path is invalid, or does
-- not point to a Blob.
findFile :: Tree m -> AnchoredPath -> Maybe (Blob m)
findFile t p = case find t p of
Just (File x) -> Just x
_ -> Nothing
-- | Find a 'Tree' by its path. Gives 'Nothing' if the path is invalid, or does
-- not point to a Tree.
findTree :: Tree m -> AnchoredPath -> Maybe (Tree m)
findTree t p = case find t p of
Just (SubTree x) -> Just x
_ -> Nothing
-- | List all contents of a 'Tree'.
list :: Tree m -> [(AnchoredPath, TreeItem m)]
list t_ = paths t_ (AnchoredPath [])
where paths t p = [ (appendPath p n, i)
| (n,i) <- listImmediate t ] ++
concat [ paths subt (appendPath p subn)
| (subn, SubTree subt) <- listImmediate t ]
expandUpdate :: (Monad m) => (AnchoredPath -> Tree m -> m (Tree m)) -> Tree m -> m (Tree m)
expandUpdate update t_ = go (AnchoredPath []) t_
where go path t = do
let subtree (name, sub) = do tree <- go (path `appendPath` name) =<< unstub sub
return (name, SubTree tree)
expanded <- mapM subtree [ x | x@(_, item) <- listImmediate t, isSub item ]
let orig_map = M.filter (not . isSub) (items t)
expanded_map = M.fromList expanded
tree = t { items = M.union orig_map expanded_map }
update path tree
-- | Expand a stubbed Tree into a one with no stubs in it. You might want to
-- filter the tree before expanding to save IO. This is the basic
-- implementation, which may be overriden by some Tree instances (this is
-- especially true of the Index case).
expand :: (Monad m) => Tree m -> m (Tree m)
expand = expandUpdate $ \_ -> return
-- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path is
-- reachable without crossing any stubs. Moreover, the leaf ought not be a Stub
-- in the resulting Tree. A non-existent path is expanded as far as it can be.
expandPath :: (Monad m) => Tree m -> AnchoredPath -> m (Tree m)
expandPath t_ path_ = expand' t_ path_
where expand' t (AnchoredPath []) = return t
expand' t (AnchoredPath (n:rest)) =
case lookup t n of
(Just item) | isSub item -> amend t n rest =<< unstub item
_ -> return t -- fail $ "Descent error in expandPath: " ++ show path_
amend t name rest sub = do
sub' <- expand' sub (AnchoredPath rest)
let tree = t { items = M.insert name (SubTree sub') (items t) }
return tree
-- | Check the disk version of a Tree: expands it, and checks each
-- hash. Returns either the expanded tree or a list of AnchoredPaths
-- where there are problems. The first argument is the hashing function
-- used to create the tree.
checkExpand :: (TreeItem IO -> IO Hash) -> Tree IO
-> IO (Either [(AnchoredPath, Hash, Maybe Hash)] (Tree IO))
checkExpand hashFunc t = go (AnchoredPath []) t
where
go path t_ = do
let
subtree (name, sub) =
do let here = path `appendPath` name
sub' <- (Just <$> unstub sub) `catch` \_ -> return Nothing
case sub' of
Nothing -> return $ Left [(here, treeHash t_, Nothing)]
Just sub -> do
treeOrTrouble <- go (path `appendPath` name) sub
return $ case treeOrTrouble of
Left problems -> Left problems
Right tree -> Right (name, SubTree tree)
badBlob (_, f@(File (Blob s h))) =
fmap (/= h) (hashFunc f `catch` (\_ -> return NoHash))
badBlob _ = return False
render (name, f@(File (Blob _ h))) = do
h' <- (Just <$> hashFunc f) `catch` \_ -> return Nothing
return (path `appendPath` name, h, h')
subs <- mapM subtree [ x | x@(_, item) <- listImmediate t_, isSub item ]
badBlobs <- filterM badBlob (listImmediate t) >>= mapM render
let problems = badBlobs ++ (concat $ lefts subs)
if null problems
then do
let orig_map = M.filter (not . isSub) (items t)
expanded_map = M.fromList $ rights subs
tree = t_ {items = orig_map `M.union` expanded_map}
h' <- hashFunc (SubTree t_)
if h' `match` treeHash t_
then return $ Right tree
else return $ Left [(path, treeHash t_, Just h')]
else return $ Left problems
class (Monad m) => FilterTree a m where
-- | Given @pred tree@, produce a 'Tree' that only has items for which
-- @pred@ returns @True@.
-- The tree might contain stubs. When expanded, these will be subject to
-- filtering as well.
filter :: (AnchoredPath -> TreeItem m -> Bool) -> a m -> a m
instance (Monad m) => FilterTree Tree m where
filter predicate t_ = filter' t_ (AnchoredPath [])
where filter' t path =
let subs = (catMaybes [ (,) name `fmap` wibble path name item
| (name,item) <- listImmediate t ])
in t { items = M.mapMaybeWithKey (wibble path) $ items t }
wibble path name item =
let npath = path `appendPath` name in
if predicate npath item
then Just $ filterSub npath item
else Nothing
filterSub npath (SubTree t) = SubTree $ filter' t npath
filterSub npath (Stub stub h) =
Stub (do x <- stub
return $ filter' x npath) h
filterSub _ x = x
-- | Given two Trees, a @guide@ and a @tree@, produces a new Tree that is a
-- identical to @tree@, but only has those items that are present in both
-- @tree@ and @guide@. The @guide@ Tree may not contain any stubs.
restrict :: (FilterTree t m, Monad n) => Tree n -> t m -> t m
restrict guide tree = filter accept tree
where accept path item =
case (find guide path, item) of
(Just (SubTree _), SubTree _) -> True
(Just (SubTree _), Stub _ _) -> True
(Just (File _), File _) -> True
(Just (Stub _ _), _) ->
error "*sulk* Go away, you, you precondition violator!"
(_, _) -> False
-- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use with
-- care.
readBlob :: Blob m -> m BL.ByteString
readBlob (Blob r _) = r
-- | For every pair of corresponding blobs from the two supplied trees,
-- evaluate the supplied function and accumulate the results in a list. Hint:
-- to get IO actions through, just use sequence on the resulting list.
-- NB. This won't expand any stubs.
zipCommonFiles :: (AnchoredPath -> Blob m -> Blob m -> a) -> Tree m -> Tree m -> [a]
zipCommonFiles f a b = catMaybes [ flip (f p) x `fmap` findFile a p
| (p, File x) <- list b ]
-- | For each file in each of the two supplied trees, evaluate the supplied
-- function (supplying the corresponding file from the other tree, or Nothing)
-- and accumulate the results in a list. Hint: to get IO actions through, just
-- use sequence on the resulting list. NB. This won't expand any stubs.
zipFiles :: (AnchoredPath -> Maybe (Blob m) -> Maybe (Blob m) -> a)
-> Tree m -> Tree m -> [a]
zipFiles f a b = [ f p (findFile a p) (findFile b p)
| p <- paths a `sortedUnion` paths b ]
where paths t = sort [ p | (p, File _) <- list t ]
zipTrees :: (AnchoredPath -> Maybe (TreeItem m) -> Maybe (TreeItem m) -> a)
-> Tree m -> Tree m -> [a]
zipTrees f a b = [ f p (find a p) (find b p)
| p <- reverse (paths a `sortedUnion` paths b) ]
where paths t = sort [ p | (p, _) <- list t ]
-- | Helper function for taking the union of AnchoredPath lists that
-- are already sorted. This function does not check the precondition
-- so use it carefully.
sortedUnion :: [AnchoredPath] -> [AnchoredPath] -> [AnchoredPath]
sortedUnion [] ys = ys
sortedUnion xs [] = xs
sortedUnion a@(x:xs) b@(y:ys) = case compare x y of
LT -> x : sortedUnion xs b
EQ -> x : sortedUnion xs ys
GT -> y : sortedUnion a ys
-- | Cautiously extracts differing subtrees from a pair of Trees. It will never
-- do any unneccessary expanding. Tree hashes are used to cut the comparison as
-- high up the Tree branches as possible. The result is a pair of trees that do
-- not share any identical subtrees. They are derived from the first and second
-- parameters respectively and they are always fully expanded. It might be
-- advantageous to feed the result into 'zipFiles' or 'zipTrees'.
diffTrees :: forall m. (Functor m, Monad m) => Tree m -> Tree m -> m (Tree m, Tree m)
diffTrees left right =
if treeHash left `match` treeHash right
then return (emptyTree, emptyTree)
else diff left right
where isFile (File _) = True
isFile _ = False
notFile = not . isFile
isEmpty = null . listImmediate
subtree :: TreeItem m -> m (Tree m)
subtree (Stub x _) = x
subtree (SubTree x) = return x
subtree (File _) = error "diffTrees tried to descend a File as a subtree"
maybeUnfold (Stub x _) = SubTree `fmap` (x >>= expand)
maybeUnfold (SubTree x) = SubTree `fmap` expand x
maybeUnfold i = return i
immediateN t = [ n | (n, _) <- listImmediate t ]
diff left' right' = do
is <- sequence [
case (lookup left' n, lookup right' n) of
(Just l, Nothing) -> do
l' <- maybeUnfold l
return (n, Just l', Nothing)
(Nothing, Just r) -> do
r' <- maybeUnfold r
return (n, Nothing, Just r')
(Just l, Just r)
| itemHash l `match` itemHash r ->
return (n, Nothing, Nothing)
| notFile l && notFile r ->
do x <- subtree l
y <- subtree r
(x', y') <- diffTrees x y
if isEmpty x' && isEmpty y'
then return (n, Nothing, Nothing)
else return (n, Just $ SubTree x', Just $ SubTree y')
| isFile l && isFile r ->
return (n, Just l, Just r)
| otherwise ->
do l' <- maybeUnfold l
r' <- maybeUnfold r
return (n, Just l', Just r')
_ -> error "n lookups failed"
| n <- immediateN left' `union` immediateN right' ]
let is_l = [ (n, l) | (n, Just l, _) <- is ]
is_r = [ (n, r) | (n, _, Just r) <- is ]
return (makeTree is_l, makeTree is_r)
-- | Modify a Tree (by replacing, or removing or adding items).
modifyTree :: (Monad m) => Tree m -> AnchoredPath -> Maybe (TreeItem m) -> Tree m
modifyTree t_ p_ i_ = snd $ go t_ p_ i_
where fix t unmod items' = (unmod, t { items = countmap items' `seq` items'
, treeHash = if unmod then treeHash t else NoHash })
go t (AnchoredPath []) (Just (SubTree sub)) = (treeHash t `match` treeHash sub, sub)
go t (AnchoredPath [n]) (Just item) = fix t unmod items'
where !items' = M.insert n item (items t)
!unmod = itemHash item `match` case lookup t n of
Nothing -> NoHash
Just i -> itemHash i
go t (AnchoredPath [n]) Nothing = fix t unmod items'
where !items' = M.delete n (items t)
!unmod = isNothing $ lookup t n
go t path@(AnchoredPath (n:r)) item = fix t unmod items'
where subtree s = go s (AnchoredPath r) item
!items' = M.insert n sub (items t)
!sub = snd sub'
!unmod = fst sub'
!sub' = case lookup t n of
Just (SubTree s) -> let (mod, sub) = subtree s in (mod, SubTree sub)
Just (Stub s _) -> (False, Stub (do x <- s
return $! snd $! subtree x) NoHash)
Nothing -> (False, SubTree $! snd $! subtree emptyTree)
_ -> error $ "Modify tree at " ++ show path
go _ (AnchoredPath []) (Just (Stub _ _)) =
error $ "BUG: Error descending in modifyTree, path = " ++ show p_
go _ (AnchoredPath []) (Just (File _)) =
error $ "BUG: Error descending in modifyTree, path = " ++ show p_
go _ (AnchoredPath []) Nothing =
error $ "BUG: Error descending in modifyTree, path = " ++ show p_
countmap = M.fold (\_ i -> i + 1) 0
updateSubtrees :: (Tree m -> Tree m) -> Tree m -> Tree m
updateSubtrees fun t =
fun $ t { items = M.mapWithKey (curry $ snd . update) $ items t
, treeHash = NoHash }
where update (k, SubTree s) = (k, SubTree $ updateSubtrees fun s)
update (k, File f) = (k, File f)
update (_, Stub _ _) = error "Stubs not supported in updateTreePostorder"
-- | Does /not/ expand the tree.
updateTree :: (Functor m, Monad m) => (TreeItem m -> m (TreeItem m)) -> Tree m -> m (Tree m)
updateTree fun t = partiallyUpdateTree fun (\_ _ -> True) t
-- | Does /not/ expand the tree.
partiallyUpdateTree :: (Functor m, Monad m) => (TreeItem m -> m (TreeItem m))
-> (AnchoredPath -> TreeItem m -> Bool) -> Tree m -> m (Tree m)
partiallyUpdateTree fun pred t' = go (AnchoredPath []) t'
where go path t = do
items' <- M.fromList <$> mapM (maybeupdate path) (listImmediate t)
SubTree t' <- fun . SubTree $ t { items = items'
, treeHash = NoHash }
return t'
maybeupdate path (k, item) = case pred (path `appendPath` k) item of
True -> update (path `appendPath` k) (k, item)
False -> return (k, item)
update path (k, SubTree tree) = (\new -> (k, SubTree new)) <$> go path tree
update _ (k, item) = (\new -> (k, new)) <$> fun item
-- | Lay one tree over another. The resulting Tree will look like the base (1st
-- parameter) Tree, although any items also present in the overlay Tree will be
-- taken from the overlay. It is not allowed to overlay a different kind of an
-- object, nor it is allowed for the overlay to add new objects to base. This
-- means that the overlay Tree should be a subset of the base Tree (although
-- any extraneous items will be ignored by the implementation).
overlay :: (Functor m, Monad m) => Tree m -> Tree m -> Tree m
overlay base over = Tree { items = M.fromList immediate
, treeHash = NoHash }
where immediate = [ (n, get n) | (n, _) <- listImmediate base ]
get n = case (M.lookup n $ items base, M.lookup n $ items over) of
(Just (File _), Just f@(File _)) -> f
(Just (SubTree b), Just (SubTree o)) -> SubTree $ overlay b o
(Just (Stub b _), Just (SubTree o)) -> Stub (flip overlay o `fmap` b) NoHash
(Just (SubTree b), Just (Stub o _)) -> Stub (overlay b `fmap` o) NoHash
(Just (Stub b _), Just (Stub o _)) -> Stub (do o' <- o
b' <- b
return $ overlay b' o') NoHash
(Just x, _) -> x
(_, _) -> error $ "Unexpected case in overlay at get " ++ show n ++ "."
addMissingHashes :: (Monad m, Functor m) => (TreeItem m -> m Hash) -> Tree m -> m (Tree m)
addMissingHashes make = updateTree update -- use partiallyUpdateTree here
where update (SubTree t) = make (SubTree t) >>= \x -> return $ SubTree (t { treeHash = x })
update (File blob@(Blob con NoHash)) =
do hash <- make $ File blob
return $ File (Blob con hash)
update (Stub s NoHash) = update . SubTree =<< s
update x = return x
------ Private utilities shared among multiple functions. --------
unstub :: (Monad m) => TreeItem m -> m (Tree m)
unstub (Stub s _) = s
unstub (SubTree s) = return s
isSub :: TreeItem m -> Bool
isSub (File _) = False
isSub _ = True
|