1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
|
{-
(c) Bartosz Nitka, Facebook, 2015
UniqDFM: Specialised deterministic finite maps, for things with @Uniques@.
Basically, the things need to be in class @Uniquable@, and we use the
@getUnique@ method to grab their @Uniques@.
This is very similar to @UniqFM@, the major difference being that the order of
folding is not dependent on @Unique@ ordering, giving determinism.
Currently the ordering is determined by insertion order.
See Note [Unique Determinism] in Unique for explanation why @Unique@ ordering
is not deterministic.
-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE FlexibleContexts #-}
{-# OPTIONS_GHC -Wall #-}
module UniqDFM (
-- * Unique-keyed deterministic mappings
UniqDFM, -- abstract type
-- ** Manipulating those mappings
emptyUDFM,
unitUDFM,
addToUDFM,
addToUDFM_C,
addListToUDFM,
delFromUDFM,
delListFromUDFM,
adjustUDFM,
alterUDFM,
mapUDFM,
plusUDFM,
plusUDFM_C,
lookupUDFM, lookupUDFM_Directly,
elemUDFM,
foldUDFM,
eltsUDFM,
filterUDFM, filterUDFM_Directly,
isNullUDFM,
sizeUDFM,
intersectUDFM, udfmIntersectUFM,
intersectsUDFM,
disjointUDFM, disjointUdfmUfm,
minusUDFM,
listToUDFM,
udfmMinusUFM,
partitionUDFM,
anyUDFM, allUDFM,
pprUDFM,
udfmToList,
udfmToUfm,
nonDetFoldUDFM,
alwaysUnsafeUfmToUdfm,
) where
import GhcPrelude
import Unique ( Uniquable(..), Unique, getKey )
import Outputable
import qualified Data.IntMap as M
import Data.Data
import Data.List (sortBy)
import Data.Function (on)
import qualified Data.Semigroup as Semi
import UniqFM (UniqFM, listToUFM_Directly, nonDetUFMToList, ufmToIntMap)
-- Note [Deterministic UniqFM]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- A @UniqDFM@ is just like @UniqFM@ with the following additional
-- property: the function `udfmToList` returns the elements in some
-- deterministic order not depending on the Unique key for those elements.
--
-- If the client of the map performs operations on the map in deterministic
-- order then `udfmToList` returns them in deterministic order.
--
-- There is an implementation cost: each element is given a serial number
-- as it is added, and `udfmToList` sorts it's result by this serial
-- number. So you should only use `UniqDFM` if you need the deterministic
-- property.
--
-- `foldUDFM` also preserves determinism.
--
-- Normal @UniqFM@ when you turn it into a list will use
-- Data.IntMap.toList function that returns the elements in the order of
-- the keys. The keys in @UniqFM@ are always @Uniques@, so you end up with
-- with a list ordered by @Uniques@.
-- The order of @Uniques@ is known to be not stable across rebuilds.
-- See Note [Unique Determinism] in Unique.
--
--
-- There's more than one way to implement this. The implementation here tags
-- every value with the insertion time that can later be used to sort the
-- values when asked to convert to a list.
--
-- An alternative would be to have
--
-- data UniqDFM ele = UDFM (M.IntMap ele) [ele]
--
-- where the list determines the order. This makes deletion tricky as we'd
-- only accumulate elements in that list, but makes merging easier as you
-- can just merge both structures independently.
-- Deletion can probably be done in amortized fashion when the size of the
-- list is twice the size of the set.
-- | A type of values tagged with insertion time
data TaggedVal val =
TaggedVal
val
{-# UNPACK #-} !Int -- ^ insertion time
deriving Data
taggedFst :: TaggedVal val -> val
taggedFst (TaggedVal v _) = v
taggedSnd :: TaggedVal val -> Int
taggedSnd (TaggedVal _ i) = i
instance Eq val => Eq (TaggedVal val) where
(TaggedVal v1 _) == (TaggedVal v2 _) = v1 == v2
instance Functor TaggedVal where
fmap f (TaggedVal val i) = TaggedVal (f val) i
-- | Type of unique deterministic finite maps
data UniqDFM ele =
UDFM
!(M.IntMap (TaggedVal ele)) -- A map where keys are Unique's values and
-- values are tagged with insertion time.
-- The invariant is that all the tags will
-- be distinct within a single map
{-# UNPACK #-} !Int -- Upper bound on the values' insertion
-- time. See Note [Overflow on plusUDFM]
deriving (Data, Functor)
emptyUDFM :: UniqDFM elt
emptyUDFM = UDFM M.empty 0
unitUDFM :: Uniquable key => key -> elt -> UniqDFM elt
unitUDFM k v = UDFM (M.singleton (getKey $ getUnique k) (TaggedVal v 0)) 1
addToUDFM :: Uniquable key => UniqDFM elt -> key -> elt -> UniqDFM elt
addToUDFM m k v = addToUDFM_Directly m (getUnique k) v
addToUDFM_Directly :: UniqDFM elt -> Unique -> elt -> UniqDFM elt
addToUDFM_Directly (UDFM m i) u v
= UDFM (M.insertWith tf (getKey u) (TaggedVal v i) m) (i + 1)
where
tf (TaggedVal new_v _) (TaggedVal _ old_i) = TaggedVal new_v old_i
-- Keep the old tag, but insert the new value
-- This means that udfmToList typically returns elements
-- in the order of insertion, rather than the reverse
addToUDFM_Directly_C
:: (elt -> elt -> elt) -- old -> new -> result
-> UniqDFM elt
-> Unique -> elt
-> UniqDFM elt
addToUDFM_Directly_C f (UDFM m i) u v
= UDFM (M.insertWith tf (getKey u) (TaggedVal v i) m) (i + 1)
where
tf (TaggedVal new_v _) (TaggedVal old_v old_i)
= TaggedVal (f old_v new_v) old_i
-- Flip the arguments, because M.insertWith uses (new->old->result)
-- but f needs (old->new->result)
-- Like addToUDFM_Directly, keep the old tag
addToUDFM_C
:: Uniquable key => (elt -> elt -> elt) -- old -> new -> result
-> UniqDFM elt -- old
-> key -> elt -- new
-> UniqDFM elt -- result
addToUDFM_C f m k v = addToUDFM_Directly_C f m (getUnique k) v
addListToUDFM :: Uniquable key => UniqDFM elt -> [(key,elt)] -> UniqDFM elt
addListToUDFM = foldl (\m (k, v) -> addToUDFM m k v)
addListToUDFM_Directly :: UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
addListToUDFM_Directly = foldl (\m (k, v) -> addToUDFM_Directly m k v)
addListToUDFM_Directly_C
:: (elt -> elt -> elt) -> UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
addListToUDFM_Directly_C f = foldl (\m (k, v) -> addToUDFM_Directly_C f m k v)
delFromUDFM :: Uniquable key => UniqDFM elt -> key -> UniqDFM elt
delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i
plusUDFM_C :: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
plusUDFM_C f udfml@(UDFM _ i) udfmr@(UDFM _ j)
-- we will use the upper bound on the tag as a proxy for the set size,
-- to insert the smaller one into the bigger one
| i > j = insertUDFMIntoLeft_C f udfml udfmr
| otherwise = insertUDFMIntoLeft_C f udfmr udfml
-- Note [Overflow on plusUDFM]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- There are multiple ways of implementing plusUDFM.
-- The main problem that needs to be solved is overlap on times of
-- insertion between different keys in two maps.
-- Consider:
--
-- A = fromList [(a, (x, 1))]
-- B = fromList [(b, (y, 1))]
--
-- If you merge them naively you end up with:
--
-- C = fromList [(a, (x, 1)), (b, (y, 1))]
--
-- Which loses information about ordering and brings us back into
-- non-deterministic world.
--
-- The solution I considered before would increment the tags on one of the
-- sets by the upper bound of the other set. The problem with this approach
-- is that you'll run out of tags for some merge patterns.
-- Say you start with A with upper bound 1, you merge A with A to get A' and
-- the upper bound becomes 2. You merge A' with A' and the upper bound
-- doubles again. After 64 merges you overflow.
-- This solution would have the same time complexity as plusUFM, namely O(n+m).
--
-- The solution I ended up with has time complexity of
-- O(m log m + m * min (n+m, W)) where m is the smaller set.
-- It simply inserts the elements of the smaller set into the larger
-- set in the order that they were inserted into the smaller set. That's
-- O(m log m) for extracting the elements from the smaller set in the
-- insertion order and O(m * min(n+m, W)) to insert them into the bigger
-- set.
plusUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
plusUDFM udfml@(UDFM _ i) udfmr@(UDFM _ j)
-- we will use the upper bound on the tag as a proxy for the set size,
-- to insert the smaller one into the bigger one
| i > j = insertUDFMIntoLeft udfml udfmr
| otherwise = insertUDFMIntoLeft udfmr udfml
insertUDFMIntoLeft :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
insertUDFMIntoLeft udfml udfmr = addListToUDFM_Directly udfml $ udfmToList udfmr
insertUDFMIntoLeft_C
:: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
insertUDFMIntoLeft_C f udfml udfmr =
addListToUDFM_Directly_C f udfml $ udfmToList udfmr
lookupUDFM :: Uniquable key => UniqDFM elt -> key -> Maybe elt
lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m
lookupUDFM_Directly :: UniqDFM elt -> Unique -> Maybe elt
lookupUDFM_Directly (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey k) m
elemUDFM :: Uniquable key => key -> UniqDFM elt -> Bool
elemUDFM k (UDFM m _i) = M.member (getKey $ getUnique k) m
-- | Performs a deterministic fold over the UniqDFM.
-- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
foldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
foldUDFM k z m = foldr k z (eltsUDFM m)
-- | Performs a nondeterministic fold over the UniqDFM.
-- It's O(n), same as the corresponding function on `UniqFM`.
-- If you use this please provide a justification why it doesn't introduce
-- nondeterminism.
nonDetFoldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
nonDetFoldUDFM k z (UDFM m _i) = foldr k z $ map taggedFst $ M.elems m
eltsUDFM :: UniqDFM elt -> [elt]
eltsUDFM (UDFM m _i) =
map taggedFst $ sortBy (compare `on` taggedSnd) $ M.elems m
filterUDFM :: (elt -> Bool) -> UniqDFM elt -> UniqDFM elt
filterUDFM p (UDFM m i) = UDFM (M.filter (\(TaggedVal v _) -> p v) m) i
filterUDFM_Directly :: (Unique -> elt -> Bool) -> UniqDFM elt -> UniqDFM elt
filterUDFM_Directly p (UDFM m i) = UDFM (M.filterWithKey p' m) i
where
p' k (TaggedVal v _) = p (getUnique k) v
-- | Converts `UniqDFM` to a list, with elements in deterministic order.
-- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
udfmToList :: UniqDFM elt -> [(Unique, elt)]
udfmToList (UDFM m _i) =
[ (getUnique k, taggedFst v)
| (k, v) <- sortBy (compare `on` (taggedSnd . snd)) $ M.toList m ]
isNullUDFM :: UniqDFM elt -> Bool
isNullUDFM (UDFM m _) = M.null m
sizeUDFM :: UniqDFM elt -> Int
sizeUDFM (UDFM m _i) = M.size m
intersectUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
intersectUDFM (UDFM x i) (UDFM y _j) = UDFM (M.intersection x y) i
-- M.intersection is left biased, that means the result will only have
-- a subset of elements from the left set, so `i` is a good upper bound.
udfmIntersectUFM :: UniqDFM elt1 -> UniqFM elt2 -> UniqDFM elt1
udfmIntersectUFM (UDFM x i) y = UDFM (M.intersection x (ufmToIntMap y)) i
-- M.intersection is left biased, that means the result will only have
-- a subset of elements from the left set, so `i` is a good upper bound.
intersectsUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
intersectsUDFM x y = isNullUDFM (x `intersectUDFM` y)
disjointUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
disjointUDFM (UDFM x _i) (UDFM y _j) = M.null (M.intersection x y)
disjointUdfmUfm :: UniqDFM elt -> UniqFM elt2 -> Bool
disjointUdfmUfm (UDFM x _i) y = M.null (M.intersection x (ufmToIntMap y))
minusUDFM :: UniqDFM elt1 -> UniqDFM elt2 -> UniqDFM elt1
minusUDFM (UDFM x i) (UDFM y _j) = UDFM (M.difference x y) i
-- M.difference returns a subset of a left set, so `i` is a good upper
-- bound.
udfmMinusUFM :: UniqDFM elt1 -> UniqFM elt2 -> UniqDFM elt1
udfmMinusUFM (UDFM x i) y = UDFM (M.difference x (ufmToIntMap y)) i
-- M.difference returns a subset of a left set, so `i` is a good upper
-- bound.
-- | Partition UniqDFM into two UniqDFMs according to the predicate
partitionUDFM :: (elt -> Bool) -> UniqDFM elt -> (UniqDFM elt, UniqDFM elt)
partitionUDFM p (UDFM m i) =
case M.partition (p . taggedFst) m of
(left, right) -> (UDFM left i, UDFM right i)
-- | Delete a list of elements from a UniqDFM
delListFromUDFM :: Uniquable key => UniqDFM elt -> [key] -> UniqDFM elt
delListFromUDFM = foldl delFromUDFM
-- | This allows for lossy conversion from UniqDFM to UniqFM
udfmToUfm :: UniqDFM elt -> UniqFM elt
udfmToUfm (UDFM m _i) =
listToUFM_Directly [(getUnique k, taggedFst tv) | (k, tv) <- M.toList m]
listToUDFM :: Uniquable key => [(key,elt)] -> UniqDFM elt
listToUDFM = foldl (\m (k, v) -> addToUDFM m k v) emptyUDFM
listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM elt
listToUDFM_Directly = foldl (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM
-- | Apply a function to a particular element
adjustUDFM :: Uniquable key => (elt -> elt) -> UniqDFM elt -> key -> UniqDFM elt
adjustUDFM f (UDFM m i) k = UDFM (M.adjust (fmap f) (getKey $ getUnique k) m) i
-- | The expression (alterUDFM f k map) alters value x at k, or absence
-- thereof. alterUDFM can be used to insert, delete, or update a value in
-- UniqDFM. Use addToUDFM, delFromUDFM or adjustUDFM when possible, they are
-- more efficient.
alterUDFM
:: Uniquable key
=> (Maybe elt -> Maybe elt) -- How to adjust
-> UniqDFM elt -- old
-> key -- new
-> UniqDFM elt -- result
alterUDFM f (UDFM m i) k =
UDFM (M.alter alterf (getKey $ getUnique k) m) (i + 1)
where
alterf Nothing = inject $ f Nothing
alterf (Just (TaggedVal v _)) = inject $ f (Just v)
inject Nothing = Nothing
inject (Just v) = Just $ TaggedVal v i
-- | Map a function over every value in a UniqDFM
mapUDFM :: (elt1 -> elt2) -> UniqDFM elt1 -> UniqDFM elt2
mapUDFM f (UDFM m i) = UDFM (M.map (fmap f) m) i
anyUDFM :: (elt -> Bool) -> UniqDFM elt -> Bool
anyUDFM p (UDFM m _i) = M.foldr ((||) . p . taggedFst) False m
allUDFM :: (elt -> Bool) -> UniqDFM elt -> Bool
allUDFM p (UDFM m _i) = M.foldr ((&&) . p . taggedFst) True m
instance Semi.Semigroup (UniqDFM a) where
(<>) = plusUDFM
instance Monoid (UniqDFM a) where
mempty = emptyUDFM
mappend = (Semi.<>)
-- This should not be used in commited code, provided for convenience to
-- make ad-hoc conversions when developing
alwaysUnsafeUfmToUdfm :: UniqFM elt -> UniqDFM elt
alwaysUnsafeUfmToUdfm = listToUDFM_Directly . nonDetUFMToList
-- Output-ery
instance Outputable a => Outputable (UniqDFM a) where
ppr ufm = pprUniqDFM ppr ufm
pprUniqDFM :: (a -> SDoc) -> UniqDFM a -> SDoc
pprUniqDFM ppr_elt ufm
= brackets $ fsep $ punctuate comma $
[ ppr uq <+> text ":->" <+> ppr_elt elt
| (uq, elt) <- udfmToList ufm ]
pprUDFM :: UniqDFM a -- ^ The things to be pretty printed
-> ([a] -> SDoc) -- ^ The pretty printing function to use on the elements
-> SDoc -- ^ 'SDoc' where the things have been pretty
-- printed
pprUDFM ufm pp = pp (eltsUDFM ufm)
|