1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
|
module Graph where
import Control.DeepSeq
-- see fgl: Data.Graph - https://hackage.haskell.org/package/fgl
-- see Algebra.Graph - https://hackage.haskell.org/package/algebraic-graphs
-- see https://hackage.haskell.org/package/graphs
import Data.List (lookup)
import Data.Maybe (mapMaybes)
import Data.Maybe (fromJust)
import Data.IntMap (IntMap)
import qualified Data.IntMap as IntMap
import Data.IntSet (IntSet)
import qualified Data.IntSet as IntSet
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Display
type NodeId = Int
type EdgeId = Int
type NodeIdSet = IntSet
type EdgeIdSet = IntSet
data Attributes = Attributes [(Text,Maybe Text)]
instance NFData Attributes where
rnf (Attributes as) = rnf as
(Attributes cs1) +:+ (Attributes cs2) = Attributes (cs1 ++ cs2)
emptyAttributes (Attributes l) = null l
instance Show Attributes where
show (Attributes []) = ""
show (Attributes cs) = "[&" ++ intercalate "," (fmap go cs) ++ "]" where
go (k, Nothing) = T.unpack k
go (k, Just v) = T.unpack k ++ "=" ++ T.unpack v
attributesText (Attributes []) = T.empty
attributesText (Attributes cs) = T.concat $ [ T.pack "[&" ] ++ intersperse (T.singleton ',') (fmap go cs) ++ [ T.pack "]" ]
where go (k, Nothing) = k
go (k, Just v) = T.concat [k, T.singleton '=',v]
noAttributes = Attributes []
noAttributesOn set = set & IntMap.fromSet (\n -> noAttributes)
getNodeAttribute tree node key = lookup key ((\(Attributes as) -> as) $ getNodeAttributes tree node)
getEdgeAttribute tree edge key = lookup key ((\(Attributes as) -> as) $ getEdgeAttributes tree edge)
getTreeAttribute tree key = lookup key ((\(Attributes as) -> as) $ getAttributes tree)
--edgeAttributes :: IsGraph t => t -> Text -> ((Maybe (Maybe Text)) -> a) -> IntMap a
edgeAttributes tree key transform = fmap transform (getEdgesSet tree & IntMap.fromSet (\edge -> getEdgeAttribute tree edge key))
getAttribute key Nothing = error $ "No attribute '" ++ (T.unpack key) ++ "'"
getAttribute key (Just Nothing) = error $ "Attribute '" ++ T.unpack key ++ "' has no value"
getAttribute _ (Just (Just text)) = read (T.unpack text)
simpleEdgeAttributes tree key = edgeAttributes tree key (getAttribute key)
-- If there are edges with no value, we don't want to add an attribute with no value
addUEdgeAttributes name values g = setEdgeAttributes g attributesForBranch
where ea = getEdgeAttributes g
attributesForBranch b = case IntMap.lookup (undirectedName b) values of
Nothing -> ea b
Just x -> ea b +:+ (Attributes [(T.pack name, Just (display x))])
{-
ISSUE: If we define graph operations in terms of node *ids*, then finding neighbors will depend on the id->Node map.
If a node stores references to neighboring nodes themselves, then looking up the ids will not be necessary.
Node ids are still necessary to determine if two nodes are the same or not.
POSSIBLE SOLUTION: Make a type family Node t that abstracts over whether a "Node" is a number of something else.
As long as we can find the neighboring nodes and such, then it doesn't matter what a "Node" is.
Likewise for Branches.
We would also need to have a function that gets the NodeID from a node.
Also maybe we should just use Int's as IDs for now, since we aren't yet allowing the set of nodes/branches to change.
-}
class IsGraph g where
getNodesSet :: g -> NodeIdSet
getEdgesSet :: g -> EdgeIdSet
edgesOutOfNodeSet :: g -> NodeId -> EdgeIdSet
sourceNode :: g -> EdgeId -> NodeId
targetNode :: g -> EdgeId -> NodeId
getNodeAttributes :: g -> NodeId -> Attributes
getEdgeAttributes :: g -> EdgeId -> Attributes
getAttributes :: g -> Attributes
setNodeAttributes :: g -> (NodeId -> Attributes) -> g
setEdgeAttributes :: g -> (EdgeId -> Attributes) -> g
setAttributes :: g -> Attributes -> g
type family LabelType g
type family NewLabelType g l
getLabel :: g -> Int -> Maybe (LabelType g)
-- TODO: all_labels - a sorted list of labels that serves as a kind of taxon-map?
-- this would map integers to labels, and labels to integers, even if get_label
-- indexes on nodes...
-- TODO: make the C++ code handle this...
getLabels :: g -> IntMap (Maybe (LabelType g))
relabel :: IntMap (Maybe l) -> g -> NewLabelType g l
class IsGraph g => IsDirectedGraph g where
isForward :: g -> EdgeId -> Bool
outEdges g n = filter (isForward g) (edgesOutOfNode g n)
inEdges g n = map reverseEdge $ filter (not . isForward g) (edgesOutOfNode g n)
isSource g n = null (inEdges g n)
isSink g n = null (outEdges g n)
class IsDirectedGraph g => IsDirectedAcyclicGraph g
data Node = Node { nodeName :: Int, nodeOutEdges:: IntSet}
instance Show Node where
show (Node name outEdges) = "Node{nodeName = " ++ show name ++ ", nodeOutEdges = " ++ show outEdges ++ "}"
-- ideally eSourceNode and eTargetNode would be of type Node,
-- and e_reverse would be of type Edge
data Edge = Edge { eSourceNode, eTargetNode, edgeName :: Int }
instance Show Edge where
show (Edge source target name) = "Edge{eSourceNode = " ++ show source ++ ", eTargetNode = " ++ show target ++ ", edgeName = " ++ show name ++ "}"
data Graph l = Graph {
graphNodes :: IntMap Node,
graphEdges :: IntMap Edge,
graphLabels :: IntMap (Maybe l),
graphNodeAttributes :: IntMap Attributes,
graphEdgeAttributes :: IntMap Attributes,
graphAttributes :: Attributes
}
instance NFData Node where
rnf (Node x y) = x `seq` y `seq` ()
instance NFData Edge where
rnf (Edge s t n) = s `seq` t `seq` n `seq` ()
--FIXME: `instance NFData Graph` does not complain, but makes no sense!
instance NFData l => NFData (Graph l) where
rnf (Graph nodes edges labels nodeAttr edgeAttr graphAttr) = rnf nodes `seq` rnf edges `seq` rnf labels `seq` rnf nodeAttr `seq` rnf edgeAttr `seq` rnf graphAttr `seq` ()
{- ISSUE: How to handle directed graphs?
The underlying data structure represent out-edges. We should be able to use this to represent
directed graphs if we don't insist that both e and -e are registered with the graph. We could then
make a derived class IsUndirectedGraph with method reverseEdge.
-}
{- QUESTION: How to handle "partially directed" graphs?
For Phylogenetic networks, we have nodes with a edge groups [(source,target) | target <- sources],
but only one source can be active at a given time. So if we choose a source for each edge, we
yet again get a normal graph.
In probabilistic terms, each target is associated with a probability.
-}
{- ISSUE: Should we store in-edges as well as out-edges? -}
{- ISSUE: Undirected graphs with a preferred direction aren't quite the same as directed graphs.
We currently derive IsDirectedGraph from IsGraph by adding an isForward attribute for an edge. -}
{- ISSUE: How to handle reverse edges on directed graphs?
SOLUTION: Make a "direction" that is either a forward or reverse edge:
data Direction e = Forward e | Reverse e
reverse (Forward e) = Reverse e
reverse (Reverse e) = Forward e
-}
instance IsGraph (Graph l) where
getNodesSet (Graph nodesMap _ _ _ _ _) = IntMap.keysSet nodesMap
getEdgesSet (Graph _ edgesMap _ _ _ _) = IntMap.keysSet edgesMap
edgesOutOfNodeSet (Graph nodesMap _ _ _ _ _) nodeId = nodeOutEdges $ (nodesMap IntMap.! nodeId)
sourceNode (Graph _ edgesMap _ _ _ _) edge = eSourceNode $ (edgesMap IntMap.! edge)
targetNode (Graph _ edgesMap _ _ _ _) edge = eTargetNode $ (edgesMap IntMap.! edge)
getNodeAttributes (Graph _ _ _ a _ _) node = a IntMap.! node
getEdgeAttributes (Graph _ _ _ _ a _) edge = a IntMap.! edge
getAttributes (Graph _ _ _ _ _ a) = a
setNodeAttributes g@(Graph n e l na ea ga) f = (Graph n e l na' ea ga)
where na' = getNodesSet g & IntMap.fromSet f
setEdgeAttributes g@(Graph n e l na ea ga) f = (Graph n e l na ea' ga)
where ea' = getEdgesSet g & IntMap.fromSet f
setAttributes g@(Graph n e l na ea ga) ga' = (Graph n e l na ea ga')
-- What this does NOT say how to do is to fmap the labels to a new type
-- labelMap display graph
type instance LabelType (Graph l) = l
type instance NewLabelType (Graph l) a = Graph a
getLabel graph node = (graphLabels graph) IntMap.! node
getLabels graph = graphLabels graph
relabel newLabels (Graph nodes edges _ na ea a) = Graph nodes edges newLabels na ea a
------------------ Derived Operations ------------
edgesTowardNodeSet t node = reverseEdgesSet $ edgesOutOfNodeSet t node
edgesTowardNodeArray t node = IntSet.toArray $ edgesTowardNodeSet t node
edgesTowardNode t node = IntSet.toList $ edgesTowardNodeSet t node
edgeForNodes t (n1,n2) = fromJust $ find (\b -> targetNode t b == n2) (edgesOutOfNode t n1)
nodeDegree t n = IntSet.size (edgesOutOfNodeSet t n)
neighbors t n = fmap (targetNode t) (edgesOutOfNode t n)
edgesBeforeEdgeSet t b = reverseEdgesSet $ IntSet.delete b $ edgesOutOfNodeSet t node
where node = sourceNode t b
edgesAfterEdgeSet t b = IntSet.delete (reverseEdge b) $ edgesOutOfNodeSet t node
where node = targetNode t b
edgesBeforeEdgeArray t b = IntSet.toArray $ edgesBeforeEdgeSet t b
edgesAfterEdgeArray t b = IntSet.toArray $ edgesAfterEdgeSet t b
edgesBeforeEdge t b = IntSet.toList $ edgesBeforeEdgeSet t b
edgesAfterEdge t b = IntSet.toList $ edgesAfterEdgeSet t b
isLeafNode t n = (nodeDegree t n < 2)
isInternalNode t n = not $ isLeafNode t n
isInternalBranch t b = isInternalNode t (sourceNode t b) && isInternalNode t (targetNode t b)
isLeafBranch t b = not $ isInternalBranch t b
nodes t = getNodes t
leafNodes t = filter (isLeafNode t) (nodes t)
internalNodes t = filter (isInternalNode t) (nodes t)
-- Given that this is a tree, would numNodes t - numBranches t + 2 work for n_leaves >=3?
numLeaves t = length $ leafNodes t
getNodes t = t & getNodesSet & IntSet.toList
numNodes t = t & getNodesSet & IntSet.size
reverseEdge e = -e
reverseEdges = map reverseEdge
reverseEdgesSet = IntSet.mapNegate
--reverseEdgesArray :: Array Int -> Array Int
reverseEdgesArray = fmap reverseEdge
isUEdge e = e > reverseEdge e
getEdges t = getEdgesSet t & IntSet.toList
getUEdges t = [ e | e <- getEdges t, isUEdge e]
getUEdgesSet t = getUEdges t & IntSet.fromList
numBranches t = length $ getUEdges t
undirectedName e = max e (reverseEdge e)
edgesOutOfNodeArray tree nodeIndex = IntSet.toArray $ edgesOutOfNodeSet tree nodeIndex
edgesOutOfNode tree nodeIndex = IntSet.toList $ edgesOutOfNodeSet tree nodeIndex
treeLength tree = sum [ branchLength tree b | b <- getUEdges tree ]
------------------ Branch Lengths ----------------
class IsGraph g => HasBranchLengths g where
branchLength :: g -> Int -> Double
branchLengths g = branchLength g <$> getUEdges g
branchLengthsSet g = getUEdgesSet g & IntMap.fromSet (branchLength g)
-- I guess it makes sense that you could construct a WithBranchLengths with arbitrary new branch lengths,
data WithBranchLengths t = WithBranchLengths t (IntMap Double)
instance NFData t => NFData (WithBranchLengths t) where
rnf (WithBranchLengths tree lengths) = rnf tree `seq` rnf lengths
instance IsGraph t => IsGraph (WithBranchLengths t) where
getNodesSet (WithBranchLengths t _) = getNodesSet t
getEdgesSet (WithBranchLengths t _) = getEdgesSet t
edgesOutOfNodeSet (WithBranchLengths t _) nodeId = edgesOutOfNodeSet t nodeId
sourceNode (WithBranchLengths t _) edgeId = sourceNode t edgeId
targetNode (WithBranchLengths t _) edgeId = targetNode t edgeId
getNodeAttributes (WithBranchLengths t _) node = getNodeAttributes t node
getEdgeAttributes (WithBranchLengths t _) edge = getEdgeAttributes t edge
getAttributes (WithBranchLengths t _) = getAttributes t
setNodeAttributes (WithBranchLengths t l) as = WithBranchLengths (setNodeAttributes t as) l
setEdgeAttributes (WithBranchLengths t l) as = WithBranchLengths (setEdgeAttributes t as) l
setAttributes (WithBranchLengths t l) as = WithBranchLengths (setAttributes t as) l
type instance LabelType (WithBranchLengths t) = LabelType t
type instance NewLabelType (WithBranchLengths t) a = WithBranchLengths (NewLabelType t a)
getLabel (WithBranchLengths t _) node = getLabel t node
getLabels (WithBranchLengths t _) = getLabels t
relabel newLabels (WithBranchLengths t lengths) = WithBranchLengths (relabel newLabels t) lengths
instance IsGraph t => HasBranchLengths (WithBranchLengths t) where
branchLength (WithBranchLengths tree ds) b = ds IntMap.! (undirectedName b)
branchLengthTree topology lengths = WithBranchLengths topology lengths
------------------ Labels ----------------
addLabels labels t = relabel newLabels t
where newLabels = getNodesSet t & IntMap.fromSet (\node -> lookup node labels)
-- These two functions shouldn't go here -- but where should they go?
addInternalLabels tree = relabel newLabels tree where
oldLabels = getLabels tree
newLabels = getNodesSet tree & IntMap.fromSet newLabel
newLabel node = case (oldLabels IntMap.! node) of
Just label -> Just label
Nothing -> Just $ T.append (T.singleton 'A') (T.pack (show node))
addAncestralLabel node labels = case (labels IntMap.! node) of
Just l -> l
Nothing -> T.append (T.singleton 'A') (T.pack (show node))
dropInternalLabels t = relabel newLabels t where
labels = getLabels t
newLabels = getNodesSet t & IntMap.fromSet (\node -> if nodeDegree t node <= 1 then labels IntMap.! node else Nothing)
---------------------------- Creating a graph from a list of edges ---------------------------
graphFromEdges nodes edges = Graph nodesMap branchesMap labels (noAttributesOn nodesSet) (noAttributesOn branchesSet) noAttributes where
num_nodes = length nodes
-- FIX: this is a way to avoid depending changeables when |edges| is constant, but edges is changeable.
num_branches = num_nodes - 1
-- is this really how we want to name the branches?
namedEdges = zip [1..] $ edges
find_branch :: Int -> Maybe (Int,Int)
find_branch b | b > 0 = fmap snd $ find (\e -> fst e == b) namedEdges
| otherwise = swap <$> (find_branch $ reverseEdge b)
branchFrom n (b,(x,y)) | x == n = Just b
| y == n = Just (-b)
| otherwise = Nothing
edgesFrom n = mapMaybes (branchFrom n) namedEdges
nodesSet = IntSet.fromList nodes
nodesMap = nodesSet & IntMap.fromSet (\n -> Node n (IntSet.fromList $ edgesFrom n) )
branchesSet = IntSet.fromList [1..num_branches] `IntSet.union` IntSet.fromList (map negate [1..num_branches])
branchesMap = branchesSet & IntMap.fromSet (\b -> let Just (s,t) = find_branch b in Edge s t b)
labels = nodesSet & IntMap.fromSet (\n -> Nothing)
|