File: Graph.hs

package info (click to toggle)
bali-phy 4.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 15,408 kB
  • sloc: cpp: 120,641; xml: 13,882; haskell: 10,056; python: 2,989; yacc: 1,323; perl: 1,169; lex: 912; sh: 343; makefile: 32
file content (360 lines) | stat: -rw-r--r-- 15,165 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
module Graph where

import Control.DeepSeq
-- see fgl: Data.Graph - https://hackage.haskell.org/package/fgl
-- see Algebra.Graph - https://hackage.haskell.org/package/algebraic-graphs
-- see https://hackage.haskell.org/package/graphs

import Data.List (lookup)
import Data.Maybe (mapMaybes)

import Data.Maybe (fromJust)
import Data.IntMap (IntMap)
import qualified Data.IntMap as IntMap
import Data.IntSet (IntSet)
import qualified Data.IntSet as IntSet
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Display

type NodeId = Int
type EdgeId = Int
type NodeIdSet = IntSet
type EdgeIdSet = IntSet

data Attributes = Attributes [(Text,Maybe Text)]

instance NFData Attributes where
    rnf (Attributes as) = rnf as

(Attributes cs1) +:+ (Attributes cs2) = Attributes (cs1 ++ cs2)

emptyAttributes (Attributes l) = null l

instance Show Attributes where
    show (Attributes []) = ""
    show (Attributes cs) = "[&" ++ intercalate "," (fmap go cs)  ++ "]" where
                       go (k, Nothing) = T.unpack k
                       go (k, Just v)  = T.unpack k ++ "=" ++ T.unpack v

attributesText (Attributes []) = T.empty
attributesText (Attributes cs) = T.concat $ [ T.pack "[&" ] ++ intersperse (T.singleton ',') (fmap go cs) ++ [ T.pack "]" ]
    where go (k, Nothing) = k
          go (k, Just v)  = T.concat [k, T.singleton '=',v]

noAttributes = Attributes []
noAttributesOn set = set & IntMap.fromSet (\n -> noAttributes)

getNodeAttribute tree node key = lookup key ((\(Attributes as) -> as) $ getNodeAttributes tree node)
getEdgeAttribute tree edge key = lookup key ((\(Attributes as) -> as) $ getEdgeAttributes tree edge)
getTreeAttribute tree key = lookup key ((\(Attributes as) -> as) $ getAttributes tree)

--edgeAttributes :: IsGraph t => t -> Text -> ((Maybe (Maybe Text)) -> a) -> IntMap a
edgeAttributes tree key transform = fmap transform (getEdgesSet tree & IntMap.fromSet (\edge -> getEdgeAttribute tree edge key))

getAttribute key Nothing = error $ "No attribute '" ++ (T.unpack key) ++ "'"
getAttribute key (Just Nothing) = error $ "Attribute '" ++ T.unpack key ++ "' has no value"
getAttribute _   (Just (Just text)) = read (T.unpack text)

simpleEdgeAttributes tree key = edgeAttributes tree key (getAttribute key)

-- If there are edges with no value, we don't want to add an attribute with no value
addUEdgeAttributes name values g = setEdgeAttributes g attributesForBranch
    where ea = getEdgeAttributes g
          attributesForBranch b = case IntMap.lookup (undirectedName b) values of
                                    Nothing -> ea b
                                    Just x  -> ea b +:+ (Attributes [(T.pack name, Just (display x))])

{-
 ISSUE: If we define graph operations in terms of node *ids*, then finding neighbors will depend on the id->Node map.
        If a node stores references to neighboring nodes themselves, then looking up the ids will not be necessary.
        Node ids are still necessary to determine if two nodes are the same or not.

 POSSIBLE SOLUTION: Make a type family Node t that abstracts over whether a "Node" is a number of something else.
        As long as we can find the neighboring nodes and such, then it doesn't matter what a "Node" is.
        Likewise for Branches.

  We would also need to have a function that gets the NodeID from a node.

  Also maybe we should just use Int's as IDs for now, since we aren't yet allowing the set of nodes/branches to change.
-}

class IsGraph g where
    getNodesSet :: g -> NodeIdSet
    getEdgesSet :: g -> EdgeIdSet

    edgesOutOfNodeSet :: g -> NodeId -> EdgeIdSet
    sourceNode :: g -> EdgeId -> NodeId
    targetNode :: g -> EdgeId -> NodeId

    getNodeAttributes :: g -> NodeId -> Attributes
    getEdgeAttributes :: g -> EdgeId -> Attributes
    getAttributes :: g -> Attributes

    setNodeAttributes :: g -> (NodeId -> Attributes) -> g
    setEdgeAttributes :: g -> (EdgeId -> Attributes) -> g
    setAttributes :: g -> Attributes -> g

    type family LabelType g
    type family NewLabelType g l

    getLabel :: g -> Int -> Maybe (LabelType g)
    -- TODO: all_labels - a sorted list of labels that serves as a kind of taxon-map?
    -- this would map integers to labels, and labels to integers, even if get_label
    -- indexes on nodes...
    -- TODO: make the C++ code handle this...

    getLabels :: g -> IntMap (Maybe (LabelType g))
    relabel :: IntMap (Maybe l) -> g -> NewLabelType g l

class IsGraph g => IsDirectedGraph g where
    isForward :: g -> EdgeId -> Bool

outEdges g n = filter (isForward g) (edgesOutOfNode g n)
inEdges g n = map reverseEdge $ filter (not . isForward g) (edgesOutOfNode g n)

isSource g n = null (inEdges g n)
isSink g n = null (outEdges g n)

class IsDirectedGraph g => IsDirectedAcyclicGraph g

data Node = Node { nodeName :: Int, nodeOutEdges:: IntSet}

instance Show Node where
    show (Node name outEdges) = "Node{nodeName = " ++ show name ++ ", nodeOutEdges = " ++ show outEdges ++ "}"

-- ideally eSourceNode and eTargetNode would be of type Node,
--   and e_reverse would be of type Edge
data Edge = Edge { eSourceNode, eTargetNode, edgeName :: Int }

instance Show Edge where
    show (Edge source target name) = "Edge{eSourceNode = " ++ show source ++ ", eTargetNode = " ++ show target ++ ", edgeName = " ++ show name ++ "}"

data Graph l = Graph {
      graphNodes :: IntMap Node,
      graphEdges :: IntMap Edge,
      graphLabels :: IntMap (Maybe l),
      graphNodeAttributes :: IntMap Attributes,
      graphEdgeAttributes :: IntMap Attributes,
      graphAttributes :: Attributes
    }

instance NFData Node where
    rnf (Node x y) = x `seq` y `seq` ()

instance NFData Edge where
    rnf (Edge s t n) = s `seq` t `seq` n `seq` ()

--FIXME: `instance NFData Graph` does not complain, but makes no sense!
instance NFData l => NFData (Graph l) where
    rnf (Graph nodes edges labels nodeAttr edgeAttr graphAttr) = rnf nodes `seq` rnf edges `seq` rnf labels `seq` rnf nodeAttr `seq` rnf edgeAttr `seq` rnf graphAttr `seq` ()

{- ISSUE: How to handle directed graphs?

The underlying data structure represent out-edges.  We should be able to use this to represent
directed graphs if we don't insist that both e and -e are registered with the graph.  We could then
make a derived class IsUndirectedGraph with method reverseEdge.

-}

{- QUESTION: How to handle "partially directed" graphs?

For Phylogenetic networks, we have nodes with a edge groups [(source,target) | target <- sources],
but only one source can be active at a given time.  So if we choose a source for each edge, we
yet again get a normal graph.

In probabilistic terms, each target is associated with a probability.

-}


{- ISSUE: Should we store in-edges as well as out-edges? -}

{- ISSUE: Undirected graphs with a preferred direction aren't quite the same as directed graphs.

We currently derive IsDirectedGraph from IsGraph by adding an isForward attribute for an edge. -}

{- ISSUE: How to handle reverse edges on directed graphs?

SOLUTION: Make a "direction" that is either a forward or reverse edge:

       data Direction e = Forward e | Reverse e
       reverse (Forward e) = Reverse e
       reverse (Reverse e) = Forward e
-}


instance IsGraph (Graph l) where
    getNodesSet (Graph nodesMap _ _  _ _ _)            = IntMap.keysSet nodesMap
    getEdgesSet (Graph _  edgesMap _ _ _ _)            = IntMap.keysSet edgesMap

    edgesOutOfNodeSet (Graph nodesMap _ _ _ _ _) nodeId = nodeOutEdges $ (nodesMap IntMap.! nodeId)
    sourceNode (Graph _ edgesMap _ _ _ _) edge = eSourceNode $ (edgesMap IntMap.! edge)
    targetNode (Graph _ edgesMap _ _ _ _) edge = eTargetNode $ (edgesMap IntMap.! edge)

    getNodeAttributes (Graph _ _ _ a _ _) node     = a IntMap.! node
    getEdgeAttributes (Graph _ _ _ _ a _) edge     = a IntMap.! edge
    getAttributes (Graph _ _ _ _ _ a)              = a

    setNodeAttributes g@(Graph n e l na ea ga) f = (Graph n e l na' ea ga)
        where na' = getNodesSet g & IntMap.fromSet f
    setEdgeAttributes g@(Graph n e l na ea ga) f = (Graph n e l na ea' ga)
        where ea' = getEdgesSet g & IntMap.fromSet f
    setAttributes g@(Graph n e l na ea ga) ga' = (Graph n e l na ea ga')

    -- What this does NOT say how to do is to fmap the labels to a new type
    -- labelMap display graph
    type instance LabelType (Graph l) = l
    type instance NewLabelType (Graph l) a = Graph a
    getLabel graph node = (graphLabels graph) IntMap.! node
    getLabels graph = graphLabels graph
    relabel newLabels (Graph nodes edges _ na ea a) = Graph nodes edges newLabels na ea a

------------------ Derived Operations ------------
edgesTowardNodeSet t node = reverseEdgesSet $ edgesOutOfNodeSet t node
edgesTowardNodeArray t node = IntSet.toArray $ edgesTowardNodeSet t node
edgesTowardNode t node = IntSet.toList $ edgesTowardNodeSet t node

edgeForNodes t (n1,n2) = fromJust $ find (\b -> targetNode t b == n2) (edgesOutOfNode t n1)

nodeDegree t n = IntSet.size (edgesOutOfNodeSet t n)
neighbors t n = fmap (targetNode t) (edgesOutOfNode t n)

edgesBeforeEdgeSet t b  = reverseEdgesSet $ IntSet.delete b $ edgesOutOfNodeSet t node
    where node = sourceNode t b
edgesAfterEdgeSet t b = IntSet.delete (reverseEdge b) $ edgesOutOfNodeSet t node
    where node = targetNode t b

edgesBeforeEdgeArray t b = IntSet.toArray $ edgesBeforeEdgeSet t b
edgesAfterEdgeArray t b = IntSet.toArray $ edgesAfterEdgeSet t b
edgesBeforeEdge t b = IntSet.toList $ edgesBeforeEdgeSet t b
edgesAfterEdge t b = IntSet.toList $ edgesAfterEdgeSet t b

isLeafNode t n = (nodeDegree t n < 2)
isInternalNode t n = not $ isLeafNode t n
isInternalBranch t b = isInternalNode t (sourceNode t b) && isInternalNode t (targetNode t b)
isLeafBranch t b = not $ isInternalBranch t b

nodes t = getNodes t
leafNodes t = filter (isLeafNode t) (nodes t)
internalNodes t = filter (isInternalNode t) (nodes t)

-- Given that this is a tree, would numNodes t - numBranches t + 2 work for n_leaves >=3?
numLeaves t = length $ leafNodes t

getNodes t = t & getNodesSet & IntSet.toList
numNodes t = t & getNodesSet & IntSet.size

reverseEdge e = -e

reverseEdges = map reverseEdge
reverseEdgesSet = IntSet.mapNegate
--reverseEdgesArray :: Array Int -> Array Int
reverseEdgesArray = fmap reverseEdge

isUEdge e = e > reverseEdge e

getEdges t  = getEdgesSet t & IntSet.toList
getUEdges t = [ e | e <- getEdges t, isUEdge e]
getUEdgesSet t = getUEdges t & IntSet.fromList
numBranches t = length $ getUEdges t

undirectedName e  = max e (reverseEdge e)

edgesOutOfNodeArray tree nodeIndex = IntSet.toArray $ edgesOutOfNodeSet tree nodeIndex
edgesOutOfNode tree nodeIndex = IntSet.toList $ edgesOutOfNodeSet tree nodeIndex

treeLength tree = sum [ branchLength tree b | b <- getUEdges tree ]

------------------ Branch Lengths ----------------

class IsGraph g => HasBranchLengths g where
    branchLength :: g -> Int -> Double

branchLengths g = branchLength g <$> getUEdges g
branchLengthsSet g = getUEdgesSet g & IntMap.fromSet (branchLength g)

-- I guess it makes sense that you could construct a WithBranchLengths with arbitrary new branch lengths,
data WithBranchLengths t = WithBranchLengths t (IntMap Double)

instance NFData t => NFData (WithBranchLengths t) where
    rnf (WithBranchLengths tree lengths) = rnf tree `seq` rnf lengths

instance IsGraph t => IsGraph (WithBranchLengths t) where
    getNodesSet (WithBranchLengths t _)             = getNodesSet t
    getEdgesSet (WithBranchLengths t _)             = getEdgesSet t

    edgesOutOfNodeSet (WithBranchLengths t _) nodeId    = edgesOutOfNodeSet t nodeId
    sourceNode (WithBranchLengths t _) edgeId           = sourceNode t edgeId
    targetNode (WithBranchLengths t _) edgeId           = targetNode t edgeId

    getNodeAttributes (WithBranchLengths t _) node     = getNodeAttributes t node
    getEdgeAttributes (WithBranchLengths t _) edge     = getEdgeAttributes t edge
    getAttributes (WithBranchLengths t _)              = getAttributes t

    setNodeAttributes (WithBranchLengths t l) as    = WithBranchLengths (setNodeAttributes t as) l
    setEdgeAttributes (WithBranchLengths t l) as    = WithBranchLengths (setEdgeAttributes t as) l
    setAttributes (WithBranchLengths t l) as    = WithBranchLengths (setAttributes t as) l

    type instance LabelType (WithBranchLengths t) = LabelType t
    type instance NewLabelType (WithBranchLengths t) a = WithBranchLengths (NewLabelType t a)
    getLabel  (WithBranchLengths t _) node = getLabel t node
    getLabels (WithBranchLengths t _) = getLabels t
    relabel newLabels (WithBranchLengths t lengths) = WithBranchLengths (relabel newLabels t) lengths

instance IsGraph t => HasBranchLengths (WithBranchLengths t) where
    branchLength (WithBranchLengths tree ds) b = ds IntMap.! (undirectedName b)

branchLengthTree topology lengths = WithBranchLengths topology lengths

------------------ Labels ----------------

addLabels labels t = relabel newLabels t
    where newLabels = getNodesSet t & IntMap.fromSet (\node -> lookup node labels)

-- These two functions shouldn't go here -- but where should they go?
addInternalLabels tree = relabel newLabels tree where
    oldLabels = getLabels tree
    newLabels = getNodesSet tree & IntMap.fromSet newLabel

    newLabel node = case (oldLabels IntMap.! node) of
                      Just label -> Just label
                      Nothing -> Just $ T.append (T.singleton 'A') (T.pack (show node))

addAncestralLabel node labels = case (labels IntMap.! node) of
                                  Just l -> l
                                  Nothing -> T.append (T.singleton 'A') (T.pack (show node))


dropInternalLabels t = relabel newLabels t where
    labels = getLabels t
    newLabels = getNodesSet t & IntMap.fromSet (\node -> if nodeDegree t node <= 1 then labels IntMap.! node else Nothing)

---------------------------- Creating a graph from a list of edges ---------------------------
graphFromEdges nodes edges = Graph nodesMap branchesMap labels (noAttributesOn nodesSet) (noAttributesOn branchesSet) noAttributes where

    num_nodes = length nodes

    -- FIX: this is a way to avoid depending changeables when |edges| is constant, but edges is changeable.
    num_branches = num_nodes - 1

    -- is this really how we want to name the branches?
    namedEdges = zip [1..] $ edges

    find_branch :: Int -> Maybe (Int,Int)
    find_branch b | b > 0     = fmap snd $ find (\e -> fst e == b) namedEdges
                  | otherwise = swap <$> (find_branch $ reverseEdge b)

    branchFrom n (b,(x,y)) | x == n    = Just b
                           | y == n    = Just (-b)
                           | otherwise = Nothing

    edgesFrom n = mapMaybes (branchFrom n) namedEdges

    nodesSet = IntSet.fromList nodes
    nodesMap = nodesSet & IntMap.fromSet (\n ->  Node n (IntSet.fromList $ edgesFrom n) )

    branchesSet = IntSet.fromList [1..num_branches] `IntSet.union` IntSet.fromList (map negate [1..num_branches])
    branchesMap = branchesSet & IntMap.fromSet (\b -> let Just (s,t) = find_branch b in Edge s t b)

    labels = nodesSet & IntMap.fromSet (\n -> Nothing)