File: graph_plot.py

package info (click to toggle)
swarm-cluster 3.0.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,140 kB
  • sloc: cpp: 7,092; python: 185; makefile: 64; sh: 12
file content (222 lines) | stat: -rw-r--r-- 7,913 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
    Visualize the internal structure of a swarm (color vertices by
    abundance). Requires the module igraph and python 3.
"""

__author__ = "Frédéric Mahé <frederic.mahe@cirad.fr>"
__date__ = "2019/09/24"
__version__ = "$Revision: 4.0"

import sys
import os.path
from igraph import Graph, plot
from optparse import OptionParser

# *************************************************************************** #
#                                                                             #
#                                  Functions                                  #
#                                                                             #
# *************************************************************************** #


def option_parse():
    """
    Parse arguments from command line.
    """
    desc = """Visualize the internal structure of a given OTU."""

    parser = OptionParser(usage="usage: %prog -s FILE -i FILE [-o INT -d INT]",
                          description=desc,
                          version="%prog version 3.1")

    parser.add_option("-s", "--swarms",
                      metavar="<FILENAME>",
                      action="store",
                      dest="swarms",
                      help="<FILENAME> contains swarm's results")

    parser.add_option("-i", "--internal_structure",
                      metavar="<FILENAME>",
                      action="store",
                      dest="internal_structure",
                      help="<FILENAME> contains OTUs' internal structure")

    parser.add_option("-o", "--OTU",
                      metavar="<INTEGER>",
                      action="store",
                      type="int",
                      dest="OTU",
                      default=1,
                      help="Select the nth OTU (first by default)")

    parser.add_option("-d", "--drop",
                      metavar="<INTEGER>",
                      action="store",
                      type="int",
                      dest="drop",
                      default=0,
                      help="Drop amplicons seen <INTEGER> or less times \
                            (zero by default)")

    (options, args) = parser.parse_args()

    return options.swarms, options.internal_structure, \
        options.OTU, options.drop


def parse_files(swarms, internal_structure, OTU, drop):
    """
    """
    # List amplicon ids and abundances
    amplicons = list()
    with open(swarms, "r") as swarms:
        for i, swarm in enumerate(swarms):
            if i == OTU - 1:
                # Deal with ";size=" in a rather clumsy way... but it works
                print("Reading target OTU", file=sys.stdout)
                amplicons = [
                    tuple(
                        item.replace(";size=", "_").rstrip(";").rsplit("_", 1))
                    for item in swarm.strip().split(" ")]
                break

    # Drop amplicons with a low abundance (remove connections too)
    if drop:
        print("Excluding amplicons below threshold", file=sys.stdout)
        amplicons = [amplicon for amplicon in amplicons
                     if int(amplicon[1]) > drop]

    # Convert amplicon names to amplicon indexes (igraph uses
    # numerical ids, not names). Requires python 2.7+)
    amplicon_index = {amplicon[0]: i for (i, amplicon) in enumerate(amplicons)}
    amplicon_connected = {amplicon[0]: False for amplicon in amplicons}

    # List pairwise relations
    relations = list()
    with open(internal_structure, "r") as internal_structure:
        print("Parsing amplicon relationships", file=sys.stdout)
        for line in internal_structure:
            # Get the first four elements of the line
            ampliconA, ampliconB, d, OTU_number = line.strip().split("\t")[0:4]
            OTU_number = int(OTU_number)
            if OTU_number == OTU:
                amplicon_connected[ampliconA] = True
                amplicon_connected[ampliconB] = True
                if ampliconA in amplicon_index and ampliconB in amplicon_index:
                    relations.append((amplicon_index[ampliconA],
                                      amplicon_index[ampliconB]))
            elif OTU_number > OTU:
                break

    # Drop amplicons grafted with the fastidious option
    amplicons = [amplicon for amplicon in amplicons
                 if amplicon_connected[amplicon[0]]]

    # Deal with errors
    if not amplicons or not relations:
        print("Error: OTU does not exists or contains only one element.",
              file=sys.stderr)
        sys.exit(-1)

    return amplicons, relations


def build_graph(amplicons, relations):
    """
    Convert pairwise relations into a graph structure.
    """
    # Create vertices (= number of unique amplicons)
    g = Graph(len(amplicons))
    g.add_edges(relations)

    amplicon_ids = [amplicon[0] for amplicon in amplicons]
    abundances = [int(amplicon[1]) for amplicon in amplicons]
    maximum = max(abundances)

    # Determine canvas size
    if len(abundances) < 500:
        bbox = (1920, 1080)
    elif len(abundances) > 4000:
        bbox = (5760, 3240)
    else:
        bbox = (3840, 2160)

    # Compute node attributes
    node_colors = list()
    node_sizes = list()
    node_labels = list()
    print("Building graph", file=sys.stdout)
    for abundance in abundances:
        # Color is coded by a 3-tuple of float values (0.0 to 1.0)
        # Start from a max color in rgb(red, green, blue)
        max_color = (176, 196, 222)  # light steel blue
        color = [1.0 * (c + (255 - c) / abundance) / 255 for c in max_color]
        node_colors.append(color)
        node_size = 30 + (abundance * 70 / maximum)
        node_sizes.append(node_size)
        # Label nodes with an abundance greater than 10
        if abundance >= 10 or abundance == maximum:
            node_labels.append(str(abundance))
        else:
            node_labels.append("")  # Doesn't work with "None"

    g.vs["name"] = amplicon_ids
    g.vs["abundance"] = abundances
    g.vs["label"] = node_labels
    g.vs["color"] = node_colors
    g.vs["vertex_size"] = node_sizes

    return g, bbox


def main():
    """
    Backbone of the script
    """
    # Parse command line options.
    swarms, internal_structure, OTU, drop = option_parse()

    # Output file
    basename = os.path.splitext(swarms)[0]
    output_pdf = basename + "_OTU_" + str(OTU) + ".pdf"
    # output_svg = basename + "_OTU_" + str(OTU) + ".svg"
    output_graphml = basename + "_OTU_" + str(OTU) + ".graphml"

    # Collect data
    amplicons, relations = parse_files(swarms, internal_structure, OTU, drop)

    # Build the graph with igraph
    g, bbox = build_graph(amplicons, relations)

    # Select the attributes to display
    visual_style = dict()
    visual_style["vertex_size"] = g.vs["vertex_size"]
    visual_style["vertex_label"] = g.vs["label"]
    visual_style["vertex_color"] = g.vs["color"]
    visual_style["vertex_label_dist"] = 0
    visual_style["layout"] = g.layout("fr")  # alternatives: "kk", "drl"
    visual_style["bbox"] = bbox
    visual_style["margin"] = 20

    # Output the graph (profiling shows that 98% of the computation
    # time is spent on "layout", no need to try to optimize the rest)
    g.write_graphml(output_graphml)
    plot(g, output_pdf, **visual_style)
    # # plot(g, output_svg, **visual_style)

    return


# *************************************************************************** #
#                                                                             #
#                                     Body                                    #
#                                                                             #
# *************************************************************************** #

if __name__ == '__main__':

    main()

sys.exit(0)