File: plot_cran.py

package info (click to toggle)
python-rdata 0.11.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 740 kB
  • sloc: python: 2,388; makefile: 22
file content (167 lines) | stat: -rw-r--r-- 5,398 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""
Loading a RDA file with custom types from CRAN
==============================================

A more advanced example showing how to read a dataset in the RDATA format from
the CRAN repository of R packages that include custom R types.

"""

# %%
# We will show how to load the graph of the classical
# `seven bridges of Königsberg problem
# <https://en.wikipedia.org/wiki/Seven_Bridges_of_K%C3%B6nigsberg>`_ from the
# R package igraphdata.
#
# .. warning::
#     This is for illustration purposes only. If you plan to use the same
#     dataset repeatedly it is better to download it, or to use a package that
#     caches it, such as
#     `scikit-datasets <https://daviddiazvico.github.io/scikit-datasets/>`_.
#
# We will make use of the function
# :external+python:func:`urllib.request.urlopen` to load the url, as well as
# the package rdata.
# The package is a tar file so we need also to import the
# :external+python:mod:`tarfile` module.
# We will use the package `igraph <https://python.igraph.org/en/stable/>`_ for
# constructing the graph in Python.
# Finally, we will import some plotting routines from Matplotlib.

import tarfile
from urllib.request import urlopen

import igraph
import igraph.drawing
import matplotlib.pyplot as plt
from matplotlib.colors import to_hex

import rdata

# %%
# The following URL contains the link to download the package from CRAN.

pkg_url = (
    "https://cran.r-project.org/src/contrib/Archive/"
    "igraphdata/igraphdata_1.0.0.tar.gz"
)

# %%
# The dataset is contained in the "data" folder, as it is common for
# R packages.
# The file is named Koenisberg and it is in the RDATA format
# (.rda extension).
data_path = "igraphdata/data/Koenigsberg.rda"

# %%
# We proceed to open the package using
# :external+python:func:`~urllib.request.urlopen`
# and :external+python:mod:`tarfile`.


with urlopen(pkg_url) as package:
    with tarfile.open(fileobj=package, mode="r|gz") as package_tar:
        for member in package_tar:
            if member.name == data_path:
                dataset = package_tar.extractfile(member)
                assert dataset
                with dataset:
                    parsed = rdata.parser.parse_file(dataset)
                break

# %%
# We could try to convert this dataset to Python objects.

converted = rdata.conversion.convert(parsed)
print(converted)

# %%
# From this representation, we can see that .rda files contain a mapping
# of variable names to objects, and not just one object as .rds files.
# In this case there is just one variable called "Koenigsberg", as the
# dataset itself, but that is not necessarily always the case.

# %%
# We can also see that there is no default conversion for the "igraph"
# class, representing a graph.
# Thus, the converted object is a list of the underlying vectors used
# by this type.

# %%
# It is however possible to define our own conversion routines for R classes
# using the package rdata.
# For that purpose we need to create a "constructor" function, that accepts
# as arguments the underlying object to convert and its attributes, and
# returns the converted object.

# %%
# In this example, the object will be received as a list, corresponding to
# the `igraph_t structure defined by the igraph package
# <https://github.com/igraph/igraph/blob/
# 50d46370fd677128cf758e4dd5c1de61dae9a3ef/include/
# igraph_datatype.h#L110-L121>`_.
# We will convert it to a :external+igraph:class:`~igraph.Graph` object from
# the `Python version of the igraph package
# <https://python.igraph.org/en/stable/>`_.
# The attrs dict is empty and will not be used.


def graph_constructor(obj, attrs):
    """Construct graph object from R representation."""
    n_vertices = int(obj[0][0])
    is_directed = obj[1]
    edge_from = obj[2].astype(int)
    edge_to = obj[3].astype(int)

    # output_edge_index = obj[4]
    # input_edge_index = obj[5]
    # output_vertex_edge_index = obj[6]
    # input_vertex_edge_index = obj[7]

    graph_attrs = obj[8][1]
    vertex_attrs = obj[8][2]
    edge_attrs = obj[8][3]

    return igraph.Graph(
        n=n_vertices,
        directed=is_directed,
        edges=list(zip(edge_from, edge_to)),
        graph_attrs=graph_attrs,
        vertex_attrs=vertex_attrs,
        edge_attrs=edge_attrs,
    )


# %%
# We create a dict with all the constructors that we want to apply.
# In this case, we include first the default constructors (which
# provide transformations for common R classes) and our newly created
# constructor.
# The key used for the dictionary entries should be the name of the
# corresponding R class.
constructor_dict = {
    **rdata.conversion.DEFAULT_CLASS_MAP,
    "igraph": graph_constructor,
}

# %%
# We can now call the :func:`rdata.conversion.convert` functtion, supplying
# the dictionary of constructors to use.
converted = rdata.conversion.convert(parsed, constructor_dict=constructor_dict)

# %%
# Finally, we check the constructed graph by plotting it using the
# :external+igraph:func:`igraph.drawing.plot` function.
fig, axes = plt.subplots()
plt.subplots_adjust(left=0, right=1, bottom=0, top=1)
igraph.drawing.plot(
    converted["Koenigsberg"],
    target=axes,
    vertex_label=converted["Koenigsberg"].vs["name"],
    vertex_label_size=8,
    vertex_size=120,
    vertex_color=to_hex("tab:blue"),
    edge_label=converted["Koenigsberg"].es["name"],
    edge_label_size=8,
)
plt.show()