File: cythonize.py

package info (click to toggle)
scikit-learn 0.18-5
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 71,040 kB
  • ctags: 91,142
  • sloc: python: 97,257; ansic: 8,360; cpp: 5,649; makefile: 242; sh: 238
file content (198 lines) | stat: -rwxr-xr-x 6,375 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env python
""" cythonize

Cythonize pyx files into C files as needed.

Usage: cythonize [root_dir]

Default [root_dir] is 'sklearn'.

Checks pyx files to see if they have been changed relative to their
corresponding C files.  If they have, then runs cython on these files to
recreate the C files.

The script detects changes in the pyx/pxd files using checksums
[or hashes] stored in a database file

Simple script to invoke Cython on all .pyx
files; while waiting for a proper build system. Uses file hashes to
figure out if rebuild is needed.

It is called by ./setup.py sdist so that sdist package can be installed without
cython

Originally written by Dag Sverre Seljebotn, and adapted from statsmodel 0.6.1
(Modified BSD 3-clause)

We copied it for scikit-learn.

Note: this script does not check any of the dependent C libraries; it only
operates on the Cython .pyx files or their corresponding Cython header (.pxd)
files.
"""
# Author: Arthur Mensch <arthur.mensch@inria.fr>
# Author: Raghav R V <rvraghav93@gmail.com>
#
# License: BSD 3 clause

from __future__ import division, print_function, absolute_import

import os
import re
import sys
import hashlib
import subprocess

HASH_FILE = 'cythonize.dat'
DEFAULT_ROOT = 'sklearn'

# WindowsError is not defined on unix systems
try:
    WindowsError
except NameError:
    WindowsError = None


def cythonize(cython_file, gen_file):
    try:
        from Cython.Compiler.Version import version as cython_version
        from distutils.version import LooseVersion
        if LooseVersion(cython_version) < LooseVersion('0.21'):
            raise Exception('Building scikit-learn requires Cython >= 0.21')

    except ImportError:
        pass

    flags = ['--fast-fail']
    if gen_file.endswith('.cpp'):
        flags += ['--cplus']

    try:
        try:
            rc = subprocess.call(['cython'] +
                                 flags + ["-o", gen_file, cython_file])
            if rc != 0:
                raise Exception('Cythonizing %s failed' % cython_file)
        except OSError:
            # There are ways of installing Cython that don't result in a cython
            # executable on the path, see scipy issue gh-2397.
            rc = subprocess.call([sys.executable, '-c',
                                  'import sys; from Cython.Compiler.Main '
                                  'import setuptools_main as main;'
                                  ' sys.exit(main())'] + flags +
                                 ["-o", gen_file, cython_file])
            if rc != 0:
                raise Exception('Cythonizing %s failed' % cython_file)
    except OSError:
        raise OSError('Cython needs to be installed')


def load_hashes(filename):
    """Load the hashes dict from the hashfile"""
    # { filename : (sha1 of header if available or 'NA',
    #               sha1 of input,
    #               sha1 of output) }

    hashes = {}
    try:
        with open(filename, 'r') as cython_hash_file:
            for hash_record in cython_hash_file:
                (filename, header_hash,
                 cython_hash, gen_file_hash) = hash_record.split()
                hashes[filename] = (header_hash, cython_hash, gen_file_hash)
    except (KeyError, ValueError, AttributeError, IOError):
        hashes = {}
    return hashes


def save_hashes(hashes, filename):
    """Save the hashes dict to the hashfile"""
    with open(filename, 'w') as cython_hash_file:
        for key, value in hashes.items():
            cython_hash_file.write("%s %s %s %s\n"
                                   % (key, value[0], value[1], value[2]))


def sha1_of_file(filename):
    h = hashlib.sha1()
    with open(filename, "rb") as f:
        h.update(f.read())
    return h.hexdigest()


def clean_path(path):
    """Clean the path"""
    path = path.replace(os.sep, '/')
    if path.startswith('./'):
        path = path[2:]
    return path


def get_hash_tuple(header_path, cython_path, gen_file_path):
    """Get the hashes from the given files"""

    header_hash = (sha1_of_file(header_path)
                   if os.path.exists(header_path) else 'NA')
    from_hash = sha1_of_file(cython_path)
    to_hash = (sha1_of_file(gen_file_path)
               if os.path.exists(gen_file_path) else 'NA')

    return header_hash, from_hash, to_hash


def cythonize_if_unchanged(path, cython_file, gen_file, hashes):
    full_cython_path = os.path.join(path, cython_file)
    full_header_path = full_cython_path.replace('.pyx', '.pxd')
    full_gen_file_path = os.path.join(path, gen_file)

    current_hash = get_hash_tuple(full_header_path, full_cython_path,
                                  full_gen_file_path)

    if current_hash == hashes.get(clean_path(full_cython_path)):
        print('%s has not changed' % full_cython_path)
        return

    print('Processing %s' % full_cython_path)
    cythonize(full_cython_path, full_gen_file_path)

    # changed target file, recompute hash
    current_hash = get_hash_tuple(full_header_path, full_cython_path,
                                  full_gen_file_path)

    # Update the hashes dict with the new hash
    hashes[clean_path(full_cython_path)] = current_hash


def check_and_cythonize(root_dir):
    print(root_dir)
    hashes = load_hashes(HASH_FILE)

    for cur_dir, dirs, files in os.walk(root_dir):
        for filename in files:
            if filename.endswith('.pyx'):
                gen_file_ext = '.c'
                # Cython files with libcpp imports should be compiled to cpp
                with open(os.path.join(cur_dir, filename), 'rb') as f:
                    data = f.read()
                    m = re.search(b"libcpp", data, re.I | re.M)
                    if m:
                        gen_file_ext = ".cpp"
                cython_file = filename
                gen_file = filename.replace('.pyx', gen_file_ext)
                cythonize_if_unchanged(cur_dir, cython_file, gen_file, hashes)

                # Save hashes once per module. This prevents cythonizing prev.
                # files again when debugging broken code in a single file
                save_hashes(hashes, HASH_FILE)


def main(root_dir=DEFAULT_ROOT):
    check_and_cythonize(root_dir)


if __name__ == '__main__':
    try:
        root_dir_arg = sys.argv[1]
    except IndexError:
        root_dir_arg = DEFAULT_ROOT
    main(root_dir_arg)