File: prepopulate_cache.py

package info (click to toggle)
statsmodels 0.14.6%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 49,956 kB
  • sloc: python: 254,365; f90: 612; sh: 560; javascript: 337; asm: 156; makefile: 145; ansic: 32; xml: 9
file content (40 lines) | stat: -rwxr-xr-x 1,579 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/python3

"""Place data from R into the statsmodels cache, as Debian does not allow Internet access during builds"""

import os
import subprocess
import glob
import pickle
import zlib

buildroot_directory = os.getcwd()
Rtmp_directory = buildroot_directory + '/build/Rtmp'
target_directory = buildroot_directory + '/build/datacache'
os.makedirs(target_directory)
os.makedirs(Rtmp_directory)

# R packages (datasets) used:
# car (Duncan, Moore) # now split off as carData
# COUNT (medpar) # not in Debian, use removed by use_available_data.patch
# geepack (dietox)
# HistData (Guerry) # not in Debian, but Guerry is and has the same dataset
# MASS (epil, Sitka)
# robustbase (starsCYG)
# vcd (Arthritis, VisualAcuity)

# duration.rst would use survival (flchain) but that example isn't run during build

# R-using examples use lme4, geepack, robustbase

subprocess.run(['R', 'CMD', 'BATCH', buildroot_directory + '/debian/datasets/Rdatasets.R'], cwd=Rtmp_directory, check=True)
subprocess.run([buildroot_directory + '/debian/datasets/rst.sh'], cwd=Rtmp_directory + '/doc', check=True)

for fname_in in glob.glob(Rtmp_directory + '/**/*', recursive=True):
    if os.path.isfile(fname_in):
        with open(fname_in,'rb') as fd:
            data = fd.read()
        fname_out = target_directory + '/raw.githubusercontent.com,vincentarelbundock,Rdatasets,master,' + os.path.relpath('-v2.'.join(fname_in.rsplit('.',1)), start=Rtmp_directory).replace('/',',') + '.zip'
        data2 = zlib.compress(data)
        with open(fname_out, 'wb') as fd:
            fd.write(data2)