1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
|
#################################################################
# Importing samtools and htslib
#
# For htslib, simply copy the whole release tar-ball
# into the directory "htslib" and recreate the file version.h
#
# rm -rf htslib
# mv download/htslib htslib
# git checkout -- htslib/version.h
# Edit the file htslib/version.h to set the right version number.
#
# For samtools, type:
# rm -rf samtools
# python import.py samtools download/samtools
#
# Manually, then:
# modify config.h to set compatibility flags
#
# For bcftools, type:
# rm -rf bedtools
# python import.py bedtools download/bedtools
# rm -rf bedtools/test bedtools/plugins
import fnmatch
import os
import re
import shutil
import sys
import hashlib
EXCLUDE = {
"samtools": (
"razip.c", "bgzip.c", "main.c",
"calDepth.c", "bam2bed.c", "wgsim.c",
"md5fa.c", "md5sum-lite.c", "maq2sam.c",
"bamcheck.c", "chk_indel.c", "vcf-miniview.c",
"htslib-1.3", # do not import twice
"hfile_irods.c", # requires irods library
),
"bcftools": (
"test", "plugins", "peakfit.c",
"peakfit.h",
# needs to renamed, name conflict with samtools reheader
"reheader.c",
"polysomy.c"),
"htslib": (
'htslib/tabix.c', 'htslib/bgzip.c',
'htslib/htsfile.c', 'htslib/hfile_irods.c'),
}
MAIN = {
"samtools": "bamtk",
"bcftools": "main"
}
def locate(pattern, root=os.curdir):
'''Locate all files matching supplied filename pattern in and below
supplied root directory.
'''
for path, dirs, files in os.walk(os.path.abspath(root)):
for filename in fnmatch.filter(files, pattern):
yield os.path.join(path, filename)
def _update_pysam_files(cf, destdir):
'''update pysam files applying redirection of ouput'''
basename = os.path.basename(destdir)
for filename in cf:
if not filename:
continue
dest = filename + ".pysam.c"
with open(filename) as infile:
lines = "".join(infile.readlines())
with open(dest, "w") as outfile:
outfile.write('#include "pysam.h"\n\n')
subname, _ = os.path.splitext(os.path.basename(filename))
if subname in MAIN.get(basename, []):
lines = re.sub("int main\(", "int {}_main(".format(
basename), lines)
else:
lines = re.sub("int main\(", "int {}_{}_main(".format(
basename, subname), lines)
lines = re.sub("stderr", "pysam_stderr", lines)
lines = re.sub("stdout", "pysam_stdout", lines)
lines = re.sub(" printf\(", " fprintf(pysam_stdout, ", lines)
lines = re.sub("([^kf])puts\(([^)]+)\)",
r"\1fputs(\2, pysam_stdout) & fputc('\\n', pysam_stdout)",
lines)
lines = re.sub("putchar\(([^)]+)\)",
r"fputc(\1, pysam_stdout)", lines)
fn = os.path.basename(filename)
# some specific fixes:
SPECIFIC_SUBSTITUTIONS = {
"bam_md.c": (
'sam_open_format("-", mode_w',
'sam_open_format(pysam_stdout_fn, mode_w'),
"phase.c": (
'putc("ACGT"[f->seq[j] == 1? (c&3, pysam_stdout) : (c>>16&3)]);',
'putc("ACGT"[f->seq[j] == 1? (c&3) : (c>>16&3)], pysam_stdout);'),
"cut_target.c": (
'putc(33 + (cns[j]>>8>>2, pysam_stdout));',
'putc(33 + (cns[j]>>8>>2), pysam_stdout);')
}
if fn in SPECIFIC_SUBSTITUTIONS:
lines = lines.replace(
SPECIFIC_SUBSTITUTIONS[fn][0],
SPECIFIC_SUBSTITUTIONS[fn][1])
outfile.write(lines)
with open(os.path.join(destdir, "pysam.h"), "w")as outfile:
outfile.write("""#ifndef PYSAM_H
#define PYSAM_H
#include "stdio.h"
extern FILE * pysam_stderr;
extern FILE * pysam_stdout;
extern const char * pysam_stdout_fn;
#endif
""")
if len(sys.argv) >= 1:
if len(sys.argv) != 3:
raise ValueError("import requires dest src")
dest, srcdir = sys.argv[1:3]
if dest not in EXCLUDE:
raise ValueError("import expected one of %s" %
",".join(EXCLUDE.keys()))
exclude = EXCLUDE[dest]
destdir = os.path.abspath(dest)
srcdir = os.path.abspath(srcdir)
if not os.path.exists(srcdir):
raise IOError(
"source directory `%s` does not exist." % srcdir)
cfiles = locate("*.c", srcdir)
hfiles = locate("*.h", srcdir)
# remove unwanted files and htslib subdirectory.
cfiles = [x for x in cfiles if os.path.basename(x) not in exclude
and not re.search("htslib-", x)]
hfiles = [x for x in hfiles if os.path.basename(x) not in exclude
and not re.search("htslib-", x)]
ncopied = 0
def _compareAndCopy(src, srcdir, destdir, exclude):
d, f = os.path.split(src)
common_prefix = os.path.commonprefix((d, srcdir))
subdir = re.sub(common_prefix, "", d)[1:]
targetdir = os.path.join(destdir, subdir)
if not os.path.exists(targetdir):
os.makedirs(targetdir)
old_file = os.path.join(targetdir, f)
if os.path.exists(old_file):
md5_old = hashlib.md5(
"".join(open(old_file, "r").readlines())).digest()
md5_new = hashlib.md5(
"".join(open(src, "r").readlines())).digest()
if md5_old != md5_new:
raise ValueError(
"incompatible files for %s and %s" %
(old_file, src))
shutil.copy(src, targetdir)
return old_file
for src_file in hfiles:
_compareAndCopy(src_file, srcdir, destdir, exclude)
ncopied += 1
cf = []
for src_file in cfiles:
cf.append(_compareAndCopy(src_file,
srcdir,
destdir,
exclude))
ncopied += 1
sys.stdout.write(
"installed latest source code from %s: "
"%i files copied\n" % (srcdir, ncopied))
# redirect stderr to pysamerr and replace bam.h with a stub.
sys.stdout.write("applying stderr redirection\n")
_update_pysam_files(cf, destdir)
sys.exit(0)
# if len(sys.argv) >= 2 and sys.argv[1] == "refresh":
# sys.stdout.write("refreshing latest source code from .c to .pysam.c")
# # redirect stderr to pysamerr and replace bam.h with a stub.
# sys.stdout.write("applying stderr redirection")
# for destdir in ('samtools', ):
# pysamcfiles = locate("*.pysam.c", destdir)
# for f in pysamcfiles:
# os.remove(f)
# cfiles = locate("*.c", destdir)
# _update_pysam_files(cfiles, destdir)
# sys.exit(0)
|