1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
|
# cython: embedsignature=True
# cython: profile=True
# adds doc-strings for sphinx
import os
from posix.unistd cimport dup
from pysam.libchtslib cimport *
from pysam.libcutils cimport force_bytes, force_str, charptr_to_str, charptr_to_str_w_len
from pysam.libcutils cimport encode_filename, from_string_and_size
__all__ = ["get_verbosity", "set_verbosity"]
########################################################################
########################################################################
## Constants
########################################################################
cdef int MAX_POS = 2 << 29
cdef tuple FORMAT_CATEGORIES = ('UNKNOWN', 'ALIGNMENTS', 'VARIANTS', 'INDEX', 'REGIONS')
cdef tuple FORMATS = ('UNKNOWN', 'BINARY_FORMAT', 'TEXT_FORMAT', 'SAM', 'BAM', 'BAI', 'CRAM', 'CRAI',
'VCF', 'BCF', 'CSI', 'GZI', 'TBI', 'BED')
cdef tuple COMPRESSION = ('NONE', 'GZIP', 'BGZF', 'CUSTOM')
cpdef set_verbosity(int verbosity):
"""Set htslib's hts_verbose global variable to the specified value."""
return hts_set_verbosity(verbosity)
cpdef get_verbosity():
"""Return the value of htslib's hts_verbose global variable."""
return hts_get_verbosity()
class CallableValue(object):
def __init__(self, value):
self.value = value
def __call__(self):
return self.value
def __bool__(self):
return self.value
def __nonzero__(self):
return self.value
def __eq__(self, other):
return self.value == other
def __ne__(self, other):
return self.value != other
CTrue = CallableValue(True)
CFalse = CallableValue(False)
cdef class HTSFile(object):
"""
Base class for HTS file types
"""
def __cinit__(self, *args, **kwargs):
self.htsfile = NULL
self.duplicate_filehandle = True
def __dealloc__(self):
if self.htsfile:
hts_close(self.htsfile)
self.htsfile = NULL
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
return False
@property
def category(self):
"""General file format category. One of UNKNOWN, ALIGNMENTS,
VARIANTS, INDEX, REGIONS"""
if not self.htsfile:
raise ValueError('metadata not available on closed file')
return FORMAT_CATEGORIES[self.htsfile.format.category]
@property
def format(self):
"""File format.
One of UNKNOWN, BINARY_FORMAT, TEXT_FORMAT, SAM, BAM,
BAI, CRAM, CRAI, VCF, BCF, CSI, GZI, TBI, BED.
"""
if not self.htsfile:
raise ValueError('metadata not available on closed file')
return FORMATS[self.htsfile.format.format]
@property
def version(self):
"""Tuple of file format version numbers (major, minor)"""
if not self.htsfile:
raise ValueError('metadata not available on closed file')
return self.htsfile.format.version.major, self.htsfile.format.version.minor
@property
def compression(self):
"""File compression.
One of NONE, GZIP, BGZF, CUSTOM."""
if not self.htsfile:
raise ValueError('metadata not available on closed file')
return COMPRESSION[self.htsfile.format.compression]
@property
def description(self):
"""Vaguely human readable description of the file format"""
if not self.htsfile:
raise ValueError('metadata not available on closed file')
cdef char *desc = hts_format_description(&self.htsfile.format)
try:
return charptr_to_str(desc)
finally:
free(desc)
@property
def is_open(self):
"""return True if HTSFile is open and in a valid state."""
return CTrue if self.htsfile != NULL else CFalse
@property
def is_closed(self):
"""return True if HTSFile is closed."""
return self.htsfile == NULL
@property
def closed(self):
"""return True if HTSFile is closed."""
return self.htsfile == NULL
@property
def is_write(self):
"""return True if HTSFile is open for writing"""
return self.htsfile != NULL and self.htsfile.is_write != 0
@property
def is_read(self):
"""return True if HTSFile is open for reading"""
return self.htsfile != NULL and self.htsfile.is_write == 0
@property
def is_sam(self):
"""return True if HTSFile is reading or writing a SAM alignment file"""
return self.htsfile != NULL and self.htsfile.format.format == sam
@property
def is_bam(self):
"""return True if HTSFile is reading or writing a BAM alignment file"""
return self.htsfile != NULL and self.htsfile.format.format == bam
@property
def is_cram(self):
"""return True if HTSFile is reading or writing a BAM alignment file"""
return self.htsfile != NULL and self.htsfile.format.format == cram
@property
def is_vcf(self):
"""return True if HTSFile is reading or writing a VCF variant file"""
return self.htsfile != NULL and self.htsfile.format.format == vcf
@property
def is_bcf(self):
"""return True if HTSFile is reading or writing a BCF variant file"""
return self.htsfile != NULL and self.htsfile.format.format == bcf
def reset(self):
"""reset file position to beginning of file just after the header.
Returns
-------
The file position after moving the file pointer.
"""
return self.seek(self.start_offset)
def seek(self, uint64_t offset):
"""move file pointer to position *offset*, see :meth:`pysam.HTSFile.tell`."""
if not self.is_open:
raise ValueError('I/O operation on closed file')
if self.is_stream:
raise OSError('seek not available in streams')
cdef int64_t ret
if self.htsfile.format.compression != no_compression:
with nogil:
ret = bgzf_seek(hts_get_bgzfp(self.htsfile), offset, SEEK_SET)
else:
with nogil:
ret = hts_useek(self.htsfile, <int>offset, SEEK_SET)
return ret
def tell(self):
"""return current file position, see :meth:`pysam.HTSFile.seek`."""
if not self.is_open:
raise ValueError('I/O operation on closed file')
if self.is_stream:
raise OSError('tell not available in streams')
cdef int64_t ret
if self.htsfile.format.compression != no_compression:
with nogil:
ret = bgzf_tell(hts_get_bgzfp(self.htsfile))
else:
with nogil:
ret = hts_utell(self.htsfile)
return ret
cdef htsFile *_open_htsfile(self) except? NULL:
cdef char *cfilename
cdef char *cmode = self.mode
cdef int fd, dup_fd
if isinstance(self.filename, bytes):
cfilename = self.filename
with nogil:
return hts_open(cfilename, cmode)
else:
if isinstance(self.filename, int):
fd = self.filename
else:
fd = self.filename.fileno()
if self.duplicate_filehandle:
dup_fd = dup(fd)
else:
dup_fd = fd
# Replicate mode normalization done in hts_open_format
smode = self.mode.replace(b'b', b'').replace(b'c', b'')
if b'b' in self.mode:
smode += b'b'
elif b'c' in self.mode:
smode += b'c'
cmode = smode
hfile = hdopen(dup_fd, cmode)
if hfile == NULL:
raise IOError('Cannot create hfile')
try:
# filename.name can be an int
filename = str(self.filename.name)
except AttributeError:
filename = '<fd:{}>'.format(fd)
filename = encode_filename(filename)
cfilename = filename
with nogil:
return hts_hopen(hfile, cfilename, cmode)
def _exists(self):
"""return False iff file is local, a file and exists.
"""
return (not isinstance(self.filename, (str, bytes)) or
self.filename == b'-' or
self.is_remote or
os.path.exists(self.filename))
|