1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
|
# Copyright (c) 2016, The Regents of the University of California.
from __future__ import absolute_import
from functools import total_ordering
import types
from . import DBConstants
import gzip
import bz2
from io import BytesIO
from collections.abc import MutableMapping
class Record(MutableMapping):
"""
Simple dict-like record interface with bag behavior.
"""
def __init__(self, name=None, sequence=None, **kwargs):
d = dict()
if name is not None:
d['name'] = name
if sequence is not None:
d['sequence'] = sequence
d.update(kwargs)
if 'quality' in d and d['quality'] is None:
del d['quality']
self.d = d
def __setitem__(self, name, value):
self.d[name] = value
def __getattr__(self, name):
try:
return self.d[name]
except KeyError:
raise AttributeError(name)
def __len__(self):
return len(self.sequence)
def keys(self):
return self.d.keys()
def __getitem__(self, idx):
if isinstance(idx, slice):
trimmed = dict(self.d)
trimmed['sequence'] = trimmed['sequence'][idx]
if 'quality' in trimmed:
trimmed['quality'] = trimmed['quality'][idx]
return Record(**trimmed)
return self.d[idx]
def __delitem__(self, key):
del self.d[key]
def __iter__(self):
return iter(self.d)
def __repr__(self):
return repr(self.d)
@total_ordering
class _screed_attr(object):
"""
Sliceable database object that supports lazy retrieval
"""
def __init__(self, dbObj, attrName, rowName, queryBy):
"""
Initializes database object with specific record retrieval
information
dbOjb = database handle
attrName = name of attr in db
rowName = index/name of row
queryBy = by name or index
"""
self._dbObj = dbObj
self._attrName = attrName
self._rowName = rowName
self._queryBy = queryBy
def __getitem__(self, sliceObj):
"""
Slicing interface. Returns the slice range given.
*.start + 1 to be compatible with sqlite's 1 not 0 scheme
"""
if not isinstance(sliceObj, slice):
raise TypeError('__getitem__ argument must be of slice type')
if not sliceObj.start <= sliceObj.stop: # String reverse in future?
raise ValueError('start must be less than stop in slice object')
length = sliceObj.stop - sliceObj.start
query = 'SELECT substr(%s, %d, %d) FROM %s WHERE %s = ?' \
% (self._attrName, sliceObj.start + 1, length,
DBConstants._DICT_TABLE,
self._queryBy)
cur = self._dbObj.cursor()
result = cur.execute(query, (str(self._rowName),))
try:
subStr, = result.fetchone()
except TypeError:
raise KeyError("Key %s not found" % self._rowName)
return str(subStr)
def __len__(self):
"""
Returns the length of the string
"""
return len(self.__str__())
def __repr__(self):
"""
Prints out the name of the class and the name of the sliceable attr
"""
return "<%s '%s'>" % (self.__class__.__name__, self._attrName)
def __eq__(self, given):
"""
Compares attribute to given object in string form
"""
if isinstance(given, bytes):
return given == self.__str__()
else:
return str(given) == self.__str__()
def __lt__(self, given):
if isinstance(given, bytes):
return self.__str__() < given
else:
return self.__str__() < str(given)
def __str__(self):
"""
Returns the full attribute as a string
"""
query = 'SELECT %s FROM %s WHERE %s = ?' \
% (self._attrName, DBConstants._DICT_TABLE, self._queryBy)
cur = self._dbObj.cursor()
result = cur.execute(query, (str(self._rowName),))
try:
record, = result.fetchone()
except TypeError:
raise KeyError("Key %s not found" % self._rowName)
return str(record)
def _buildRecord(fieldTuple, dbObj, rowName, queryBy):
"""
Constructs a dict-like object with record attribute names as keys and
_screed_attr objects as values
"""
# Separate the lazy and full retrieval objects
kvResult = []
fullRetrievals = []
for fieldname, role in fieldTuple:
if role == DBConstants._SLICEABLE_TEXT:
kvResult.append((fieldname, _screed_attr(dbObj,
fieldname,
rowName,
queryBy)))
else:
fullRetrievals.append(fieldname)
# Retrieve the full text fields from the db
subs = ','.join(fullRetrievals)
query = 'SELECT %s FROM %s WHERE %s=?' % \
(subs, DBConstants._DICT_TABLE, queryBy)
cur = dbObj.cursor()
res = cur.execute(query, (rowName,))
# Add the full text fields to the result tuple list
data = tuple([str(r) for r in res.fetchone()])
kvResult.extend(zip(fullRetrievals, data))
# Hack to make indexing start at 0
hackedResult = []
for key, value in kvResult:
if key == DBConstants._PRIMARY_KEY:
hackedResult.append((key, int(value) - 1))
else:
hackedResult.append((key, value))
return Record(**dict(hackedResult))
def write_fastx(record, fileobj):
"""Write sequence record to 'fileobj' in FASTA/FASTQ format."""
isbytesio = isinstance(fileobj, BytesIO)
iswb = hasattr(fileobj, 'mode') and fileobj.mode == 'wb'
outputvalid = isbytesio or iswb
if not outputvalid:
message = ('cannot call "write_fastx" on object, must be of a file '
'handle with mode "wb" or an instance of "BytesIO"')
raise AttributeError(message)
defline = record.name
if hasattr(record, 'description'):
defline += ' ' + record.description
if hasattr(record, 'quality'):
recstr = '@{defline}\n{sequence}\n+\n{quality}\n'.format(
defline=defline,
sequence=record.sequence,
quality=record.quality)
else:
recstr = '>{defline}\n{sequence}\n'.format(
defline=defline,
sequence=record.sequence)
fileobj.write(recstr.encode('utf-8'))
def write_fastx_pair(read1, read2, fileobj):
"""Write a pair of sequence records to 'fileobj' in FASTA/FASTQ format."""
if hasattr(read1, 'quality'):
assert hasattr(read2, 'quality')
write_record(read1, fileobj)
write_record(read2, fileobj)
|