1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
|
#cython: language_level=3
__OBIDMS_COLUMN_CLASS__ = {}
from ..capi.obitypes cimport name_data_type, \
obitype_t, \
obiversion_t, \
OBI_QUAL, \
OBI_STR
from ..capi.obidms cimport obi_import_column
from ..capi.obidmscolumn cimport OBIDMS_column_header_p, \
obi_close_column, \
obi_get_elements_names, \
obi_column_formatted_infos, \
obi_column_write_comments
from ..capi.obiutils cimport obi_format_date
from ..capi.obiview cimport obi_view_add_column, \
obi_view_get_pointer_on_column_in_view, \
Obiview_p, \
NUC_SEQUENCE_COLUMN, \
QUALITY_COLUMN, \
REVERSE_SEQUENCE_COLUMN, \
REVERSE_QUALITY_COLUMN
from ..object cimport OBIDeactivatedInstanceError
from obitools3.utils cimport tobytes, \
bytes2str, \
str2bytes, \
str2bytes_object, \
bytes2str_object, \
clean_empty_values_from_object
from obitools3.dms.column import typed_column
from libc.stdlib cimport free
from libc.string cimport strcpy
import importlib
import inspect
import pkgutil
import json
cdef class Column(OBIWrapper) :
'''
The obitools3.dms.column.Column class wraps a C instance of a column in the context of a View
'''
cdef inline OBIDMS_column_p pointer(self) :
return <OBIDMS_column_p>(<OBIDMS_column_p*>(self._pointer))[0]
@staticmethod
cdef type get_column_class(obitype_t obitype, bint multi_elts, bint tuples):
'''
Internal function returning the python class representing
a column for a given obitype.
'''
return __OBIDMS_COLUMN_CLASS__[(obitype, multi_elts, tuples)][0]
@staticmethod
cdef type get_python_type(obitype_t obitype, bint multi_elts): # TODO
'''
Internal function returning the python type representing
an instance for a given obitype.
'''
return __OBIDMS_COLUMN_CLASS__[(obitype, multi_elts)][1]
@staticmethod
def import_column(object dms_1, object dms_2, object column_name, obiversion_t version_number):
cdef obiversion_t new_version
new_version = obi_import_column(tobytes(dms_1), tobytes(dms_2), tobytes(column_name), version_number)
if new_version < 0 :
raise Exception("Error importing a column")
return new_version
@staticmethod
def new_column(View view,
object column_name,
obitype_t data_type,
index_t nb_elements_per_line=1,
list elements_names=None,
bint dict_column=False,
bint tuples=False,
bint to_eval=False,
object associated_column_name=b"",
int associated_column_version=-1,
object comments={},
object alias=b""):
# TODO indexer_name?
cdef Column column
cdef bytes column_name_b = tobytes(column_name)
cdef bytes alias_b = tobytes(alias)
cdef bytes comments_b = str2bytes(json.dumps(bytes2str_object(comments)))
cdef bytes associated_column_name_b = tobytes(associated_column_name)
cdef list elements_names_s
cdef bytes elements_names_b
cdef char* elements_names_p
cdef object elt_name
cdef str elt_name_s
if not view.active() :
raise OBIDeactivatedInstanceError()
if alias_b == b"" :
alias_b = column_name_b
if elements_names is not None:
elements_names_s = []
for elt_name in elements_names :
if type(elt_name) != bytes and type(elt_name) != str :
elements_names_s.append(str(elt_name))
else :
elements_names_s.append(elt_name)
elements_names_b = b';'.join([tobytes(x) for x in elements_names_s])
elements_names_p = elements_names_b
else:
elements_names_p = NULL
if column_name_b == b"SAMPLE" or column_name_b == b"sample":
# force str type
data_type = OBI_STR
if data_type == OBI_QUAL:
if associated_column_name_b == b"":
if column_name == QUALITY_COLUMN:
if NUC_SEQUENCE_COLUMN not in view:
raise RuntimeError("Cannot create column %s in view %s: trying to create quality column but no NUC_SEQ column to associate it with in the view" % (bytes2str(column_name_b),
bytes2str(view.name)))
associated_column_name_b = NUC_SEQUENCE_COLUMN
associated_column_version = view[NUC_SEQUENCE_COLUMN].version
elif column_name == REVERSE_QUALITY_COLUMN:
if REVERSE_SEQUENCE_COLUMN not in view:
raise RuntimeError("Cannot create column %s in view %s: trying to create reverse quality column but no REVERSE_SEQUENCE column to associate it with in the view" % (bytes2str(column_name_b),
bytes2str(view.name)))
associated_column_name_b = REVERSE_SEQUENCE_COLUMN
associated_column_version = view[REVERSE_SEQUENCE_COLUMN].version
if (obi_view_add_column(view = view.pointer(),
column_name = column_name_b,
version_number = -1,
alias = alias_b,
data_type = <obitype_t>data_type,
nb_lines = len(view),
nb_elements_per_line = nb_elements_per_line,
elements_names = elements_names_p,
elt_names_formatted = False,
dict_column = dict_column,
tuples = tuples,
to_eval = to_eval,
indexer_name = NULL,
associated_column_name = associated_column_name_b,
associated_column_version = associated_column_version,
comments = comments_b,
create = True)<0):
raise RuntimeError("Cannot create column %s in view %s" % (bytes2str(column_name_b),
bytes2str(view.name)))
column = Column.open(view, alias_b)
# Automatically associate nuc sequence column to quality column if necessary
if data_type == OBI_QUAL:
if column_name == QUALITY_COLUMN:
view[NUC_SEQUENCE_COLUMN].associated_column_name = column.name
view[NUC_SEQUENCE_COLUMN].associated_column_version = column.version
elif column_name == REVERSE_QUALITY_COLUMN:
view[REVERSE_SEQUENCE_COLUMN].associated_column_name = column.name
view[REVERSE_SEQUENCE_COLUMN].associated_column_version = column.version
return column
@staticmethod
def open(View view,
object column_name):
cdef bytes column_name_b = tobytes(column_name)
cdef OBIDMS_column_p* column_pp
cdef OBIDMS_column_p column_p
cdef Column column
cdef obitype_t column_type
cdef type column_class
if not view.active() :
raise OBIDeactivatedInstanceError()
column_pp = obi_view_get_pointer_on_column_in_view(view.pointer(),
column_name_b)
if column_pp == NULL:
raise KeyError("Cannot access to column %s in view %s" % (
bytes2str(column_name_b),
bytes2str(view.name)
))
column_p = column_pp[0]
column_type = column_p.header.returned_data_type
column_class = Column.get_column_class(column_type, (column_p.header.nb_elements_per_line > 1 or column_p.header.dict_column == True), column_p.header.tuples)
column = OBIWrapper.new_wrapper(column_class, column_pp)
column._view = view
column._alias = column_name_b
column._elements_names = column.read_elements_names()
view.register(column)
return column
@OBIWrapper.checkIsActive
def add_to_view(self,
View view,
object column_name=None) :
cdef bytes alias
cdef OBIDMS_column_p column_p = self.pointer()
if not view.active() :
raise OBIDeactivatedInstanceError()
if (column_name is None):
alias = self._alias
else:
alias = tobytes(column_name)
if (obi_view_add_column(view = view.pointer(),
column_name = column_p.header.name,
version_number = column_p.header.version,
alias = alias,
data_type = <obitype_t>0,
nb_lines = -1,
nb_elements_per_line = -1,
elements_names = NULL,
elt_names_formatted = False,
dict_column = False,
tuples = False,
to_eval = False,
indexer_name = NULL,
associated_column_name = NULL,
associated_column_version = -1,
comments = NULL,
create = False) < 0):
raise RuntimeError("Cannot insert column %s (%s@%d) into view %s" %
( bytes2str(alias),
bytes2str(column_p.header.name),
column_p.header.version,
bytes2str(view.name)
))
view.register(self)
@OBIWrapper.checkIsActive
def __len__(self):
'''
implements the len() function for the Column class
@rtype: `int`
'''
return self.lines_used
@OBIWrapper.checkIsActive
def __sizeof__(self):
'''
returns the size of the C object wrapped by the Column instance
'''
cdef OBIDMS_column_header_p header = self.pointer().header
return header.header_size + header.data_size
@OBIWrapper.checkIsActive
def __iter__(self):
cdef index_t line_nb
for line_nb in range(self.lines_used):
yield self.get_line(line_nb)
# TODO check time efficiency with and without
@OBIWrapper.checkIsActive
def __setitem__(self, index_t line_nb, object value):
self.set_line(line_nb, value)
@OBIWrapper.checkIsActive
def __getitem__(self, index_t line_nb):
return self.get_line(line_nb)
@OBIWrapper.checkIsActive
def __str__(self) :
cdef str to_print
cdef object line
to_print = ''
for line in self :
to_print = to_print + str(line) + "\n"
return to_print
@OBIWrapper.checkIsActive
def __repr__(self) :
cdef str s
cdef char* sc
cdef OBIDMS_column_p pointer = self.pointer()
sc = obi_column_formatted_infos(pointer, False)
s = bytes2str(sc)
free(sc)
return s
@OBIWrapper.checkIsActive
def repr_longformat(self) :
cdef str s
cdef char* sc
cdef OBIDMS_column_p pointer = self.pointer()
sc = obi_column_formatted_infos(pointer, True)
s = bytes2str(sc)
free(sc)
return s
def close(self): # TODO discuss, can't be called bc then bug when closing view that tries to close it in C
cdef OBIDMS_column_p pointer
if self.active() :
pointer = self.pointer()
self._view.unregister(self)
OBIWrapper.close(self)
#if obi_close_column(pointer) < 0 :
# raise Exception("Problem closing column %s" % bytes2str(self.name))
cdef read_elements_names(self):
cdef char* elts_names_b
cdef list elts_names_list
if not self.active() :
raise OBIDeactivatedInstanceError()
elts_names_b = obi_get_elements_names(self.pointer())
if elts_names_b == NULL:
raise Exception("Error reading the elements names of a column")
elts_names_list = elts_names_b.split(b';')
free(elts_names_b)
return elts_names_list
cpdef list keys(self):
return self._elements_names
# Column alias property getter and setter
@property
def name(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self._alias
@name.setter
def name(self, object new_alias): # @DuplicatedSignature
if not self.active() :
raise OBIDeactivatedInstanceError()
self._view.rename_column(self._alias, new_alias)
# elements_names property getter
@property
def elements_names(self):
return self._elements_names
# nb_elements_per_line property getter
@property
def nb_elements_per_line(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.nb_elements_per_line
# dict_column property getter
@property
def dict_column(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.dict_column
# data_type property getter
@property
def data_type(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return name_data_type(self.data_type_int)
# data_type integer code property getter
@property
def data_type_int(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.returned_data_type
# original_name property getter
@property
def original_name(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.name
# version property getter
@property
def version(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.version
# lines_used property getter
@property
def lines_used(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.lines_used
# tuples property getter
@property
def tuples(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.tuples
# to_eval property getter
@property
def to_eval(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.to_eval
# creation_date property getter
@property
def creation_date(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return obi_format_date(self.pointer().header.creation_date)
# associated_column name property getter and setter
@property
def associated_column_name(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.associated_column.column_name
@associated_column_name.setter
def associated_column_name(self, object new_name):
strcpy(self.pointer().header.associated_column.column_name, tobytes(new_name))
# associated_column version property getter and setter
@property
def associated_column_version(self):
if not self.active() :
raise OBIDeactivatedInstanceError()
return self.pointer().header.associated_column.version
@associated_column_version.setter
def associated_column_version(self, int new_version):
self.pointer().header.associated_column.version = new_version
# comments property getter
@property
def comments(self):
return Column_comments(self)
@comments.setter
def comments(self, object value):
Column_comments(self, value)
cdef class Column_comments(dict): # Not thread safe
def __init__(self, Column column, value=None) :
if not column.active() :
raise OBIDeactivatedInstanceError()
self._column = column
if value is not None:
self.update(value) # TODO test and discuss not overwriting (could use replace bool)
self._update_from_file()
def _update_from_file(self):
cdef bytes comments_json
cdef str comments_json_str
cdef OBIDMS_column_p column_p
cdef Column column
if not self._column.active() :
raise OBIDeactivatedInstanceError()
column = self._column
column_p = <OBIDMS_column_p>(column.pointer())
comments_json = <bytes> column_p.header.comments
comments_json_str = bytes2str(comments_json)
comments_dict = json.loads(comments_json_str)
str2bytes_object(comments_dict)
super(Column_comments, self).update(comments_dict)
def __getitem__(self, object key):
if not self._column.active() :
raise OBIDeactivatedInstanceError()
if type(key) == str:
key = str2bytes(key)
self._update_from_file()
return super(Column_comments, self).__getitem__(key)
def __setitem__(self, object key, object value):
cdef OBIDMS_column_p column_p
cdef Column column
if not self._column.active() :
raise OBIDeactivatedInstanceError()
column = self._column
column_p = <OBIDMS_column_p>(column.pointer())
# Remove virtually empty values from the object # TODO discuss
clean_empty_values_from_object(value)
# If value is virtually empty, don't add it # TODO discuss
if value is None or len(value) == 0:
return
# Convert to bytes
if type(key) == str:
key = str2bytes(key)
value_bytes = str2bytes_object(value)
# Update dict with comments already written in file
self._update_from_file()
# Add new element # TODO don't overwrite?
super(Column_comments, self).__setitem__(key, value_bytes)
# Convert to str because json library doens't like bytes
dict_str = {key:item for key,item in self.items()}
dict_str = bytes2str_object(dict_str)
# Convert to json string
comments_json = json.dumps(dict_str)
# Write new comments
if obi_column_write_comments(column_p, tobytes(comments_json)) < 0:
raise Exception("Could not write column comments: %s", comments_json)
def update(self, value):
for k,v in value.items():
self[k] = v
def __contains__(self, key):
return super(Column_comments, self).__contains__(tobytes(key))
def __str__(self):
return bytes2str(self._column.pointer().header.comments)
######################################################################################################
cdef class Column_multi_elts(Column) :
@OBIWrapper.checkIsActive
def __getitem__(self, index_t line_nb):
return Column_line(self, line_nb)
cpdef set_line(self, index_t line_nb, object values):
cdef object value_ref
cdef object values_iter
if not self.active() :
raise OBIDeactivatedInstanceError()
if values is None or len(values) == 0 or (isinstance(values, Column_line) and values.is_NA()):
for element_name in self._elements_names :
self.set_item(line_nb, element_name, None)
else :
values_iter = xrange(len(values)) if isinstance(values, list) else values
for value_ref in values_iter :
self.set_item(line_nb, value_ref, values[value_ref])
######################################################################################################
cdef class Column_line :
def __init__(self, Column column, index_t line_nb) :
self._index = line_nb
self._column = column
def __getitem__(self, object elt_id) :
return self._column.get_item(self._index, elt_id)
def __setitem__(self, object elt_id, object value):
self._column.set_item(self._index, elt_id, value)
def get(self, object elt_id, object default=None): # TODO returns default if None???
if elt_id in self:
return self._column.get_item(self._index, elt_id)
else:
return default
def __len__(self):
return self._column.nb_elements_per_line
def __contains__(self, object elt_id):
if type(elt_id) == int:
return elt_id < self._column.nb_elements_per_line
else:
return (tobytes(elt_id) in self._column._elements_names)
def __repr__(self) :
return str(self._column.get_line(self._index))
def is_NA(self):
return self._column.get_line(self._index) is None
def __iter__(self) :
cdef list elements_names
cdef bytes element_name
for element_name in self._column.get_line(self._index) :
yield element_name
cpdef list keys(self):
# TODO just returning all element names of the column would be faster but
# potentially lots of keys associated with None values on that given line.
# get_line() methods don't return None values
return [element_name for element_name in self._column.get_line(self._index)]
cpdef list items(self):
return [(k, self[k]) for k in self.keys()]
cpdef dict dict(self):
cdef dict d
d = {}
for k,v in self.items():
d[k] = v
return d
def __str__(self):
cdef dict d
cdef str s
if self.is_NA():
return ""
d = {}
for k,v in self.items():
if type(v) == bytes:
value = bytes2str(v)
else:
value = v
d[bytes2str(k)] = value
s = str(d)
return s
cpdef bytes bytes(self):
cdef str s
cdef bytes b
s = self.__str__()
b = str2bytes(s)
return b
# column property getter
@property
def column(self):
return self._column
# index property getter
@property
def index(self):
return self._index
cpdef update(self, object data):
if isinstance(data, dict):
data=data.items()
for key,value in data:
if key in self:
self[key]=value
######################################################################################################
cdef register_column_class(obitype_t obitype,
bint multi_elts,
bint tuples,
type obiclass,
type python_type):
'''
Each sub class of `OBIDMS_column` needs to be registered after its declaration
to declare its relationship with an `OBIType_t`
'''
global __OBIDMS_COLUMN_CLASS__
assert issubclass(obiclass, Column)
__OBIDMS_COLUMN_CLASS__[(obitype, multi_elts, tuples)] = (obiclass, python_type)
cdef register_all_column_classes() :
x = list(pkgutil.walk_packages(typed_column.__path__, prefix="obitools3.dms.column.typed_column."))
all_modules = [importlib.import_module(a[1]) for a in x]
for mod in all_modules :
getattr(mod, 'register_class')()
register_all_column_classes()
|