1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
|
package Bio::DB::SeqFeature::Store::DBI::Pg;
use DBD::Pg qw(:pg_types);
use MIME::Base64;
# $Id: Pg.pm 14656 2008-04-14 15:05:37Z lstein $
=head1 NAME
Bio::DB::SeqFeature::Store::DBI::Pg -- PostgreSQL implementation of Bio::DB::SeqFeature::Store
=head1 SYNOPSIS
use Bio::DB::SeqFeature::Store;
# Open the sequence database
my $db = Bio::DB::SeqFeature::Store->new(-adaptor => 'DBI::Pg',
-dsn => 'dbi:Pg:test');
# get a feature from somewhere
my $feature = Bio::SeqFeature::Generic->new(...);
# store it
$db->store($feature) or die "Couldn't store!";
# primary ID of the feature is changed to indicate its primary ID
# in the database...
my $id = $feature->primary_id;
# get the feature back out
my $f = $db->fetch($id);
# change the feature and update it
$f->start(100);
$db->update($f) or die "Couldn't update!";
# searching...
# ...by id
my @features = $db->fetch_many(@list_of_ids);
# ...by name
@features = $db->get_features_by_name('ZK909');
# ...by alias
@features = $db->get_features_by_alias('sma-3');
# ...by type
@features = $db->get_features_by_name('gene');
# ...by location
@features = $db->get_features_by_location(-seq_id=>'Chr1',-start=>4000,-end=>600000);
# ...by attribute
@features = $db->get_features_by_attribute({description => 'protein kinase'})
# ...by the GFF "Note" field
@result_list = $db->search_notes('kinase');
# ...by arbitrary combinations of selectors
@features = $db->features(-name => $name,
-type => $types,
-seq_id => $seqid,
-start => $start,
-end => $end,
-attributes => $attributes);
# ...using an iterator
my $iterator = $db->get_seq_stream(-name => $name,
-type => $types,
-seq_id => $seqid,
-start => $start,
-end => $end,
-attributes => $attributes);
while (my $feature = $iterator->next_seq) {
# do something with the feature
}
# ...limiting the search to a particular region
my $segment = $db->segment('Chr1',5000=>6000);
my @features = $segment->features(-type=>['mRNA','match']);
# getting & storing sequence information
# Warning: this returns a string, and not a PrimarySeq object
$db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
# what feature types are defined in the database?
my @types = $db->types;
# create a new feature in the database
my $feature = $db->new_feature(-primary_tag => 'mRNA',
-seq_id => 'chr3',
-start => 10000,
-end => 11000);
=head1 DESCRIPTION
Bio::DB::SeqFeature::Store::Pg is the Pg adaptor for
Bio::DB::SeqFeature::Store. You will not create it directly, but
instead use Bio::DB::SeqFeature::Store-E<gt>new() to do so.
See L<Bio::DB::SeqFeature::Store> for complete usage instructions.
=head2 Using the Pg adaptor
Before you can use the adaptor, you must use the Pgadmin tool to
create a database and establish a user account with write
permission. In order to use "fast" loading, the user account must have
"file" privileges.
To establish a connection to the database, call
Bio::DB::SeqFeature::Store-E<gt>new(-adaptor=E<gt>'DBI::Pg',@more_args). The
additional arguments are as follows:
Argument name Description
------------- -----------
-dsn The database name. You can abbreviate
"dbi:Pg:foo" as "foo" if you wish.
-user Username for authentication.
-pass Password for authentication.
-namespace Creates a SCHEMA for the tables. This allows you
to have several virtual databases in the same
physical database.
-temp Boolean flag. If true, a temporary database
will be created and destroyed as soon as
the Store object goes out of scope. (synonym -temporary)
-autoindex Boolean flag. If true, features in the database will be
reindexed every time they change. This is the default.
-tmpdir Directory in which to place temporary files during "fast" loading.
Defaults to File::Spec->tmpdir(). (synonyms -dump_dir, -dumpdir, -tmp)
-dbi_options A hashref to pass to DBI->connect's 4th argument, the "attributes."
(synonyms -options, -dbi_attr)
-write Pass true to open database for writing or updating.
If successful, a new instance of
Bio::DB::SeqFeature::Store::DBI::Pg will be returned.
In addition to the standard methods supported by all well-behaved
Bio::DB::SeqFeature::Store databases, several following
adaptor-specific methods are provided. These are described in the next
sections.
=cut
use strict;
use base 'Bio::DB::SeqFeature::Store::DBI::mysql';
use Bio::DB::SeqFeature::Store::DBI::Iterator;
use DBI;
use Memoize;
use Cwd 'abs_path';
use Bio::DB::GFF::Util::Rearrange 'rearrange';
use File::Copy;
use File::Spec;
use constant DEBUG=>0;
use constant MAX_INT => 2_147_483_647;
use constant MIN_INT => -2_147_483_648;
use constant MAX_BIN => 1_000_000_000; # size of largest feature = 1 Gb
use constant MIN_BIN => 1000; # smallest bin we'll make - on a 100 Mb chromosome, there'll be 100,000 of these
###
# object initialization
#
# NOTE: most of this code can be refactored and inherited from DBI or DBI::mysql adapter
#
sub init {
my $self = shift;
my ($dsn,
$is_temporary,
$autoindex,
$namespace,
$dump_dir,
$user,
$pass,
$dbi_options,
$writeable,
$create,
$schema,
) = rearrange(['DSN',
['TEMP','TEMPORARY'],
'AUTOINDEX',
'NAMESPACE',
['DUMP_DIR','DUMPDIR','TMP','TMPDIR'],
'USER',
['PASS','PASSWD','PASSWORD'],
['OPTIONS','DBI_OPTIONS','DBI_ATTR'],
['WRITE','WRITEABLE'],
'CREATE',
'SCHEMA'
],@_);
$dbi_options ||= {pg_server_prepare => 0};
$writeable = 1 if $is_temporary or $dump_dir;
$dsn or $self->throw("Usage: ".__PACKAGE__."->init(-dsn => \$dbh || \$dsn)");
my $dbh;
if (ref $dsn) {
$dbh = $dsn;
} else {
$dsn = "dbi:Pg:$dsn" unless $dsn =~ /^dbi:/;
$dbh = DBI->connect($dsn,$user,$pass,$dbi_options) or $self->throw($DBI::errstr);
}
$dbh->do('set client_min_messages=warning') if $dbh;
$self->{'original_arguments'} = {
'dsn' => $dsn,
'user' => $user,
'pass' => $pass,
'dbh_options' => $dbi_options,
};
$self->{dbh} = $dbh;
$self->{dbh}->{InactiveDestroy} = 1;
$self->{is_temp} = $is_temporary;
$self->{writeable} = $writeable;
$self->{namespace} = $namespace || $schema || 'public';
$self->schema($self->{namespace});
$self->default_settings;
$self->autoindex($autoindex) if defined $autoindex;
$self->dumpdir($dump_dir) if $dump_dir;
if ($self->is_temp) {
# warn "creating a temp database isn't supported";
#$self->init_tmp_database();
$self->init_database('erase');
} elsif ($create) {
$self->init_database('erase');
}
}
sub table_definitions {
my $self = shift;
return {
feature => <<END,
(
id serial primary key,
typeid int not null,
seqid int,
start int,
"end" int,
strand int default 0,
tier int,
bin int,
indexed int default 1,
object bytea not null
);
CREATE INDEX feature_stuff ON feature(seqid,tier,bin,typeid);
CREATE INDEX feature_typeid ON feature(typeid);
END
locationlist => <<END,
(
id serial primary key,
seqname text not null
); CREATE INDEX locationlist_seqname ON locationlist(seqname);
END
typelist => <<END,
(
id serial primary key,
tag text not null
); CREATE INDEX typelist_tab ON typelist(tag);
END
name => <<END,
(
id int not null,
name text not null,
display_name int default 0
);
CREATE INDEX name_id ON name( id );
CREATE INDEX name_name ON name( name );
CREATE INDEX name_lcname ON name( lower(name) );
CREATE INDEX name_lcname_varchar_patt_ops ON name USING BTREE (lower(name) varchar_pattern_ops);
END
attribute => <<END,
(
id int not null,
attribute_id int not null,
attribute_value text
);
CREATE INDEX attribute_id ON attribute(id);
CREATE INDEX attribute_id_val ON attribute(attribute_id,SUBSTR(attribute_value, 1, 10));
END
attributelist => <<END,
(
id serial primary key,
tag text not null
);
CREATE INDEX attributelist_tag ON attributelist(tag);
END
parent2child => <<END,
(
id int not null,
child int not null
);
CREATE UNIQUE INDEX parent2child_id_child ON parent2child(id,child);
END
meta => <<END,
(
name text primary key,
value text not null
)
END
sequence => <<END,
(
id int not null,
"offset" int not null,
sequence text,
primary key(id,"offset")
)
END
interval_stats => <<END,
(
typeid int not null,
seqid int not null,
bin int not null,
cum_count int not null
);
CREATE UNIQUE INDEX interval_stats_id ON interval_stats(typeid,seqid,bin);
END
};
}
sub schema {
my ($self, $schema) = @_;
$self->{'schema'} = $schema if defined($schema);
if ($schema) {
$self->_check_for_namespace();
$self->dbh->do("SET search_path TO " . $self->{'schema'} );
} else {
$self->dbh->do("SET search_path TO public");
}
return $self->{'schema'};
}
###
# wipe database clean and reinstall schema
#
sub _init_database {
my $self = shift;
my $erase = shift;
my $dbh = $self->dbh;
my $namespace = $self->namespace;
my $tables = $self->table_definitions;
my $temporary = $self->is_temp ? 'TEMPORARY' : '';
foreach (keys %$tables) {
next if $_ eq 'meta'; # don't get rid of meta data!
my $table = $self->_qualify($_);
$dbh->do("DROP TABLE IF EXISTS $table") if $erase;
my @table_exists = $dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = '$table' AND schemaname = '$self->namespace'");
if (!scalar(@table_exists)) {
my $query = "CREATE $temporary TABLE $table $tables->{$_}";
$dbh->do($query) or $self->throw($dbh->errstr);
}
}
$self->subfeatures_are_indexed(1) if $erase;
1;
}
sub maybe_create_meta {
my $self = shift;
return unless $self->writeable;
my $namespace = $self->namespace;
my $table = $self->_qualify('meta');
my $tables = $self->table_definitions;
my $temporary = $self->is_temp ? 'TEMPORARY' : '';
my @table_exists = $self->dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename = 'meta' AND schemaname = '$namespace'");
$self->dbh->do("CREATE $temporary TABLE $table $tables->{meta}")
unless @table_exists;
}
###
# check if the namespace/schema exists, if not create it
#
sub _check_for_namespace {
my $self = shift;
my $namespace = $self->namespace;
return if $namespace eq 'public';
my $dbh = $self->dbh;
my @schema_exists = $dbh->selectrow_array("SELECT * FROM pg_namespace WHERE nspname = '$namespace'");
if (!scalar(@schema_exists)) {
my $query = "CREATE SCHEMA $namespace";
$dbh->do($query) or $self->throw($dbh->errstr);
# if temp parameter is set and schema created for this process then enable removal in remove_namespace()
if ($self->is_temp) {
$self->{delete_schema} = 1;
}
}
}
###
# Overiding inherited mysql _qualify (We do not need to qualify for PostgreSQL as we have set the search_path above)
#
sub _qualify {
my $self = shift;
my $table_name = shift;
return $table_name;
}
###
# when is_temp is set and the schema did not exist beforehand then we are able to remove it
#
sub remove_namespace {
my $self = shift;
if ($self->{delete_schema}) {
my $namespace = $self->namespace;
$self->dbh->do("DROP SCHEMA $namespace") or $self->throw($self->dbh->errstr);
}
}
####Overiding the inherited mysql function _prepare
sub _prepare {
my $self = shift;
my $query = shift;
my $dbh = $self->dbh;
my $schema = $self->{namespace};
if ($schema) {
$self->_check_for_namespace();
$dbh->do("SET search_path TO " . $self->{'schema'} );
} else {
$dbh->do("SET search_path TO public");
}
my $sth = $dbh->prepare_cached($query, {}, 3) or
$self->throw($dbh->errstr);
$sth;
}
sub _finish_bulk_update {
my $self = shift;
my $dbh = $self->dbh;
my $dir = $self->{dumpdir} || '.';
for my $table ('feature',$self->index_tables) {
my $fh = $self->dump_filehandle($table);
my $path = $self->dump_path($table);
$fh->close;
my $qualified_table = $self->_qualify($table);
copy($path, "$path.bak");
# Get stuff from file into STDIN so we don't have to be superuser
open my $FH, '<', $path or $self->throw("Could not read file '$path': $!");
print STDERR "Loading file $path\n";
$dbh->do("COPY $qualified_table FROM STDIN CSV QUOTE '''' DELIMITER '\t'") or $self->throw($dbh->errstr);
while (my $line = <$FH>) {
$dbh->pg_putline($line);
}
$dbh->pg_endcopy() or $self->throw($dbh->errstr);
close $FH;
#unlink $path;
}
delete $self->{bulk_update_in_progress};
delete $self->{filehandles};
}
###
# Add a subparts to a feature. Both feature and all subparts must already be in database.
#
sub _add_SeqFeature {
my $self = shift;
# special purpose method for case when we are doing a bulk update
return $self->_dump_add_SeqFeature(@_) if $self->{bulk_update_in_progress};
my $parent = shift;
my @children = @_;
my $dbh = $self->dbh;
local $dbh->{RaiseError} = 1;
my $child_table = $self->_parent2child_table();
my $count = 0;
my $querydel = "DELETE FROM $child_table WHERE id = ? AND child = ?";
my $query = "INSERT INTO $child_table (id,child) VALUES (?,?)";
my $sthdel = $self->_prepare($querydel);
my $sth = $self->_prepare($query);
my $parent_id = (ref $parent ? $parent->primary_id : $parent)
or $self->throw("$parent should have a primary_id");
$self->begin_work or $self->throw($dbh->errstr);
eval {
for my $child (@children) {
my $child_id = ref $child ? $child->primary_id : $child;
defined $child_id or die "no primary ID known for $child";
$sthdel->execute($parent_id, $child_id);
$sth->execute($parent_id,$child_id);
$count++;
}
};
if ($@) {
warn "Transaction aborted because $@";
$self->rollback;
}
else {
$self->commit;
}
$sth->finish;
$count;
}
# because this is a reserved word in postgresql
###
# get primary sequence between start and end
#
sub _fetch_sequence {
my $self = shift;
my ($seqid,$start,$end) = @_;
# backward compatibility to the old days when I liked reverse complementing
# dna by specifying $start > $end
my $reversed;
if (defined $start && defined $end && $start > $end) {
$reversed++;
($start,$end) = ($end,$start);
}
$start-- if defined $start;
$end-- if defined $end;
my $offset1 = $self->_offset_boundary($seqid,$start || 'left');
my $offset2 = $self->_offset_boundary($seqid,$end || 'right');
my $sequence_table = $self->_sequence_table;
my $locationlist_table = $self->_locationlist_table;
my $sth = $self->_prepare(<<END);
SELECT sequence,"offset"
FROM $sequence_table as s,$locationlist_table as ll
WHERE s.id=ll.id
AND ll.seqname= ?
AND "offset" >= ?
AND "offset" <= ?
ORDER BY "offset"
END
my $seq = '';
$sth->execute($seqid,$offset1,$offset2) or $self->throw($sth->errstr);
while (my($frag,$offset) = $sth->fetchrow_array) {
substr($frag,0,$start-$offset) = '' if defined $start && $start > $offset;
$seq .= $frag;
}
substr($seq,$end-$start+1) = '' if defined $end && $end-$start+1 < length($seq);
if ($reversed) {
$seq = reverse $seq;
$seq =~ tr/gatcGATC/ctagCTAG/;
}
$sth->finish;
$seq;
}
sub _offset_boundary {
my $self = shift;
my ($seqid,$position) = @_;
my $sequence_table = $self->_sequence_table;
my $locationlist_table = $self->_locationlist_table;
my $sql;
$sql = $position eq 'left' ? "SELECT min(\"offset\") FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
:$position eq 'right' ? "SELECT max(\"offset\") FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=?"
:"SELECT max(\"offset\") FROM $sequence_table as s,$locationlist_table as ll WHERE s.id=ll.id AND ll.seqname=? AND \"offset\"<=?";
my $sth = $self->_prepare($sql);
my @args = $position =~ /^-?\d+$/ ? ($seqid,$position) : ($seqid);
$sth->execute(@args) or $self->throw($sth->errstr);
my $boundary = $sth->fetchall_arrayref->[0][0];
$sth->finish;
return $boundary;
}
sub _name_sql {
my $self = shift;
my ($name,$allow_aliases,$join) = @_;
my $name_table = $self->_name_table;
my $from = "$name_table as n";
my ($match,$string) = $self->_match_sql($name);
my $where = "n.id=$join AND lower(n.name) $match";
$where .= " AND n.display_name>0" unless $allow_aliases;
return ($from,$where,'',$string);
}
sub _search_attributes {
my $self = shift;
my ($search_string,$attribute_names,$limit) = @_;
my @words = map {quotemeta($_)} split /\s+/,$search_string;
my $name_table = $self->_name_table;
my $attribute_table = $self->_attribute_table;
my $attributelist_table = $self->_attributelist_table;
my $type_table = $self->_type_table;
my $typelist_table = $self->_typelist_table;
my @tags = @$attribute_names;
my $tag_sql = join ' OR ',("al.tag=?") x @tags;
my $perl_regexp = join '|',@words;
my @wild_card_words = map { "%$_%" } @words;
my $sql_regexp = join ' OR ',("a.attribute_value SIMILAR TO ?") x @words;
my $sql = <<END;
SELECT name,attribute_value,tl.tag,n.id
FROM $name_table as n,$attribute_table as a,$attributelist_table as al,$type_table as t,$typelist_table as tl
WHERE n.id=a.id
AND al.id=a.attribute_id
AND n.id=t.id
AND t.typeid=tl.id
AND n.display_name=1
AND ($tag_sql)
AND ($sql_regexp)
END
$sql .= "LIMIT $limit" if defined $limit;
$self->_print_query($sql,@tags,@wild_card_words) if DEBUG || $self->debug;
my $sth = $self->_prepare($sql);
$sth->execute(@tags,@wild_card_words) or $self->throw($sth->errstr);
my @results;
while (my($name,$value,$type,$id) = $sth->fetchrow_array) {
my (@hits) = $value =~ /$perl_regexp/ig;
my @words_in_row = split /\b/,$value;
my $score = int(@hits*100/@words/@words_in_row);
push @results,[$name,$value,$score,$type,$id];
}
$sth->finish;
@results = sort {$b->[2]<=>$a->[2]} @results;
return @results;
}
# overridden here because the mysql adapter uses
# a non-standard query hint
sub _attributes_sql {
my $self = shift;
my ($attributes,$join) = @_;
my ($wf,@bind_args) = $self->_make_attribute_where('a','al',$attributes);
my ($group_by,@group_args)= $self->_make_attribute_group('a',$attributes);
my $attribute_table = $self->_attribute_table;
my $attributelist_table = $self->_attributelist_table;
my $from = "$attribute_table as a, $attributelist_table as al";
my $where = <<END;
a.id=$join
AND a.attribute_id=al.id
AND ($wf)
END
my $group = $group_by;
my @args = (@bind_args,@group_args);
return ($from,$where,$group,@args);
}
sub _match_sql {
my $self = shift;
my $name = shift;
my ($match,$string);
if ($name =~ /(?:^|[^\\])[*?]/) {
$name =~ s/(^|[^\\])([%_])/$1\\$2/g;
$name =~ s/(^|[^\\])\*/$1%/g;
$name =~ s/(^|[^\\])\?/$1_/g;
$match = "LIKE ?";
$string = lc($name);
} else {
$match = "= lower(?)";
$string = lc($name);
}
return ($match,$string);
}
# overridden because of differences between LIKE behavior in mysql and postgres
# as well as case-sensitivity of matches
sub _types_sql {
my $self = shift;
my ($types,$type_table) = @_;
my ($primary_tag,$source_tag);
my @types = ref $types eq 'ARRAY' ? @$types : $types;
my $typelist = $self->_typelist_table;
my $from = "$typelist AS tl";
my (@matches,@args);
for my $type (@types) {
if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
$primary_tag = $type->method;
$source_tag = $type->source;
} else {
($primary_tag,$source_tag) = split ':',$type,2;
}
if ($source_tag) {
push @matches,"lower(tl.tag)=lower(?)";
push @args,"$primary_tag:$source_tag";
} else {
push @matches,"tl.tag ILIKE ?";
push @args,"$primary_tag:%";
}
}
my $matches = join ' OR ',@matches;
my $where = <<END;
tl.id=$type_table.typeid
AND ($matches)
END
return ($from,$where,'',@args);
}
# overridden because mysql adapter uses the non-standard REPLACE syntax
sub setting {
my $self = shift;
my ($variable_name,$value) = @_;
my $meta = $self->_meta_table;
if (defined $value && $self->writeable) {
my $querydel = "DELETE FROM $meta WHERE name = ?";
my $query = "INSERT INTO $meta (name,value) VALUES (?,?)";
my $sthdel = $self->_prepare($querydel);
my $sth = $self->_prepare($query);
$sthdel->execute($variable_name);
$sth->execute($variable_name,$value) or $self->throw($sth->errstr);
$sth->finish;
$self->{settings_cache}{$variable_name} = $value;
}
else {
return $self->{settings_cache}{$variable_name} if exists $self->{settings_cache}{$variable_name};
my $query = "SELECT value FROM $meta as m WHERE m.name=?";
my $sth = $self->_prepare($query);
# $sth->execute($variable_name) or $self->throw($sth->errstr);
unless ($sth->execute($variable_name)) {
my $errstr = $sth->errstr;
$sth = $self->_prepare("SHOW search_path");
$sth->execute();
$errstr .= "With search_path " . $sth->fetchrow_arrayref->[0] . "\n";
$self->throw($errstr);
}
my ($value) = $sth->fetchrow_array;
$sth->finish;
return $self->{settings_cache}{$variable_name} = $value;
}
}
# overridden because of use of REPLACE in mysql adapter
###
# Replace Bio::SeqFeatureI into database.
#
sub replace {
my $self = shift;
my $object = shift;
my $index_flag = shift || undef;
# ?? shouldn't need to do this
# $self->_load_class($object);
my $id = $object->primary_id;
my $features = $self->_feature_table;
my $query = "INSERT INTO $features (id,object,indexed,seqid,start,\"end\",strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?,?)";
my $query_noid = "INSERT INTO $features (object,indexed,seqid,start,\"end\",strand,tier,bin,typeid) VALUES (?,?,?,?,?,?,?,?,?)";
my $querydel = "DELETE FROM $features WHERE id = ?";
my $sthdel = $self->_prepare($querydel);
my $sth = $self->_prepare($query);
my $sth_noid = $self->_prepare($query_noid);
my @location = $index_flag ? $self->_get_location_and_bin($object) : (undef)x6;
my $primary_tag = $object->primary_tag;
my $source_tag = $object->source_tag || '';
$primary_tag .= ":$source_tag";
my $typeid = $self->_typeid($primary_tag,1);
if ($id) {
$sthdel->execute($id);
$sth->execute($id,encode_base64($self->freeze($object), ''),$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
} else {
$sth_noid->execute(encode_base64($self->freeze($object), ''),$index_flag||0,@location,$typeid) or $self->throw($sth->errstr);
}
my $dbh = $self->dbh;
$object->primary_id($dbh->last_insert_id(undef, undef, undef, undef, {sequence=>$features."_id_seq"})) unless defined $id;
$self->flag_for_indexing($dbh->last_insert_id(undef, undef, undef, undef, {sequence=>$features."_id_seq"})) if $self->{bulk_update_in_progress};
}
=head2 types
Title : types
Usage : @type_list = $db->types
Function: Get all the types in the database
Returns : array of Bio::DB::GFF::Typename objects
Args : none
Status : public
=cut
# overridden because "offset" is reserved in postgres
###
# Insert a bit of DNA or protein into the database
#
sub _insert_sequence {
my $self = shift;
my ($seqid,$seq,$offset) = @_;
my $id = $self->_locationid($seqid);
my $seqtable = $self->_sequence_table;
my $sthdel = $self->_prepare("DELETE FROM $seqtable WHERE id = ? AND \"offset\" = ?");
my $sth = $self->_prepare(<<END);
INSERT INTO $seqtable (id,"offset",sequence) VALUES (?,?,?)
END
$sthdel->execute($id,$offset);
$sth->execute($id,$offset,$seq) or $self->throw($sth->errstr);
}
# overridden because of mysql adapter's use of REPLACE
###
# This subroutine flags the given primary ID for later reindexing
#
sub flag_for_indexing {
my $self = shift;
my $id = shift;
my $needs_updating = $self->_update_table;
my $querydel = "DELETE FROM $needs_updating WHERE id = ?";
my $query = "INSERT INTO $needs_updating VALUES (?)";
my $sthdel = $self->_prepare($querydel);
my $sth = $self->_prepare($query);
$sthdel->execute($id);
$sth->execute($id) or $self->throw($self->dbh->errstr);
}
# overridden because of the different ways that mysql and postgres
# handle id sequences
sub _genericid {
my $self = shift;
my ($table,$namefield,$name,$add_if_missing) = @_;
my $qualified_table = $self->_qualify($table);
my $sth = $self->_prepare(<<END);
SELECT id FROM $qualified_table WHERE $namefield=?
END
$sth->execute($name) or die $sth->errstr;
my ($id) = $sth->fetchrow_array;
$sth->finish;
return $id if defined $id;
return unless $add_if_missing;
$sth = $self->_prepare(<<END);
INSERT INTO $qualified_table ($namefield) VALUES (?)
END
$sth->execute($name) or die $sth->errstr;
my $dbh = $self->dbh;
return $dbh->last_insert_id(undef, undef, undef, undef, {sequence=>$qualified_table."_id_seq"});
}
# overridden because of differences in binding between mysql and postgres adapters
# given a statement handler that is expected to return rows of (id,object)
# unthaw each object and return a list of 'em
sub _sth2objs {
my $self = shift;
my $sth = shift;
my @result;
my ($id, $o);
$sth->bind_col(1, \$id);
$sth->bind_col(2, \$o, { pg_type => PG_BYTEA});
#while (my ($id,$o) = $sth->fetchrow_array) {
while ($sth->fetch) {
my $obj = $self->thaw(decode_base64($o) ,$id);
push @result,$obj;
}
$sth->finish;
return @result;
}
# given a statement handler that is expected to return rows of (id,object)
# unthaw each object and return a list of 'em
sub _sth2obj {
my $self = shift;
my $sth = shift;
my ($id,$o) = $sth->fetchrow_array;
return unless $o;
my $obj = $self->thaw(decode_base64($o) ,$id);
$obj;
}
####################################################################################################
# SQL Fragment generators
####################################################################################################
# overridden because of base64 encoding needed here
###
# special-purpose store for bulk loading - write to a file rather than to the db
#
sub _dump_store {
my $self = shift;
my $indexed = shift;
my $count = 0;
my $store_fh = $self->dump_filehandle('feature');
my $dbh = $self->dbh;
my $autoindex = $self->autoindex;
for my $obj (@_) {
my $id = $self->next_id;
my ($seqid,$start,$end,$strand,$tier,$bin) = $indexed ? $self->_get_location_and_bin($obj) : (undef)x6;
my $primary_tag = $obj->primary_tag;
my $source_tag = $obj->source_tag || '';
$primary_tag .= ":$source_tag";
my $typeid = $self->_typeid($primary_tag,1);
my $frozen_object = encode_base64($self->freeze($obj), '');
# TODO: Fix this, why does frozen object start with quote but not end with one
print $store_fh join("\t",$id,$typeid,$seqid,$start,$end,$strand,$tier,$bin,$indexed,$frozen_object),"\n";
$obj->primary_id($id);
$self->_update_indexes($obj) if $indexed && $autoindex;
$count++;
}
# remember whether we are have ever stored a non-indexed feature
unless ($indexed or $self->{indexed_flag}++) {
$self->subfeatures_are_indexed(0);
}
$count;
}
sub _enable_keys { } # nullop
sub _disable_keys { } # nullop
sub _add_interval_stats_table {
my $self = shift;
my $tables = $self->table_definitions;
my $interval_stats = $self->_interval_stats_table;
##check to see if it exists yet; if it does, just return because
##there is a drop from in the next step
my $dbh = $self->dbh;
my @table_exists = $dbh->selectrow_array("SELECT * FROM pg_tables WHERE tablename
= '$interval_stats' AND schemaname = '".$self->namespace."'");
if (!scalar(@table_exists)) {
my $query = "CREATE TABLE $interval_stats $tables->{interval_stats}";
$dbh->do($query) or $self->throw($dbh->errstr);
}
}
sub _fetch_indexed_features_sql {
my $self = shift;
my $features = $self->_feature_table;
return <<END;
SELECT typeid,seqid,start-1,"end"
FROM $features as f
WHERE f.indexed=1
ORDER BY typeid,seqid,start
END
}
1;
|