File: DataBase.pm

package info (click to toggle)
gbrowse 2.56%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 13,112 kB
  • ctags: 4,436
  • sloc: perl: 50,765; sh: 249; sql: 62; makefile: 45; ansic: 27
file content (162 lines) | stat: -rw-r--r-- 3,558 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
package Bio::Graphics::Browser2::DataBase;

# This module maintains a cache of opened genome databases
# keyed by the database module name and the parameters
# passed to new(). It is intended to improve performance
# on in-memory databases and other databases that have
# a relatively slow startup time.

=head1 NAME

Bio::Graphics::Browser2::DataBase -- A simple cache for database handles

=head1 SYNOPSIS


=head1 DESCRIPTION

=head2 METHODS

=cut

use strict;
use warnings;
use Data::Dumper 'Dumper';
use constant DEBUG=>0;

# Cache this many databases in a LRU cache.
# If you are getting too many open files errors, then set this
# lower.
use constant CACHE_SIZE => 100;  

my $CACHE = LRUCache->new(CACHE_SIZE);

sub open_database {
  my $self  = shift;
  my ($adaptor,@argv) = @_;

  my $key   = Dumper($adaptor,@argv);
  my $db    = $CACHE->get($key);
  return $db if defined $db;

  my @caller = caller(1);
  warn "[$$] open database @argv from @caller" if DEBUG;
  $db = eval {$adaptor->new(@argv)};

  if (!$db && $@ =~ /too many open files/) {
      warn "Too many open databases. Clearing and trying again.\n";
      warn "You may wish to adjust the CACHE_SIZE constant in Bio/Graphics/Browser2/DataBase.pm";
      $CACHE->clear();  # last ditch attempt to free filehandles
      $db = eval {$adaptor->new(@argv)};
  }
  unless ($db) {
      warn "Could not open database: $@";
      return;
  }

  $db->strict_bounds_checking(1) if $db->can('strict_bounds_checking');
  $db->absolute(1)               if $db->can('absolute');
  $CACHE->set($key,$db);
  $db;
}

sub delete_database {
    my $self = shift;
    my $key  = Dumper(@_);
    $CACHE->delete($key);
}

=over

=item Bio::Graphics::Browser2::DataBase->clone_databases()

Call this after a fork in the child process to make sure that all open
databases have had a chance to clone themselves if they need
to. Otherwise you will get random database failures.

=back

=cut

sub clone_databases {
    my $self = shift;
    eval {$_->clone()} 
       foreach $CACHE->values();
}


package LRUCache;

sub new {
    my $self     = shift;
    my $maxopen  = shift || 20;
    return bless {maxopen   => $maxopen,
		  curopen   => 0,
		  cacheseq  => {},
		  cachedata => {},
    },ref $self || $self;
}

sub delete {
    my $self = shift;
    my $key  = shift;
    delete $self->{cachedata}{$key};
    delete $self->{cacheseq}{$key};
    $self->{curopen}--;
}

sub get {
    my $self = shift;
    my $key  = shift;
    my $obj  = $self->{cachedata}{$key};
    return unless $obj;
    $self->{cacheseq}{$key}++;
    return $obj;
}

sub set {
    my $self       = shift;
    my ($key,$obj) = @_;

    if (exists $self->{cachedata}{$key}) {
	$self->{cachedata}{$key} = $obj;
	$self->{cacheseq}{$key}  = 1;
	return;
    }

    if ($self->{curopen} >= $self->{maxopen}) {
	my @lru = sort {$self->{cacheseq}{$a} <=> $self->{cacheseq}{$b}} 
	     keys %{$self->{cachedata}};
	splice(@lru, $self->{maxopen} / 3);
	$self->{curopen} -= @lru;
	foreach (@lru) {
	    delete $self->{cachedata}{$_};
	    delete $self->{cacheseq}{$_};
	}

	warn "garbage collecting done, values = ",join ' ',$self->values 
	    if Bio::Graphics::Browser2::DataBase::DEBUG;
    }

    $self->{cacheseq}{$key}=1;
    $self->{curopen}++;
    $self->{cachedata}{$key} = $obj;
}

sub keys {
    my $self = shift;
    return keys %{$self->{cachedata}};
}

sub values {
    my $self = shift;
    return values %{$self->{cachedata}};
}

sub clear {
    my $self = shift;
    $self->{cacheseq}  = {};
    $self->{cachedata} = {};
}

1;