File: sam.pm

package info (click to toggle)
gbrowse 2.56%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 13,112 kB
  • ctags: 4,436
  • sloc: perl: 50,765; sh: 249; sql: 62; makefile: 45; ansic: 27
file content (125 lines) | stat: -rw-r--r-- 3,393 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package Bio::Graphics::Browser2::DataLoader::sam;

# $Id: bam.pm 22336 2009-12-07 22:18:43Z lstein $

use strict;
use base 'Bio::Graphics::Browser2::DataLoader::bam';
use Bio::DB::Sam;

# go back to line-at-a-time loading
sub load {
    my $self                = shift;
    $self->Bio::Graphics::Browser2::DataLoader::load(@_);
}

# We do nothing here; the base class
# makes a copy of the data file in SOURCES
sub load_line { }

sub finish_load {
    my $self = shift;
    my $source_file = File::Spec->catfile($self->sources_path,$self->track_name);

    # sort out the file names
    my $track_name = $self->track_name;
    $track_name    =~ s/\..+$//;  # get rid of any extension

    my $base       = File::Spec->catfile($self->data_path,$track_name);
    my $bam        = $base . '.bam';
    my $sorted     = $base . '_sorted';

    # turn SAM file into a BAM file
    $self->set_status('converting SAM to BAM');
    $self->do_strip_prefix($source_file);

    $self->sam2bam($source_file,$bam);

    # sort it
    $self->set_status('sorting BAM file');
    Bio::DB::Bam->sort_core(0,$bam,$sorted,250*1e6); # use 200 meg core maximum

    # no need to keep sorted and unsorted copies
    rename "$sorted.bam",$bam;

    $self->set_status('indexing BAM file');

    Bio::DB::Bam->index_build($bam);

    my $bigwig_exists = 0;

    if ($self->has_bigwig) {
	$self->set_status('creating BigWig coverage file');
	$bigwig_exists = $self->create_big_wig();
    }

    $self->set_status('creating conf file');
    $self->create_conf_file($bam,$bigwig_exists);

    $self->set_status('recompressing source');
    system ("gzip $source_file");
}

sub do_strip_prefix {
    my $self = shift;
    my $source_file = shift;
    my $prefix = $self->strip_prefix or return;
    my $dest   = "$source_file.new";
    open my $i,'<',$source_file  or die "Can't open $source_file: $!";
    open my $o,">",$dest         or die "Can't open $dest: $!";
    while (<$i>) {
	next if /^\@/;
	s/^([^\t]+)\t([^\t]+)\t$prefix([^\t]+)/$1\t$2\t$3/;
    } continue {
	print $o $_;
    }
    close $o;
    close $i;
    rename $dest,$source_file;
}

sub sam2bam {
    my $self = shift;
    my ($sampath,$bampath) = @_;

    my $tam = Bio::DB::Tam->open($sampath)
	or die "Could not open SAM file for reading: $!";

    # get chromosome names
    my $chroms = $self->chromosome_list;

    my $header = eval {$tam->header_read};
    unless ($header) {
	warn "Bio::DB::Sam version 1.20 or greater required for SAM conversion to work properly";
    }

    unless ($header && $header->n_targets > 0) {
	my $fasta      = $self->get_fasta_file;
	$fasta 
	    or die "Could not find a suitable reference FASTA file for indexing this SAM file";

	my $fai = Bio::DB::Sam::Fai->load($fasta)
	    or die "Could not load reference FASTA file for indexing this SAM file: $!";

	$header = $tam->header_read2($fasta.".fai");
    }

    my $targets = $header->target_name;

    my $bam = Bio::DB::Bam->open($bampath,'w')
	or die "Could not open BAM file for writing: $!";

    $bam->header_write($header);
    my $alignment = Bio::DB::Bam::Alignment->new();
    my $lines = 0;

    while ($tam->read1($header,$alignment) > 0) {
	next if $chroms && !$chroms->{$targets->[$alignment->tid]};
	$bam->write1($alignment);
	$self->set_status("converted $lines lines...") if $lines++%1000 == 0;
    }
    
    undef $tam;
    undef $bam;
}

1;