File: make_primers.pl

package info (click to toggle)
bioperl 1.7.8-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid, trixie
  • size: 35,788 kB
  • sloc: perl: 94,019; xml: 14,811; makefile: 20
file content (168 lines) | stat: -rw-r--r-- 5,302 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/perl
# Author: cckim@stanford.edu

# Description: This program designs primers for constructing knockouts
# of genes by transformation of PCR products (ref: Datsenko & Wanner,
# PNAS 2000).  A tab-delimited file containing ORF START STOP is read,
# and primers flanking the start & stop coordinates are designed based
# on the user-designated sequence file.  In addition, primers flanking
# the knockout regions are chosen for PCR screening purposes once the
# knockout is generated.  The script uses Bioperl in order to
# determine the primer sequences, which requires getting subsequences
# and reverse complementing some of the objects.

# make_primers.pl
# Purpose: Design primers for the Wanner method of PCR product-based knockouts
# Input: FASTA sequence file, tab-delimited coordinates file
# Output: Primer output file
# July 4, 2001
# Charles C. Kim

###########
# MODULES #
###########
use Bio::Seq;
use Getopt::Std;

#############
# VARIABLES #
#############
$upgap = 0; # the number of nt upstream of the 5' end to include in the deletion
$downgap = 0; # the number of nucleotides downstream of the 3' end to include
              # in the deletion
$oligolength = 40; # the length of the homologous region on each primer
$seqfile = '';   # don't specify these filenames unless you want to run
$coordfile = ''; # the program on these filenames exclusively
$outfile = '';   #
%fiveprime_primers = (
		      "P1" => "GTGTAGGCTGGAGCTGCTTC",
		      );
%threeprime_primers = (
		       "P2" => "CATATGAATATCCTCCTTAG",
		       "P4" => "ATTCCGGGGATCCGTCGACC",
		       );

#########
# FILES #
#########
getopts('s:c:o:');  # sequence file, coordinates file, output file

$seqfile = $opt_s if $opt_s;
$coordfile = $opt_c if $opt_c;
$outfile = $opt_o if $opt_o;

&open_readfile(*SEQFILE, 'sequence', $seqfile);
&open_readfile(*COORDFILE, 'coordinate', $coordfile);
&open_writefile(*PRIMERFILE, 'output', $outfile);

########
# MAIN #
########

$seq = '';
$count = 0;
while (<SEQFILE>) {
    if (/>/) {
	$count++;
	if ($count > 1) {
	    die "More than one sequence present in the input file\n";
	}
	next;
    }
    chomp($_);
    $_ =~ tr/gatc/GATC/;
    $seq .= $_;
}
close SEQFILE;

$seq = Bio::Seq-> new('-seq'=>$seq );

while (<COORDFILE>) {
    chomp($_);
    next if !$_;
    (my $name, my $start, my $stop) = split(/\t/, $_);
    if ($start < $stop) {
	$upprimer = $seq->subseq($start-$oligolength-$upgap, $start-1-$upgap);
	$downprimer = $seq->subseq($stop+1+$downgap,$stop+$oligolength+$downgap);
	$downprimer = Bio::Seq->new('-seq'=>$downprimer);
	$downprimer = $downprimer->revcom();
	$downprimer = $downprimer->seq();
	$uppcr = $seq->subseq($start-$oligolength-$upgap-20,$start-1-$upgap-$oligolength);
	$downpcr = $seq->subseq($stop+1+$downgap+$oligolength,$stop+$oligolength+$downgap+20);
	$downpcr = Bio::Seq->new('-seq'=>$downpcr);
	$downpcr = $downpcr->revcom();
	$downpcr = $downpcr->seq();
    }
    elsif ($start > $stop) {
	$upprimer = $seq->subseq($start+$upgap+1,$start+$oligolength+$upgap);
	$downprimer = $seq->subseq($stop-$oligolength-$downgap, $stop-1-$downgap);
	$upprimer = Bio::Seq->new('-seq'=>$upprimer);
	$upprimer = $upprimer->revcom();
	$upprimer = $upprimer->seq();
	$uppcr = $seq->subseq($start+$oligolength+$upgap+1,$start+$oligolength+$upgap+20);
	$downpcr = $seq->subseq($stop-$oligolength-$downgap-20,$stop-1-$downgap-$oligolength);
	$uppcr = Bio::Seq->new('-seq'=>$uppcr);
	$uppcr = $uppcr->revcom();
	$uppcr = $uppcr->seq();
    }
    else { die "Problem with start and stop coordinates\n"; }
    print PRIMERFILE "$name\n";
    print PRIMERFILE "5'pcr\t$uppcr\n";
    print PRIMERFILE "3'pcr\t$downpcr\n";
    print PRIMERFILE "\tExpected wildtype product size: ",abs($start-$stop)+121," bp\n";
    foreach $entry (sort keys %fiveprime_primers) {
	print PRIMERFILE "5'+$entry\t$upprimer$fiveprime_primers{$entry}\n";
    }
    foreach $entry (sort keys %threeprime_primers) {
	print PRIMERFILE "3'+$entry\t$downprimer$threeprime_primers{$entry}\n";
    }
    print PRIMERFILE "\n";
    $upprimer = '';
    $downprimer = '';
    $uppcr = '';
    $downpcr = '';
}


###############
# SUBROUTINES #
###############

sub open_readfile {
    my $filehandle = $_[0];
    my $filetype = $_[1] if $_[1];
    my $filename = $_[2] if $_[2];
    unless ($filename) {
	print "Enter $filetype filename: ";
	chomp ($filename=<STDIN>);
    }
    unless (-e $filename) { die "$filename not found\n"; }
    open $filehandle,'<', $filename or die "Could not read file '$filename': $!\n";
    $filehandle = '';
    $filetype = '';
    $filename = '';
}

sub open_writefile {
    my $filehandle = $_[0];
    my $filetype = $_[1] if $_[1];
    my $filename = $_[2] if $_[2];
    unless ($filename) {
	print "Enter $filetype filename: ";
	chomp ($filename=<STDIN>);
    }
    if (-e $filename) {
	print "$filename already exists!  Overwrite (Y/N)? ";
	chomp ($_ = <STDIN>);
	while (/[^yn]/i) {
	    print 'Y or N, please: ';
	    chomp ($_ = <STDIN>);
	}
	if (/n/i) { die "$filename not overwritten.\n"; }
	else { open $filehandle, '>', $filename or die "Could nott write file '$filename': $!\n"; }
    }
    else { open $filehandle, '>', $filename or die "Could not write file '$filename': $!\n"; }
    $filehandle = '';
    $filetype = '';
    $filename = '';
}