1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
#!/usr/bin/perl
# Author: cckim@stanford.edu
# Description: This program designs primers for constructing knockouts
# of genes by transformation of PCR products (ref: Datsenko & Wanner,
# PNAS 2000). A tab-delimited file containing ORF START STOP is read,
# and primers flanking the start & stop coordinates are designed based
# on the user-designated sequence file. In addition, primers flanking
# the knockout regions are chosen for PCR screening purposes once the
# knockout is generated. The script uses Bioperl in order to
# determine the primer sequences, which requires getting subsequences
# and reverse complementing some of the objects.
# make_primers.pl
# Purpose: Design primers for the Wanner method of PCR product-based knockouts
# Input: FASTA sequence file, tab-delimited coordinates file
# Output: Primer output file
# July 4, 2001
# Charles C. Kim
###########
# MODULES #
###########
use Bio::Seq;
use Getopt::Std;
#############
# VARIABLES #
#############
$upgap = 0; # the number of nt upstream of the 5' end to include in the deletion
$downgap = 0; # the number of nucleotides downstream of the 3' end to include
# in the deletion
$oligolength = 40; # the length of the homologous region on each primer
$seqfile = ''; # don't specify these filenames unless you want to run
$coordfile = ''; # the program on these filenames exclusively
$outfile = ''; #
%fiveprime_primers = (
"P1" => "GTGTAGGCTGGAGCTGCTTC",
);
%threeprime_primers = (
"P2" => "CATATGAATATCCTCCTTAG",
"P4" => "ATTCCGGGGATCCGTCGACC",
);
#########
# FILES #
#########
getopts('s:c:o:'); # sequence file, coordinates file, output file
$seqfile = $opt_s if $opt_s;
$coordfile = $opt_c if $opt_c;
$outfile = $opt_o if $opt_o;
&open_readfile(*SEQFILE, 'sequence', $seqfile);
&open_readfile(*COORDFILE, 'coordinate', $coordfile);
&open_writefile(*PRIMERFILE, 'output', $outfile);
########
# MAIN #
########
$seq = '';
$count = 0;
while (<SEQFILE>) {
if (/>/) {
$count++;
if ($count > 1) {
die "More than one sequence present in the input file\n";
}
next;
}
chomp($_);
$_ =~ tr/gatc/GATC/;
$seq .= $_;
}
close SEQFILE;
$seq = Bio::Seq-> new('-seq'=>$seq );
while (<COORDFILE>) {
chomp($_);
next if !$_;
(my $name, my $start, my $stop) = split(/\t/, $_);
if ($start < $stop) {
$upprimer = $seq->subseq($start-$oligolength-$upgap, $start-1-$upgap);
$downprimer = $seq->subseq($stop+1+$downgap,$stop+$oligolength+$downgap);
$downprimer = Bio::Seq->new('-seq'=>$downprimer);
$downprimer = $downprimer->revcom();
$downprimer = $downprimer->seq();
$uppcr = $seq->subseq($start-$oligolength-$upgap-20,$start-1-$upgap-$oligolength);
$downpcr = $seq->subseq($stop+1+$downgap+$oligolength,$stop+$oligolength+$downgap+20);
$downpcr = Bio::Seq->new('-seq'=>$downpcr);
$downpcr = $downpcr->revcom();
$downpcr = $downpcr->seq();
}
elsif ($start > $stop) {
$upprimer = $seq->subseq($start+$upgap+1,$start+$oligolength+$upgap);
$downprimer = $seq->subseq($stop-$oligolength-$downgap, $stop-1-$downgap);
$upprimer = Bio::Seq->new('-seq'=>$upprimer);
$upprimer = $upprimer->revcom();
$upprimer = $upprimer->seq();
$uppcr = $seq->subseq($start+$oligolength+$upgap+1,$start+$oligolength+$upgap+20);
$downpcr = $seq->subseq($stop-$oligolength-$downgap-20,$stop-1-$downgap-$oligolength);
$uppcr = Bio::Seq->new('-seq'=>$uppcr);
$uppcr = $uppcr->revcom();
$uppcr = $uppcr->seq();
}
else { die "Problem with start and stop coordinates\n"; }
print PRIMERFILE "$name\n";
print PRIMERFILE "5'pcr\t$uppcr\n";
print PRIMERFILE "3'pcr\t$downpcr\n";
print PRIMERFILE "\tExpected wildtype product size: ",abs($start-$stop)+121," bp\n";
foreach $entry (sort keys %fiveprime_primers) {
print PRIMERFILE "5'+$entry\t$upprimer$fiveprime_primers{$entry}\n";
}
foreach $entry (sort keys %threeprime_primers) {
print PRIMERFILE "3'+$entry\t$downprimer$threeprime_primers{$entry}\n";
}
print PRIMERFILE "\n";
$upprimer = '';
$downprimer = '';
$uppcr = '';
$downpcr = '';
}
###############
# SUBROUTINES #
###############
sub open_readfile {
my $filehandle = $_[0];
my $filetype = $_[1] if $_[1];
my $filename = $_[2] if $_[2];
unless ($filename) {
print "Enter $filetype filename: ";
chomp ($filename=<STDIN>);
}
unless (-e $filename) { die "$filename not found\n"; }
open $filehandle,'<', $filename or die "Could not read file '$filename': $!\n";
$filehandle = '';
$filetype = '';
$filename = '';
}
sub open_writefile {
my $filehandle = $_[0];
my $filetype = $_[1] if $_[1];
my $filename = $_[2] if $_[2];
unless ($filename) {
print "Enter $filetype filename: ";
chomp ($filename=<STDIN>);
}
if (-e $filename) {
print "$filename already exists! Overwrite (Y/N)? ";
chomp ($_ = <STDIN>);
while (/[^yn]/i) {
print 'Y or N, please: ';
chomp ($_ = <STDIN>);
}
if (/n/i) { die "$filename not overwritten.\n"; }
else { open $filehandle, '>', $filename or die "Could nott write file '$filename': $!\n"; }
}
else { open $filehandle, '>', $filename or die "Could not write file '$filename': $!\n"; }
$filehandle = '';
$filetype = '';
$filename = '';
}
|