1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
|
#!/usr/bin/env perl
use strict;
use warnings;
use lib("/usr/lib/trinityrnaseq/PerlLib");
use Fasta_reader;
use List::Util qw(min max);
use Data::Dumper;
my $usage = "usage: $0 transcriptome.fasta polymorphRatePercentage\n\n";
my $transcriptome = $ARGV[0] or die $usage;
my $poly_rate = $ARGV[1] or die $usage;
if ($poly_rate < 1) {
print STDERR "\n\n** WARNING: polymorphRatePercentage expects a percentage, and your input value is quite small: $poly_rate ** \n\n";
}
my %mutations;
main: {
my $fasta_reader = new Fasta_reader($transcriptome);
my $counter = 0;
while (my $seq_obj = $fasta_reader->next()) {
my $acc = $seq_obj->get_accession();
my $sequence = $seq_obj->get_sequence();
my $num_snps = int($poly_rate/100 * length($sequence) + 0.5);
if ($num_snps < 1) { next; } # exclude since not helpful here.
&print_fasta("aleA;$acc", $sequence);
$sequence = &mutate($sequence, $num_snps);
&print_fasta("aleB;$acc", $sequence);
$counter++;
print STDERR "\r[$counter] ";
}
print STDERR "\n\n";
#print STDERR Dumper(\%mutations);
exit(0);
}
####
sub print_fasta {
my ($acc, $sequence) = @_;
$sequence =~ s/(\S{60})/$1\n/g;
chomp $sequence;
print ">$acc\n$sequence\n";
return;
}
####
sub mutate {
my ($sequence, $num_snps) = @_;
my %seen;
my @chars = qw(G A T C);
my @seq = split(//, uc $sequence);
for (1..$num_snps) {
my $pos; # select unique sites (sampling w/o replacement)
do {
$pos = int(rand(length($sequence)));
} while ($seen{$pos});
$seen{$pos} = 1;
my $nuc = uc $seq[$pos];
my @others = grep { $_ ne $nuc } @chars;
my $substitution = $others[ int(rand(scalar(@others))) ];
#print STDERR "$nuc -> $substitution\n";
#$mutations{"$nuc,$substitution"}++;
$seq[$pos] = lc $substitution;
}
$sequence = join("", @seq);
return($sequence);
}
|