1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
|
prelude: |
bm_so_fasta = <<'EOS'
# The Computer Language Shootout
# http://shootout.alioth.debian.org/
# Contributed by Sokolov Yura
$last = 42.0
def gen_random(max, im=139968, ia=3877, ic=29573)
(max * ($last = ($last * ia + ic) % im)) / im
end
alu =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"+
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"+
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"+
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"+
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"+
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"+
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
iub = [
["a", 0.27],
["c", 0.12],
["g", 0.12],
["t", 0.27],
["B", 0.02],
["D", 0.02],
["H", 0.02],
["K", 0.02],
["M", 0.02],
["N", 0.02],
["R", 0.02],
["S", 0.02],
["V", 0.02],
["W", 0.02],
["Y", 0.02],
]
homosapiens = [
["a", 0.3029549426680],
["c", 0.1979883004921],
["g", 0.1975473066391],
["t", 0.3015094502008],
]
def make_repeat_fasta(id, desc, src, n)
puts ">#{id} #{desc}"
v = nil
width = 60
l = src.length
s = src * ((n / l) + 1)
s.slice!(n, l)
puts(s.scan(/.{1,#{width}}/).join("\n"))
end
def make_random_fasta(id, desc, table, n)
puts ">#{id} #{desc}"
rand, v = nil,nil
width = 60
chunk = 1 * width
prob = 0.0
table.each{|v| v[1]= (prob += v[1])}
for i in 1..(n/width)
puts((1..width).collect{
rand = gen_random(1.0)
table.find{|v| v[1]>rand}[0]
}.join)
end
if n%width != 0
puts((1..(n%width)).collect{
rand = gen_random(1.0)
table.find{|v| v[1]>rand}[0]
}.join)
end
end
n = (ARGV[0] or 250_000).to_i
make_repeat_fasta('ONE', 'Homo sapiens alu', alu, n*2)
make_random_fasta('TWO', 'IUB ambiguity codes', iub, n*3)
make_random_fasta('THREE', 'Homo sapiens frequency', homosapiens, n*5)
EOS
benchmark:
- name: so_reverse_complement
prelude: |
script = File.join(File.dirname($0), 'bm_so_fasta.rb')
File.write(script, bm_so_fasta)
def prepare_fasta_output n
filebase = File.join(File.dirname($0), 'fasta.output')
script = File.join(File.dirname($0), 'bm_so_fasta.rb')
file = "#{filebase}.#{n}"
unless FileTest.exist?(file)
STDERR.puts "preparing #{file}"
open(file, 'w'){|f|
ARGV[0] = n
$stdout = f
load script
$stdout = STDOUT
}
end
end
prepare_fasta_output(2_500_000)
script: |
# The Great Computer Language Shootout
# http://shootout.alioth.debian.org/
#
# Contributed by Peter Bjarke Olsen
# Modified by Doug King
seq=Array.new
def revcomp(seq)
seq.reverse!.tr!('wsatugcyrkmbdhvnATUGCYRKMBDHVN','WSTAACGRYMKVHDBNTAACGRYMKVHDBN')
stringlen=seq.length
0.step(stringlen-1,60) {|x| print seq.slice(x,60) , "\n"}
end
input = open(File.join(File.dirname($0), 'fasta.output.2500000'), 'rb')
while input.gets
if $_ =~ />/
if seq.length != 0
revcomp(seq.join)
seq=Array.new
end
puts $_
else
$_.sub(/\n/,'')
seq.push $_
end
end
revcomp(seq.join)
loop_count: 1
|