File: convert2myersformat.rb

package info (click to toggle)
genometools 1.6.1%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 50,412 kB
  • sloc: ansic: 271,241; ruby: 30,339; python: 4,880; sh: 3,193; makefile: 1,194; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (35 lines) | stat: -rwxr-xr-x 952 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env ruby

require_relative "fasta"

def print_wildcard_replace(seq, linelength, fp=STDOUT)
  pos = 0
  while pos < seq.length do
    fp.puts replacewildcards(seq[pos..pos+linelength-1])
    pos += linelength
  end
end

def convert2myersformat(header, length, id)
  new_header = ">m000000_000000_00000_c1898213712391273/#{id}/0_#{length}"
  return header.sub(/^\S+/, new_header)
end

# Replace all non-DNA characters by randomly chosen DNA characters (a,c,g,t).
def replacewildcards(seq)
  bases = ['a', 'c', 'g', 't']
  finished = seq.sub!(/[^acgtACGT]/, bases.sample).nil? until finished
  return seq
end

# read all files and output them verbatim on lines of width 70
if __FILE__ == $0
  ARGV.each do |filename|
    id = 0
    Fasta.read_multi_file(filename) do |entry|
      puts "#{convert2myersformat(entry.get_header(), entry.get_seqlength(), id)}"
      print_wildcard_replace(entry.get_sequence(),70)
      id += 1
    end
  end
end