File: add_revcmp.rb

package info (click to toggle)
genometools 1.6.1%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 50,412 kB
  • sloc: ansic: 271,241; ruby: 30,339; python: 4,880; sh: 3,193; makefile: 1,194; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (83 lines) | stat: -rwxr-xr-x 1,699 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env ruby
class Sequence
	attr_reader :sequence, :length, :fasta_id
	attr_writer :sequence, :length, :fasta_id

	def initialize()
		@length = 0
		@sequence = ""
		@fasta_id = ""
	end

	def read_sequence(file)
		if !File.exists?(file)
			raise "The specified file #{file} does not exist."
		end
		File.open(file) do |line|
			#read first line
			str = line.gets.chomp
			if (str =~ /^>.*$/) then
				#FASTA file, read header properly
				@fasta_id = str
			else	
				#no FASTA file, but plain sequence
				@sequence = str
				@length = str.length
			end
			#read the rest of the file
			while nextline = line.gets
				nextline.chomp!
				@sequence = @sequence + nextline
				@length += nextline.length
			end
		end
	end
end

def revcomp(seq)
  seq.tr("aAcCgGtTnN","tTgGcCaAnN").reverse
end

class MSA
  attr_reader :seqs, :length, :cons

  def initialize(file)
    if !File.exists?(file)
      raise "The specified file #{file} does not exist."
    end
    @file = file
    @length = 0
    @seqs = []
    self.read
  end

  def read
    text = File.read(@file)
    text = text.split('>')
    text[1..text.length-1].each do |item|
      seq_components = item.split(/\n/)
      fasta_id=seq_components[0]
      sequence = seq_components[1..seq_components.length-1]
      seqObj = Sequence.new()
      seqObj.fasta_id = fasta_id
      seqObj.sequence = sequence.join
      seqObj.length = seqObj.sequence.length
      @seqs.push(seqObj)
    end
    @length = @seqs[0].length
  end
end

m = MSA.new(ARGV[0])
l = 0
m.seqs.each do |s|
  puts ">#{s.fasta_id}"
  puts "#{s.sequence}"
  l += s.length
end
m.seqs.reverse.each do |s|
  puts ">#{s.fasta_id}"
  puts "#{revcomp(s.sequence)}"
end

puts