File: cutsequences.rb

package info (click to toggle)
genometools 1.6.6%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 50,576 kB
  • sloc: ansic: 271,876; ruby: 29,930; python: 5,106; sh: 3,083; makefile: 1,213; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (61 lines) | stat: -rwxr-xr-x 1,502 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env ruby

require_relative "fasta"
require_relative "print_sequence"

def cutsinglesequence(sequence,header,maxnumber,minlength,
                      maxlength,linelength)
  pos = 0
  count = 0
  remaininglength = sequence.length
  while pos < sequence.length and remaininglength >= minlength do
    puts ">#{header}"
    print_sequence(sequence[pos..pos+maxlength-1],linelength)
    pos += maxlength
    remaininglength -= maxlength
    count += 1
    if count >= maxnumber
      break
    end
  end
  return count
end

def cutsequences(inputfile,maxnumber,minlength,maxlength)
  count = 0
  linelength = 70
  count = 0
  Fasta.read_multi_file(inputfile) do |curr_entry|
    len = curr_entry.get_seqlength()
    if len >= minlength
      sequence = curr_entry.get_sequence()
      header = curr_entry.get_header()
      if len > maxlength
        count += cutsinglesequence(sequence,header,maxnumber,minlength,
                                   maxlength,linelength)
      else
        puts ">#{header}"
        print_sequence(sequence,linelength)
      end
      if count >= maxnumber
        break
      end
    end
  end
end

if __FILE__ == "#{$0}"
  if ARGV.length != 4
    STDERR.puts "Usage: #{$0} <inputfile> <maxnumber|all> <minlength> <maxlength>"
    exit 1
  end
  inputfile = ARGV[0]
  if ARGV[1] == "all"
    maxnumber = INT_MAX
  else
    maxnumber = ARGV[1].to_i
  end
  minlength = ARGV[2].to_i
  maxlength = ARGV[3].to_i
  cutsequences(inputfile,minlength,maxlength)
end