File: spmfilter.rb

package info (click to toggle)
genometools 1.6.1%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 50,412 kB
  • sloc: ansic: 271,241; ruby: 30,339; python: 4,880; sh: 3,193; makefile: 1,194; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (53 lines) | stat: -rwxr-xr-x 1,107 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env ruby

require 'gtruby'

if ARGV.length != 2
  STDERR.puts "Usage #{$0} <indexname> <matchfile>"
  exit 1
end

indexname=ARGV[0]
matchfile=ARGV[1]

el = GT::EncseqLoader.new
encseq = el.load(indexname)
mirrored=false
File.new(indexname + ".prj","r").each_line do |line|
  m = line.match(/^mirrored=(\d+)/)
  if m
    if m[1].to_i == 0
      mirrored=false
    else
      mirrored=true
    end
  end
end

if mirrored
  encseq.mirror()
end

File.new(matchfile,"r").each_line do |line|
  if not line.match(/^#/)
    m = line.split 
    seqnum1 = m[1].to_i
    relpos1 = m[2].to_i
    seqnum2 = m[5].to_i
    relpos2 = m[6].to_i
    matchlen = m[0].to_i
    #puts "sn1=#{seqnum1} rp1=#{relpos1} " +
         #"sn2=#{seqnum2} rp2=#{relpos2} ml=#{matchlen}"
    if relpos1 == 0
      seqlen2 = encseq.seqlength(seqnum2)
      if relpos2 + matchlen == seqlen2
        puts "#{seqnum2} #{seqnum1} #{matchlen}"
      end
    elsif relpos2 == 0
      seqlen1 = encseq.seqlength(seqnum1)
      if relpos1 + matchlen == seqlen1
        puts "#{seqnum1} #{seqnum2} #{matchlen}"
      end
    end
  end
end