File: sample_encseq.lua

package info (click to toggle)
genometools 1.6.6%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 50,576 kB
  • sloc: ansic: 271,876; ruby: 29,930; python: 5,106; sh: 3,083; makefile: 1,213; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (32 lines) | stat: -rw-r--r-- 824 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
math.randomseed(os.time())

function usage()
  io.stderr:write(string.format("Usage: %s indexname minlen maxlen n_substr\n", arg[0]))
  io.stderr:write("Extract <nof_substr> random substrings from a GtEncseq.\n")
  os.exit(1)
end

if #arg == 4 then
  idxname = arg[1]
  minlen = tonumber(arg[2])
  maxlen = tonumber(arg[3])
  nsubstr = tonumber(arg[4])

  el = gt.encseq_loader_new()
  es = el:load(idxname)
  i = 0
  while i < nsubstr do
    len = math.random(minlen, maxlen)
    seqno = math.random(es:num_of_sequences())-1
    eslen = es:seqlength(seqno)
    if eslen > len then
      start = es:seqstartpos(seqno) + math.random(eslen-len)
      stop = start + len - 1
      print(">"..start.."-"..stop.." (length "..len..")")
      print(es:extract_decoded(start, stop))
      i = i + 1
    end
  end
else
  usage()
end