File: check_linesorting.lua

package info (click to toggle)
genometools 1.6.6%2Bds-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 50,576 kB
  • sloc: ansic: 271,876; ruby: 29,930; python: 5,106; sh: 3,083; makefile: 1,213; perl: 219; pascal: 159; haskell: 37; sed: 5
file content (48 lines) | stat: -rw-r--r-- 1,097 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
function usage()
io.stderr:write(string.format("Usage: %s file\n", arg[0]))
  io.stderr:write("Checks a GFF file for line-sortedness.\n")
  os.exit(1)
end

function split(str, sep)
  local fields = {}
  str:gsub("([^"..sep.."]*)"..sep, function(c) table.insert(fields, c) end)
  return fields
end

if #arg == 1 then
  gfffile = arg[1]
else
  usage()
end

cur_seqid = nil
cur_pos = 0
file = assert(io.open(gfffile, "r"))
i = 0
for line in file:lines() do
  i = i + 1
  if string.match(line, "^##FASTA") then
    break
  end
  if not string.match(line, "^#") then
    f = split(line, "\t")
    if #f < 5 then
      io.stderr:write("Not enough fields in line " .. i .. "\n")
      os.exit(1)
    end
    seqid, startpos, endpos = f[1], f[4], f[5]
    if seqid ~= cur_seqid then
      cur_seqid = seqid
      cur_pos = 0
    end
    if tonumber(startpos) < cur_pos then
      io.stderr:write("Error: " .. startpos .. " < "
                       .. cur_pos .. " (line " .. i ..")\n")
      io.stderr:write(line .. "\n")
      os.exit(1)
    end
    cur_pos = tonumber(startpos)
  end
end
file:close()