File: filterBy.py

package info (click to toggle)
snpeff 5.2.f%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 701,384 kB
  • sloc: java: 62,547; perl: 2,279; sh: 1,185; python: 744; xml: 507; makefile: 50
file content (37 lines) | stat: -rwxr-xr-x 987 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python3

#-------------------------------------------------------------------------------
#
# Filter a TXT file
#
# Make sure column number 'colNum' from 'input.txt' matches
# one entry from 'ids.txt'
#
#
#
#																Pablo Cingolani
#-------------------------------------------------------------------------------

import sys

# Command line parsing
if len(sys.argv) < 3:
	print(f"Usage:{sys.argv[0]} ids.txt input.txt colNum\n", file=sys.stderr)
	sys.exit(1)

idsFileName = sys.argv[1]
inputFileName = sys.argv[2]
colNum = int( sys.argv[3] ) - 1

# Read IDs
with open(idsFileName) as idsFile:
	idSet = set(line.strip() for line in idsFile)
print(f"Read {len(idSet)} IDs from file '{idsFileName}'", file=sys.stderr)

# Read input file and filter
print(f"Reding '{inputFileName}'", file=sys.stderr)
with open(inputFileName) as inFile:
	for line in inFile:
		fields = line.split('\t')
		if len(fields) > colNum and ( fields[colNum] in idSet ):
			print(line.strip())