File: table_filter.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (72 lines) | stat: -rwxr-xr-x 2,217 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/python3

"""
Tool for filtering a tabular data file. Fields are separated by tabs, the
header line is denoted by a '#' in the first byte, comments are denoted by
a '#' at the start of any subsequent line.

Expressions can use column names as well as numbers. The -c options allows
cutting, again using field name or numbers.

usage: %prog expression < table
    -H, --header:       keep header in output
    -C, --comments:     keep comments in output
    --force-header:     assume the first line is a header even if it does not start with "#"
    -c, --cols=1,2:     names or indexes of columns to keep
"""

import sys

import bx.tabular.io
from bx.cookbook import doc_optparse


def __main__():
    # Parse command line arguments
    options, args = doc_optparse.parse(__doc__)
    try:
        keep_header = bool(options.header)
        keep_comments = bool(options.comments)
        cols = []
        if options.cols:
            for c in options.cols.split(","):
                try:
                    v = int(c)
                except ValueError:
                    v = c
                cols.append(v)
        if len(args) > 0:
            expr = args[0]
        else:
            expr = None
        if options.force_header:
            force_header = bx.tabular.io.FIRST_LINE_IS_HEADER
        else:
            force_header = None
    except Exception:
        doc_optparse.exception()

    # Compile expression for SPEED
    if expr:
        expr = compile(expr, "<expr arg>", "eval")

    for element in bx.tabular.io.TableReader(sys.stdin, force_header=force_header):
        if isinstance(element, bx.tabular.io.Header):
            if keep_header:
                if cols:
                    print("#" + "\t".join(element[c] for c in cols))
                else:
                    print(element)
        elif isinstance(element, bx.tabular.io.Comment):
            if keep_comments:
                print(element)
        else:
            if expr is None or bool(eval(expr, {"row": element})):
                if cols:
                    print("\t".join(element[c] for c in cols))
                else:
                    print(element)


if __name__ == "__main__":
    __main__()