File: create_feather.py

package info (click to toggle)
python-nanoget 1.19.3-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 25,876 kB
sloc: python: 643; sh: 25; makefile: 9
file content (123 lines) | stat: -rw-r--r-- 4,997 bytes
parent folder | download | duplicates (2)
#! /usr/bin/env python
# wdecoster

from argparse import ArgumentParser
from nanoget import get_input
import os


def main():
    '''
    Organization function
    -setups logging
    -gets inputdata
    -calls plotting function
    '''
    args = get_args()

    sources = {
        "fastq": args.fastq,
        "bam": args.bam,
        "cram": args.cram,
        "fastq_rich": args.fastq_rich,
        "fastq_minimal": args.fastq_minimal,
        "summary": args.summary,
        "fasta": args.fasta,
        "ubam": args.ubam,
    }
    if os.path.isfile(args.output) and not args.force:
        print("Output file {} already exists.".format(args.output))
    else:
        get_input(
            source=[n for n, s in sources.items() if s][0],
            files=[f for f in sources.values() if f][0],
            threads=args.threads,
            readtype=args.readtype,
            combine="simple",
            barcoded=args.barcoded,
            huge=args.huge,
            keep_supp=not(args.no_supplementary)) \
            .to_feather(args.output)


def get_args():
    parser = ArgumentParser(
        description="Creates various plots for long read sequencing data.".upper(),
        add_help=False)
    general = parser.add_argument_group(
        title='General options')
    general.add_argument("-h", "--help",
                         action="help",
                         help="show the help and exit")
    general.add_argument("-t", "--threads",
                         help="Set the allowed number of threads to be used by the script",
                         default=4,
                         type=int)
    general.add_argument("--huge",
                         help="Input data is one very large file.",
                         action="store_true")
    general.add_argument("-o", "--output",
                         help="Specify name of feather file.",
                         default="NanoPlot-data.feather")
    general.add_argument("--readtype",
                         help="Which read type to extract information about from summary. \
                                 Options are 1D, 2D, 1D2",
                         default="1D",
                         choices=['1D', '2D', '1D2'])
    general.add_argument("--barcoded",
                         help="Use if you want to split the summary file by barcode",
                         action="store_true")
    general.add_argument("--no_supplementary",
                         help="Use if you want to remove supplementary alignments",
                         action="store_true",
                         default=False)
    general.add_argument("--force",
                         help="Overwrite existing feather files",
                         action="store_true",
                         default=False)
    target = parser.add_argument_group(
        title="Input data sources, one of these is required.")
    mtarget = target.add_mutually_exclusive_group(
        required=True)
    mtarget.add_argument("--fastq",
                         help="Data is in one or more default fastq file(s).",
                         nargs='+',
                         metavar="file")
    mtarget.add_argument("--fasta",
                         help="Data is in one or more fasta file(s).",
                         nargs='+',
                         metavar="file")
    mtarget.add_argument("--fastq_rich",
                         help="Data is in one or more fastq file(s) generated by albacore, \
                               MinKNOW or guppy with additional information \
                               concerning channel and time.",
                         nargs='+',
                         metavar="file")
    mtarget.add_argument("--fastq_minimal",
                         help="Data is in one or more fastq file(s) generated by albacore, \
                               MinKNOW or guppy with additional information concerning channel \
                               and time. Is extracted swiftly without elaborate checks.",
                         nargs='+',
                         metavar="file")
    mtarget.add_argument("--summary",
                         help="Data is in one or more summary file(s) generated by albacore \
                               or guppy.",
                         nargs='+',
                         metavar="file")
    mtarget.add_argument("--bam",
                         help="Data is in one or more sorted bam file(s).",
                         nargs='+',
                         metavar="file")
    mtarget.add_argument("--ubam",
                         help="Data is in one or more unmapped bam file(s).",
                         nargs='+',
                         metavar="file")
    mtarget.add_argument("--cram",
                         help="Data is in one or more sorted cram file(s).",
                         nargs='+',
                         metavar="file")
    return parser.parse_args()


if __name__ == '__main__':
    main()