1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
|
import glob
import os.path as op
import sys
import click
from .. import util
from . import cli
from ._util import exit_on_broken_pipe
@cli.command()
@click.argument(
"chromsizes",
metavar="CHROMSIZES_PATH"
)
@click.argument(
"fasta",
metavar="FASTA_PATH"
)
@click.argument(
"enzyme",
metavar="ENZYME"
)
@click.option(
"--out", "-o",
help="Output file (defaults to stdout)"
)
@click.option(
"--header",
"-H",
help="Print the header of column names as the first row.",
is_flag=True,
default=False,
show_default=True,
)
@click.option(
"--rel-ids",
"-i",
type=click.Choice(["0", "1"]),
help="Include a column of relative bin IDs for each chromosome. "
"Choose whether to report them as 0- or 1-based.",
)
@exit_on_broken_pipe(1)
def digest(chromsizes, fasta, enzyme, out, header, rel_ids):
"""
Generate fragment-delimited genomic bins.
Output a genome segmentation of restriction fragments as a BED file.
CHROMSIZES_PATH : UCSC-like chromsizes file, with chromosomes in desired
order.
FASTA_PATH : Genome assembly FASTA file or folder containing FASTA files
(uncompressed).
ENZYME : Name of restriction enzyme
"""
chromsizes = util.read_chromsizes(chromsizes, all_names=True)
chroms = list(chromsizes.keys())
# Load sequences
if op.isdir(fasta):
filepaths = glob.glob(op.join(fasta, "*.fa"))
filepaths.extend(glob.glob(op.join(fasta, "*.fasta")))
else:
filepaths = [fasta]
fasta_records = util.load_fasta(chroms, *filepaths)
# Digest sequences
frags = util.digest(fasta_records, enzyme)
if rel_ids is not None:
frags["id"] = frags.groupby("chrom").cumcount()
if int(rel_ids) == 1:
frags["id"] += 1
# Write output
if out is None:
f = sys.stdout
else:
f = open(out, "w")
if header:
frags[0:0].to_csv(f, sep="\t", index=False, header=True)
frags.to_csv(f, sep="\t", index=False, header=False)
f.flush()
|