1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
|
#===========================================================================
#
# PUBLIC DOMAIN NOTICE
# National Center for Biotechnology Information
#
# This software/database is a "United States Government Work" under the
# terms of the United States Copyright Act. It was written as part of
# the author's official duties as a United States Government employee and
# thus cannot be copyrighted. This software/database is freely available
# to the public for use. The National Library of Medicine and the U.S.
# Government have not placed any restriction on its use or reproduction.
#
# Although all reasonable efforts have been taken to ensure the accuracy
# and reliability of the software and data, the NLM and the U.S.
# Government do not and cannot warrant the performance or results that
# may be obtained by using this software or data. The NLM and the U.S.
# Government disclaim all warranties, express or implied, including
# warranties of performance, merchantability or fitness for any particular
# purpose.
#
# Please cite the author in any work or product based on this material.
#
#===========================================================================
#
import sys
import traceback
from ngs import NGS
from ngs.ErrorMsg import ErrorMsg
from ngs.ReadCollection import ReadCollection
from ngs.Read import Read
from ngs.ReadIterator import ReadIterator
def run(acc, splitNum, splitNo):
# open requested accession using SRA implementation of the API
with NGS.openReadCollection(acc) as run:
run_name = run.getName()
# compute window to iterate through
MAX_ROW = run.getReadCount()
chunk = MAX_ROW/splitNum
first = int(round(chunk*(splitNo-1)))
next_first = int(round(chunk*(splitNo)))
if next_first > MAX_ROW:
next_first = MAX_ROW
# start iterator on reads
with run.getReadRange(first+1, next_first-first, Read.all) as it:
i = 0
while it.nextRead():
i += 1
print (it.getReadId())
# iterate through fragments
while it.nextFragment():
bases = it.getFragmentBases()
if bases:
print ("\t{} - {}".format(bases, "aligned" if it.isAligned() else "unaligned"))
print ("\n")
print ("Read {} spots for {}".format(i, run_name))
if len(sys.argv) != 4:
print ("Usage: FragTest accession NumChunks ChunkNo\n")
exit(1)
else:
try:
run(sys.argv[1], int(sys.argv[2]), int(sys.argv[3]))
except ErrorMsg as x:
print (x)
traceback.print_exc()
# x.printStackTrace - not implemented
exit(1)
except BaseException as x:
traceback.print_exc()
exit(1)
|