File: SummarizeAssemblies.py

package info (click to toggle)
shasta 0.14.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 29,636 kB
  • sloc: cpp: 82,262; python: 2,348; makefile: 222; sh: 143
file content (68 lines) | stat: -rwxr-xr-x 2,100 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python3

import argparse
import glob
import json
import collections


helpMessage = """
Creates a single csv file containing summaries for the Shasta assemblies
present in the current directory.
"""

parser = argparse.ArgumentParser(description = helpMessage)
arguments = parser.parse_args()

# Find the json files containing summaries for the individual assemblies.
jsonFileNames = glob.glob('*/AssemblySummary.json')
if not jsonFileNames:
    print('No Shasta assemblies found in the current directory.\n'
        'Invoke this script from a directory containing one or more\n'
        'Shasta assemblies.')
    exit(1)

# Gather the jsons for all the assemblies.
jsons = []
for jsonFileName in jsonFileNames:    
    jsons.append(json.load(open(jsonFileName, 'r'), object_pairs_hook=collections.OrderedDict))

# Open the output file and write the header line.
out = open('AssembliesSummary.csv', 'w')
out.write('Assembly,',)
for jsonFileName in jsonFileNames:
    assemblyName = jsonFileName[:jsonFileName.find('/')]
    out.write('%s,' % assemblyName)
out.write('\n')



# Loop over sections.
firstAssemblyJson = jsons[0]
for sectionName in firstAssemblyJson:
    if sectionName == 'Comment':
        continue
    firstAssemblySection = firstAssemblyJson[sectionName]
    
    # Special treatment of section "Reads discarded on input"
    # which contains reads and bases for each item.
    if sectionName == 'Reads discarded on input':
        for itemName in firstAssemblySection:
           for x in json[sectionName][itemName]:
                out.write('"%s: %s: %s",' % (sectionName, itemName, x))
                for json in jsons:
                    out.write('%s,' % json[sectionName][itemName][x])
                out.write('\n')
        continue
    
    # Loop over items in this section.
    for itemName in firstAssemblySection:
        out.write('"%s: %s",' % (sectionName, itemName))
        
        # Write the values for all the assemblies.
        for json in jsons:
            out.write('%s,' % json[sectionName][itemName])
        
        out.write('\n')