1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
"""
Print out which fields of FAST5 files are static or variable for the same experiment.
In a nice hierarchical format.
Given h5dump output of any number of FAST5 files.
Show the constant values with -c flag.
Usage: python3 $0 [h5dump_FAST5_output...] [-c]
"""
from sys import argv
import re
# Class for a hdf5 group
class Group:
def __init__(self, name, prev_group=None):
self.name = name
self.prev_group = prev_group
self.attrs = []
def add_attr(self, attr):
self.attrs.append(attr)
def print_hier(self, prefix=''):
for attr in self.attrs:
if isinstance(attr, Group):
print(f"{prefix}{attr.name}:")
attr.print_hier(prefix + ' ')
else:
print(f"{prefix}{attr}")
def print_hier_if_in(self, arr, show_val=False, prefix=''):
is_dict = type(arr) is dict
for attr in self.attrs:
if isinstance(attr, Group):
print(f"{prefix}{attr.name}:")
attr.print_hier_if_in(arr, show_val, prefix + ' ')
else:
if attr in arr:
if show_val and is_dict:
print(f"{prefix}{attr}: {arr[attr]}")
else:
print(f"{prefix}{attr}")
def __str__(self):
return str(self.attrs)
def __repr__(self):
return str(self.attrs)
args = argv[1:]
if "-c" in args:
args.remove("-c")
show_const = True
else:
show_const = False
attrs = {}
root_group = Group("/")
var = []
const = {}
bracket_pos = 0
group_pos = []
struct_made = False
curr_group = root_group
first_read = True
for fname in args:
f = open(fname)
for line in f:
line = line.split()
# Add group to structure
if not struct_made and line[0] == "GROUP" and line[1] != '"/"':
# Don't repeat the structure for the 'multi' type
if re.match('"read_.+"', line[1]):
if first_read:
first_read = False
else:
struct_made = True
if not struct_made:
group_pos.append(bracket_pos)
new_group = Group(line[1][1:-1])
curr_group.add_attr(new_group)
new_group.prev_group = curr_group
curr_group = new_group
if line[0] == "ATTRIBUTE" or line[0] == "DATASET":
curr_attr = line[1][1:-1]
# Create empty set for attribute if not already there
if curr_attr not in attrs:
attrs[curr_attr] = set()
# Add attribute to structure
if not struct_made:
curr_group.add_attr(curr_attr)
# Data follows a (0)
elif line[0] == "(0):":
# Store attribute's data
data = " ".join(line[1:])
attrs[curr_attr].add(data)
# Closing marker
if not struct_made and "{" in line:
bracket_pos += 1
if not struct_made and "}" in line:
bracket_pos -= 1
# Check if group is finished
if len(group_pos) != 0 and bracket_pos == group_pos[-1]:
group_pos = group_pos[:-1]
curr_group = curr_group.prev_group
if not struct_made:
struct_made = True
# Decide what's constant and variable
for prop in attrs:
if len(attrs[prop]) == 1:
const[prop] = list(attrs[prop])[0]
else:
var.append(prop)
# Print properties which are constant and variable
print("CONSTANT")
root_group.print_hier_if_in(const, show_const)
print("\nVARIABLE")
root_group.print_hier_if_in(var, show_const)
|