1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# Copyright (c) 2011-2013, The BIOM Format Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
# -----------------------------------------------------------------------------
from __future__ import division
from pyqi.core.command import (Command, CommandIn, CommandOut,
ParameterCollection)
from numpy import std
from operator import itemgetter
from biom.util import compute_counts_per_sample_stats
__author__ = "Greg Caporaso"
__copyright__ = "Copyright 2011-2013, The BIOM Format Development Team"
__credits__ = ["Greg Caporaso", "Daniel McDonald", "Jose Antonio Navas Molina"]
__license__ = "BSD"
__maintainer__ = "Greg Caporaso"
__email__ = "gregcaporaso@gmail.com"
class TableSummarizer(Command):
"""
Example usage:
from biom.commands.table_summarizer import TableSummarizer
from biom.parse import parse_biom_table
c = TableSummarizer()
table_f = open("table.biom")
t = parse_biom_table(table_f)
table_f.seek(0)
result = c(table=(t,None))
result = c(table=(t,None),qualitative=True)
result = c(table=(t,table_f),qualitative=True)
table_f.close()
"""
BriefDescription = "Summarize sample or observation data in a BIOM table"
LongDescription = ("Provides details on the observation counts per sample,"
" including summary statistics, as well as metadata "
"categories associated with samples and observations.")
CommandIns = ParameterCollection([
CommandIn(Name='table',
DataType=tuple,
Description='the input BIOM table',
Required=True),
CommandIn(Name='qualitative',
DataType=bool,
Description=('Present counts as number of unique '
'observation ids per sample, rather than '
'counts of observations per sample.'),
Required=False,
Default=False),
CommandIn(Name='observations',
DataType=bool,
Default=False,
Description=('Summarize over observations'))
])
CommandOuts = ParameterCollection([
CommandOut(Name='biom_summary',
DataType=list,
Description='The table summary')
])
def run(self, **kwargs):
result = {}
qualitative = kwargs['qualitative']
by_observations = kwargs['observations']
table, table_lines = kwargs['table']
if by_observations:
table = table.transpose()
min_counts, max_counts, median_counts, mean_counts, counts_per_samp =\
compute_counts_per_sample_stats(table, qualitative)
num_observations = len(table.ids(axis='observation'))
counts_per_sample_values = counts_per_samp.values()
if table.metadata() is None:
sample_md_keys = ["None provided"]
else:
sample_md_keys = table.metadata()[0].keys()
if table.metadata(axis='observation') is None:
observation_md_keys = ["None provided"]
else:
observation_md_keys = table.metadata(axis='observation')[0].keys()
lines = []
num_samples = len(table.ids())
if by_observations:
# as this is a transpose of the original table...
lines.append('Num samples: %d' % num_observations)
lines.append('Num observations: %d' % num_samples)
else:
lines.append('Num samples: %d' % num_samples)
lines.append('Num observations: %d' % num_observations)
if not qualitative:
total_count = sum(counts_per_sample_values)
lines.append('Total count: %d' % total_count)
lines.append('Table density (fraction of non-zero values): %1.3f' %
table.get_table_density())
lines.append('')
if qualitative:
if by_observations:
lines.append('Sample/observations summary:')
else:
lines.append('Observations/sample summary:')
else:
lines.append('Counts/sample summary:')
lines.append(' Min: %r' % min_counts)
lines.append(' Max: %r' % max_counts)
lines.append(' Median: %1.3f' % median_counts)
lines.append(' Mean: %1.3f' % mean_counts)
lines.append(' Std. dev.: %1.3f' % std(counts_per_sample_values))
if by_observations:
# since this is a transpose...
lines.append(
' Sample Metadata Categories: %s' %
'; '.join(observation_md_keys))
lines.append(
' Observation Metadata Categories: %s' %
'; '.join(sample_md_keys))
lines.append('')
else:
lines.append(
' Sample Metadata Categories: %s' %
'; '.join(sample_md_keys))
lines.append(
' Observation Metadata Categories: %s' %
'; '.join(observation_md_keys))
lines.append('')
if qualitative:
lines.append('Observations/sample detail:')
else:
lines.append('Counts/sample detail:')
for k, v in sorted(counts_per_samp.items(), key=itemgetter(1)):
lines.append(' %s: %r' % (k, v))
result['biom_summary'] = lines
return result
CommandConstructor = TableSummarizer
|