1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
|
# Copyright 2001 by Katharine Lindner. All rights reserved.
# Copyright 2006 by PeterC. All rights reserved.
# Copyright 2007 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Parser for files from NCBI's Gene Expression Omnibus (GEO).
http://www.ncbi.nlm.nih.gov/geo/
"""
from . import Record
def _read_key_value(line):
words = line[1:].split("=", 1)
try:
key, value = words
value = value.strip()
except ValueError:
key = words[0]
value = ""
key = key.strip()
return key, value
def parse(handle):
record = None
for line in handle:
line = line.strip('\n').strip('\r')
if not line:
continue # Ignore empty lines
c = line[0]
if c == '^':
if record:
yield record
record = Record.Record()
record.entity_type, record.entity_id = _read_key_value(line)
elif c == '!':
if line in ('!Sample_table_begin',
'!Sample_table_end',
'!Platform_table_begin',
'!Platform_table_end'):
continue
key, value = _read_key_value(line)
if key in record.entity_attributes:
if isinstance(record.entity_attributes[key], list):
record.entity_attributes[key].append(value)
else:
existing = record.entity_attributes[key]
record.entity_attributes[key] = [existing, value]
else:
record.entity_attributes[key] = value
elif c == '#':
key, value = _read_key_value(line)
assert key not in record.col_defs
record.col_defs[key] = value
else:
row = line.split("\t")
record.table_rows.append(row)
yield record
|