1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
|
# Copyright 2004 by Harry Zuzan. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""
Classes for accessing the information in Affymetrix cel files.
Functions:
read Read a cel file and store its contents in a Record
Classes:
Record Contains the information from a cel file
"""
# We use print in the doctests
from __future__ import print_function
try:
import numpy
except ImportError:
from Bio import MissingPythonDependencyError
raise MissingPythonDependencyError(
"Install NumPy if you want to use Bio.Affy.CelFile")
class Record(object):
"""Stores the information in a cel file
Example usage:
>>> from Bio.Affy import CelFile
>>> with open('Affy/affy_v3_example.CEL') as handle:
... c = CelFile.read(handle)
...
>>> print(c.ncols, c.nrows)
5 5
>>> print(c.intensities)
[[ 234. 170. 22177. 164. 22104.]
[ 188. 188. 21871. 168. 21883.]
[ 188. 193. 21455. 198. 21300.]
[ 188. 182. 21438. 188. 20945.]
[ 193. 20370. 174. 20605. 168.]]
>>> print(c.stdevs)
[[ 24. 34.5 2669. 19.7 3661.2]
[ 29.8 29.8 2795.9 67.9 2792.4]
[ 29.8 88.7 2976.5 62. 2914.5]
[ 29.8 76.2 2759.5 49.2 2762. ]
[ 38.8 2611.8 26.6 2810.7 24.1]]
>>> print(c.npix)
[[25 25 25 25 25]
[25 25 25 25 25]
[25 25 25 25 25]
[25 25 25 25 25]
[25 25 25 25 25]]
"""
def __init__(self):
self.version = None
self.GridCornerUL = None
self.GridCornerUR = None
self.GridCornerLR = None
self.GridCornerLL = None
self.DatHeader = None
self.Algorithm = None
self.AlgorithmParameters = None
self.NumberCells = None
self.intensities = None
self.stdevs = None
self.npix = None
self.nrows = None
self.ncols = None
self.nmask = None
self.mask = None
self.noutliers = None
self.outliers = None
self.modified = None
def read(handle):
"""
Read the information in a cel file, and store it in a Record.
"""
# Needs error handling.
# Needs to know the chip design.
record = Record()
section = ""
for line in handle:
if not line.strip():
continue
# Set current section
if line[:5] == "[CEL]":
section = "CEL"
elif line[:8] == "[HEADER]":
section = "HEADER"
elif line[:11] == "[INTENSITY]":
section = "INTENSITY"
record.intensities = numpy.zeros((record.nrows, record.ncols))
record.stdevs = numpy.zeros((record.nrows, record.ncols))
record.npix = numpy.zeros((record.nrows, record.ncols), int)
elif line[:7] == "[MASKS]":
section = "MASKS"
record.mask = numpy.zeros((record.nrows, record.ncols))
elif line[:10] == "[OUTLIERS]":
section = "OUTLIERS"
record.outliers = numpy.zeros((record.nrows, record.ncols))
elif line[:10] == "[MODIFIED]":
section = "MODIFIED"
record.modified = numpy.zeros((record.nrows, record.ncols))
elif line[0] == "[":
# This would be an unknown section
section = ""
elif section == "CEL":
keyword, value = line.split("=", 1)
if keyword == 'Version':
record.version = int(value)
elif section == "HEADER":
# Set record.ncols and record.nrows, remaining data goes into
# record.header dict
keyword, value = line.split("=", 1)
if keyword == "Cols":
record.ncols = int(value)
elif keyword == "Rows":
record.nrows = int(value)
elif keyword == 'GridCornerUL':
x, y = value.split()
record.GridCornerUL = (int(x), int(y))
elif keyword == 'GridCornerUR':
x, y = value.split()
record.GridCornerUR = (int(x), int(y))
elif keyword == 'GridCornerLR':
x, y = value.split()
record.GridCornerLR = (int(x), int(y))
elif keyword == 'GridCornerLL':
x, y = value.split()
record.GridCornerLL = (int(x), int(y))
elif keyword == 'DatHeader':
record.DatHeader = value.strip('\n\r')
elif keyword == 'Algorithm':
record.Algorithm = value.strip('\n\r')
elif keyword == 'AlgorithmParameters':
record.AlgorithmParameters = value.strip('\n\r')
elif section == "INTENSITY":
if "NumberCells" in line:
record.NumberCells = int(line.split("=", 1)[1])
elif "CellHeader" in line:
pass
else:
words = line.split()
y = int(words[0])
x = int(words[1])
record.intensities[x, y] = float(words[2])
record.stdevs[x, y] = float(words[3])
record.npix[x, y] = int(words[4])
elif section == "MASKS":
if "NumberCells" in line:
record.nmask = int(line.split("=", 1)[1])
elif "CellHeader" in line:
pass
else:
words = line.split()
y = int(words[0])
x = int(words[1])
record.mask[x, y] = int(1)
elif section == "OUTLIERS":
if "NumberCells" in line:
record.noutliers = int(line.split("=", 1)[1])
elif "CellHeader" in line:
pass
else:
words = line.split()
y = int(words[0])
x = int(words[1])
record.outliers[x, y] = int(1)
elif section == "MODIFIED":
if "NumberCells" in line:
record.nmodified = int(line.split("=", 1)[1])
elif "CellHeader" in line:
pass
else:
words = line.split()
y = int(words[0])
x = int(words[1])
record.modified[x, y] = float(words[2])
else:
continue
return record
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
|