1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
|
'''
Code for reading and managing ECOSTRESS spectral library data.
'''
from __future__ import absolute_import, division, print_function, unicode_literals
import itertools
import logging
from spectral.utilities.python23 import IS_PYTHON3
from .aster import AsterDatabase, Signature
if IS_PYTHON3:
readline = lambda fin: fin.readline()
open_file = lambda filename: open(filename, encoding='iso-8859-1')
else:
readline = lambda fin: fin.readline().decode('iso-8859-1')
open_file = lambda filename: open(filename)
def read_ecostress_file(filename):
'''Reads an ECOSTRESS v1 spectrum file.'''
logger = logging.getLogger('spectral')
lines = open_file(filename).readlines()
if not IS_PYTHON3:
lines = [line.decode('iso-8859-1') for line in lines]
metaline_to_pair = lambda line: [x.strip() for x in line.split(':', 1)]
s = Signature()
# Read sample metadata
for i in itertools.count():
if lines[i].strip().startswith('Measurement'):
break
pair = metaline_to_pair(lines[i])
try:
s.sample[pair[0].lower()] = pair[1]
except:
logger.error('Failed to parse line: {}: {}'.format(i, lines[i]))
raise
# Read measurment metadata
for j in itertools.count(i):
if len(lines[j].strip()) == 0:
break
pair = metaline_to_pair(lines[j])
s.measurement[pair[0].lower()] = pair[1]
# Read signature spectrum
pairs = []
for line in lines[j:]:
line = line.strip()
if len(line) == 0:
continue
pair = line.split()
nItems = len(pair)
# Try to handle invalid values on signature lines
if nItems == 1:
logger.info('Skipping single item (%s) on signature line for %s',
pair[0], filename)
continue
elif nItems > 2:
logger.info('Skipping more than 2 values on signature line for %s',
filename)
continue
try:
x = float(pair[0])
except:
logger.info('Corrupt signature line in file %s', filename)
if x == 0:
logger.info('Skipping zero wavelength value in file %s', filename)
continue
elif x < 0:
logger.info('Skipping negative wavelength value in file %s',
filename)
continue
pairs.append(pair)
[x, y] = [list(v) for v in zip(*pairs)]
# Make sure wavelengths are ascending
if float(x[0]) > float(x[-1]):
x.reverse()
y.reverse()
s.x = [float(val) for val in x]
s.y = [float(val) for val in y]
s.measurement['first x value'] = x[0]
s.measurement['last x value'] = x[-1]
s.measurement['number of x values'] = len(x)
return s
class EcostressDatabase(AsterDatabase):
'''A relational database to manage ECOSTRESS spectral library data.'''
@classmethod
def create(cls, filename, data_dir=None):
'''Creates an ECOSTRESS relational database by parsing ECOSTRESS data files.
Arguments:
`filename` (str):
Name of the new sqlite database file to create.
`data_dir` (str):
Path to the directory containing ECOSTRESS library data files. If
this argument is not provided, no data will be imported.
Returns:
An :class:`~spectral.database.EcostressDatabase` object.
Example::
>>> EcostressDatabase.create("ecostress.db", "./eco_data_ver1/")
This is a class method (it does not require instantiating an
EcostressDatabase object) that creates a new database by parsing all of the
files in the ECOSTRESS library data directory. Normally, this should only
need to be called once. Subsequently, a corresponding database object
can be created by instantiating a new EcostressDatabase object with the
path the database file as its argument. For example::
>>> from spectral.database.ecostress import EcostressDatabase
>>> db = EcostressDatabase("~/ecostress.db")
'''
import os
if os.path.isfile(filename):
raise Exception('Error: Specified file already exists.')
db = cls()
db._connect(filename)
for schema in cls.schemas:
db.cursor.execute(schema)
if data_dir:
db._import_files(data_dir)
return db
def read_file(self, filename):
return read_ecostress_file(filename)
def _import_files(self, data_dir, ignore=None):
'''Import each file from the ECOSTRESS library into the database.'''
from glob import glob
import numpy
import os
logger = logging.getLogger('spectral')
if not os.path.isdir(data_dir):
raise Exception('Error: Invalid directory name specified.')
if ignore is not None:
filesToIgnore = [data_dir + '/' + f for f in ignore]
else:
filesToIgnore = []
numFiles = 0
numIgnored = 0
sigID = 1
class Sig:
pass
sigs = []
for f in glob(data_dir + '/*spectrum.txt'):
if f in filesToIgnore:
numIgnored += 1
continue
logger.info('Importing ECOSTRESS file %s.', f)
numFiles += 1
sig = self.read_file(f)
s = sig.sample
if 'particle size' in s:
if s['particle size'].lower == 'liquid':
phase = 'liquid'
else:
phase = 'solid'
else:
phase = 'unknown'
s['particle size'] = 'none'
if 'sample no.' in s:
sampleNum = s['sample no.']
else:
sampleNum = ''
subclass = s.get('subclass', 'none')
if subclass == 'none' and 'genus' in s:
subclass = s['genus']
id = self._add_sample(s['name'], s['type'], s['class'], subclass,
s['particle size'], sampleNum, s['owner'],
s['origin'], phase, s['description'])
instrument = os.path.basename(f).split('.')[1]
environment = 'lab'
m = sig.measurement
# Correct numerous mispellings of "reflectance" and "transmittance"
yUnit = m['y units']
if yUnit.find('reflectence') > -1:
yUnit = 'reflectance (percent)'
elif yUnit.find('trans') == 0:
yUnit = 'transmittance (percent)'
measurement = m['measurement']
if measurement[0] == 't':
measurement = 'transmittance'
self._add_signature(id, -1, instrument, environment, measurement,
m['x units'], yUnit, m['first x value'],
m['last x value'], sig.x, sig.y)
if numFiles == 0:
logger.warning('No ECOSTRESS data files were found in "%s".',
data_dir)
else:
logger.info('Processed %d ECOSTRESS files.', numFiles)
if numIgnored > 0:
msg = 'Ignored the following %d bad files:' % (numIgnored)
for f in filesToIgnore:
msg += '\n\t' + f
logger.info(msg)
return sigs
|