File: _dicom.py

package info (click to toggle)
python-imageio 2.4.1-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 4,824 kB
  • sloc: python: 18,306; makefile: 145
file content (925 lines) | stat: -rw-r--r-- 33,935 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
# -*- coding: utf-8 -*-
# imageio is distributed under the terms of the (new) BSD License.

""" Plugin for reading DICOM files.
"""

# todo: Use pydicom:
# * Note: is not py3k ready yet
# * Allow reading the full meta info
# I think we can more or less replace the SimpleDicomReader with a
# pydicom.Dataset For series, only ned to read the full info from one
# file: speed still high
# * Perhaps allow writing?

from __future__ import absolute_import, print_function, division

import sys
import os
import struct

import numpy as np

from ..core import string_types


# Determine endianity of system
sys_is_little_endian = sys.byteorder == "little"

# Define a dictionary that contains the tags that we would like to know
MINIDICT = {
    (0x7FE0, 0x0010): ("PixelData", "OB"),
    # Date and time
    (0x0008, 0x0020): ("StudyDate", "DA"),
    (0x0008, 0x0021): ("SeriesDate", "DA"),
    (0x0008, 0x0022): ("AcquisitionDate", "DA"),
    (0x0008, 0x0023): ("ContentDate", "DA"),
    (0x0008, 0x0030): ("StudyTime", "TM"),
    (0x0008, 0x0031): ("SeriesTime", "TM"),
    (0x0008, 0x0032): ("AcquisitionTime", "TM"),
    (0x0008, 0x0033): ("ContentTime", "TM"),
    # With what, where, by whom?
    (0x0008, 0x0060): ("Modality", "CS"),
    (0x0008, 0x0070): ("Manufacturer", "LO"),
    (0x0008, 0x0080): ("InstitutionName", "LO"),
    # Descriptions
    (0x0008, 0x1030): ("StudyDescription", "LO"),
    (0x0008, 0x103E): ("SeriesDescription", "LO"),
    # UID's
    (0x0008, 0x0016): ("SOPClassUID", "UI"),
    (0x0008, 0x0018): ("SOPInstanceUID", "UI"),
    (0x0020, 0x000D): ("StudyInstanceUID", "UI"),
    (0x0020, 0x000E): ("SeriesInstanceUID", "UI"),
    (0x0008, 0x0117): ("ContextUID", "UI"),
    # Numbers
    (0x0020, 0x0011): ("SeriesNumber", "IS"),
    (0x0020, 0x0012): ("AcquisitionNumber", "IS"),
    (0x0020, 0x0013): ("InstanceNumber", "IS"),
    (0x0020, 0x0014): ("IsotopeNumber", "IS"),
    (0x0020, 0x0015): ("PhaseNumber", "IS"),
    (0x0020, 0x0016): ("IntervalNumber", "IS"),
    (0x0020, 0x0017): ("TimeSlotNumber", "IS"),
    (0x0020, 0x0018): ("AngleNumber", "IS"),
    (0x0020, 0x0019): ("ItemNumber", "IS"),
    (0x0020, 0x0020): ("PatientOrientation", "CS"),
    (0x0020, 0x0030): ("ImagePosition", "CS"),
    (0x0020, 0x0032): ("ImagePositionPatient", "CS"),
    (0x0020, 0x0035): ("ImageOrientation", "CS"),
    (0x0020, 0x0037): ("ImageOrientationPatient", "CS"),
    # Patient information
    (0x0010, 0x0010): ("PatientName", "PN"),
    (0x0010, 0x0020): ("PatientID", "LO"),
    (0x0010, 0x0030): ("PatientBirthDate", "DA"),
    (0x0010, 0x0040): ("PatientSex", "CS"),
    (0x0010, 0x1010): ("PatientAge", "AS"),
    (0x0010, 0x1020): ("PatientSize", "DS"),
    (0x0010, 0x1030): ("PatientWeight", "DS"),
    # Image specific (required to construct numpy array)
    (0x0028, 0x0002): ("SamplesPerPixel", "US"),
    (0x0028, 0x0008): ("NumberOfFrames", "IS"),
    (0x0028, 0x0100): ("BitsAllocated", "US"),
    (0x0028, 0x0101): ("BitsStored", "US"),
    (0x0028, 0x0102): ("HighBit", "US"),
    (0x0028, 0x0103): ("PixelRepresentation", "US"),
    (0x0028, 0x0010): ("Rows", "US"),
    (0x0028, 0x0011): ("Columns", "US"),
    (0x0028, 0x1052): ("RescaleIntercept", "DS"),
    (0x0028, 0x1053): ("RescaleSlope", "DS"),
    # Image specific (for the user)
    (0x0028, 0x0030): ("PixelSpacing", "DS"),
    (0x0018, 0x0088): ("SliceSpacing", "DS"),
}

# Define some special tags:
# See PS 3.5-2008 section 7.5 (p.40)
ItemTag = (0xFFFE, 0xE000)  # start of Sequence Item
ItemDelimiterTag = (0xFFFE, 0xE00D)  # end of Sequence Item
SequenceDelimiterTag = (0xFFFE, 0xE0DD)  # end of Sequence of undefined length

# Define set of groups that we're interested in (so we can quickly skip others)
GROUPS = set([key[0] for key in MINIDICT.keys()])
VRS = set([val[1] for val in MINIDICT.values()])


class NotADicomFile(Exception):
    pass


class CompressedDicom(RuntimeError):
    pass


class SimpleDicomReader(object):
    """ 
    This class provides reading of pixel data from DICOM files. It is 
    focussed on getting the pixel data, not the meta info.
    
    To use, first create an instance of this class (giving it 
    a file object or filename). Next use the info attribute to
    get a dict of the meta data. The loading of pixel data is
    deferred until get_numpy_array() is called.
    
    Comparison with Pydicom
    -----------------------
    
    This code focusses on getting the pixel data out, which allows some
    shortcuts, resulting in the code being much smaller.
    
    Since the processing of data elements is much cheaper (it skips a lot
    of tags), this code is about 3x faster than pydicom (except for the
    deflated DICOM files).
    
    This class does borrow some code (and ideas) from the pydicom
    project, and (to the best of our knowledge) has the same limitations
    as pydicom with regard to the type of files that it can handle.
    
    Limitations
    -----------

    For more advanced DICOM processing, please check out pydicom.
    
      * Only a predefined subset of data elements (meta information) is read.
      * This is a reader; it can not write DICOM files.
      * (just like pydicom) it can handle none of the compressed DICOM
        formats except for "Deflated Explicit VR Little Endian"
        (1.2.840.10008.1.2.1.99). 
    
    """

    def __init__(self, file):
        # Open file if filename given
        if isinstance(file, string_types):
            self._filename = file
            self._file = open(file, "rb")
        else:
            self._filename = "<unknown file>"
            self._file = file
        # Init variable to store position and size of pixel data
        self._pixel_data_loc = None
        # The meta header is always explicit and little endian
        self.is_implicit_VR = False
        self.is_little_endian = True
        self._unpackPrefix = "<"
        # Dict to store data elements of interest in
        self._info = {}
        # VR Conversion
        self._converters = {
            # Numbers
            "US": lambda x: self._unpack("H", x),
            "UL": lambda x: self._unpack("L", x),
            # Numbers encoded as strings
            "DS": lambda x: self._splitValues(x, float, "\\"),
            "IS": lambda x: self._splitValues(x, int, "\\"),
            # strings
            "AS": lambda x: x.decode("ascii", "ignore").strip("\x00"),
            "DA": lambda x: x.decode("ascii", "ignore").strip("\x00"),
            "TM": lambda x: x.decode("ascii", "ignore").strip("\x00"),
            "UI": lambda x: x.decode("ascii", "ignore").strip("\x00"),
            "LO": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(),
            "CS": lambda x: self._splitValues(x, float, "\\"),
            "PN": lambda x: x.decode("utf-8", "ignore").strip("\x00").rstrip(),
        }

        # Initiate reading
        self._read()

    @property
    def info(self):
        return self._info

    def _splitValues(self, x, type, splitter):
        s = x.decode("ascii").strip("\x00")
        try:
            if splitter in s:
                return tuple([type(v) for v in s.split(splitter) if v.strip()])
            else:
                return type(s)
        except ValueError:
            return s

    def _unpack(self, fmt, value):
        return struct.unpack(self._unpackPrefix + fmt, value)[0]

    # Really only so we need minimal changes to _pixel_data_numpy
    def __iter__(self):
        return iter(self._info.keys())

    def __getattr__(self, key):
        info = object.__getattribute__(self, "_info")
        if key in info:
            return info[key]
        return object.__getattribute__(self, key)  # pragma: no cover

    def _read(self):
        f = self._file
        # Check prefix after peamble
        f.seek(128)
        if f.read(4) != b"DICM":
            raise NotADicomFile("Not a valid DICOM file.")
        # Read
        self._read_header()
        self._read_data_elements()
        self._get_shape_and_sampling()
        # Close if done, reopen if necessary to read pixel data
        if os.path.isfile(self._filename):
            self._file.close()
            self._file = None

    def _readDataElement(self):
        f = self._file
        # Get group  and element
        group = self._unpack("H", f.read(2))
        element = self._unpack("H", f.read(2))
        # Get value length
        if self.is_implicit_VR:
            vl = self._unpack("I", f.read(4))
        else:
            vr = f.read(2)
            if vr in (b"OB", b"OW", b"SQ", b"UN"):
                reserved = f.read(2)  # noqa
                vl = self._unpack("I", f.read(4))
            else:
                vl = self._unpack("H", f.read(2))
        # Get value
        if group == 0x7FE0 and element == 0x0010:
            here = f.tell()
            self._pixel_data_loc = here, vl
            f.seek(here + vl)
            return group, element, b"Deferred loading of pixel data"
        else:
            if vl == 0xFFFFFFFF:
                value = self._read_undefined_length_value()
            else:
                value = f.read(vl)
            return group, element, value

    def _read_undefined_length_value(self, read_size=128):
        """ Copied (in compacted form) from PyDicom
        Copyright Darcy Mason.
        """
        fp = self._file
        # data_start = fp.tell()
        search_rewind = 3
        bytes_to_find = struct.pack(
            self._unpackPrefix + "HH", SequenceDelimiterTag[0], SequenceDelimiterTag[1]
        )

        found = False
        value_chunks = []
        while not found:
            chunk_start = fp.tell()
            bytes_read = fp.read(read_size)
            if len(bytes_read) < read_size:
                # try again,
                # if still don't get required amount, this is last block
                new_bytes = fp.read(read_size - len(bytes_read))
                bytes_read += new_bytes
                if len(bytes_read) < read_size:
                    raise EOFError(
                        "End of file reached before sequence " "delimiter found."
                    )
            index = bytes_read.find(bytes_to_find)
            if index != -1:
                found = True
                value_chunks.append(bytes_read[:index])
                fp.seek(chunk_start + index + 4)  # rewind to end of delimiter
                length = fp.read(4)
                if length != b"\0\0\0\0":
                    print("Expected 4 zero bytes after undefined length " "delimiter")
            else:
                fp.seek(fp.tell() - search_rewind)  # rewind a bit
                # accumulate the bytes read (not including the rewind)
                value_chunks.append(bytes_read[:-search_rewind])

        # if get here then have found the byte string
        return b"".join(value_chunks)

    def _read_header(self):
        f = self._file
        TransferSyntaxUID = None

        # Read all elements, store transferSyntax when we encounter it
        try:
            while True:
                fp_save = f.tell()
                # Get element
                group, element, value = self._readDataElement()
                if group == 0x02:
                    if group == 0x02 and element == 0x10:
                        TransferSyntaxUID = value.decode("ascii").strip("\x00")
                else:
                    # No more group 2: rewind and break
                    # (don't trust group length)
                    f.seek(fp_save)
                    break
        except (EOFError, struct.error):  # pragma: no cover
            raise RuntimeError("End of file reached while still in header.")

        # Handle transfer syntax
        self._info["TransferSyntaxUID"] = TransferSyntaxUID
        #
        if TransferSyntaxUID is None:
            # Assume ExplicitVRLittleEndian
            is_implicit_VR, is_little_endian = False, True
        elif TransferSyntaxUID == "1.2.840.10008.1.2.1":
            # ExplicitVRLittleEndian
            is_implicit_VR, is_little_endian = False, True
        elif TransferSyntaxUID == "1.2.840.10008.1.2.2":
            # ExplicitVRBigEndian
            is_implicit_VR, is_little_endian = False, False
        elif TransferSyntaxUID == "1.2.840.10008.1.2":
            # implicit VR little endian
            is_implicit_VR, is_little_endian = True, True
        elif TransferSyntaxUID == "1.2.840.10008.1.2.1.99":
            # DeflatedExplicitVRLittleEndian:
            is_implicit_VR, is_little_endian = False, True
            self._inflate()
        else:
            # http://www.dicomlibrary.com/dicom/transfer-syntax/
            t, extra_info = TransferSyntaxUID, ""
            if "1.2.840.10008.1.2.4.50" <= t < "1.2.840.10008.1.2.4.99":
                extra_info = " (JPEG)"
            if "1.2.840.10008.1.2.4.90" <= t < "1.2.840.10008.1.2.4.99":
                extra_info = " (JPEG 2000)"
            if t == "1.2.840.10008.1.2.5":
                extra_info = " (RLE)"
            if t == "1.2.840.10008.1.2.6.1":
                extra_info = " (RFC 2557)"
            raise CompressedDicom(
                "The dicom reader can only read files with "
                "uncompressed image data - not %r%s. You "
                "can try using dcmtk or gdcm to convert the "
                "image." % (t, extra_info)
            )

        # From hereon, use implicit/explicit big/little endian
        self.is_implicit_VR = is_implicit_VR
        self.is_little_endian = is_little_endian
        self._unpackPrefix = "><"[is_little_endian]

    def _read_data_elements(self):
        info = self._info
        try:
            while True:
                # Get element
                group, element, value = self._readDataElement()
                # Is it a group we are interested in?
                if group in GROUPS:
                    key = (group, element)
                    name, vr = MINIDICT.get(key, (None, None))
                    # Is it an element we are interested in?
                    if name:
                        # Store value
                        converter = self._converters.get(vr, lambda x: x)
                        info[name] = converter(value)
        except (EOFError, struct.error):
            pass  # end of file ...

    def get_numpy_array(self):
        """ Get numpy arra for this DICOM file, with the correct shape,
        and pixel values scaled appropriately.
        """
        # Is there pixel data at all?
        if "PixelData" not in self:
            raise TypeError("No pixel data found in this dataset.")

        # Load it now if it was not already loaded
        if self._pixel_data_loc and len(self.PixelData) < 100:
            # Reopen file?
            close_file = False
            if self._file is None:
                close_file = True
                self._file = open(self._filename, "rb")
            # Read data
            self._file.seek(self._pixel_data_loc[0])
            if self._pixel_data_loc[1] == 0xFFFFFFFF:
                value = self._read_undefined_length_value()
            else:
                value = self._file.read(self._pixel_data_loc[1])
            # Close file
            if close_file:
                self._file.close()
                self._file = None
            # Overwrite
            self._info["PixelData"] = value

        # Get data
        data = self._pixel_data_numpy()
        data = self._apply_slope_and_offset(data)

        # Remove data again to preserve memory
        # Note that the data for the original file is loaded twice ...
        self._info["PixelData"] = (
            b"Data converted to numpy array, " + b"raw data removed to preserve memory"
        )
        return data

    def _get_shape_and_sampling(self):
        """ Get shape and sampling without actuall using the pixel data.
        In this way, the user can get an idea what's inside without having
        to load it.
        """
        # Get shape (in the same way that pydicom does)
        if "NumberOfFrames" in self and self.NumberOfFrames > 1:
            if self.SamplesPerPixel > 1:
                shape = (
                    self.SamplesPerPixel,
                    self.NumberOfFrames,
                    self.Rows,
                    self.Columns,
                )
            else:
                shape = self.NumberOfFrames, self.Rows, self.Columns
        elif "SamplesPerPixel" in self:
            if self.SamplesPerPixel > 1:
                if self.BitsAllocated == 8:
                    shape = self.SamplesPerPixel, self.Rows, self.Columns
                else:
                    raise NotImplementedError(
                        "DICOM plugin only handles "
                        "SamplesPerPixel > 1 if Bits "
                        "Allocated = 8"
                    )
            else:
                shape = self.Rows, self.Columns
        else:
            raise RuntimeError(
                "DICOM file has no SamplesPerPixel " "(perhaps this is a report?)"
            )

        # Try getting sampling between pixels
        if "PixelSpacing" in self:
            sampling = float(self.PixelSpacing[0]), float(self.PixelSpacing[1])
        else:
            sampling = 1.0, 1.0
        if "SliceSpacing" in self:
            sampling = (abs(self.SliceSpacing),) + sampling

        # Ensure that sampling has as many elements as shape
        sampling = (1.0,) * (len(shape) - len(sampling)) + sampling[-len(shape) :]

        # Set shape and sampling
        self._info["shape"] = shape
        self._info["sampling"] = sampling

    def _pixel_data_numpy(self):
        """Return a NumPy array of the pixel data.
        """
        # Taken from pydicom
        # Copyright (c) 2008-2012 Darcy Mason

        if "PixelData" not in self:
            raise TypeError("No pixel data found in this dataset.")

        # determine the type used for the array
        need_byteswap = self.is_little_endian != sys_is_little_endian

        # Make NumPy format code, e.g. "uint16", "int32" etc
        # from two pieces of info:
        #    self.PixelRepresentation -- 0 for unsigned, 1 for signed;
        #    self.BitsAllocated -- 8, 16, or 32
        format_str = "%sint%d" % (
            ("u", "")[self.PixelRepresentation],
            self.BitsAllocated,
        )
        try:
            numpy_format = np.dtype(format_str)
        except TypeError:  # pragma: no cover
            raise TypeError(
                "Data type not understood by NumPy: format='%s', "
                " PixelRepresentation=%d, BitsAllocated=%d"
                % (numpy_format, self.PixelRepresentation, self.BitsAllocated)
            )

        # Have correct Numpy format, so create the NumPy array
        arr = np.frombuffer(self.PixelData, numpy_format).copy()

        # XXX byte swap - may later handle this in read_file!!?
        if need_byteswap:
            arr.byteswap(True)  # True means swap in-place, don't make new copy

        # Note the following reshape operations return a new *view* onto arr,
        # but don't copy the data
        arr = arr.reshape(*self._info["shape"])
        return arr

    def _apply_slope_and_offset(self, data):
        """ 
        If RescaleSlope and RescaleIntercept are present in the data,
        apply them. The data type of the data is changed if necessary.
        """
        # Obtain slope and offset
        slope, offset = 1, 0
        needFloats, needApplySlopeOffset = False, False
        if "RescaleSlope" in self:
            needApplySlopeOffset = True
            slope = self.RescaleSlope
        if "RescaleIntercept" in self:
            needApplySlopeOffset = True
            offset = self.RescaleIntercept
        if int(slope) != slope or int(offset) != offset:
            needFloats = True
        if not needFloats:
            slope, offset = int(slope), int(offset)

        # Apply slope and offset
        if needApplySlopeOffset:
            # Maybe we need to change the datatype?
            if data.dtype in [np.float32, np.float64]:
                pass
            elif needFloats:
                data = data.astype(np.float32)
            else:
                # Determine required range
                minReq, maxReq = data.min(), data.max()
                minReq = min([minReq, minReq * slope + offset, maxReq * slope + offset])
                maxReq = max([maxReq, minReq * slope + offset, maxReq * slope + offset])

                # Determine required datatype from that
                dtype = None
                if minReq < 0:
                    # Signed integer type
                    maxReq = max([-minReq, maxReq])
                    if maxReq < 2 ** 7:
                        dtype = np.int8
                    elif maxReq < 2 ** 15:
                        dtype = np.int16
                    elif maxReq < 2 ** 31:
                        dtype = np.int32
                    else:
                        dtype = np.float32
                else:
                    # Unsigned integer type
                    if maxReq < 2 ** 8:
                        dtype = np.int8
                    elif maxReq < 2 ** 16:
                        dtype = np.int16
                    elif maxReq < 2 ** 32:
                        dtype = np.int32
                    else:
                        dtype = np.float32
                # Change datatype
                if dtype != data.dtype:
                    data = data.astype(dtype)

            # Apply slope and offset
            data *= slope
            data += offset

        # Done
        return data

    def _inflate(self):
        # Taken from pydicom
        # Copyright (c) 2008-2012 Darcy Mason
        import zlib
        from io import BytesIO

        # See PS3.6-2008 A.5 (p 71) -- when written, the entire dataset
        #   following the file metadata was prepared the normal way,
        #   then "deflate" compression applied.
        #  All that is needed here is to decompress and then
        #      use as normal in a file-like object
        zipped = self._file.read()
        # -MAX_WBITS part is from comp.lang.python answer:
        # groups.google.com/group/comp.lang.python/msg/e95b3b38a71e6799
        unzipped = zlib.decompress(zipped, -zlib.MAX_WBITS)
        self._file = BytesIO(unzipped)  # a file-like object


class DicomSeries(object):
    """ DicomSeries
    This class represents a serie of dicom files (SimpleDicomReader
    objects) that belong together. If these are multiple files, they
    represent the slices of a volume (like for CT or MRI).
    """

    def __init__(self, suid, progressIndicator):
        # Init dataset list and the callback
        self._entries = []

        # Init props
        self._suid = suid
        self._info = {}
        self._progressIndicator = progressIndicator

    def __len__(self):
        return len(self._entries)

    def __iter__(self):
        return iter(self._entries)

    def __getitem__(self, index):
        return self._entries[index]

    @property
    def suid(self):
        return self._suid

    @property
    def shape(self):
        """ The shape of the data (nz, ny, nx). """
        return self._info["shape"]

    @property
    def sampling(self):
        """ The sampling (voxel distances) of the data (dz, dy, dx). """
        return self._info["sampling"]

    @property
    def info(self):
        """ A dictionary containing the information as present in the
        first dicomfile of this serie. None if there are no entries. """
        return self._info

    @property
    def description(self):
        """ A description of the dicom series. Used fields are
        PatientName, shape of the data, SeriesDescription, and
        ImageComments.
        """
        info = self.info

        # If no info available, return simple description
        if not info:  # pragma: no cover
            return "DicomSeries containing %i images" % len(self)

        fields = []
        # Give patient name
        if "PatientName" in info:
            fields.append("" + info["PatientName"])
        # Also add dimensions
        if self.shape:
            tmp = [str(d) for d in self.shape]
            fields.append("x".join(tmp))
        # Try adding more fields
        if "SeriesDescription" in info:
            fields.append("'" + info["SeriesDescription"] + "'")
        if "ImageComments" in info:
            fields.append("'" + info["ImageComments"] + "'")

        # Combine
        return " ".join(fields)

    def __repr__(self):
        adr = hex(id(self)).upper()
        return "<DicomSeries with %i images at %s>" % (len(self), adr)

    def get_numpy_array(self):
        """ Get (load) the data that this DicomSeries represents, and return
        it as a numpy array. If this serie contains multiple images, the
        resulting array is 3D, otherwise it's 2D.
        """

        # It's easy if no file or if just a single file
        if len(self) == 0:
            raise ValueError("Serie does not contain any files.")
        elif len(self) == 1:
            return self[0].get_numpy_array()

        # Check info
        if self.info is None:
            raise RuntimeError("Cannot return volume if series not finished.")

        # Init data (using what the dicom packaged produces as a reference)
        slice = self[0].get_numpy_array()
        vol = np.zeros(self.shape, dtype=slice.dtype)
        vol[0] = slice

        # Fill volume
        self._progressIndicator.start("loading data", "", len(self))
        for z in range(1, len(self)):
            vol[z] = self[z].get_numpy_array()
            self._progressIndicator.set_progress(z + 1)
        self._progressIndicator.finish()

        # Done
        import gc

        gc.collect()
        return vol

    def _append(self, dcm):
        self._entries.append(dcm)

    def _sort(self):
        self._entries.sort(key=lambda k: k.InstanceNumber)

    def _finish(self):
        """
        Evaluate the series of dicom files. Together they should make up
        a volumetric dataset. This means the files should meet certain
        conditions. Also some additional information has to be calculated,
        such as the distance between the slices. This method sets the
        attributes for "shape", "sampling" and "info".

        This method checks:
          * that there are no missing files
          * that the dimensions of all images match
          * that the pixel spacing of all images match
        """

        # The datasets list should be sorted by instance number
        L = self._entries
        if len(L) == 0:
            return
        elif len(L) == 1:
            self._info = L[0].info
            return

        # Get previous
        ds1 = L[0]
        # Init measures to calculate average of
        distance_sum = 0.0
        # Init measures to check (these are in 2D)
        dimensions = ds1.Rows, ds1.Columns
        # sampling = float(ds1.PixelSpacing[0]), float(ds1.PixelSpacing[1])
        sampling = ds1.info["sampling"][:2]  # row, column

        for index in range(len(L)):
            # The first round ds1 and ds2 will be the same, for the
            # distance calculation this does not matter
            # Get current
            ds2 = L[index]
            # Get positions
            pos1 = float(ds1.ImagePositionPatient[2])
            pos2 = float(ds2.ImagePositionPatient[2])
            # Update distance_sum to calculate distance later
            distance_sum += abs(pos1 - pos2)
            # Test measures
            dimensions2 = ds2.Rows, ds2.Columns
            # sampling2 = float(ds2.PixelSpacing[0]), float(ds2.PixelSpacing[1])
            sampling2 = ds2.info["sampling"][:2]  # row, column
            if dimensions != dimensions2:
                # We cannot produce a volume if the dimensions match
                raise ValueError("Dimensions of slices does not match.")
            if sampling != sampling2:
                # We can still produce a volume, but we should notify the user
                self._progressIndicator.write("Warn: sampling does not match.")
            # Store previous
            ds1 = ds2

        # Finish calculating average distance
        # (Note that there are len(L)-1 distances)
        distance_mean = distance_sum / (len(L) - 1)

        # Set info dict
        self._info = L[0].info.copy()

        # Store information that is specific for the serie
        self._info["shape"] = (len(L),) + ds2.info["shape"]
        self._info["sampling"] = (distance_mean,) + ds2.info["sampling"]


def list_files(files, path):
    """List all files in the directory, recursively. """
    for item in os.listdir(path):
        item = os.path.join(path, item)
        if os.path.isdir(item):
            list_files(files, item)
        elif os.path.isfile(item):
            files.append(item)


def process_directory(request, progressIndicator, readPixelData=False):
    """
    Reads dicom files and returns a list of DicomSeries objects, which
    contain information about the data, and can be used to load the
    image or volume data.
    
    if readPixelData is True, the pixel data of all series is read. By
    default the loading of pixeldata is deferred until it is requested
    using the DicomSeries.get_pixel_array() method. In general, both
    methods should be equally fast.
    """
    # Get directory to examine
    if os.path.isdir(request.filename):
        path = request.filename
    elif os.path.isfile(request.filename):
        path = os.path.dirname(request.filename)
    else:  # pragma: no cover - tested earlier
        raise ValueError(
            "Dicom plugin needs a valid filename to examine " "the directory"
        )

    # Check files
    files = []
    list_files(files, path)  # Find files recursively

    # Gather file data and put in DicomSeries
    series = {}
    count = 0
    progressIndicator.start("examining files", "files", len(files))
    for filename in files:
        # Show progress (note that we always start with a 0.0)
        count += 1
        progressIndicator.set_progress(count)
        # Skip DICOMDIR files
        if filename.count("DICOMDIR"):  # pragma: no cover
            continue
        # Try loading dicom ...
        try:
            dcm = SimpleDicomReader(filename)
        except NotADicomFile:
            continue  # skip non-dicom file
        except Exception as why:  # pragma: no cover
            progressIndicator.write(str(why))
            continue
        # Get SUID and register the file with an existing or new series object
        try:
            suid = dcm.SeriesInstanceUID
        except AttributeError:  # pragma: no cover
            continue  # some other kind of dicom file
        if suid not in series:
            series[suid] = DicomSeries(suid, progressIndicator)
        series[suid]._append(dcm)

    # Finish progress
    # progressIndicator.finish('Found %i series.' % len(series))

    # Make a list and sort, so that the order is deterministic
    series = list(series.values())
    series.sort(key=lambda x: x.suid)

    # Split series if necessary
    for serie in reversed([serie for serie in series]):
        splitSerieIfRequired(serie, series, progressIndicator)

    # Finish all series
    # progressIndicator.start('analyse series', '', len(series))
    series_ = []
    for i in range(len(series)):
        try:
            series[i]._finish()
            series_.append(series[i])
        except Exception as err:  # pragma: no cover
            progressIndicator.write(str(err))
            pass  # Skip serie (probably report-like file without pixels)
        # progressIndicator.set_progress(i+1)
    progressIndicator.finish("Found %i correct series." % len(series_))

    # Done
    return series_


def splitSerieIfRequired(serie, series, progressIndicator):
    """ 
    Split the serie in multiple series if this is required. The choice
    is based on examing the image position relative to the previous
    image. If it differs too much, it is assumed that there is a new
    dataset. This can happen for example in unspitted gated CT data.
    """

    # Sort the original list and get local name
    serie._sort()
    L = serie._entries
    # Init previous slice
    ds1 = L[0]
    # Check whether we can do this
    if "ImagePositionPatient" not in ds1:
        return
    # Initialize a list of new lists
    L2 = [[ds1]]
    # Init slice distance estimate
    distance = 0

    for index in range(1, len(L)):
        # Get current slice
        ds2 = L[index]
        # Get positions
        pos1 = float(ds1.ImagePositionPatient[2])
        pos2 = float(ds2.ImagePositionPatient[2])
        # Get distances
        newDist = abs(pos1 - pos2)
        # deltaDist = abs(firstPos-pos2)
        # If the distance deviates more than 2x from what we've seen,
        # we can agree it's a new dataset.
        if distance and newDist > 2.1 * distance:
            L2.append([])
            distance = 0
        else:
            # Test missing file
            if distance and newDist > 1.5 * distance:
                progressIndicator.write(
                    "Warning: missing file after %r" % ds1._filename
                )
            distance = newDist
        # Add to last list
        L2[-1].append(ds2)
        # Store previous
        ds1 = ds2

    # Split if we should
    if len(L2) > 1:
        # At what position are we now?
        i = series.index(serie)
        # Create new series
        series2insert = []
        for L in L2:
            newSerie = DicomSeries(serie.suid, progressIndicator)
            newSerie._entries = L
            series2insert.append(newSerie)
        # Insert series and remove self
        for newSerie in reversed(series2insert):
            series.insert(i, newSerie)
        series.remove(serie)