File: ReliablePy.py

package info (click to toggle)
python-mrcz 0.5.9-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 520 kB
  • sloc: python: 2,484; makefile: 5; sh: 1
file content (989 lines) | stat: -rw-r--r-- 46,822 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
# -*- coding: utf-8 -*-
"""
Python Utilities for Relion

Created on Tue Dec  1 14:26:13 2015
@author: Robert A. McLeod
@email: robbmcleod@gmail.com OR robert.mcleod@unibas.ch

This is a primarily a general parser for Relion star files.  It creates a two-level dictionary, with the 
"data_*" level at the top and the "_rln*" level at the second. Use the star.keys() function to see what values 
the dictionary has.  I.e.
    
    rln.star.keys()

and then

    rln.star['data_whatever'].keys()

Example usage:

    rln = ReliablePy()
    # Wildcards can be loaded
    rln.load( 'PostProcess*.star' )
    
    # Plot the Fourier Shell Correlation
    plt.figure()
    plt.plot( rln.star['data_fsc']['Resolution'], rln.star['data_fsc']['FourierShellCorrelationUnmaskedMaps'], '.-' )
    plt.xlabel( "Resolution" )
    plt.ylabel( "FSC" )
    
Note that all Relion strings are byte-strings (char1) rather than UTF encoded.

"""
from __future__ import division, print_function, absolute_import


from . import ioDM, ioMRC

import numpy as np
import os, os.path
import glob
import time

from collections import OrderedDict

# The following are not requirements of python-mrcz, only ReliablePy:
import matplotlib.pyplot as plt
import scipy
import pandas

# Static variable decorator
def static_var(varname, value):
    def decorate(func):
        setattr(func, varname, value)
        return func
    return decorate
    
    
def apodization( name = 'butter.32', shape= [2048,2048], radius=None ):
    """ apodization( name = 'butter.32', size = [2048,2048], radius=None )
    Provides a 2-D filter or apodization window for Fourier filtering or image clamping.
        Radius = None defaults to shape/2
    
    Valid names are: 
        'hann' - von Hann cosine window on radius
        'hann_square' as above but on X-Y
        'hamming' - good for apodization, nonsense as a filter
        'butter.X' Butterworth multi-order filter where X is the order of the Lorentzian
        'butter_square.X' Butterworth in X-Y
        'gauss_trunc' - truncated gaussian, higher performance (smaller PSF) than hann filter
        'gauss' - regular gaussian
    NOTE: There are windows in scipy.signal for 1D-filtering...
    WARNING: doesn't work properly for odd image dimensions
    """
    # Make meshes
    shape = np.asarray( shape )
    if radius is None:
        radius = shape/2.0
    else:
        radius = np.asarray( radius, dtype='float' )
    # DEBUG: Doesn't work right for odd numbers
    [xmesh,ymesh] = np.meshgrid( np.arange(-shape[1]/2,shape[1]/2), np.arange(-shape[0]/2,shape[0]/2) )
    r2mesh = xmesh*xmesh/( np.double(radius[0])**2 ) + ymesh*ymesh/( np.double(radius[1])**2 )
    
    try:
        [name, order] = name.lower().split('.')
        order = np.double(order)
    except ValueError:
        order = 1
        
    if name == 'butter':
        window =  np.sqrt( 1.0 / (1.0 + r2mesh**order ) )
    elif name == 'butter_square':
        window = np.sqrt( 1.0 / (1.0 + (xmesh/radius[1])**order))*np.sqrt(1.0 / (1.0 + (ymesh/radius[0])**order) )
    elif name == 'hann':
        cropwin = ((xmesh/radius[1])**2.0 + (ymesh/radius[0])**2.0) <= 1.0
        window = cropwin.astype('float') * 0.5 * ( 1.0 + np.cos( 1.0*np.pi*np.sqrt( (xmesh/radius[1])**2.0 + (ymesh/radius[0])**2.0  )  ) )
    elif name == 'hann_square':
        window = ( (0.5 + 0.5*np.cos( np.pi*( xmesh/radius[1]) ) ) *
            (0.5 + 0.5*np.cos( np.pi*( ymesh/radius[0] )  ) ) )
    elif name == 'hamming':
        cropwin = ((xmesh/radius[1])**2.0 + (ymesh/radius[0])**2.0) <= 1.0
        window = cropwin.astype('float') *  ( 0.54 + 0.46*np.cos( 1.0*np.pi*np.sqrt( (xmesh/radius[1])**2.0 + (ymesh/radius[0])**2.0  )  ) )
    elif name == 'hamming_square':
        window = ( (0.54 + 0.46*np.cos( np.pi*( xmesh/radius[1]) ) ) *
            (0.54 + 0.46*np.cos( np.pi*( ymesh/radius[0] )  ) ) )
    elif name == 'gauss' or name == 'gaussian':
        window = np.exp( -(xmesh/radius[1])**2.0 - (ymesh/radius[0])**2.0 )
    elif name == 'gauss_trunc':
        cropwin = ((0.5*xmesh/radius[1])**2.0 + (0.5*ymesh/radius[0])**2.0) <= 1.0
        window = cropwin.astype('float') * np.exp( -(xmesh/radius[1])**2.0 - (ymesh/radius[0])**2.0 )
    elif name == 'lanczos':
        print( "TODO: Implement Lanczos window" )
        return
    else:
        print( "Error: unknown filter name passed into apodization" )
        return
    return window
 
def pyFFTWPlanner( realMage, fouMage=None, wisdomFile = None, effort = 'FFTW_MEASURE', n_threads = None, doForward = True, doReverse = True ):
    """
    Appends an FFTW plan for the given realMage to a text file stored in the same
    directory as RAMutil, which can then be loaded in the future with pyFFTWLoadWisdom.
    
    NOTE: realMage should be typecast to 'complex64' normally.
    
    NOTE: planning pickle files are hardware dependant, so don't copy them from one 
    machine to another. wisdomFile allows you to specify a .pkl file with the wisdom
    tuple written to it.  The wisdomFile is never updated, whereas the default 
    wisdom _is_ updated with each call. For multiprocessing, it's important to 
    let FFTW generate its plan from an ideal processor state.
    
    TODO: implement real, half-space fourier transforms rfft2 and irfft2 as built
    """
    
    import pyfftw
    import pickle
    import os.path
    from multiprocessing import cpu_count
    
    utilpath = os.path.dirname(os.path.realpath(__file__))
    
    # First import whatever we already have
    if wisdomFile is None:
        wisdomFile = os.path.join( utilpath, "pyFFTW_wisdom.pkl" )


    if os.path.isfile(wisdomFile):
        try:
            fh = open( wisdomFile, 'rb')
        except:
            print( "Util: pyFFTW wisdom plan file: " + str(wisdomFile) + " invalid/unreadable" )
            
        try:
            pyfftw.import_wisdom( pickle.load( fh ) )
        except: 
            # THis is not normally a problem, it might be empty?
            print( "Util: pickle failed to import FFTW wisdom" )
            pass
        try:
            fh.close()
        except: 
            pass

    else:
        # Touch the file
        os.umask(0000) # Everyone should be able to delete scratch files
        with open( wisdomFile, 'wb') as fh:
            pass
    
        # I think the fouMage array has to be smaller to do the real -> complex FFT?
    if fouMage is None:
        if realMage.dtype.name == 'float32':
            print( "pyFFTW is recommended to work on purely complex data" )
            fouShape = realMage.shape
            fouShape.shape[-1] = realMage.shape[-1]//2 + 1
            fouDtype =  'complex64'
            fouMage = np.empty( fouShape, dtype=fouDtype )
        elif realMage.dtype.name == 'float64': 
            print( "pyFFTW is recommended to work on purely complex data" )
            fouShape = realMage.shape
            fouShape.shape[-1] = realMage.shape[-1]//2 + 1
            fouDtype = 'complex128'
            fouMage = np.empty( fouShape, dtype=fouDtype )
        else: # Assume dtype is complexXX
            fouDtype = realMage.dtype.name
            fouMage = np.zeros( realMage.shape, dtype=fouDtype )
            
    if n_threads is None:
        n_threads = cpu_count()
    print( "FFTW using " + str(n_threads) + " threads" )
    
    if bool(doForward):
        #print( "Planning forward pyFFTW for shape: " + str( realMage.shape ) )
        FFT2 = pyfftw.builders.fft2( realMage, planner_effort=effort, 
                                    threads=n_threads, auto_align_input=True )
    else:
        FFT2 = None
    if bool(doReverse):
        #print( "Planning reverse pyFFTW for shape: " + str( realMage.shape ) )
        IFFT2 = pyfftw.builders.ifft2( fouMage, planner_effort=effort, 
                                      threads=n_threads, auto_align_input=True )
    else: 
        IFFT2 = None

    # Setup so that we can call .execute on each one without re-copying arrays
    # if FFT2 is not None and IFFT2 is not None:
    #    FFT2.update_arrays( FFT2.get_input_array(), IFFT2.get_input_array() )
    #    IFFT2.update_arrays( IFFT2.get_input_array(), FFT2.get_input_array() )
    # Something is different in the builders compared to FFTW directly. 
    # Can also repeat this for pyfftw.builders.rfft2 and .irfft2 if desired, but 
    # generally it seems slower.
    # Opening a file for writing is supposed to truncate it
    # if bool(savePlan):
    #if wisdomFile is None:
    # with open( utilpath + "/pyFFTW_wisdom.pkl", 'wb') as fh:
    with open( wisdomFile, 'wb' ) as fh:
        pickle.dump( pyfftw.export_wisdom(), fh )
            
    return FFT2, IFFT2
    
# TODO: put IceFilter in a ReliablePy utility function file
@static_var( "bpFilter", -1 )
@static_var( "mageShape", np.array([0,0]) )
@static_var( "ps", -42 )
@static_var( "FFT2", -42 )
@static_var( "IFFT2", -42 )
def IceFilter( mage, pixelSize=1.0, filtRad = 8.0 ):
    """
    IceFilter applies a band-pass filter to mage that passes the first 3 
    water ice rings, and then returns the result.
        pixelSize is in ANGSTROMS because this is bio.  Program uses this to 
        calculate the width of the band-pass filter.
        filtRad is radius of the Gaussian filter (pixels) to apply after Fourier filtration 
        that are periodic artifacts due to multiple defocus zeros being in the band 
    """
    
    # First water ring is at 3.897 Angstroms
    # Second is ater 3.669 Angstroms
    # Third is at 3.441 Angstroms
    # And of course there is strain, so go from about 4 to 3.3 Angstroms in the mesh
    # Test for existance of pyfftw
    try:
        import pyfftw
        pyfftwFound = True
    except:
        pyfftwFound = False

    # Check to see if we have to update our static variables
    if ( (IceFilter.mageShape != mage.shape).any() ) or (IceFilter.bpFilter.size == 1) or (IceFilter.ps != pixelSize):
        # Make a new IceFilter.bpFilter
        IceFilter.mageShape = np.array( mage.shape )
        IceFilter.ps = pixelSize
        
        bpMin = pixelSize / 4.0  # pixels tp the 4.0 Angstrom spacing
        bpMax = pixelSize / 3.3  # pixels to the 3.3 Angstrom spacing
        
        # So pixel frequency is -0.5 to +0.5 with shape steps
        # And we want a bandpass from 1.0/bpMin to 1.0/bpMax, which is different on each axis for rectangular images
        pixFreqX = 1.0 / mage.shape[1]
        pixFreqY = 1.0 / mage.shape[0]
        bpRangeX = np.round( np.array( [ bpMin/pixFreqX, bpMax/pixFreqX ] ) )
        bpRangeY = np.round( np.array( [ bpMin/pixFreqY, bpMax/pixFreqY ] ) )
        IceFilter.bpFilter = np.fft.fftshift( 
            (1.0 - apodization( name='butter.64', size=mage.shape, radius=[ bpRangeY[0],bpRangeX[0] ] )) 
            * apodization( name='butter.64', size=mage.shape, radius=[ bpRangeY[1],bpRangeX[1] ] ) )
        IceFilter.bpFilter = IceFilter.bpFilter.astype( 'float32' ) 
        
        if pyfftwFound: [IceFilter.FFT2, IceFilter.IFFT2] = pyFFTWPlanner( mage.astype('complex64') )
        pass
    
    # Apply band-pass filter
    if pyfftwFound:
        IceFilter.FFT2.update_arrays( mage.astype('complex64'), IceFilter.FFT2.get_output_array() )
        IceFilter.FFT2.execute()
        IceFilter.IFFT2.update_arrays( IceFilter.FFT2.get_output_array() * IceFilter.bpFilter, IceFilter.IFFT2.get_output_array() )
        IceFilter.IFFT2.execute()
        bpMage = IceFilter.IFFT2.get_output_array() / mage.size
    else:
        FFTmage = np.fft.fft2( mage )
        bpMage = np.fft.ifft2( FFTmage * IceFilter.bpFilter )

    from scipy.ndimage import gaussian_filter
    bpGaussMage = gaussian_filter( np.abs(bpMage), filtRad )
    # So if I don't want to build a mask here, and if I'm just doing band-pass
    # intensity scoring I don't need it, I don't need to make a thresholded mask
    
    # Should we normalize the bpGaussMage by the mean and std of the mage?
    return bpGaussMage

class ReliablePy(object):
    
    def __init__( self, *inputs ) :
        self.verbose = 1
        self.inputs = list( inputs )
        
        # _data.star file dicts
        self.star = OrderedDict()
        
        self.par = []
        self.pcol = OrderedDict()

        self.box = [] # Each box file loaded is indexed by its load order / dict could also be done if it's more convienent.
        
        # Particle/class data
        self.mrc = []
        self.mrc_header = []
        if inputs:
            self.load( *inputs )
        pass
    
    def load( self, *input_names ):
        # See if it's a single-string or list/tuple

        if not isinstance( input_names, str ):
            new_files = []
            for item in input_names:
                new_files.extend( glob.glob( item ) )
        else:
            new_files = list( input_names )
        
        for filename in new_files:
            [fileFront, fileExt] = os.path.splitext( filename )
            
            if fileExt == '.mrc' or fileExt == '.mrcs':
                self.inputs.append(filename)
                self.__loadMRC( filename )
            elif fileExt == '.star':
                self.inputs.append(filename)
                self.__loadStar( filename )
            elif fileExt == '.par':
                self.inputs.append(filename)
                self.__loadPar( filename )
            elif fileExt == '.box':
                self.inputs.append(filename)
                self.__loadBox( filename )
            else:
                print( "Unknown file extension passed in: " + filename )
                
    def plotFSC( self ):
        # Do error checking?  Or no?
        plt.rc('lines', linewidth=2.0, markersize=12.0 )
        plt.figure()
        plt.plot( self.star['data_fsc']['Resolution'], 0.143*np.ones_like(self.star['data_fsc']['Resolution']), 
                 '-', color='firebrick', label="Resolution criteria" )
        try:
            plt.plot( self.star['data_fsc']['Resolution'], self.star['data_fsc']['FourierShellCorrelationUnmaskedMaps'], 
                 'k.-', label="Unmasked FSC" )
        except: pass
        try:
            plt.plot( self.star['data_fsc']['Resolution'], self.star['data_fsc']['FourierShellCorrelationMaskedMaps'], 
                 '.-', color='royalblue', label="Masked FSC" )   
        except: pass
        try:         
            plt.plot( self.star['data_fsc']['Resolution'], self.star['data_fsc']['FourierShellCorrelationCorrected'], 
                 '.-', color='forestgreen', label="Corrected FSC" )          
        except: pass
        try:
            plt.plot( self.star['data_fsc']['Resolution'], self.star['data_fsc']['CorrectedFourierShellCorrelationPhaseRandomizedMaskedMaps'], 
                 '.-', color='goldenrod', label="Random-phase corrected FSC" )
        except: pass
        plt.xlabel( r"Resolution ($\AA^{-1}$)" )
        plt.ylabel( "Fourier Shell Correlation" )
        plt.legend( loc='upper right', fontsize=16 )
        plt.xlim( np.min(self.star['data_fsc']['Resolution']), np.max(self.star['data_fsc']['Resolution']) )
        print( "Final resolution (unmasked): %.2f A"%self.star['data_general']['FinalResolution']  )
        print( "B-factor applied: %.1f"%self.star['data_general']['BfactorUsedForSharpening'] )
        
    def plotSSNR( self ):
        """
        Pulls the SSNR from each class in a _model.star file and plots them, for assessing which class is the 
        'best' class 
        """
        
        N_particles = np.sum( self.star[b'data_model_groups'][b'GroupNrParticles'] )
        N_classes = self.star[b'data_model_general'][b'NrClasses']
            
        plt.figure()
        for K in np.arange( N_classes ):
            Resolution = self.star[b'data_model_class_%d'%(K+1)][b'Resolution']
            SSNR = self.star[b'data_model_class_%d'%(K+1)][b'SsnrMap']
            plt.semilogy( Resolution, SSNR+1.0, 
                     label="Class %d: %d" %(K+1,N_particles*self.star[b'data_model_classes'][b'ClassDistribution'][K]) )
        plt.legend( loc = 'best' )
        plt.xlabel( r"Resolution ($\AA^{-1}$)" )
        plt.ylabel( "Spectral Signal-to-Noise Ratio" )
        # Let's also display the class distributions in the legend
        
    
    def pruneParticlesNearImageEdge( self, box = None, shapeImage = [3838,3710] ):
        """
        Removes any particles near image edge. Relion's default behavoir is to replicate pad these, 
        which often leads to it crashing.
        
        box is the bounding box size for the particle, in pixels.  If a _model.star file is loaded 
        it is automatically detected.  Otherwise it must be provided.
        
        Image size is not stored anywhere obvious in Relion, so it must be passed in in terms of 
        it's shape in [y,x]
        """
        if box == None:
            try: 
                box = self.star[b'data_model_general'][b'OriginalImageSize']
            except:
                print( "No box shape found in metadata, load a *_model.star file or provide box dimension" )
                return
            
        partCount = len( self.star[b'data_'][b'CoordinateX'] )
        
        # Hmm... removing a row is a little painful because I index by keys in columnar format.
        box2 = box/2
        CoordX = self.star[b'data_'][b'CoordinateX']
        CoordY = self.star[b'data_'][b'CoordinateY']
        keepElements = ~((CoordX < box2)|(CoordY < box2)|(CoordX > shapeImage[1]-box2)|(CoordY > shapeImage[0]-box2))
        for key, store in self.star[b'data_'].items():
            self.star[b'data_'][key] = store[keepElements]
        print( "Deleted %d"%(partCount-len(self.star[b'data_'][b'CoordinateX']) ) + 
                " particles too close to image edge" )
        pass
    
    def permissiveMask( self, volumeThres, gaussSigma = 5.0, gaussRethres = 0.07, smoothSigma=1.5 ):
        """
        Given a (tight) volumeThres(hold) measured in Chimera or IMS, this function generates a 
        Gaussian dilated mask that is then smoothed.  Everything is done with Gaussian operations 
        so the Fourier space representation of the mask should be relatively smooth as well, 
        and hence ring less.
        
        Excepts self.mrc to be loaded.  Populates self.mask.  
        
        """
        thres = self.mrc > volumeThres; thres = thres.astype('float32')
        
        gaussThres = scipy.ndimage.gaussian_filter( thres, gaussSigma )
        rethres = gaussThres > gaussRethres; rethres = rethres.astype('float32')
        
        self.mask = scipy.ndimage.gaussian_filter( rethres, smoothSigma )
        print( "permissive mask complete, use ioMRC.writeMRC(self.mrc, 'maskname.mrc') to save" )
        pass
    
    def box2star( self, directory = "." ):
        """
        Converts all EMAN .box files in a directory to the associated .star files. Relion cannot successfully 
        rescale particles if they come in .box format.  Also does box pruning if they are too close to an edge.
        """

        boxList = glob.glob( os.path.join( directory, "*.box") )

        starHeader = """
        data_
        
        loop_
        _rlnCoordinateX #1
        _rlnCoordinateY #2
        """
        
        shapeImage = [3838,3710]
        for boxFile in boxList:
            print( "Loading %s" % boxFile )
            boxData = np.loadtxt(boxFile)
            
            xCoord = boxData[:,0]
            yCoord = boxData[:,1]
            boxX = boxData[:,2]/2
            boxY = boxData[:,3]/2
            
            keepElements = ~((xCoord < boxX)|(yCoord < boxY)|(xCoord > shapeImage[1]-boxX)|(yCoord> shapeImage[0]-boxY))
            xCoord = xCoord[keepElements]
            yCoord = yCoord[keepElements]
            boxX = boxX[keepElements]
            boxY = boxY[keepElements]
            
            starFilename = os.path.splitext( boxFile )[0] + ".star"
            with open( starFilename, 'wb' ) as sh:
                sh.writelines( starHeader )
                for J in np.arange(0,len(xCoord)):
                    sh.write( "%.1f %.1f\n" % (xCoord[J]+boxX[J], yCoord[J]+boxY[J] ) )
                   
                sh.write( "\n" )
            sh.close()
            
    def regroupKmeans( self, partPerGroup = 100, miniBatch=True ):
        """
        Does a 3-D k-means clustering on DefocusU, DefocusV, and GroupScaleCorrection
        
        partPerGroup is a suggestion, that is the number of groups is the # of particles / partPerGroup, 
        so outlier groups will tend to have far fewer particle counts that those in the bulk of the data.
        
        miniBatch=True is faster for very large sets (>100,000 particles), but somewhat less accurate
        miniBatch=False is faster for smaller sets, and better overall
        """
        # K-means clustering
        import sklearn
        import sklearn.cluster
        
        # We need to make an array for all particles that has the GroupScaleCorrection
        P = len( self.star[b'data_'][b'DefocusU'] )
        n_clusters = np.int( P / partPerGroup )
        
        DefocusU = self.star[b'data_'][b'DefocusU']
        DefocusV = self.star[b'data_'][b'DefocusV']
        DefocusMean = 0.5* (DefocusU + DefocusV)
        
        if b'data_model_groups' in self.star:
            SCALE_CORR_PRESENT = True
            part_GroupScaleCorrection = np.zeros_like( self.star[b'data_'][b'DefocusU'] )
        
            # Build a GroupScaleCorrection vector
            for J, groupNr in enumerate( self.star[b'data_'][b'GroupNumber'] ):
                part_GroupScaleCorrection[J] = self.star[b'data_model_groups'][b'GroupScaleCorrection'][  np.argwhere(self.star[b'data_model_groups'][b'GroupNumber'] == groupNr)[0] ]
        else:
            print( "No _model.star loaded, not using scale correction" )
            SCALE_CORR_PRESENT = False
            
        ##################
        # K-means clustering:
        ##################
        print( "Running K-means clustering analysis for " + str(P) + " particles into " + str(n_clusters) + " clusters" )
        t0 = time.time()
        if bool(miniBatch):
            print( "TODO: determine number of jobs for K-means" )
            k_means = sklearn.cluster.MiniBatchKMeans( n_clusters=n_clusters, init_size=3*n_clusters+1 )
        else: 
            k_means = sklearn.cluster.KMeans( n_clusters=n_clusters, n_jobs=12 )
        #Kmeans_in = np.vstack( [DefocusMean, part_GroupScaleCorrection]).transpose()
        if SCALE_CORR_PRESENT:
            Kmeans_in = np.vstack( [DefocusU,DefocusV, part_GroupScaleCorrection]).transpose()
        else:
            Kmeans_in = np.vstack( [DefocusU,DefocusV]).transpose()
        Kmeans_in = sklearn.preprocessing.robust_scale( Kmeans_in )
        k_predict = k_means.fit_predict( Kmeans_in  )
        t1 = time.time()
        print( "Cluster analysis finished in (s): " + str(t1-t0) )
        
        if self.verbose >= 2:
            plt.figure()
            plt.scatter( DefocusMean, part_GroupScaleCorrection, c=k_predict)
            plt.xlabel( r"Defocus ($\AA$)" )
            plt.ylabel( "Group scale correction (a.u.)" )
            plt.title("K-means on Defocus")
        
        ##################
        # Save the results in a new particles .star file:
        ##################
        # Replace, add one to group number because Relion starts counting from 1

        particleKey = b"data_"
        
        # Add the GroupName field to the star file
        self.star[particleKey][b'GroupName'] = [""] * len( self.star[particleKey][b'GroupNumber'] )
        for J, groupName in enumerate( k_predict ):
            self.star[particleKey][b'GroupName'][J] = b'G' + str(groupName + 1)
            
        # Build a new group number count
        groupCount = np.zeros_like( self.star[particleKey][b'GroupNumber'] )
        for J in np.arange(0,len(groupCount)):
            groupCount[J] = np.sum( self.star[particleKey][b'GroupNumber'] == J )
        self.star[particleKey][b'GroupNumber'] = groupCount
            
        # Recalculate number of particles in each group (ACTUALLY THIS SEEMS NOT NECESSARY)
        #GroupNr = np.zeros( np.max( k_predict )+1 )
        #for J in xrange( np.min( k_predict), np.max( k_predict ) ):
        #    GroupNr[J] = np.sum( k_predict == J )
        #    pass
        #
        #for J in xrange(0, len(rln.star[particleKey]['GroupNumber']) ):
        #    rln.star[particleKey]['GroupNumber'][J] = GroupNr[ k_predict[J] ]
            
    def saveDataStar( self, outputName, particleKey = b"data_" ):
        """
        Outputs a relion ..._data.star file that has been pruned, regrouped, etc. to outputName
        """
        
        if outputName == None:
            # Need to store input star names, and figure out which was the last loaded particles.star file.
            # [outFront, outExt] = os.path.splitext()
            raise IOError( "Default filenames for saveDataStar not implemented yet" )
            
        # TODO: more general star file output    
        # Let's just hack this
        fh = open( outputName, 'wb' )
        fh.write( b"\ndata_\n\nloop_\n")
        
        # Since we made self.star an OrderedDict we don't need to keep track of index ordering
        
        headerKeys = self.star[particleKey].keys()
        

        for J, key in enumerate(headerKeys):
            # print( "Column: " + "_rln" + lookupDict[J+1] + " #" + str(J+1) )
            fh.write( b"_rln" + key + " #" + str(J) + "\n")

        
        # lCnt = len( headerKeys ) 
        P = len( self.star[particleKey][ self.star[particleKey].keys()[0] ] )
        for I in np.arange(0,P):
            fh.write( b"    ")
            for J, key in enumerate(headerKeys):
                fh.write( str( self.star[particleKey][key][I] ) )
                fh.write( b"   " )
            fh.write( b"\n" )
        fh.close()
        
    def saveDataAsPar( self, outputPrefix, N_classes = 1, mag = None, pixelsize=None, particleKey = "data_" ):
        """
        Saves a Relion .star file as a Frealign .par meta-data file.  Also goes through all the particles in the 
        Relion .star and generates an appropriate meta-MRC particle file for Frealign.
        
        Usage:
            
            saveDataAsPar( self, outputPrefix, N_classes = 1, mag = None, pixelsize=None, particleKey = "data_" )
        
            outputPrefix will be appended with "_1_rX.par", where X is the class number.
            
            N_classes will generate N classes with random occupancy, or 100.0 % occupancy for one class.
            
            mag wil change the Relion magnification to the given integer.
            
            pixelsize is also optional.  Relion tends to have round-off error in the pixelsize.
        
        Use 'relion_stack_create --i particles.star --o forFrealign' to generate the associated mrc file.
        
        Also no comment lines are written to the .par file.
        """
        

        
        partCount = len( self.star[b'data_'][b'MicrographName'] )
        # Need AnglePsi, AngleTilt, and AngleRot
        if not b'AnglePsi' in self.star[b'data_']:
            self.star[b'data_'][b'AnglePsi'] = np.zeros( partCount, dtype='float32' )
        if not b'AngleTilt' in self.star[b'data_']:
            self.star['data_'][b'AngleTilt'] = np.zeros( partCount, dtype='float32' )
        if not b'AngleRot' in self.star[b'data_']:
            self.star[b'data_'][b'AngleRot'] = np.zeros( partCount, dtype='float32' )
        if not b'OriginY' in self.star[b'data_']:
            self.star[b'data_'][b'OriginY'] = np.zeros( partCount, dtype='float32' )
        if not b'OriginX' in self.star[b'data_']:
            self.star[b'data_'][b'OriginX'] = np.zeros( partCount, dtype='float32' )
        if not b'Magnification' in self.star[b'data_']:
            self.star[b'data_'][b'Magnification'] = np.zeros( partCount, dtype='float32' )     
        if not b'GroupNumber' in self.star[b'data_']:
            self.star[b'data_'][b'GroupNumber'] = np.zeros( partCount, dtype='uint16' )    
        if not b'DefocusU' in self.star[b'data_']:
            self.star[b'data_'][b'DefocusU'] = np.zeros( partCount, dtype='float32' )
        if not b'DefocusV' in self.star[b'data_']:
            self.star[b'data_'][b'DefocusV'] = np.zeros( partCount, dtype='float32' )
        if not b'DefocusAngle' in self.star[b'data_']:
            self.star[b'data_'][b'DefocusAngle'] = np.zeros( partCount, dtype='float32' )
            

        # Frealign expects shifts in Angstroms.  Pixelsize is sort of sloppily 
        # kept track of in Relion with Magnification and DetectorPixelSize (which
        # defaults to 14.0)
        
        if pixelsize == None:
            # Detector pixel size in um, we need pixelsize in Angstrom
            pixelsize = self.star[b'data_'][b'DetectorPixelSize'][0]*1E4 / self.star[b'data_'][b'Magnification'][0]
            print( "Found pixelsize of %0.f" % pixelsize )
        if mag == None:
            print( "Using Relion magnification of %.f and DSTEP=%.1f" % ( self.star[b'data_'][b'Magnification'][0], self.star[b'data_'][b'DetectorPixelSize'][0]) )
            print( "For a K2 (DSTEP=5.0) the appropriate magnification would be %0.f" % 50000/pixelsize )
        else:
            self.star[b'data_'][b'Magnification'] = mag * np.ones_like( self.star[b'data_'][b'Magnification'] )
        
        logP = int(-500)
        sigma = 1.0
        score = 20.0
        change = 0.0 
        for K in np.arange( 1, N_classes+1 ):
            outputName = outputPrefix + "_1_r%d.par" % K
            if N_classes > 1:
                # Add random occupancy
                occupancy = np.random.uniform( low=0.0, high=100.0, size=len(self.star[b'data_'][b'DefocusU']) )
            else:
                occupancy = 100.0* np.ones_like( self.star[b'data_'][b'DefocusU'] )
            
            with open( outputName, 'w' ) as fh:
                # Frealign is very picky about the number of digits, see card10.f, line 163
                #READ(LINE,*,ERR=99,IOSTAT=CNT)ILIST(NANG),
                #     +                  PSI(NANG),THETA(NANG),PHI(NANG),SHX(NANG),
                #     +                  SHY(NANG),ABSMAGP,FILM(NANG),DFMID1(NANG),
                #     +                  DFMID2(NANG),ANGAST(NANG),OCC(NANG),
                #     +                  LGP,SIG(NANG),PRESA(NANG)
                #7011                FORMAT(I7,3F8.2,2F10.2,I8,I6,2F9.1,2F8.2,I10,F11.4,
                #     +                     F8.2)
                for J in np.arange(partCount):
                    fh.write( "%7d"%(J+1) 
                                + " %8.2f"%self.star[b'data_'][b'AnglePsi'][J] 
                                + " %8.2f"%self.star[b'data_'][b'AngleTilt'][J]  
                                + " %8.2f"%self.star[b'data_'][b'AngleRot'][J] 
                                + " %8.2f"%(self.star[b'data_'][b'OriginX'][J] * pixelsize)
                                + " %8.2f"%(self.star[b'data_'][b'OriginY'][J] * pixelsize)
                                + " %8.0f"%self.star[b'data_'][b'Magnification'][J] 
                                + " %6d"%self.star[b'data_'][b'GroupNumber'][J] 
                                + " %9.1f"%self.star[b'data_'][b'DefocusU'][J]
                                + " %9.1f"%self.star[b'data_'][b'DefocusV'][J] 
                                + " %8.2f"%self.star[b'data_'][b'DefocusAngle'][J] 
                                + " %8.2f"%occupancy[J] 
                                + " %10d"%logP
                                + " %11.4f"%sigma
                                + " %8.2f"%score
                                + " %8.2f"%change + "\n")
                pass
        
        # Ok and now we need to make a giant particles file?
        #mrcName, _= os.path.splitext( outputName ) 
        #mrcName = mrcName + ".mrc"
        
        #imageNames = np.zeros_like( self.star[b'data_'][b'ImageName'] )
        #for J, name in enumerate( self.star[b'data_'][b'ImageName'] ):
        #    imageNames[J] = name.split('@')[1]
        #uniqueNames = np.unique( imageNames ) # Ordering is preserved, thankfully!
        

        # It would be much better if we could write to a memory-mapped file rather than building the entire array in memory
        # However this is a little buggy in numpy.
        # https://docs.python.org/2/library/mmap.html instead?
        
        #particleList = []
        #for uniqueName in uniqueNames:
        #   particleList.extend( ioMRC.readMRC(uniqueName)[0] )
            
        #print( "DONE building particle list!" )
        #print( len(particleList) )
        
        #particleArray = np.array( particleList )
        # del particleList
        
        # We do have the shape parameter that we can pass in to pre-pad the array with all zeros.
        #ioMRC.writeMRC( particleArray, mrcName, shape=None ) # TODO: no pixelsize
        pass
        
    def saveCtfImagesStar( self, outputName, zorroList = "*.dm4.log", physicalPixelSize=5.0, amplitudeContrast=0.08 ):
        """
        Given a glob pattern, generate a list of zorro logs, or alternatively one can pass in a list.  For each
        zorro log, load it, extract the pertinant info (defocus, etc.).  This is a file ready for particle 
        extraction, with imbedded Ctf information.
        """
        import zorro
        
        zorroList = glob.glob( zorroList )

        headerDict = { b'MicrographName':1, b'CtfImage':2, b'DefocusU':3, b'DefocusV':4, b'DefocusAngle':5, 
                      b'Voltage':6, b'SphericalAberration':7, b'AmplitudeContrast':8, b'Magnification':9, 
                      b'DetectorPixelSize':10, b'CtfFigureOfMerit': 11 }
        lookupDict = dict( zip( headerDict.values(), headerDict.keys() ) )   
        data = OrderedDict()
        for header in headerDict:      
            data[header] = [None]*len(zorroList)
            
        zorroReg = zorro.ImageRegistrator()    
        for J, zorroLog in enumerate(zorroList):
            zorroReg.loadConfig( zorroLog, loadData=False )
            
            data[b'MicrographName'][J] = zorroReg.files['sum']
            data[b'CtfImage'][J] = os.path.splitext( zorroReg.files['sum'] )[0] + ".ctf:mrc"
            # CTF4Results = [Micrograph number, DF1, DF2, Azimuth, Additional Phase shift, CC, max spacing fit-to]
            data[b'DefocusU'][J] = zorroReg.CTF4Results[1]
            data[b'DefocusV'][J] = zorroReg.CTF4Results[2]
            data[b'DefocusAngle'][J] = zorroReg.CTF4Results[3]
            data[b'CtfFigureOfMerit'][J] = zorroReg.CTF4Results[5]
            data[b'Voltage'][J] = zorroReg.voltage
            data[b'SphericalAberration'][J] = zorroReg.C3
            data[b'AmplitudeContrast'][J] = amplitudeContrast
            data[b'DetectorPixelSize'][J] = physicalPixelSize
            data[b'Magnification'][J] = physicalPixelSize / (zorroReg.pixelsize * 1E-3)
            
        with open( outputName, 'wb' ) as fh:

            fh.write( b"\ndata_\n\nloop_\n")
            for J in np.sort(lookupDict.keys()):
                # print( "Column: " + "_rln" + lookupDict[J+1] + " #" + str(J+1) )
                fh.write( b"_rln" + lookupDict[J] + b" #" + str(J) + b"\n")
            
            lCnt = len( lookupDict ) 
            for I in np.arange(0,len(zorroList)):
                fh.write( b"    ")
                for J in np.arange(0,lCnt):
                    fh.write( str( data[lookupDict[J+1]][I] )  )
                    fh.write( b"   " )
                fh.write( b"\n" )
            
    def gctfHistogramFilter( self, defocusThreshold = 40000, astigThreshold = 800, 
                            fomThreshold = 0.0, resThreshold = 6.0, 
                            starName = "micrographs_all_gctf.star", outName = "micrographs_pruned_gctf.star" ):
        """
        gctfHistogramFilter( self, defocusThreshold = 40000, astigThreshold = 800, 
                            fomThreshold = 0.0, resThreshold = 6.0, 
                            starName = "micrographs_all_gctf.star", outName = "micrographs_pruned_gctf.star" )
                            
        Calculates histograms of defocus, astigmatism, figure-of-merit (Pearson correlation coefficient),
        and resolution limit, and applies the thresholds as specified in the keyword arguments.  
        Plots are generated showing the threshold level.  
        
        The output star file `outName` rejects all micrographs that fail any of the thresholds.
        """
        self.load( starName )
        
        defocusU = self.star['data_']['DefocusU']
        defocusV = self.star['data_']['DefocusV']
        finalResolution = self.star['data_']['FinalResolution']
        ctfFoM = self.star['data_']['CtfFigureOfMerit']
        
        defocusMean = 0.5 * defocusU + 0.5 * defocusV
        astig = np.abs( defocusU - defocusV )
        
        [hDefocus, cDefocus] = np.histogram( defocusMean,  
            bins=np.arange(np.min(defocusMean),np.max(defocusMean),500.0) )
        hDefocus = hDefocus.astype('float32')
        cDefocus = cDefocus[:-1] +500.0/2
        
        [hAstig, cAstig] = np.histogram( astig, 
            bins=np.arange(0, np.max(astig), 500.0) )
        hAstig = hAstig.astype('float32')
        cAstig = cAstig[:-1] +500.0/2
        
        [hFoM, cFoM] = np.histogram( ctfFoM,  
            bins=np.arange(0.0,np.max(ctfFoM),0.002) )
        hFoM = hFoM.astype('float32')
        cFoM = cFoM[:-1] +0.002/2.0
        
        [hRes, cRes] = np.histogram( finalResolution,  
            bins=np.arange(np.min(finalResolution),np.max(finalResolution),0.20) )
        hRes = hRes.astype('float32')
        cRes = cRes[:-1] +0.20/2.0
        
        plt.figure()
        plt.fill_between( cDefocus, hDefocus, np.zeros(len(hDefocus)), facecolor='steelblue', alpha=0.5 )
        plt.plot( [defocusThreshold, defocusThreshold], [0, np.max(hDefocus)], "--", color='firebrick' )
        plt.xlabel( r"Defocus, $C_1 (\AA)$" )
        plt.ylabel( "Histogram counts" )
        
        plt.figure()
        plt.fill_between( cAstig, hAstig, np.zeros(len(hAstig)), facecolor='forestgreen', alpha=0.5 )
        plt.plot( [astigThreshold, astigThreshold], [0, np.max(hAstig)], "--", color='firebrick' )
        plt.xlabel( r"Astigmatism, $A_1 (\AA)$" )
        plt.ylabel( "Histogram counts" )
        
        plt.figure()
        plt.fill_between( cFoM, hFoM, np.zeros(len(hFoM)), facecolor='darkorange', alpha=0.5 )
        plt.plot( [fomThreshold, fomThreshold], [0, np.max(hFoM)], "--", color='firebrick' )
        plt.xlabel( "Figure of Merit, $R^2$" )
        plt.ylabel( "Histogram counts" )
        
        plt.figure()
        plt.fill_between( cRes, hRes, np.zeros(len(hRes)), facecolor='purple', alpha=0.5 )
        plt.plot( [resThreshold, resThreshold], [0, np.max(hRes)], "--", color='firebrick' )
        plt.xlabel( r"Fitted Resolution, $r (\AA)$" )
        plt.ylabel( "Histogram counts" )
        
        
        #keepIndices = np.ones( len(defocusU), dtype='bool' )
        keepIndices = ( ( defocusMean < defocusThreshold) & (astig < astigThreshold) &
                        (ctfFoM > fomThreshold ) & (finalResolution < resThreshold) )
        
        print( "KEEPING %d of %d micrographs" %(np.sum(keepIndices), defocusU.size) )
        
        for key in self.star['data_']:
            self.star['data_'][key] =  self.star['data_'][key][keepIndices]
            
        self.saveDataStar( outName )
        
    def __loadPar( self, parname ):
        """
        Frealign files normally have 16 columns, with any number of comment lines that start with 'C'
        """
        # Ergh, cannot have trailing comments with np.loadtxt?  
        self.parCol = [b"N", b"PSI", b"THETA", b"PHI", b"SHX", b"SHY", b"MAG", b"FILM", b"DF1", b"DF2", \
                     b"ANGAST", b"OCC", b"LogP", b"SIGMA", b"SCORE", b"CHANGE" ]
                     
        self.par = pandas.read_table( parname, engine='c', sep=' ', header=None, names =self.parCol, quotechar='C'  )
        #self.par.append( np.loadtxt( parname, comments=b'C' ) )
        # TODO: split into a dictionary?  
        # TODO: read comments as well
        # TODO: use pandas instead?
        #self.parCol = {b"N":0, b"PSI":1, b"THETA":2, b"PHI":3, b"SHX":4, b"SHY":5, b"MAG":6, b"FILM":7, b"DF1":8, b"DF2":9, 
        #             b"ANGAST":10, b"OCC":11, b"LogP":12, b"SIGMA":13, b"SCORE":14, b"CHANGE":15 }
        #self.parComments = np.loadtxt( parname, comments=b' ' )
        
               
    def __loadStar( self, starname ):
        with open( starname, 'rb' ) as starFile:
            starLines = starFile.readlines()
        
            # Remove any lines that are blank
            blankLines = [I for I, line in enumerate(starLines) if ( line == "\n" or line == " \n") ]
            for blank in sorted( blankLines, reverse=True ):
                del starLines[blank]
        
            # Top-level keys all start with data_
            headerTags = []; headerIndices = []
            for J, line in enumerate(starLines):
                if line.startswith( b"data_" ): # New headerTag
                    headerTags.append( line.strip() )
                    headerIndices.append( J )
            # for end-of-file
            headerIndices.append(-1)
            
            # Build dict keys
            for K, tag in enumerate( headerTags ):
                self.star[tag] = OrderedDict()
                # Read in _rln lines and assign them as dict keys
                
                lastHeaderIndex = 0
                foundLoop = False
                
                if headerIndices[K+1] == -1: #-1 is not end of the array for indexing
                    slicedLines = starLines[headerIndices[K]:]
                else:
                    slicedLines = starLines[headerIndices[K]:headerIndices[K+1]] 
                    
                for J, line in enumerate( slicedLines ):
                    if line.startswith( b"loop_" ):
                        foundLoop = True
                    elif line.startswith( b"_rln" ):
                        lastHeaderIndex = J
                        # Find all the keys that start with _rln, they are sub-dict keys
                        newKey = line.split()[0][4:]
                        try:
                            newValue = line.split()[1]
                            # If newValue starts with a #, strip it
                            newValue = newValue.lstrip( b'#' )
                        except:
                            # Some really old Relion star files don't have the column numbers, so assume it's ordered
                            newValue = J
                            
                        # Try to make newValue an int or float, or leave it as a string if that fails
                        try:
                            self.star[tag][newKey] = np.int( newValue )
                        except:
                            try: 
                                self.star[tag][newKey] = np.float( newValue )
                            except: # leave as a string
                                self.star[tag][newKey] = newValue
        
                # Now run again starting at lastHeaderIndex
                if foundLoop: 
                    # Need to check to make sure it's not an empty dict
                    if self.star[tag] == OrderedDict():
                        continue
                    
                    # Sometimes we have an empty line on the end.
                    for J in np.arange(len(slicedLines)-1,0,-1):
                        if bool( slicedLines[J].strip() ):
                            break
                        slicedLines = slicedLines[:J]

                    endIndex = len(slicedLines)
                    
                    
                        
                    # Reverse sub-dictionary so we can determine by which column goes to which key
                    lookup = dict( zip( self.star[tag].values(), self.star[tag].keys() ) )
                    print( "DEBUG: lookup = %s" % lookup )
                    # Pre-allocate, we can determine types later.   
                    itemCount = endIndex - lastHeaderIndex - 1
                    testSplit = slicedLines[lastHeaderIndex+1].split()
                    for K, test in enumerate( testSplit ):
                        self.star[tag][lookup[K+1]] = [None] * itemCount
        
                    # Loop through and parse items
                    for J, line in enumerate( slicedLines[lastHeaderIndex+1:endIndex] ):
                        for K, item in enumerate( line.split() ):
                            self.star[tag][lookup[K+1]][J] = item
                    pass
                
                    # Try to convert to int, then float, otherwise leave as a string
                    for key in self.star[tag].keys():
                        try:
                            self.star[tag][key] = np.asarray( self.star[tag][key], dtype='int' )
                        except:
                            try:
                                self.star[tag][key] = np.asarray( self.star[tag][key], dtype='float' )
                            except: 
                                self.star[tag][key] = np.asarray( self.star[tag][key] )
                                pass

    def __loadMRC( self, mrcname ):
        mrcimage, mrcheader  = ioMRC.readMRC( mrcname, pixelunits=u'nm' )
        self.mrc.append( mrcimage )
        self.mrc_header.append( mrcheader )
        
    def __loadBox( self, boxname ):
        self.box.append( np.loadtxt( boxname ) )

# End of relion class