File: CdfReader.java

package info (click to toggle)
jcdf 1.2.5%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 572 kB
  • sloc: java: 5,315; makefile: 198; sh: 98
file content (306 lines) | stat: -rw-r--r-- 11,471 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
package uk.ac.bristol.star.cdf;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.util.logging.Logger;
import uk.ac.bristol.star.cdf.record.Buf;
import uk.ac.bristol.star.cdf.record.Bufs;
import uk.ac.bristol.star.cdf.record.CdfDescriptorRecord;
import uk.ac.bristol.star.cdf.record.CompressedCdfRecord;
import uk.ac.bristol.star.cdf.record.CompressedParametersRecord;
import uk.ac.bristol.star.cdf.record.Compression;
import uk.ac.bristol.star.cdf.record.NumericEncoding;
import uk.ac.bristol.star.cdf.record.Pointer;
import uk.ac.bristol.star.cdf.record.Record;
import uk.ac.bristol.star.cdf.record.RecordFactory;

/**
 * Examines a CDF file and provides methods to access its records.
 *
 * <p>Constructing an instance of this class reads enough of a file
 * to identify it as a CDF and work out how to access its records.
 * Most of the actual contents are only read from the data buffer
 * as required.
 * Although only the magic numbers and CDR are read during construction,
 * in the case of a file-compressed CDF the whole thing is uncompressed,
 * so it could still be an expensive operation.
 *
 * <p>For low-level access to the CDF internal records, use the
 * {@link #getCdr} method to get the CdfDescriptorRecord and use that
 * in conjunction with knowledge of the internal format of CDF files
 * as a starting point to chase pointers around the file constructing
 * other records.  When you have a pointer to another record, you can
 * use the record factory got from {@link #getRecordFactory} to turn
 * it into a typed Record object.
 *
 * @author   Mark Taylor
 * @since    19 Jun 2013
 */
public class CdfReader {

    private final CdfDescriptorRecord cdr_;
    private final Buf buf_;
    private final RecordFactory recordFactory_;

    private static final Logger logger_ =
        Logger.getLogger( CdfReader.class.getName() );

    /** 
     * Constructs a CdfReader from a buffer containing its byte data.
     *
     * @param   buf  buffer containing CDF file
     */
    public CdfReader( Buf buf ) throws IOException {
        Pointer ptr = new Pointer( 0 );

        // Read the CDF magic number bytes.
        int magic1 = buf.readInt( ptr );
        int magic2 = buf.readInt( ptr );
        int offsetRec0 = (int) ptr.get();

        // Work out from that what variant (if any) of the CDF format
        // this file implements.
        CdfVariant variant = decodeMagic( magic1, magic2 );
        if ( variant == null ) {
            String msg = new StringBuffer()
                .append( "Unrecognised magic numbers: " )
                .append( "0x" )
                .append( Integer.toHexString( magic1 ) )
                .append( ", " )
                .append( "0x" )
                .append( Integer.toHexString( magic2 ) )
                .toString();
            throw new CdfFormatException( msg );
        }
        logger_.config( "CDF magic number for " + variant.label_ );
        logger_.config( "Whole file compression: " + variant.compressed_ );

        // The length of the pointers and sizes used in CDF files are
        // dependent on the CDF file format version.
        // Notify the buffer which regime is in force for this file.
        // Note that no operations for which this makes a difference have
        // yet taken place.
        buf.setBit64( variant.bit64_ );

        // The lengths of some fields differ according to CDF version.
        // Construct a record factory that does it right.
        recordFactory_ = new RecordFactory( variant.nameLeng_ );

        // Read the CDF Descriptor Record.  This may be the first record,
        // or it may be in a compressed form along with the rest of
        // the internal records.
        if ( variant.compressed_ ) {

            // Work out compression type and location of compressed data.
            CompressedCdfRecord ccr =
                recordFactory_.createRecord( buf, offsetRec0,
                                             CompressedCdfRecord.class );
            CompressedParametersRecord cpr =
                recordFactory_.createRecord( buf, ccr.cprOffset,
                                             CompressedParametersRecord.class );
            final Compression compress =
                Compression.getCompression( cpr.cType );

            // Uncompress the compressed data into a new buffer.
            // The compressed data is the data record of the CCR.
            // When uncompressed it can be treated just like the whole of
            // an uncompressed CDF file, except that it doesn't have the
            // magic numbers (8 bytes) prepended to it.
            // Note however that any file offsets recorded within the file
            // are given as if the magic numbers are present - this is not
            // very clear from the Internal Format Description document,
            // but it appears to be the case from reverse engineering
            // whole-file compressed files.  To work round this, we hack
            // the compression to prepend a dummy 8-byte block to the
            // uncompressed stream it provides.
            final int prepad = offsetRec0;
            assert prepad == 8;
            Compression padCompress =
                    new Compression( "Padded " + compress.getName() ) {
                public InputStream uncompressStream( InputStream in )
                        throws IOException {
                    InputStream in1 =
                        new ByteArrayInputStream( new byte[ prepad ] );
                    InputStream in2 = compress.uncompressStream( in );
                    return new SequenceInputStream( in1, in2 );
                }
            };
            buf = Bufs.uncompress( padCompress, buf, ccr.getDataOffset(),
                                   ccr.uSize + prepad );
        }
        cdr_ = recordFactory_.createRecord( buf, offsetRec0,
                                            CdfDescriptorRecord.class );

        // Interrogate CDR for required information.
        boolean isSingleFile = Record.hasBit( cdr_.flags, 1 );
        if ( ! isSingleFile ) {
            throw new CdfFormatException( "Multi-file CDFs not supported" );
        }
        NumericEncoding encoding =
            NumericEncoding.getEncoding( cdr_.encoding );
        Boolean bigEndian = encoding.isBigendian();
        if ( bigEndian == null ) {
            throw new CdfFormatException( "Unsupported encoding " + encoding );
        }
        buf.setEncoding( bigEndian.booleanValue() );
        buf_ = buf;
    }

    /**
     * Constructs a CdfReader from a readable file containing its byte data.
     *
     * @param  file  CDF file
     */
    public CdfReader( File file ) throws IOException {
        this( Bufs.createBuf( file, true, true ) );
    }

    /**
     * Returns the buffer containing the uncompressed record stream for
     * this reader's CDF file.
     * This will be the buffer originally submitted at construction time
     * only if the CDF does not use whole-file compression.
     *
     * @return   buffer containing CDF records
     */
    public Buf getBuf() {
        return buf_;
    }

    /** 
     * Returns a RecordFactory that can be applied to this reader's Buf 
     * to construct CDF Record objects.
     *
     * @return  record factory
     */
    public RecordFactory getRecordFactory() {
        return recordFactory_;
    }

    /**
     * Returns the CDF Descriptor Record object for this reader's CDF.
     *
     * @return  CDF Descriptor Record
     */
    public CdfDescriptorRecord getCdr() {
        return cdr_;
    }

    /**
     * Examines a byte array to see if it looks like the start of a CDF file.
     *
     * @param   intro  byte array, at least 8 bytes if available
     * @return  true iff the first 8 bytes of <code>intro</code> are
     *          a CDF magic number
     */
    public static boolean isMagic( byte[] intro ) {
        if ( intro.length < 8 ) {
            return false;
        }
        return decodeMagic( readInt( intro, 0 ), readInt( intro, 4 ) ) != null;
    }

    /**
     * Reads an 4-byte big-endian integer from a byte array.
     *
     * @param  b  byte array
     * @param  ioff   index into <code>b</code> of integer start
     * @return   int value
     */
    private static int readInt( byte[] b, int ioff ) {
        return ( b[ ioff++ ] & 0xff ) << 24
             | ( b[ ioff++ ] & 0xff ) << 16
             | ( b[ ioff++ ] & 0xff ) <<  8
             | ( b[ ioff++ ] & 0xff ) <<  0;
    }

    /**
     * Interprets two integer values as the magic number sequence at the
     * start of a CDF file, and returns an object encoding the information
     * about CDF encoding specifics.
     *
     * @param   magic1  big-endian int at CDF file offset 0x00
     * @param   magic2  big-endian int at CDF file offset 0x04
     * @return  object describing CDF encoding specifics,
     *          or null if this is not a recognised CDF magic number
     */
    private static CdfVariant decodeMagic( int magic1, int magic2 ) {
        final String label;
        final boolean bit64;
        final int nameLeng;
        final boolean compressed;
        if ( magic1 == 0xcdf30001 ) {  // version 3.0 - 3.4 (3.*?)
            label = "V3";
            bit64 = true;
            nameLeng = 256;
            if ( magic2 == 0x0000ffff ) {
                compressed = false;
            }
            else if ( magic2 == 0xcccc0001 ) {
                compressed = true;
            }
            else {
                return null;
            }
        }
        else if ( magic1 == 0xcdf26002 ) {  // version 2.6/2.7
            label = "V2.6/2.7";
            bit64 = false;
            nameLeng = 64;
            if ( magic2 == 0x0000ffff ) {
                compressed = false;
            }
            else if ( magic2 == 0xcccc0001 ) {
                compressed = true;
            }
            else {
                return null;
            }
        }
        else if ( magic1 == 0x0000ffff ) { // pre-version 2.6
            label = "pre-V2.6";
            bit64 = false;
            nameLeng = 64; // true as far as I know
            if ( magic2 == 0x0000ffff ) {
                compressed = false;
            }
            else {
                return null;
            }
        }
        else {
            return null;
        }
        return new CdfVariant( label, bit64, nameLeng, compressed );
    }

    /**
     * Encapsulates CDF encoding details as determined from the magic number.
     */
    private static class CdfVariant {
        final String label_;
        final boolean bit64_;
        final int nameLeng_;
        final boolean compressed_;

        /**
         * Constructor.
         *
         * @param  label  short string indicating CDF format version number
         * @param  bit64  true for 8-bit pointers, false for 4-bit pointers
         * @param  nameLeng  number of bytes used for attribute and variable
         *                   names
         * @param  compressed true iff the CDF file uses whole-file compression
         */
        CdfVariant( String label, boolean bit64, int nameLeng,
                    boolean compressed ) {
            label_ = label;
            bit64_ = bit64;
            nameLeng_ = nameLeng;
            compressed_ = compressed;
        }
    }
}