File: filetype.c

package info (click to toggle)
cf-python 1.3.2%2Bdfsg1-4
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 7,996 kB
  • sloc: python: 51,733; ansic: 2,736; makefile: 78; sh: 2
file content (200 lines) | stat: -rw-r--r-- 6,536 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
  /* Routines for auto-determining file type from the start of the file 
   * contents.
   * 
   * =================
   * Fields file tests
   * =================
   *
   * These are done first. We test the second word, which should be the
   * submodel ID - is this 1, 2 or 4?
   *
   * ==> Could the fields file test give a false +ve with PP file?
   *
   *     Test for fields file only true if the first 16 bytes, when viewed 
   *     as 4 32-bit integers, are one of the following:
   *
   *     bytes 1-4    bytes 5-8    bytes 9-12   bytes 13-16
   *     ---------------------------------------------------
   *         any         any          0         1/2/4(BE)   <-- 64-bit BE FF
   *         any         any      1/2/4(LE)        0        <-- 64-bit LE FF
   *         any      1/2/4(BE)      any          any       <-- 32-bit BE FF
   *         any      1/2/4(LE)      any          any       <-- 32-bit LE FF
   *   
   *     For PP files, we in fact have:
   *   
   *          0       512/1024(BE)    0          lbyr(BE)  <--- 64-bit BE PP
   *      512/1024(LE)      0      lbyr(LE)         0      <--- 64-bit LE PP
   *       256/512(BE)  lbyr(BE)   lbmon(BE)    lbdat(BE)  <--- 32-bit BE PP
   *       256/512(LE)  lbyr(LE)   lbmon(LE)    lbdat(LE)  <--- 32-bit LE PP
   *   
   *     Possible false positives:
   *
   *     - any PP with lbyr=1/2/4 looks like FF of same length and endianness
   *     - 32-bit BE PP with lbmon=0, lbdat=1/2/4 looks like 64-bit BE FF
   *     - 32-bit LE PP with lbmon=1/2/4, lbdat=0 looks like 64-bit LE FF
   *   
   *      Do we care about these cases?
   *          lbyr=1/2/4: probably NO
   *          lbmon=0, lbdat non-zero: probably NO
   *          lbmon=1/2/4, lbdat=0 - possible monthly climatology? <== YES
   *
   *      Always option for user to force file type, but:
   *        **FIXME**: additional test could help.
   *
   * ========
   * PP tests
   * ========
   * 
   * If the fields-file test is false, then test for types of plain PP file.
   * Here we test the first word, which should be record length (put there by 
   * fortran).
   *
   * Check first for a 64-bit PP file, but in addition to the first word being
   * a valid possibility, this must also pass the stringent test of every
   * other 32-bit value being zero throughout the first 14 64-bit words,
   * although because of endianness issues we accept the sequence of
   * alternating zeros to start either at the first or second 32-bit value.
   * The point is that for a true 64-bit file, these should all be small
   * integers, so the most significant bytes will be 0.  (The first possibly
   * large integer is the 15th: LBLREC.)  However, for a 32-bit file this test
   * will span the first 28 elements.  Even if the date elements (first 12
   * words) are all 0, LBROW (18) and LBNPT (19) should both be non-zero, so
   * both the set of even-positioned integers and the set of odd-positioned
   * integers will each contain at least one non-zero value, and the test will
   * fail. 
   * 
   * If the 64-bit tests fail, try 32-bit.
   */

#include <sys/types.h>
#include <unistd.h>
#include <string.h>

#include "umfileint.h"

/* values passed to valid_um_word2 and valid_pp_word1 could be 32 or
 * 64-bit.  Declare as longer of these two (int64_t), and shorter will be
 * accommodated also.
 */

static int valid_um_word2(int64_t val)
{
  /* second word should be 1,2 or 4, reflecting model ID in fixed length
     header */
  return (val == 1 || val == 2 || val == 4);
}

static int valid_pp_word1(int64_t val, int wsize)
{
  /* first word should be integer from Fortan representing length of header
     record */
  return (val == 64 * wsize || val == 128 * wsize);
}

/* tests whether sequence of integers has every other value = 0, but 
 * only when starting at first value
 */
static int is_alternating_zeros_without_offset(int32_t *vals, int num_pairs)
{
  int i;
  int32_t *p;
  p = vals;
  for (i = 0; i < num_pairs; i++)
    {
      if (*p != 0) return 0;
      p += 2;
    }
  return 1;
}

/* tests whether sequence of integers has every other value = 0, but 
 * can either be when starting at first or second value
 */
static int is_alternating_zeros(int32_t *vals, int num_pairs)
{
  return (is_alternating_zeros_without_offset(vals, num_pairs) || 
	  is_alternating_zeros_without_offset(vals + 1, num_pairs));
}

#define N_PAIRS 14
int detect_file_type_(int fd, File_type *file_type)
{
  int32_t data4[2 * N_PAIRS], data4s[2];
  int64_t data8[2], data8s[2];

  /* read and store first 24 4-byte words
   * and store first two integers of this according to possible suppositions 
   * of 4- or 8- byte, and of native or swapped byte ordering
   */
  lseek(fd, 0, SEEK_SET);
  if(read(fd, data4, 8 * N_PAIRS) != 8 * N_PAIRS) return 1;

  memcpy(data8, data4, 16);

  memcpy(data4s, data4, 8);
  swap_bytes_sgl(data4s, 2);

  memcpy(data8s, data4, 16);
  swap_bytes_dbl(data8s, 2);


  /* --- Fields file cases -- */

  if (valid_um_word2(data4[1]))
    {
      file_type->format = fields_file;
      file_type->byte_ordering = NATIVE_ORDERING;
      file_type->word_size = 4;
    }
  else if (valid_um_word2(data8[1]))
    {
      file_type->format = fields_file;
      file_type->byte_ordering = NATIVE_ORDERING;
      file_type->word_size = 8;
    }
  else if (valid_um_word2(data4s[1]))
    {
      file_type->format = fields_file;
      file_type->byte_ordering = REVERSE_ORDERING;
      file_type->word_size = 4;
    }
  else if (valid_um_word2(data8s[1]))
    {
      file_type->format = fields_file;
      file_type->byte_ordering = REVERSE_ORDERING;
      file_type->word_size = 8;
    }

  /* --- Plain PP cases -- */

  else if (valid_pp_word1(data8[0], 8) && is_alternating_zeros(data4, N_PAIRS))
    {
      file_type->format = plain_pp;
      file_type->byte_ordering = NATIVE_ORDERING;
      file_type->word_size = 8;
    }
  else if (valid_pp_word1(data8s[0], 8) && is_alternating_zeros(data4, N_PAIRS))
    {
      file_type->format = plain_pp;
      file_type->byte_ordering = REVERSE_ORDERING;
      file_type->word_size = 8;
    }
  else if (valid_pp_word1(data4[0], 4))
    {
      file_type->format = plain_pp;
      file_type->byte_ordering = NATIVE_ORDERING;
      file_type->word_size = 4;
    }
  else if (valid_pp_word1(data4s[0], 4))
    {
      file_type->format = plain_pp;
      file_type->byte_ordering = REVERSE_ORDERING;
      file_type->word_size = 4;
    }
  else
    {
      /* type not identified */
      return 1;
    }
  return 0;
}