
// Automatically generated by m214003 at 2024-10-30, do not edit

// CGRIBEXLIB_VERSION="2.3.1"

#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 5) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wconversion"
#pragma GCC diagnostic ignored "-Wsign-conversion"
#pragma GCC diagnostic warning "-Wstrict-overflow"
#endif

#ifdef _ARCH_PWR6
#pragma options nostrict
#include <ppu_intrinsics.h>
#endif

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#include <stdbool.h>
#include <sys/types.h>
#include <limits.h>
#include <inttypes.h>

#include "file.h"
#include "dmemory.h"
#include "julian_date.h"

#ifndef CGRIBEX_TEMPLATES_H
#define CGRIBEX_TEMPLATES_H

// clang-format off
#define CAT(X,Y)      X##_##Y
#define TEMPLATE(X,Y) CAT(X,Y)
// clang-format on

#endif
#ifndef GRIB_INT_H
#define GRIB_INT_H

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <math.h>
#include <float.h>

// clang-format off

#ifndef  CGRIBEX_H
#include "cgribex.h"
#endif
#ifndef  ERROR_H
#include "error.h"
#endif

#ifndef UCHAR
#define  UCHAR  unsigned char
#endif


#if defined (CRAY) || defined (SX) || defined (__uxpch__)
#define VECTORCODE 1
#endif


#ifdef VECTORCODE
#  define  GRIBPACK     uint32_t
#  define  PACK_GRIB    packInt32
#  define  UNPACK_GRIB  unpackInt32
#else
#  define  GRIBPACK     unsigned char
#endif

#ifndef HOST_ENDIANNESS
#ifdef __cplusplus
static const uint32_t HOST_ENDIANNESS_temp[1] = { UINT32_C(0x00030201) };
#define HOST_ENDIANNESS (((const unsigned char *)HOST_ENDIANNESS_temp)[0])
#else
#define HOST_ENDIANNESS (((const unsigned char *)&(const uint32_t[1]){UINT32_C(0x00030201)})[0])
#endif
#endif

#define  IS_BIGENDIAN()  (HOST_ENDIANNESS == 0)

#if defined (__xlC__) /* performance problems on IBM */
#ifndef DBL_IS_NAN
#  define DBL_IS_NAN(x)     ((x) != (x))
#endif
#else
#ifndef DBL_IS_NAN
#if  defined  (HAVE_DECL_ISNAN)
#  define DBL_IS_NAN(x)     (isnan(x))
#elif  defined  (FP_NAN)
#  define DBL_IS_NAN(x)     (fpclassify(x) == FP_NAN)
#else
#  define DBL_IS_NAN(x)     ((x) != (x))
#endif
#endif
#endif

#ifndef IS_EQUAL
#  define IS_NOT_EQUAL(x,y) (x < y || y < x)
#  define IS_EQUAL(x,y)     (!IS_NOT_EQUAL(x,y))
#endif

/* dummy use of unused parameters to silence compiler warnings */
#ifndef UNUSED
#define  UNUSED(x) (void)(x)
#endif

#define  JP24SET    0xFFFFFF  /* 2**24     (---> 16777215) */
#define  JP23SET    0x7FFFFF  /* 2**23 - 1 (--->  8388607) */

#define  POW_2_M24  0.000000059604644775390625  /* pow(2.0, -24.0) */

#ifdef __cplusplus
extern "C" {
#endif

#define intpow2(x) (ldexp(1.0, (x)))

static inline int
gribrec_len(unsigned b1, unsigned b2, unsigned b3)
{
  /*
    If bit 7 of b1 is set, we have to rescale by factor of 120.
    This is a fixup to get round the restriction on product lengths
    due to the count being only 24 bits. It is only possible because
    the (default) rounding for GRIB products is 120 bytes.
  */
  const int needRescaling = b1 & (1 << 7);

  int gribsize = (int)((((b1&127) << 16)+(b2<<8) + b3));

  if ( needRescaling ) gribsize *= 120;

  return gribsize;

}

unsigned correct_bdslen(unsigned bdslen, long recsize, long gribpos);

/* CDI converter routines */

/* param format:  DDDCCCNNN */

void    cdiDecodeParam(int param, int *pnum, int *pcat, int *pdis);
int     cdiEncodeParam(int pnum, int pcat, int pdis);

/* date format:  YYYYMMDD */
/* time format:  hhmmss   */

void    cdiDecodeDate(int date, int *year, int *month, int *day);
int     cdiEncodeDate(int year, int month, int day);

void    cdiDecodeTime(int time, int *hour, int *minute, int *second);
int     cdiEncodeTime(int hour, int minute, int second);

/* CALENDAR types */

#define  CALENDAR_STANDARD        0  /* don't change this value (used also in cgribexlib)! */
#define  CALENDAR_GREGORIAN       1
#define  CALENDAR_PROLEPTIC       2
#define  CALENDAR_360DAYS         3
#define  CALENDAR_365DAYS         4
#define  CALENDAR_366DAYS         5
#define  CALENDAR_NONE            6

extern FILE *grprsm;

extern int  CGRIBEX_Debug, CGRIBEX_Fix_ZSE, CGRIBEX_Const;
extern int  CGRIBEX_grib_calendar;

void   gprintf(const char *caller, const char *fmt, ...);

void   grsdef(void);

void   prtbin(int kin, int knbit, int *kout, int *kerr);
void   confp3(double pval, int *kexp, int *kmant, int kbits, int kround);
double decfp2(int kexp, int kmant);
void   ref2ibm(double *pref, int kbits);

void   scale_complex_double(double *fpdata, int pcStart, int pcScale, int trunc, int inv);
void   scale_complex_float(float *fpdata, int pcStart, int pcScale, int trunc, int inv);
void   scatter_complex_double(double *fpdata, int pcStart, int trunc, int nsp);
void   scatter_complex_float(float *fpdata, int pcStart, int trunc, int nsp);
void   gather_complex_double(double *fpdata, size_t pcStart, size_t trunc, size_t nsp);
void   gather_complex_float(float *fpdata, size_t pcStart, size_t trunc, size_t nsp);

int    qu2reg2(double *pfield, int *kpoint, int klat, int klon,
	       double *ztemp, double msval, int *kret);
int    qu2reg3_double(double *pfield, int *kpoint, int klat, int klon,
		      double msval, int *kret, int omisng, int operio, int oveggy);
int    qu2reg3_float(float *pfield, int *kpoint, int klat, int klon,
		     float msval, int *kret, int omisng, int operio, int oveggy);

long   packInt32(uint32_t *up, unsigned char *cp, long bc, long tc);
long   packInt64(uint64_t *up, unsigned char *cp, long bc, long tc);
long   unpackInt32(const unsigned char *cp, uint32_t *up, long bc, long tc);
long   unpackInt64(const unsigned char *cp, uint64_t *up, long bc, long tc);

void  grib_encode_double(int *isec0, int *isec1, int *isec2, double *fsec2, int *isec3,
			 double *fsec3, int *isec4, double *fsec4, int klenp, int *kgrib,
			 int kleng, int *kword, int efunc, int *kret);
void  grib_encode_float(int *isec0, int *isec1, int *isec2, float *fsec2, int *isec3,
			float *fsec3, int *isec4, float *fsec4, int klenp, int *kgrib,
			int kleng, int *kword, int efunc, int *kret);

void  grib_decode_double(int *isec0, int *isec1, int *isec2, double *fsec2, int *isec3,
			 double *fsec3, int *isec4, double *fsec4, int klenp, int *kgrib,
			 int kleng, int *kword, int dfunc, int *kret);
void  grib_decode_float(int *isec0, int *isec1, int *isec2, float *fsec2, int *isec3,
			float *fsec3, int *isec4, float *fsec4, int klenp, int *kgrib,
			int kleng, int *kword, int dfunc, int *kret);


int grib1Sections(unsigned char *gribbuffer, long gribbufsize, unsigned char **pdsp,
		  unsigned char **gdsp, unsigned char **bmsp, unsigned char **bdsp, long *gribrecsize);
int grib2Sections(unsigned char *gribbuffer, long gribbufsize, unsigned char **idsp,
		  unsigned char **lusp, unsigned char **gdsp, unsigned char **pdsp,
		  unsigned char **drsp, unsigned char **bmsp, unsigned char **bdsp);

#ifdef  __cplusplus
}
#endif

// clang-format on

#endif /* GRIB_INT_H */
#ifndef GRIBDECODE_H
#define GRIBDECODE_H

// clang-format off

#define  UNDEFINED          9.999e20


#define  GET_INT3(a,b,c)    ((1-(int) ((unsigned) (a & 128) >> 6)) * (int) (((a & 127) << 16)+(b<<8)+c))
#define  GET_INT2(a,b)      ((1-(int) ((unsigned) (a & 128) >> 6)) * (int) (((a & 127) << 8) + b))
#define  GET_INT1(a)        ((1-(int) ((unsigned) (a & 128) >> 6)) * (int) (a&127))

/* this requires a 32-bit default integer machine */
#define  GET_UINT4(a,b,c,d) ((unsigned) ((a << 24) + (b << 16) + (c << 8) + (d)))
#define  GET_UINT3(a,b,c)   ((unsigned) ((a << 16) + (b << 8)  + (c)))
#define  GET_UINT2(a,b)     ((unsigned) ((a << 8)  + (b)))
#define  GET_UINT1(a)       ((unsigned)  (a))

#define  BUDG_START(s)      (s[0]=='B' && s[1]=='U' && s[2]=='D' && s[3]=='G')
#define  TIDE_START(s)      (s[0]=='T' && s[1]=='I' && s[2]=='D' && s[3]=='E')
#define  GRIB_START(s)      (s[0]=='G' && s[1]=='R' && s[2]=='I' && s[3]=='B')
#define  GRIB_FIN(s)        (s[0]=='7' && s[1]=='7' && s[2]=='7' && s[3]=='7')

/* GRIB1 Section 0: Indicator Section (IS) */

#define  GRIB1_SECLEN(s)     GET_UINT3(s[ 4], s[ 5], s[ 6])
#define  GRIB_EDITION(s)     GET_UINT1(s[ 7])

/* GRIB1 Section 1: Product Definition Section (PDS) */

#define  PDS_Len             GET_UINT3(pds[ 0], pds[ 1], pds[ 2])
#define  PDS_CodeTable       GET_UINT1(pds[ 3])
#define  PDS_CenterID        GET_UINT1(pds[ 4])
#define  PDS_ModelID         GET_UINT1(pds[ 5])
#define  PDS_GridDefinition  GET_UINT1(pds[ 6])
#define  PDS_Sec2Or3Flag     GET_UINT1(pds[ 7])
#define  PDS_HAS_GDS         ((pds[7] & 128) != 0)
#define  PDS_HAS_BMS         ((pds[7] &  64) != 0)
#define  PDS_Parameter       GET_UINT1(pds[ 8])
#define  PDS_LevelType       GET_UINT1(pds[ 9])
#define  PDS_Level1          (pds[10])
#define  PDS_Level2	     (pds[11])
#define  PDS_Level	     GET_UINT2(pds[10], pds[11])
#define  PDS_Year            GET_INT1(pds[12])
#define  PDS_Month           GET_UINT1(pds[13])
#define  PDS_Day             GET_UINT1(pds[14])
#define  PDS_Hour            GET_UINT1(pds[15])
#define  PDS_Minute          GET_UINT1(pds[16])
#define  PDS_Date            (PDS_Year*10000+PDS_Month*100+PDS_Day)
#define  PDS_Time            (PDS_Hour*100+PDS_Minute)
#define  PDS_TimeUnit        GET_UINT1(pds[17])
#define  PDS_TimePeriod1     GET_UINT1(pds[18])
#define  PDS_TimePeriod2     GET_UINT1(pds[19])
#define  PDS_TimeRange       GET_UINT1(pds[20])
#define  PDS_AvgNum          GET_UINT2(pds[21], pds[22])
#define  PDS_AvgMiss         GET_UINT1(pds[23])
#define  PDS_Century         GET_UINT1(pds[24])
#define  PDS_Subcenter       GET_UINT1(pds[25])
#define  PDS_DecimalScale    GET_INT2(pds[26],pds[27])


/* GRIB1 Section 2: Grid Description Section (GDS) */

#define  GDS_Len             ((gds) == NULL ? 0 : GET_UINT3(gds[0], gds[1], gds[2]))
#define  GDS_NV              GET_UINT1(gds[ 3])
#define  GDS_PVPL            GET_UINT1(gds[ 4])
#define  GDS_PV	             ((gds[3] ==    0) ? -1 : (int) gds[4] - 1)
#define  GDS_PL	             ((gds[4] == 0xFF) ? -1 : (int) gds[3] * 4 + (int) gds[4] - 1)
#define  GDS_GridType        GET_UINT1(gds[ 5])


/* GRIB1 Triangular grid of DWD */
#define  GDS_GME_NI2         GET_UINT2(gds[ 6], gds[ 7])
#define  GDS_GME_NI3         GET_UINT2(gds[ 8], gds[ 9])
#define  GDS_GME_ND          GET_UINT3(gds[10], gds[11], gds[12])
#define  GDS_GME_NI          GET_UINT3(gds[13], gds[14], gds[15])
#define  GDS_GME_AFlag       GET_UINT1(gds[16])
#define  GDS_GME_LatPP       GET_INT3(gds[17], gds[18], gds[19])
#define  GDS_GME_LonPP       GET_INT3(gds[20], gds[21], gds[22])
#define  GDS_GME_LonMPL      GET_INT3(gds[23], gds[24], gds[25])
#define  GDS_GME_BFlag       GET_UINT1(gds[27])

/* GRIB1 Spectral */
#define  GDS_PentaJ          GET_UINT2(gds[ 6], gds[ 7])
#define  GDS_PentaK          GET_UINT2(gds[ 8], gds[ 9])
#define  GDS_PentaM          GET_UINT2(gds[10], gds[11])
#define  GDS_RepType         GET_UINT1(gds[12])
#define  GDS_RepMode         GET_UINT1(gds[13])

/* GRIB1 Regular grid */
#define  GDS_NumLon          GET_UINT2(gds[ 6], gds[ 7])
#define  GDS_NumLat          GET_UINT2(gds[ 8], gds[ 9])
#define  GDS_FirstLat        GET_INT3(gds[10], gds[11], gds[12])
#define  GDS_FirstLon        GET_INT3(gds[13], gds[14], gds[15])
#define  GDS_ResFlag         GET_UINT1(gds[16])
#define  GDS_LastLat         GET_INT3(gds[17], gds[18], gds[19])
#define  GDS_LastLon         GET_INT3(gds[20], gds[21], gds[22])
#define  GDS_LonIncr         GET_UINT2(gds[23], gds[24])
#define  GDS_LatIncr         GET_UINT2(gds[25], gds[26])
#define  GDS_NumPar          GET_UINT2(gds[25], gds[26])
#define  GDS_ScanFlag        GET_UINT1(gds[27])
#define  GDS_LatSP           GET_INT3(gds[32], gds[33], gds[34])
#define  GDS_LonSP           GET_INT3(gds[35], gds[36], gds[37])
#define  GDS_RotAngle        (GET_Real(&(gds[38])))

/* GRIB1 Lambert */
#define  GDS_Lambert_Lov     GET_INT3(gds[17], gds[18], gds[19])
#define  GDS_Lambert_dx	     GET_INT3(gds[20], gds[21], gds[22])
#define  GDS_Lambert_dy	     GET_INT3(gds[23], gds[24], gds[25])
#define  GDS_Lambert_ProjFlag GET_UINT1(gds[26])
#define  GDS_Lambert_LatS1   GET_INT3(gds[28], gds[29], gds[30])
#define  GDS_Lambert_LatS2   GET_INT3(gds[31], gds[32], gds[33])
#define  GDS_Lambert_LatSP   GET_INT3(gds[34], gds[35], gds[36])
#define  GDS_Lambert_LonSP   GET_INT3(gds[37], gds[37], gds[37])

/* GRIB1 Section 3: Bit Map Section (BMS) */

#define  BMS_Len	     ((bms) == NULL ? 0 : GET_UINT3(bms[0], bms[1], bms[2]))
#define  BMS_UnusedBits      (bms[3])
#define  BMS_Bitmap	     ((bms) == NULL ? NULL : (bms)+6)
#define  BMS_BitmapSize      (((((bms[0]<<16)+(bms[1]<<8)+bms[2]) - 6)<<3) - bms[3])

/* GRIB1 Section 4: Binary Data Section (BDS) */

#define  BDS_Len	    GET_UINT3(bds[0], bds[1], bds[2])
#define  BDS_Flag	    (bds[3])
#define  BDS_BinScale       GET_INT2(bds[ 4], bds[ 5])
#define  BDS_RefValue       (decfp2((int)bds[ 6], (int)(GET_UINT3(bds[7], bds[8], bds[9]))))
#define  BDS_NumBits        ((int) bds[10])
#define  BDS_RealCoef       (decfp2((int)bds[zoff+11], (int)(GET_UINT3(bds[zoff+12], bds[zoff+13], bds[zoff+14]))))
#define  BDS_PackData       ((int) ((bds[zoff+11]<<8) + bds[zoff+12]))
#define  BDS_Power          GET_INT2(bds[zoff+13], bds[zoff+14])
#define  BDS_Z              (bds[13])

/* GRIB1 Section 5: End Section (ES) */

/* GRIB2 */

#define  GRIB2_SECLEN(section)   (GET_UINT4(section[0], section[1], section[2], section[3]))
#define  GRIB2_SECNUM(section)   (GET_UINT1(section[4]))

// clang-format on

#endif /* GRIBDECODE_H */
#ifndef CGRIBEX_GRIB_ENCODE_H
#define CGRIBEX_GRIB_ENCODE_H

#include <limits.h>

// clang-format off

#define PutnZero(n) \
{ \
  for ( size_t i___ = z >= 0 ? (size_t)z : 0; i___ < (size_t)(z+n); i___++ ) lGrib[i___] = 0; \
  z += n; \
}

#define Put1Byte(Value)  (lGrib[z++] = (GRIBPACK)(Value))
#define Put2Byte(Value) ((lGrib[z++] = (GRIBPACK)((Value) >>  8)),      \
                         (lGrib[z++] = (GRIBPACK)(Value)))
#define Put3Byte(Value) ((lGrib[z++] = (GRIBPACK)((Value) >> 16)),      \
                         (lGrib[z++] = (GRIBPACK)((Value) >>  8)),      \
                         (lGrib[z++] = (GRIBPACK)(Value)))
#define Put4Byte(Value) ((lGrib[z++] = (GRIBPACK)((Value) >> 24)),      \
                         (lGrib[z++] = (GRIBPACK)((Value) >> 16)),      \
                         (lGrib[z++] = (GRIBPACK)((Value) >>  8)),      \
                         (lGrib[z++] = (GRIBPACK)(Value)))

#define Put1Int(Value)  {ival = Value; if ( ival < 0 ) ival =     0x80 - ival; Put1Byte(ival);}
#define Put2Int(Value)  {ival = Value; if ( ival < 0 ) ival =   0x8000 - ival; Put2Byte(ival);}
#define Put3Int(Value)  {ival = Value; if ( ival < 0 ) ival = 0x800000 - ival; Put3Byte(ival);}

enum {
  BitsPerInt = (int) (sizeof(int) * CHAR_BIT),
};


#define Put1Real(Value)          \
{                                \
  confp3(Value, &exponent, &mantissa, BitsPerInt, 1); \
  Put1Byte(exponent);            \
  Put3Byte(mantissa);            \
}

// clang-format on

#endif /* CGRIBEX_GRIB_ENCODE_H */
#ifndef CODEC_COMMON_H
#define CODEC_COMMON_H
#define gribSwapByteOrder_uint16(ui16) ((uint16_t) ((ui16 << 8) | (ui16 >> 8)))
#endif /* CODEC_COMMON_H */
/*
icc -g -Wall -O3 -march=native -std=c99 -qopt-report=5 -DTEST_MINMAXVAL -qopenmp -DOMP_SIMD minmax_val.c
 result on hama2 (icc 16.0.0):
     float:
minmax_val: fmin: -500000  fmax: 499999  time:   1.22s
simd      : fmin: -500000  fmax: 499999  time:   1.20s
    double:
minmax_val: fmin: -500000  fmax: 499999  time:   2.86s
orig      : fmin: -500000  fmax: 499999  time:   2.74s
simd      : fmin: -500000  fmax: 499999  time:   2.70s
avx       : fmin: -500000  fmax: 499999  time:   2.99s

gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL -fopenmp -DOMP_SIMD -Wa,-q minmax_val.c
 result on thunder5 (gcc 6.1.0):
float:
minmax_val: fmin: -500000  fmax: 499999  time:   8.25s
  simd    : fmin: -500000  fmax: 499999  time:   1.24s
double:
minmax_val: fmin: -500000  fmax: 499999  time:   2.73s
  orig    : fmin: -500000  fmax: 499999  time:   9.24s
  simd    : fmin: -500000  fmax: 499999  time:   2.78s
  avx     : fmin: -500000  fmax: 499999  time:   2.90s

gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL minmax_val.c
 result on bailung (gcc 4.8.2):
  orig    : fmin: -500000  fmax: 499999  time:   4.82s
  sse2    : fmin: -500000  fmax: 499999  time:   4.83s

gcc -g -Wall -O3 -march=native -std=c99 -DTEST_MINMAXVAL -fopenmp -DOMP_SIMD -Wa,-q minmax_val.c
 result on thunder5 (gcc 4.8.2):
  orig    : fmin: -500000  fmax: 499999  time:   3.10s
  simd    : fmin: -500000  fmax: 499999  time:   3.10s # omp simd in gcc 4.9
  avx     : fmin: -500000  fmax: 499999  time:   2.84s

icc -g -Wall -O3 -march=native -std=c99 -qopt-report=5 -DTEST_MINMAXVAL -openmp -DOMP_SIMD minmax_val.c
 result on thunder5 (icc 14.0.2):
  orig    : fmin: -500000  fmax: 499999  time:   2.83s
  simd    : fmin: -500000  fmax: 499999  time:   2.83s
  avx     : fmin: -500000  fmax: 499999  time:   2.92s

xlc_r -g -O3 -qhot -q64 -qarch=auto -qtune=auto -qreport -DTEST_MINMAXVAL minmax_val.c
 result on blizzard (xlc 12):
  orig    : fmin: -500000  fmax: 499999  time:   7.26s
  pwr6u6  : fmin: -500000  fmax: 499999  time:   5.92s
*/
#ifdef _ARCH_PWR6
#pragma options nostrict
#endif

#ifdef OMP_SIMD
#include <omp.h>
#endif

#include <stdlib.h>

// #undef _GET_X86_COUNTER
// #undef _GET_IBM_COUNTER
// #undef _GET_MACH_COUNTER
// #undef _ARCH_PWR6

#if defined(_GET_IBM_COUNTER)
#include <libhpc.h>
#elif defined(_GET_X86_COUNTER)
#include <x86intrin.h>
#elif defined(_GET_MACH_COUNTER)
#include <mach/mach_time.h>
#endif

#if defined(__GNUC__) && !defined(__ICC) && !defined(__clang__)
#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 4)
#define GNUC_PUSH_POP
#endif
#endif

#ifndef DISABLE_SIMD
#if defined(__GNUC__) && (__GNUC__ >= 4)
#elif defined(__ICC) && (__ICC >= 1100)
#elif defined(__clang__)
#else
#define DISABLE_SIMD
#endif
#endif

#ifdef DISABLE_SIMD
#define DISABLE_SIMD_MINMAXVAL
#endif

#ifndef TEST_MINMAXVAL
#define DISABLE_SIMD_MINMAXVAL
#endif

#ifdef DISABLE_SIMD_MINMAXVAL
#ifdef ENABLE_AVX
#define _ENABLE_AVX
#endif
#ifdef ENABLE_SSE2
#define _ENABLE_SSE2
#endif
#endif

#ifndef DISABLE_SIMD_MINMAXVAL
#ifdef __AVX__
#define _ENABLE_AVX
#endif
#ifdef __SSE2__
#define _ENABLE_SSE2
#endif
#endif

#include <float.h>
#include <stdint.h>
#include <inttypes.h>

#if defined(_ENABLE_AVX)
#include <immintrin.h>
#elif defined(_ENABLE_SSE2)
#include <emmintrin.h>
#endif

#if defined(_ENABLE_AVX)

static void
avx_minmax_val_double(const double *restrict buf, size_t nframes, double *min, double *max)
{
  double fmin[4], fmax[4];
  __m256d current_max, current_min, work;

  // load max and min values into all four slots of the YMM s
  current_min = _mm256_set1_pd(*min);
  current_max = _mm256_set1_pd(*max);

  // Work input until "buf" reaches 32 byte alignment
  while (((unsigned long) buf) % 32 != 0 && nframes > 0)
  {

    // Load the next double into the work buffer
    work = _mm256_set1_pd(*buf);
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf++;
    nframes--;
  }

  while (nframes >= 16)
  {

    (void) _mm_prefetch((const char *) (buf + 8), _MM_HINT_NTA);

    work = _mm256_load_pd(buf);
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf += 4;

    work = _mm256_load_pd(buf);
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf += 4;

    (void) _mm_prefetch((const char *) (buf + 8), _MM_HINT_NTA);

    work = _mm256_load_pd(buf);
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf += 4;

    work = _mm256_load_pd(buf);
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf += 4;
    nframes -= 16;
  }

  // work through aligned buffers
  while (nframes >= 4)
  {
    work = _mm256_load_pd(buf);
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf += 4;
    nframes -= 4;
  }

  // work through the remainung values
  while (nframes > 0)
  {
    work = _mm256_set1_pd(*buf);
    current_min = _mm256_min_pd(current_min, work);
    current_max = _mm256_max_pd(current_max, work);
    buf++;
    nframes--;
  }

  // find min & max value through shuffle tricks

  work = current_min;
  work = _mm256_shuffle_pd(work, work, 5);
  work = _mm256_min_pd(work, current_min);
  current_min = work;
  work = _mm256_permute2f128_pd(work, work, 1);
  work = _mm256_min_pd(work, current_min);
  _mm256_storeu_pd(fmin, work);

  work = current_max;
  work = current_max;
  work = _mm256_shuffle_pd(work, work, 5);
  work = _mm256_max_pd(work, current_max);
  current_max = work;
  work = _mm256_permute2f128_pd(work, work, 1);
  work = _mm256_max_pd(work, current_max);
  _mm256_storeu_pd(fmax, work);

  *min = fmin[0];
  *max = fmax[0];

  return;
}

#elif defined(_ENABLE_SSE2)

static void
sse2_minmax_val_double(const double *restrict buf, size_t nframes, double *min, double *max)
{
  __m128d current_max, current_min, work;

  // load starting max and min values into all slots of the XMM registers
  current_min = _mm_set1_pd(*min);
  current_max = _mm_set1_pd(*max);

  // work on input until buf reaches 16 byte alignment
  while (((unsigned long) buf) % 16 != 0 && nframes > 0)
  {

    // load one double and replicate
    work = _mm_set1_pd(*buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf++;
    nframes--;
  }

  while (nframes >= 8)
  {
    // use 64 byte prefetch for double octetts
    // __builtin_prefetch(buf+64,0,0); // for GCC 4.3.2 +

    work = _mm_load_pd(buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf += 2;
    work = _mm_load_pd(buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf += 2;
    work = _mm_load_pd(buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf += 2;
    work = _mm_load_pd(buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf += 2;
    nframes -= 8;
  }

  // work through smaller chunks of aligned buffers without prefetching
  while (nframes >= 2)
  {
    work = _mm_load_pd(buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf += 2;
    nframes -= 2;
  }

  // work through the remaining value
  while (nframes > 0)
  {
    // load the last double and replicate
    work = _mm_set1_pd(*buf);
    current_min = _mm_min_pd(current_min, work);
    current_max = _mm_max_pd(current_max, work);
    buf++;
    nframes--;
  }

  // find final min and max value through shuffle tricks
  work = current_min;
  work = _mm_shuffle_pd(work, work, _MM_SHUFFLE2(0, 1));
  work = _mm_min_pd(work, current_min);
  _mm_store_sd(min, work);
  work = current_max;
  work = _mm_shuffle_pd(work, work, _MM_SHUFFLE2(0, 1));
  work = _mm_max_pd(work, current_max);
  _mm_store_sd(max, work);

  return;
}

#endif  // SIMD

#if defined(_ARCH_PWR6)
static void
pwr6_minmax_val_double_unrolled6(const double *restrict data, size_t datasize, double *fmin, double *fmax)
{
#define __UNROLL_DEPTH_1 6

  // to allow pipelining we have to unroll

  {
    size_t residual = datasize % __UNROLL_DEPTH_1;
    size_t ofs = datasize - residual;
    double dmin[__UNROLL_DEPTH_1];
    double dmax[__UNROLL_DEPTH_1];

    for (size_t j = 0; j < __UNROLL_DEPTH_1; ++j)
    {
      dmin[j] = data[0];
      dmax[j] = data[0];
    }

    for (size_t i = 0; i < datasize - residual; i += __UNROLL_DEPTH_1)
    {
      for (size_t j = 0; j < __UNROLL_DEPTH_1; ++j)
      {
        dmin[j] = __fsel(dmin[j] - data[i + j], data[i + j], dmin[j]);
        dmax[j] = __fsel(data[i + j] - dmax[j], data[i + j], dmax[j]);
      }
    }

    for (size_t j = 0; j < residual; ++j)
    {
      dmin[j] = __fsel(dmin[j] - data[ofs + j], data[ofs + j], dmin[j]);
      dmax[j] = __fsel(data[ofs + j] - dmax[j], data[ofs + j], dmax[j]);
    }

    for (size_t j = 0; j < __UNROLL_DEPTH_1; ++j)
    {
      *fmin = __fsel(*fmin - dmin[j], dmin[j], *fmin);
      *fmax = __fsel(dmax[j] - *fmax, dmax[j], *fmax);
    }
  }
#undef __UNROLL_DEPTH_1
}
#endif

// clang-format off
#if defined(TEST_MINMAXVAL) && defined(__GNUC__)
static void minmax_val_double_orig(const double *restrict data, size_t datasize, double *fmin, double *fmax) __attribute__((noinline));
static void minmax_val_double_simd(const double *restrict data, size_t datasize, double *fmin, double *fmax) __attribute__((noinline));
static void minmax_val_double_omp(const double *restrict data, size_t datasize, double *fmin, double *fmax) __attribute__((noinline));
static void minmax_val_float(const float *restrict data, long datasize, float *fmin, float *fmax) __attribute__((noinline));
static void minmax_val_float_simd(const float *restrict data, size_t datasize, float *fmin, float *fmax) __attribute__((noinline));
#endif
// clang-format on

#if defined(GNUC_PUSH_POP) && defined __OPTIMIZE__
#pragma GCC push_options
#pragma GCC optimize("O3", "fast-math")
#endif
static void
minmax_val_double_orig(const double *restrict data, size_t datasize, double *fmin, double *fmax)
{
  double dmin = *fmin, dmax = *fmax;

#if defined(CRAY)
#pragma _CRI ivdep
#elif defined(SX)
#pragma vdir nodep
#elif defined(__uxp__)
#pragma loop novrec
#elif defined(__ICC)
#pragma ivdep
#endif
  for (size_t i = 0; i < datasize; ++i)
  {
    dmin = (dmin < data[i]) ? dmin : data[i];
    dmax = (dmax > data[i]) ? dmax : data[i];
  }

  *fmin = dmin;
  *fmax = dmax;
}

static void
minmax_val_float(const float *restrict data, long idatasize, float *fmin, float *fmax)
{
  size_t datasize = (size_t) idatasize;
  float dmin = *fmin, dmax = *fmax;

#if defined(CRAY)
#pragma _CRI ivdep
#elif defined(SX)
#pragma vdir nodep
#elif defined(__uxp__)
#pragma loop novrec
#elif defined(__ICC)
#pragma ivdep
#endif
  for (size_t i = 0; i < datasize; ++i)
  {
    dmin = (dmin < data[i]) ? dmin : data[i];
    dmax = (dmax > data[i]) ? dmax : data[i];
  }

  *fmin = dmin;
  *fmax = dmax;
}
#if defined(GNUC_PUSH_POP) && defined __OPTIMIZE__
#pragma GCC pop_options
#endif

// TEST
#if defined(OMP_SIMD)

#if defined(GNUC_PUSH_POP) && defined __OPTIMIZE__
#pragma GCC push_options
#pragma GCC optimize("O3", "fast-math")
#endif
static void
minmax_val_double_omp(const double *restrict data, size_t datasize, double *fmin, double *fmax)
{
  double dmin = *fmin, dmax = *fmax;

#if defined(_OPENMP)
#pragma omp parallel for simd reduction(min : dmin) reduction(max : dmax)
#endif
  for (size_t i = 0; i < datasize; ++i)
  {
    dmin = (dmin < data[i]) ? dmin : data[i];
    dmax = (dmax > data[i]) ? dmax : data[i];
  }

  *fmin = dmin;
  *fmax = dmax;
}

static void
minmax_val_double_simd(const double *restrict data, size_t datasize, double *fmin, double *fmax)
{
  double dmin = *fmin, dmax = *fmax;

#ifdef _OPENMP
#pragma omp simd reduction(min : dmin) reduction(max : dmax)
#endif
  for (size_t i = 0; i < datasize; ++i)
  {
    dmin = (dmin < data[i]) ? dmin : data[i];
    dmax = (dmax > data[i]) ? dmax : data[i];
  }

  *fmin = dmin;
  *fmax = dmax;
}

static void
minmax_val_float_simd(const float *restrict data, size_t datasize, float *fmin, float *fmax)
{
  float dmin = *fmin, dmax = *fmax;

#if defined(_OPENMP)
#pragma omp simd reduction(min : dmin) reduction(max : dmax)
#endif
  for (size_t i = 0; i < datasize; ++i)
  {
    dmin = (dmin < data[i]) ? dmin : data[i];
    dmax = (dmax > data[i]) ? dmax : data[i];
  }

  *fmin = dmin;
  *fmax = dmax;
}
#if defined(GNUC_PUSH_POP) && defined __OPTIMIZE__
#pragma GCC pop_options
#endif
#endif

static void
minmax_val_double(const double *restrict data, long idatasize, double *fmin, double *fmax)
{
#if defined(_GET_X86_COUNTER) || defined(_GET_MACH_COUNTER)
  uint64_t start_minmax, end_minmax;
#endif
  size_t datasize = (size_t) idatasize;

  if (idatasize >= 1)
    ;
  else
    return;

#if defined(_GET_X86_COUNTER)
  start_minmax = _rdtsc();
#endif
#if defined(_GET_MACH_COUNTER)
  start_minmax = mach_absolute_time();
#endif

#if defined(_ENABLE_AVX)

  avx_minmax_val_double(data, datasize, fmin, fmax);

#elif defined(_ENABLE_SSE2)

  sse2_minmax_val_double(data, datasize, fmin, fmax);

#else

#if defined(_ARCH_PWR6)
#define __UNROLL_DEPTH_1 6

    // to allow pipelining we have to unroll

#if defined(_GET_IBM_COUNTER)
  hpmStart(1, "minmax fsel");
#endif

  pwr6_minmax_val_double_unrolled6(data, datasize, fmin, fmax);

#if defined(_GET_IBM_COUNTER)
  hpmStop(1);
#endif

#undef __UNROLL_DEPTH_1

#else  // original loop

#if defined(_GET_IBM_COUNTER)
  hpmStart(1, "minmax base");
#endif

  minmax_val_double_orig(data, datasize, fmin, fmax);

#if defined(_GET_IBM_COUNTER)
  hpmStop(1);
#endif

#endif  // _ARCH_PWR6 && original loop
#endif  // SIMD

#if defined(_GET_X86_COUNTER) || defined(_GET_MACH_COUNTER)
#if defined(_GET_X86_COUNTER)
  end_minmax = _rdtsc();
#endif
#if defined(_GET_MACH_COUNTER)
  end_minmax = mach_absolute_time();
#endif
#if defined(_ENABLE_AVX)
  printf("AVX minmax cycles:: %" PRIu64 "\n", end_minmax - start_minmax);
  fprintf(stderr, "AVX min: %lf max: %lf\n", *fmin, *fmax);
#elif defined(_ENABLE_SSE2)
  printf("SSE2 minmax cycles:: %" PRIu64 "\n", end_minmax - start_minmax);
  fprintf(stderr, "SSE2 min: %lf max: %lf\n", *fmin, *fmax);
#else
  printf("loop minmax cycles:: %" PRIu64 "\n", end_minmax - start_minmax);
  fprintf(stderr, "loop min: %lf max: %lf\n", *fmin, *fmax);
#endif
#endif

  return;
}

#if defined(TEST_MINMAXVAL)

#include <stdio.h>
#include <sys/time.h>

static double
dtime()
{
  double tseconds = 0.0;
  struct timeval mytime;
  gettimeofday(&mytime, NULL);
  tseconds = (double) (mytime.tv_sec + (double) mytime.tv_usec * 1.0e-6);
  return (tseconds);
}

#define NRUN 10000

int
main(void)
{
  long datasize = 1000000;
  double t_begin, t_end;

  printf("datasize %ld\n", datasize);
#if defined(_OPENMP)
  printf("_OPENMP=%d\n", _OPENMP);
#endif

#if defined(__ICC)
  printf("icc\n");
#elif defined(__clang__)
  printf("clang\n");
#elif defined(__GNUC__)
  printf("gcc\n");
#endif

  {
    float fmin, fmax;
    float *data_sp = (float *) malloc(datasize * sizeof(float));

    for (long i = 0; i < datasize / 2; ++i) data_sp[i] = (float) (i);
    for (long i = datasize / 2; i < datasize; ++i) data_sp[i] = (float) (-datasize + i);

    printf("float:\n");

    t_begin = dtime();
    for (int i = 0; i < NRUN; ++i)
    {
      fmin = fmax = data_sp[0];
      minmax_val_float(data_sp, datasize, &fmin, &fmax);
    }
    t_end = dtime();
    printf("minmax_val: fmin: %ld  fmax: %ld  time: %6.2fs\n", (long) fmin, (long) fmax, t_end - t_begin);

#if defined(OMP_SIMD)
    t_begin = dtime();
    for (int i = 0; i < NRUN; ++i)
    {
      fmin = fmax = data_sp[0];
      minmax_val_float_simd(data_sp, datasize, &fmin, &fmax);
    }
    t_end = dtime();
    printf("simd      : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long) fmin, (long) fmax, t_end - t_begin);
#endif

    free(data_sp);
  }

  {
    double fmin, fmax;
    double *data_dp = (double *) malloc(datasize * sizeof(double));

    // for (long i = datasize-1; i >= 0; --i) data[i] = (double) (-datasize/2 + i);
    for (long i = 0; i < datasize / 2; ++i) data_dp[i] = (double) (i);
    for (long i = datasize / 2; i < datasize; ++i) data_dp[i] = (double) (-datasize + i);

    printf("double:\n");

    t_begin = dtime();
    for (int i = 0; i < NRUN; ++i)
    {
      fmin = fmax = data_dp[0];
      minmax_val_double(data_dp, datasize, &fmin, &fmax);
    }
    t_end = dtime();
    printf("minmax_val: fmin: %ld  fmax: %ld  time: %6.2fs\n", (long) fmin, (long) fmax, t_end - t_begin);

    t_begin = dtime();
    for (int i = 0; i < NRUN; ++i)
    {
      fmin = fmax = data_dp[0];
      minmax_val_double_orig(data_dp, datasize, &fmin, &fmax);
    }
    t_end = dtime();
    printf("orig      : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long) fmin, (long) fmax, t_end - t_begin);

#if defined(OMP_SIMD)
    t_begin = dtime();
    for (int i = 0; i < NRUN; ++i)
    {
      fmin = fmax = data_dp[0];
      minmax_val_double_simd(data_dp, datasize, &fmin, &fmax);
    }
    t_end = dtime();
    printf("simd      : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long) fmin, (long) fmax, t_end - t_begin);

    t_begin = dtime();
    for (int i = 0; i < NRUN; ++i)
    {
      fmin = fmax = data_dp[0];
      minmax_val_double_omp(data_dp, datasize, &fmin, &fmax);
    }
    t_end = dtime();
    printf("openmp %d  : fmin: %ld  fmax: %ld  time: %6.2fs\n", omp_get_max_threads(), (long) fmin, (long) fmax, t_end - t_begin);
#endif

#if defined(_ENABLE_AVX)
    t_begin = dtime();
    for (int i = 0; i < NRUN; ++i)
    {
      fmin = fmax = data_dp[0];
      avx_minmax_val_double(data_dp, datasize, &fmin, &fmax);
    }
    t_end = dtime();
    printf("avx       : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long) fmin, (long) fmax, t_end - t_begin);
#elif defined(_ENABLE_SSE2)
    t_begin = dtime();
    for (int i = 0; i < NRUN; ++i)
    {
      fmin = fmax = data_dp[0];
      sse2_minmax_val_double(data_dp, datasize, &fmin, &fmax);
    }
    t_end = dtime();
    printf("sse2      : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long) fmin, (long) fmax, t_end - t_begin);
#endif
#if defined(_ARCH_PWR6)
    t_begin = dtime();
    for (int i = 0; i < NRUN; ++i)
    {
      fmin = fmax = data_dp[0];
      pwr6_minmax_val_double_unrolled6(data_dp, datasize, &fmin, &fmax);
    }
    t_end = dtime();
    printf("pwr6u6  : fmin: %ld  fmax: %ld  time: %6.2fs\n", (long) fmin, (long) fmax, t_end - t_begin);
#endif
    free(data_dp);
  }

  return 0;
}
#endif  // TEST_MINMAXVAL

#undef DISABLE_SIMD_MINMAXVAL
#undef _ENABLE_AVX
#undef _ENABLE_SSE2
#undef GNUC_PUSH_POP
/*
### new version with gribSwapByteOrder_uint16()
icc -g -Wall -O3 -march=native -std=c99 -qopt-report=5 -DTEST_ENCODE encode_array.c
 result on hama2 (icc 16.0.2):
   float:
    orig: val1: 1  val2: 1  val3: 2  valn: 66  time: 1.8731s
unrolled: val1: 1  val2: 1  val3: 2  valn: 66  time: 2.0898s
  double:
    orig: val1: 1  val2: 1  val3: 2  valn: 66  time: 3.68089s
unrolled: val1: 1  val2: 1  val3: 2  valn: 66  time: 4.30798s
     avx: val1: 1  val2: 1  val3: 2  valn: 66  time: 4.23864s

gcc -g -Wall -O3 -march=native -Wa,-q -std=c99 -DTEST_ENCODE encode_array.c
 result on hama2 (gcc 6.1.0):
float:
    orig: val1: 1  val2: 1  val3: 2  valn: 66  time: 2.22871s
unrolled: val1: 1  val2: 1  val3: 2  valn: 66  time: 2.30281s
double:
    orig: val1: 1  val2: 1  val3: 2  valn: 66  time: 4.2669s
unrolled: val1: 1  val2: 1  val3: 2  valn: 66  time: 4.81643s
     avx: val1: 1  val2: 1  val3: 2  valn: 66  time: 3.98415s

###
icc -g -Wall -O3 -march=native -std=c99 -qopt-report=5 -DTEST_ENCODE encode_array.c
 result on hama2 (icc 16.0.0):
   float:
    orig: val1: 1  val2: 1  val3: 2  valn: 66  time: 9.10691s
unrolled: val1: 1  val2: 1  val3: 2  valn: 66  time: 8.63584s
  double:
    orig: val1: 1  val2: 1  val3: 2  valn: 66  time: 13.5768s
unrolled: val1: 1  val2: 1  val3: 2  valn: 66  time: 9.17742s
     avx: val1: 1  val2: 1  val3: 2  valn: 66  time: 3.9488s

gcc -g -Wall -O3 -std=c99 -DTEST_ENCODE encode_array.c
 result on hama2 (gcc 5.2.0):
   float:
    orig: val1: 1  val2: 1  val3: 2  valn: 66  time: 5.32775s
unrolled: val1: 1  val2: 1  val3: 2  valn: 66  time: 7.87125s
  double:
    orig: val1: 1  val2: 1  val3: 2  valn: 66  time: 7.85873s
unrolled: val1: 1  val2: 1  val3: 2  valn: 66  time: 12.9979s

###
gcc -g -Wall -O3 -march=native -std=c99 -DTEST_ENCODE encode_array.c
 result on bailung (gcc 4.7):
  orig    : val1: 1  val2: 1  val3: 2  valn: 66  time: 8.4166s
  sse41   : val1: 1  val2: 1  val3: 2  valn: 66  time: 7.1522s

gcc -g -Wall -O3 -march=native -std=c99 -DTEST_ENCODE encode_array.c
 result on thunder5 (gcc 4.7):
  orig    : val1: 1  val2: 1  val3: 2  valn: 66  time: 6.21976s
  avx     : val1: 1  val2: 1  val3: 2  valn: 66  time: 4.54485s

icc -g -Wall -O3 -march=native -std=c99 -vec-report=1 -DTEST_ENCODE encode_array.c
 result on thunder5 (icc 13.2):
  orig    : val1: 1  val2: 1  val3: 2  valn: 66  time: 14.6279s
  avx     : val1: 1  val2: 1  val3: 2  valn: 66  time:  4.9776s

xlc_r -g -O3 -qhot -q64 -qarch=auto -qtune=auto -qreport -DTEST_ENCODE encode_array.c
 result on blizzard (xlc 12):
  orig    : val1: 1  val2: 1  val3: 2  valn: 66  time: 132.25s
  unrolled: val1: 1  val2: 1  val3: 2  valn: 66  time:  27.202s
  orig    : val1: 1  val2: 1  val3: 2  valn: 66  time: 106.627s  // without -qhot
  unrolled: val1: 1  val2: 1  val3: 2  valn: 66  time:  39.929s  // without -qhot
*/
#ifdef _ARCH_PWR6
#pragma options nostrict
#include <ppu_intrinsics.h>
#endif

#ifdef TEST_ENCODE
#include <stdio.h>
#include <stdlib.h>
#define GRIBPACK unsigned char

#ifndef HOST_ENDIANNESS
#ifdef __cplusplus
static const uint32_t HOST_ENDIANNESS_temp[1] = { UINT32_C(0x00030201) };
#define HOST_ENDIANNESS (((const unsigned char *) HOST_ENDIANNESS_temp)[0])
#else
#define HOST_ENDIANNESS (((const unsigned char *) &(const uint32_t[1]){ UINT32_C(0x00030201) })[0])
#endif
#endif

#define IS_BIGENDIAN() (HOST_ENDIANNESS == 0)
#define Error(x, y)
#endif

// #undef _GET_X86_COUNTER
// #undef _GET_MACH_COUNTER
// #undef _GET_IBM_COUNTER
// #undef _ARCH_PWR6

#if defined _GET_IBM_COUNTER
#include <libhpc.h>
#elif defined _GET_X86_COUNTER
#include <x86intrin.h>
#elif defined _GET_MACH_COUNTER
#include <mach/mach_time.h>
#endif

#include <stdint.h>
#include <math.h>

#ifndef DISABLE_SIMD
#if defined(__GNUC__) && (__GNUC__ >= 4)
#elif defined(__ICC) && (__ICC >= 1100)
#elif defined(__clang__)
#else
#define DISABLE_SIMD
#endif
#endif

#ifdef DISABLE_SIMD
#define DISABLE_SIMD_ENCODE
#endif

// #define DISABLE_SIMD_ENCODE

#ifdef DISABLE_SIMD_ENCODE
#ifdef ENABLE_AVX
#define _ENABLE_AVX
#endif
#ifdef ENABLE_SSE4_1
#define _ENABLE_SSE4_1
#endif
#endif

#ifndef DISABLE_SIMD_ENCODE
#ifdef __AVX__
#define _ENABLE_AVX
#endif
#ifdef __SSE4_1__
#define _ENABLE_SSE4_1
#endif
#endif

#if defined _ENABLE_AVX
#include <immintrin.h>
#elif defined _ENABLE_SSE4_1
#include <smmintrin.h>
#endif

#if defined _ENABLE_AVX

static void avx_encode_array_2byte_double(size_t datasize, unsigned char *restrict lGrib, const double *restrict data, double zref,
                                          double factor, size_t *gz) __attribute__((optimize(2)));
static void
avx_encode_array_2byte_double(size_t datasize, unsigned char *restrict lGrib, const double *restrict data, double zref,
                              double factor, size_t *gz)
{
  const double *dval = data;
  __m128i *sgrib = (__m128i *) (lGrib + (*gz));

  const __m128i swap = _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);

  const __m256d c0 = _mm256_set1_pd(zref);
  const __m256d c1 = _mm256_set1_pd(factor);
  const __m256d c2 = _mm256_set1_pd(0.5);

  __m256d d0, d3, d2, d1;
  __m128i i0, i1, i2, i3;
  __m128i s0, s1;

  size_t residual = datasize % 16;

  size_t i;
  for (i = 0; i < (datasize - residual); i += 16)
  {
    (void) _mm_prefetch((const char *) (dval + 8), _MM_HINT_NTA);
    //_____________________________________________________________________________

    d0 = _mm256_loadu_pd(dval);
    d0 = _mm256_sub_pd(d0, c0);
    d0 = _mm256_mul_pd(d0, c1);
    d0 = _mm256_add_pd(d0, c2);

    i0 = _mm256_cvttpd_epi32(d0);

    //_____________________________________________________________________________

    d1 = _mm256_loadu_pd(dval + 4);
    d1 = _mm256_sub_pd(d1, c0);
    d1 = _mm256_mul_pd(d1, c1);
    d1 = _mm256_add_pd(d1, c2);

    i1 = _mm256_cvttpd_epi32(d1);

    //_____________________________________________________________________________

    s0 = _mm_packus_epi32(i0, i1);
    s0 = _mm_shuffle_epi8(s0, swap);
    (void) _mm_storeu_si128(sgrib, s0);

    //_____________________________________________________________________________

    (void) _mm_prefetch((const char *) (dval + 16), _MM_HINT_NTA);

    //_____________________________________________________________________________

    d2 = _mm256_loadu_pd(dval + 8);
    d2 = _mm256_sub_pd(d2, c0);
    d2 = _mm256_mul_pd(d2, c1);
    d2 = _mm256_add_pd(d2, c2);

    i2 = _mm256_cvttpd_epi32(d2);

    //_____________________________________________________________________________

    d3 = _mm256_loadu_pd(dval + 12);
    d3 = _mm256_sub_pd(d3, c0);
    d3 = _mm256_mul_pd(d3, c1);
    d3 = _mm256_add_pd(d3, c2);

    i3 = _mm256_cvttpd_epi32(d3);

    //_____________________________________________________________________________

    s1 = _mm_packus_epi32(i2, i3);
    s1 = _mm_shuffle_epi8(s1, swap);
    (void) _mm_storeu_si128(sgrib + 1, s1);

    //_____________________________________________________________________________

    dval += 16;
    sgrib += 2;
  }

  if (i != datasize)
  {
    uint16_t ui16;
    for (size_t j = i; j < datasize; ++j)
    {
      ui16 = (uint16_t) ((data[j] - zref) * factor + 0.5);
      lGrib[*gz + 2 * j] = ui16 >> 8;
      lGrib[*gz + 2 * j + 1] = ui16;
    }
  }

  *gz += 2 * datasize;

  return;
}

#define grib_encode_array_2byte_double avx_encode_array_2byte_double

#elif defined _ENABLE_SSE4_1

static void
sse41_encode_array_2byte_double(size_t datasize, unsigned char *restrict lGrib, const double *restrict data, double zref,
                                double factor, size_t *gz)
{
  const double *dval = data;
  __m128i *sgrib = (__m128i *) (lGrib + (*gz));

  const __m128i swap = _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1);

  const __m128d c0 = _mm_set1_pd(zref);
  const __m128d c1 = _mm_set1_pd(factor);
  const __m128d c2 = _mm_set1_pd(0.5);

  __m128d d0, d4, d3, d2, d1;
  __m128i i0, i1, i2, i3, i4;
  __m128i s0, s1;

  size_t residual = datasize % 16;

  size_t i;
  for (i = 0; i < (datasize - residual); i += 16)
  {
    (void) _mm_prefetch((const char *) (dval + 8), _MM_HINT_NTA);
    //_____________________________________________________________________________

    d0 = _mm_loadu_pd(dval);
    d0 = _mm_sub_pd(d0, c0);
    d0 = _mm_mul_pd(d0, c1);
    d0 = _mm_add_pd(d0, c2);

    d4 = _mm_loadu_pd(dval + 2);
    d4 = _mm_sub_pd(d4, c0);
    d4 = _mm_mul_pd(d4, c1);
    d4 = _mm_add_pd(d4, c2);

    i0 = _mm_cvttpd_epi32(d0);
    i4 = _mm_cvttpd_epi32(d4);
    i0 = _mm_unpacklo_epi64(i0, i4);

    //_____________________________________________________________________________

    d1 = _mm_loadu_pd(dval + 4);
    d1 = _mm_sub_pd(d1, c0);
    d1 = _mm_mul_pd(d1, c1);
    d1 = _mm_add_pd(d1, c2);

    d4 = _mm_loadu_pd(dval + 6);
    d4 = _mm_sub_pd(d4, c0);
    d4 = _mm_mul_pd(d4, c1);
    d4 = _mm_add_pd(d4, c2);

    i1 = _mm_cvttpd_epi32(d1);
    i4 = _mm_cvttpd_epi32(d4);
    i1 = _mm_unpacklo_epi64(i1, i4);

    //_____________________________________________________________________________

    s0 = _mm_packus_epi32(i0, i1);
    s0 = _mm_shuffle_epi8(s0, swap);
    (void) _mm_storeu_si128(sgrib, s0);

    //_____________________________________________________________________________

    (void) _mm_prefetch((const char *) (dval + 16), _MM_HINT_NTA);

    //_____________________________________________________________________________

    d2 = _mm_loadu_pd(dval + 8);
    d2 = _mm_sub_pd(d2, c0);
    d2 = _mm_mul_pd(d2, c1);
    d2 = _mm_add_pd(d2, c2);

    d4 = _mm_loadu_pd(dval + 10);
    d4 = _mm_sub_pd(d4, c0);
    d4 = _mm_mul_pd(d4, c1);
    d4 = _mm_add_pd(d4, c2);

    i2 = _mm_cvttpd_epi32(d2);
    i4 = _mm_cvttpd_epi32(d4);
    i2 = _mm_unpacklo_epi64(i2, i4);

    //_____________________________________________________________________________

    d3 = _mm_loadu_pd(dval + 12);
    d3 = _mm_sub_pd(d3, c0);
    d3 = _mm_mul_pd(d3, c1);
    d3 = _mm_add_pd(d3, c2);

    d4 = _mm_loadu_pd(dval + 14);
    d4 = _mm_sub_pd(d4, c0);
    d4 = _mm_mul_pd(d4, c1);
    d4 = _mm_add_pd(d4, c2);

    i3 = _mm_cvttpd_epi32(d3);
    i4 = _mm_cvttpd_epi32(d4);
    i3 = _mm_unpacklo_epi64(i3, i4);

    //_____________________________________________________________________________

    s1 = _mm_packus_epi32(i2, i3);
    s1 = _mm_shuffle_epi8(s1, swap);
    (void) _mm_storeu_si128(sgrib + 1, s1);

    //_____________________________________________________________________________

    dval += 16;
    sgrib += 2;
  }

  if (i != datasize)
  {
    uint16_t ui16;
    for (size_t j = i; j < datasize; ++j)
    {
      ui16 = (uint16_t) ((data[j] - zref) * factor + 0.5);
      lGrib[*gz + 2 * j] = ui16 >> 8;
      lGrib[*gz + 2 * j + 1] = ui16;
    }
  }

  *gz += 2 * datasize;

  return;
}

#define grib_encode_array_2byte_double sse41_encode_array_2byte_double

#else

#define grib_encode_array_2byte_double encode_array_2byte_double

#endif  // SIMD variants

#ifdef TEST_ENCODE

// clang-format off
#define CAT(X,Y)      X##_##Y
#define TEMPLATE(X,Y) CAT(X,Y)

#ifdef T
#undef T
#endif
#define T double

#ifdef T
#undef T
#endif
#define T float
// clang-format on

#include <sys/time.h>

static double
dtime()
{
  double tseconds = 0.0;
  struct timeval mytime;
  gettimeofday(&mytime, NULL);
  tseconds = (double) (mytime.tv_sec + (double) mytime.tv_usec * 1.0e-6);
  return (tseconds);
}

static void
pout(char *name, int s, unsigned char *lgrib, long datasize, double tt)
{
  printf("%8s: val1: %d  val2: %d  val3: %d  valn: %d  time: %gs\n", name, (int) lgrib[s * 1 + 1], (int) lgrib[s * 2 + 1],
         (int) lgrib[s * 3 + 1], (int) lgrib[2 * datasize - 1], tt);
}

int
main(void)
{
  enum
  {
    datasize = 1000000,
    NRUN = 10000,
  };

  double t_begin, t_end;

  float *dataf = (float *) malloc(datasize * sizeof(float));
  double *data = (double *) malloc(datasize * sizeof(double));
  unsigned char *lgrib = (unsigned char *) malloc(2 * datasize * sizeof(unsigned char));

  for (long i = 0; i < datasize; ++i) dataf[i] = (float) (-datasize / 2 + i);
  for (long i = 0; i < datasize; ++i) data[i] = (double) (-datasize / 2 + i);

  int PackStart = 0;
  int nbpv = 16;
  double zref = data[0];
  size_t z;
  double factor = 0.00390625;
  int s = 256;

  if (0)
  {
    encode_array_float(0, 0, 0, NULL, NULL, 0, 0, NULL);
    encode_array_double(0, 0, 0, NULL, NULL, 0, 0, NULL);
  }

#if defined(__ICC)
  printf("icc\n");
#elif defined(__clang__)
  printf("clang\n");
#elif defined(__GNUC__)
  printf("gcc\n");
#endif

  printf("float:\n");

  t_begin = dtime();
  for (int i = 0; i < NRUN; ++i)
  {
    z = 0;
    encode_array_2byte_float(datasize, lgrib, dataf, (float) zref, (float) factor, &z);
  }
  t_end = dtime();
  pout("orig", s, lgrib, datasize, t_end - t_begin);

  t_begin = dtime();
  for (int i = 0; i < NRUN; ++i)
  {
    z = 0;
    encode_array_unrolled_float(nbpv, PackStart, datasize, lgrib, dataf, (float) zref, (float) factor, &z);
  }
  t_end = dtime();
  pout("unrolled", s, lgrib, datasize, t_end - t_begin);

  printf("double:\n");

  t_begin = dtime();
  for (int i = 0; i < NRUN; ++i)
  {
    z = 0;
    encode_array_2byte_double(datasize, lgrib, data, zref, factor, &z);
  }
  t_end = dtime();
  pout("orig", s, lgrib, datasize, t_end - t_begin);

  t_begin = dtime();
  for (int i = 0; i < NRUN; ++i)
  {
    z = 0;
    encode_array_unrolled_double(nbpv, PackStart, datasize, lgrib, data, zref, factor, &z);
  }
  t_end = dtime();
  pout("unrolled", s, lgrib, datasize, t_end - t_begin);

#if defined _ENABLE_AVX
  t_begin = dtime();
  for (int i = 0; i < NRUN; ++i)
  {
    z = 0;
    avx_encode_array_2byte_double(datasize, lgrib, data, zref, factor, &z);
  }
  t_end = dtime();
  pout("avx", s, lgrib, datasize, t_end - t_begin);
#elif defined _ENABLE_SSE4_1
  t_begin = dtime();
  for (int i = 0; i < NRUN; ++i)
  {
    z = 0;
    sse41_encode_array_2byte_double(datasize, lgrib, data, zref, factor, &z);
  }
  t_end = dtime();
  pout("sse41", s, lgrib, datasize, t_end - t_begin);
#endif

  return 0;
}
#endif  // TEST_ENCODE

#undef DISABLE_SIMD_ENCODE
#undef _ENABLE_AVX
#undef _ENABLE_SSE4_1

void
confp3(double pval, int *kexp, int *kmant, int kbits, int kround)
{
  /*

    Purpose:
    --------

    Convert floating point number from machine
    representation to GRIB representation.

    Input Parameters:
    -----------------

       pval    - Floating point number to be converted.
       kbits   - Number of bits in computer word.
       kround  - Conversion type.
                 0 , Closest number in GRIB format less than
                     original number.
                 1 , Closest number in GRIB format to the
                     original number (equal to, greater than or
                     less than original number).

    Output Parameters:
    ------------------

       kexp    - 8 Bit signed exponent.
       kmant   - 24 Bit mantissa.

    Method:
    -------

    Floating point number represented as 8 bit signed
    exponent and 24 bit mantissa in integer values.

    Externals.
    ----------

    decfp2    - Decode from IBM floating point format.

    Reference:
    ----------

    WMO Manual on Codes re GRIB representation.

    Comments:
    ---------

    Routine aborts if an invalid conversion type parameter
    is used or if a 24 bit mantissa is not produced.

    Author:
    -------

    John Hennessy   ECMWF   18.06.91

    Modifications:
    --------------

    Uwe Schulzweida   MPIfM   01/04/2001

    Convert to C from EMOS library version 130

    Uwe Schulzweida   MPIfM   02/08/2002

     - speed up by factor 1.6 on NEC SX6
        - replace 1.0 / pow(16.0, (double)(iexp - 70)) by rpow16m70tab[iexp]
  */

  // extern int CGRIBEX_Debug;

  /* ----------------------------------------------------------------- */
  /*   Section 1 . Initialise                                          */
  /* ----------------------------------------------------------------- */

  // Check conversion type parameter.

  int iround = kround;
  if (iround != 0 && iround != 1)
  {
    Error("Invalid conversion type = %d", iround);

    // If not aborting, arbitrarily set rounding to 'up'.
    iround = 1;
  }

  /* ----------------------------------------------------------------- */
  /*   Section 2 . Convert value of zero.                              */
  /* ----------------------------------------------------------------- */

  if (fabs(pval) <= 0)
  {
    *kexp = 0;
    *kmant = 0;
    goto LABEL900;
  }

  /* ----------------------------------------------------------------- */
  /*   Section 3 . Convert other values.                               */
  /* ----------------------------------------------------------------- */
  {
    double zeps = (kbits != 32) ? 1.0e-12 : 1.0e-8;
    double zref = pval;

    // Sign of value.
    int isign = (zref >= 0.0) ? 0 : 128;
    zref = fabs(zref);

    // Exponent.
    int iexp = (int) (log(zref) / log(16.0) + 65.0 + zeps);

    // only ANSI C99 has log2
    // iexp = (int) (log2(zref) * 0.25 + 65.0 + zeps);

    if (iexp < 0) iexp = 0;
    if (iexp > 127) iexp = 127;

    // double rpowref = zref / pow(16.0, (double)(iexp - 70));
    double rpowref = ldexp(zref, 4 * -(iexp - 70));

    // Mantissa.
    if (iround == 0)
    {
      /*  Closest number in GRIB format less than original number. */
      /*  Truncate for positive numbers. */
      /*  Round up for negative numbers. */
      *kmant = (isign == 0) ? (int) rpowref : (int) lround(rpowref + 0.5);
    }
    else
    {
      /*  Closest number in GRIB format to the original number   */
      /*  (equal to, greater than or less than original number). */
      *kmant = (int) lround(rpowref);
    }

    /*  Check that mantissa value does not exceed 24 bits. */
    /*  If it does, adjust the exponent upwards and recalculate the mantissa. */
    /*  16777215 = 2**24 - 1 */
    if (*kmant > 16777215)
    {

    LABEL350:

      ++iexp;

      // Check for exponent overflow during adjustment
      if (iexp > 127)
      {
        Message("Exponent overflow");
        Message("Original number = %30.20f", pval);
        Message("Sign = %3d, Exponent = %3d, Mantissa = %12d", isign, iexp, *kmant);

        Error("Exponent overflow");

        // If not aborting, arbitrarily set value to zero
        Message("Value arbitrarily set to zero.");
        *kexp = 0;
        *kmant = 0;
        goto LABEL900;
      }

      rpowref = ldexp(zref, 4 * -(iexp - 70));

      if (iround == 0)
      {
        /*  Closest number in GRIB format less than original number. */
        /*  Truncate for positive numbers. */
        /*  Round up for negative numbers. */
        *kmant = (isign == 0) ? (int) rpowref : (int) lround(rpowref + 0.5);
      }
      else
      {
        /*  Closest number in GRIB format to the original number */
        /*  (equal to, greater or less than original number). */
        *kmant = (int) lround(rpowref);
      }

      // Repeat calculation (with modified exponent) if still have mantissa overflow.
      if (*kmant > 16777215) goto LABEL350;
    }

    // Add sign bit to exponent.
    *kexp = iexp + isign;
  }

  /* ----------------------------------------------------------------- */
  /*   Section 9. Return                                               */
  /* ----------------------------------------------------------------- */

LABEL900:
  /*
  if ( CGRIBEX_Debug )
    {
      double zval;

      Message("Conversion type parameter = %4d", kround);
      Message("Original number = %30.20f", pval);

      zval = decfp2(*kexp, *kmant);

      Message("Converted to      %30.20f", zval);
      Message("Sign = %3d, Exponent = %3d, Mantissa = %12d", isign, iexp, *kmant);
    }
  */
  return;
} /* confp3 */
#include <math.h>

double
decfp2(int kexp, int kmant)
{
  /*

    Purpose:
    --------

    Convert GRIB representation of a floating point
    number to machine representation.

    Input Parameters:
    -----------------

    kexp    - 8 Bit signed exponent.
    kmant   - 24 Bit mantissa.

    Output Parameters:
    ------------------

    Return value   - Floating point number represented
                     by kexp and kmant.

    Method:
    -------

    Floating point number represented as 8 bit exponent
    and 24 bit mantissa in integer values converted to
    machine floating point format.

    Externals:
    ----------

    None.

    Reference:
    ----------

    WMO Manual on Codes re GRIB representation.

    Comments:
    ---------

    Rewritten from DECFP, to conform to programming standards.
    Sign bit on 0 value now ignored, if present.
    If using 32 bit reals, check power of 16 is not so small as to
    cause overflows (underflows!); this causes warning to be given
    on Fujitsus.

    Author:
    -------

    John Hennessy   ECMWF   18.06.91

    Modifications:
    --------------

    Uwe Schulzweida   MPIfM   01/04/2001

     - Convert to C from EMOS library version 130

    Uwe Schulzweida   MPIfM   02/08/2002

     - speed up by factor 2 on NEC SX6
        - replace pow(2.0, -24.0) by constant POW_2_M24
        - replace pow(16.0, (double)(iexp - 64)) by pow16m64tab[iexp]
  */

  /* ----------------------------------------------------------------- */
  /*   Section 1 . Convert value of 0.0. Ignore sign bit.              */
  /* ----------------------------------------------------------------- */

  if ((kexp == 128) || (kexp == 0) || (kexp == 255)) return 0.0;

  /* ----------------------------------------------------------------- */
  /*   Section 2 . Convert other values.                               */
  /* ----------------------------------------------------------------- */

  //  Sign of value.

  int iexp = kexp;
  int isign = (iexp < 128) * 2 - 1;

  iexp -= iexp < 128 ? 0 : 128;

  //  Decode value.

  // double pval = isign * pow(2.0, -24.0) * kmant * pow(16.0, (double)(iexp - 64));

  iexp -= 64;

  double pval = ldexp(1.0, 4 * iexp) * isign * POW_2_M24 * kmant;

  /* ----------------------------------------------------------------- */
  /*   Section 9. Return to calling routine.                           */
  /* ----------------------------------------------------------------- */

  return pval;
}
#include <stdarg.h>
#include <stdint.h>

static void
gribDecodeRefDate(const int *isec1, int *year, int *month, int *day)
{
  int ryear = ISEC1_Year;

  int century = ISEC1_Century;
  if (century < 0) century = -century;
  century -= 1;

  if (century == -255 && ryear == 127) { ryear = 0; }
  else
  {
    // if ( century != 0 )
    {
      if (ryear == 100)
      {
        ryear = 0;
        century += 1;
      }

      if (ryear != 255)
      {
        ryear = century * 100 + ryear;
        if (ISEC1_Century < 0) ryear = -ryear;
      }
      else { ryear = 1; }
    }
  }

  *year = ryear;
  *month = ISEC1_Month;
  *day = ISEC1_Day;
}

int
gribRefDate(const int *isec1)
{
  int ryear, rmonth, rday;
  gribDecodeRefDate(isec1, &ryear, &rmonth, &rday);
  return (int) cdiEncodeDate(ryear, rmonth, rday);
}

static void
gribDecodeRefTime(const int *isec1, int *hour, int *minute, int *second)
{
  *hour = ISEC1_Hour;
  *minute = ISEC1_Minute;
  *second = 0;
}

int
gribRefTime(const int *isec1)
{
  int rhour, rminute, rsecond;
  gribDecodeRefTime(isec1, &rhour, &rminute, &rsecond);
  return cdiEncodeTime(rhour, rminute, rsecond);
}

bool
gribTimeIsFC(const int *isec1)
{
  bool isFC = false;

  const int time_period = (ISEC1_TimeRange == 10) ? (ISEC1_TimePeriod1 << 8) + ISEC1_TimePeriod2 : ISEC1_TimePeriod1;

  if (time_period > 0 && ISEC1_Day > 0) { isFC = (ISEC1_TimeRange == 0 || ISEC1_TimeRange == 10); }

  return isFC;
}

static int
getTimeUnitFactor(int timeUnit)
{
  static bool lprint = true;
  // clang-format off
  switch (timeUnit)
    {
    case ISEC1_TABLE4_MINUTE:    return    60; break;
    case ISEC1_TABLE4_QUARTER:   return   900; break;
    case ISEC1_TABLE4_30MINUTES: return  1800; break;
    case ISEC1_TABLE4_HOUR:      return  3600; break;
    case ISEC1_TABLE4_3HOURS:    return 10800; break;
    case ISEC1_TABLE4_6HOURS:    return 21600; break;
    case ISEC1_TABLE4_12HOURS:   return 43200; break;
    case ISEC1_TABLE4_DAY:       return 86400; break;
    default:
      if (lprint)
        {
          gprintf(__func__, "Time unit %d unsupported", timeUnit);
          lprint = false;
        }
      break;
    }
  // clang-format on

  return 0;
}

void
gribDateTimeX(int *isec1, int *date, int *time, int *startDate, int *startTime)
{
  *startDate = 0;
  *startTime = 0;

  CdiDateTime rDateTime = cdiDateTime_set(gribRefDate(isec1), gribRefTime(isec1));

  int64_t time_period = 0, time_period_x = 0;
  if (ISEC1_TimeRange == 10)
    time_period = (ISEC1_TimePeriod1 << 8) + ISEC1_TimePeriod2;
  else if (ISEC1_TimeRange >= 2 && ISEC1_TimeRange <= 5)
  {
    time_period_x = ISEC1_TimePeriod1;
    time_period = ISEC1_TimePeriod2;
  }
  else if (ISEC1_TimeRange == 0)
    time_period = ISEC1_TimePeriod1;

  if (time_period > 0 && rDateTime.date.day > 0)
  {
    JulianDate julianDate = julianDate_encode(CGRIBEX_grib_calendar, rDateTime);

    const int timeUnitFactor = getTimeUnitFactor(ISEC1_TimeUnit);

    if (time_period_x > 0)
    {
      JulianDate julianDate2 = julianDate_add_seconds(julianDate, timeUnitFactor * time_period_x);
      CdiDateTime sDateTime = julianDate_decode(CGRIBEX_grib_calendar, julianDate2);
      sDateTime.time.second = 0;
      *startDate = (int) cdiDate_get(sDateTime.date);
      *startTime = cdiTime_get(sDateTime.time);
    }

    julianDate = julianDate_add_seconds(julianDate, timeUnitFactor * time_period);
    rDateTime = julianDate_decode(CGRIBEX_grib_calendar, julianDate);
  }

  *date = (int) cdiDate_get(rDateTime.date);
  *time = cdiTime_get(rDateTime.time);
}

void
gribDateTime(int *isec1, int *date, int *time)
{
  int sdate, stime;
  gribDateTimeX(isec1, date, time, &sdate, &stime);
}

void
gprintf(const char *caller, const char *fmt, ...)
{
  va_list args;

  if (grprsm == NULL) Error("GRIBEX initialization missing!");

  va_start(args, fmt);

  fprintf(grprsm, "%-18s : ", caller);
  vfprintf(grprsm, fmt, args);
  fputs("\n", grprsm);

  va_end(args);
}

// clang-format off
void
gribExDP(int *isec0, int *isec1, int *isec2, double *fsec2, int *isec3,
	 double *fsec3, int *isec4, double *fsec4, int klenp, int *kgrib,
	 int kleng, int *kword, const char *hoper, int *kret)
{
  int yfunc = *hoper;

  if ( yfunc == 'C' )
    {
      grib_encode_double(isec0, isec1, isec2, fsec2, isec3,
			 fsec3, isec4, fsec4, klenp, kgrib,
			 kleng, kword, yfunc, kret);
    }
  else if ( yfunc == 'D' || yfunc == 'J' || yfunc == 'R' )
    {
      grib_decode_double(isec0, isec1, isec2, fsec2, isec3,
			 fsec3, isec4, fsec4, klenp, kgrib,
			 kleng, kword, yfunc, kret);
    }
  else if ( yfunc == 'V' )
    {
      fprintf(stderr, "  cgribex: Version is %s\n", cgribexLibraryVersion());
    }
  else
    {
      Error("oper %c unsupported!", yfunc);
      *kret=-9;
    }
}


void
gribExSP(int *isec0, int *isec1, int *isec2, float *fsec2, int *isec3,
	 float *fsec3, int *isec4, float *fsec4, int klenp, int *kgrib,
	 int kleng, int *kword, const char *hoper, int *kret)
{
  int yfunc = *hoper;

  if ( yfunc == 'C' )
    {
      grib_encode_float(isec0, isec1, isec2, fsec2, isec3,
			fsec3, isec4, fsec4, klenp, kgrib,
			kleng, kword, yfunc, kret);
    }
  else if ( yfunc == 'D' || yfunc == 'J' || yfunc == 'R' )
    {
      grib_decode_float(isec0, isec1, isec2, fsec2, isec3,
			fsec3, isec4, fsec4, klenp, kgrib,
			kleng, kword, yfunc, kret);
    }
  else if ( yfunc == 'V' )
    {
      fprintf(stderr, " cgribex: Version is %s\n", cgribexLibraryVersion());
    }
  else
    {
      Error("oper %c unsupported!", yfunc);
      *kret=-9;
    }
}
// clang-format on

int CGRIBEX_Fix_ZSE = 0; /* 1: Fix ZeroShiftError of simple packed spherical harmonics */
int CGRIBEX_Const = 0;   /* 1: Don't pack constant fields on regular grids */
int CGRIBEX_Debug = 0;   /* 1: Debugging */

void
gribSetDebug(int debug)
{
  CGRIBEX_Debug = debug;

  if (CGRIBEX_Debug) Message("debug level %d", debug);
}

void
gribFixZSE(int flag)
{
  CGRIBEX_Fix_ZSE = flag;

  if (CGRIBEX_Debug) Message("Fix ZeroShiftError set to %d", flag);
}

void
gribSetConst(int flag)
{
  CGRIBEX_Const = flag;

  if (CGRIBEX_Debug) Message("Const set to %d", flag);
}

void
gribSetRound(int round)
{
  UNUSED(round);
}

void
gribSetRefDP(double refval)
{
  UNUSED(refval);
}

void
gribSetRefSP(float refval)
{
  gribSetRefDP((double) refval);
}

void
gribSetValueCheck(int vcheck)
{
  UNUSED(vcheck);
}
#include <string.h>
#include <math.h>

void
gribPrintSec0(int *isec0)
{
  /*

    Print the information in the Indicator
    Section (Section 0) of decoded GRIB data.

    Input Parameters:

       isec0 - Array of decoded integers from Section 0


    Converted from EMOS routine GRPRS0.

       Uwe Schulzweida   MPIfM   01/04/2001

  */

  grsdef();

  fprintf(grprsm, " \n");
  fprintf(grprsm, " Section 0 - Indicator Section.       \n");
  fprintf(grprsm, " -------------------------------------\n");
  fprintf(grprsm, " Length of GRIB message (octets).     %9d\n", ISEC0_GRIB_Len);
  fprintf(grprsm, " GRIB Edition Number.                 %9d\n", ISEC0_GRIB_Version);
}

void
gribPrintSec1(int *isec0, int *isec1)
{
  /*

    Print the information in the Product Definition
    Section (Section 1) of decoded GRIB data.

    Input Parameters
       isec0 - Array of decoded integers from Section 0

       isec1 - Array of decoded integers from Section 1

    Comments:

       When decoding data from Experimental Edition or Edition 0,
       routine GRIBEX adds the additional fields available in
       Edition 1.


    Converted from EMOS routine GRPRS1.

       Uwe Schulzweida   MPIfM   01/04/2001

  */

  int iprev, icurr, ioffset;
  int ibit, ierr, iout, iyear;
  int jiloop;
  float value;
  char hversion[9];

  grsdef();

  /*
    -----------------------------------------------------------------
    Section 0 . Print required information.
    -----------------------------------------------------------------
  */

  fprintf(grprsm, " \n");
  fprintf(grprsm, " Section 1 - Product Definition Section.\n");
  fprintf(grprsm, " ---------------------------------------\n");

  fprintf(grprsm, " Code Table 2 Version Number.         %9d\n", isec1[0]);
  fprintf(grprsm, " Originating centre identifier.       %9d\n", isec1[1]);
  fprintf(grprsm, " Model identification.                %9d\n", isec1[2]);
  fprintf(grprsm, " Grid definition.                     %9d\n", isec1[3]);

  ibit = 8;
  prtbin(isec1[4], ibit, &iout, &ierr);
  fprintf(grprsm, " Flag (Code Table 1)                   %8.8d\n", iout);
  fprintf(grprsm, " Parameter identifier (Code Table 2). %9d\n", isec1[5]);

  if (isec1[5] != 127)
  {
    fprintf(grprsm, " Type of level (Code Table 3).        %9d\n", isec1[6]);
    fprintf(grprsm, " Value 1 of level (Code Table 3).     %9d\n", isec1[7]);
    fprintf(grprsm, " Value 2 of level (Code Table 3).     %9d\n", isec1[8]);
  }
  else
  {
    fprintf(grprsm, " Satellite identifier.                %9d\n", isec1[6]);
    fprintf(grprsm, " Spectral band.                       %9d\n", isec1[7]);
  }

  iyear = isec1[9];
  if (iyear != 255)
  {
    int date, time;
    gribDateTime(isec1, &date, &time);
    iyear = date / 10000;
    fprintf(grprsm, " Year of reference time of data.      %9d  (%4d)\n", isec1[9], iyear);
  }
  else { fprintf(grprsm, " Year of reference time of data MISSING  (=255)\n"); }

  fprintf(grprsm, " Month of reference time of data.     %9d\n", isec1[10]);
  fprintf(grprsm, " Day of reference time of data.       %9d\n", isec1[11]);
  fprintf(grprsm, " Hour of reference time of data.      %9d\n", isec1[12]);
  fprintf(grprsm, " Minute of reference time of data.    %9d\n", isec1[13]);
  fprintf(grprsm, " Time unit (Code Table 4).            %9d\n", isec1[14]);
  fprintf(grprsm, " Time range one.                      %9d\n", isec1[15]);
  fprintf(grprsm, " Time range two.                      %9d\n", isec1[16]);
  fprintf(grprsm, " Time range indicator (Code Table 5)  %9d\n", isec1[17]);
  fprintf(grprsm, " Number averaged.                     %9d\n", isec1[18]);
  fprintf(grprsm, " Number missing from average.         %9d\n", isec1[19]);
  /*
     All ECMWF data in GRIB Editions before Edition 1 is decoded
     as 20th century data. Other centres are decoded as missing.
  */
  if (isec0[1] < 1 && isec1[1] != 98)
    fprintf(grprsm, " Century of reference time of data.   Not given\n");
  else
    fprintf(grprsm, " Century of reference time of data.   %9d\n", isec1[20]);

  //   Print sub-centre
  fprintf(grprsm, " Sub-centre identifier.               %9d\n", ISEC1_SubCenterID);

  //   Decimal scale factor
  fprintf(grprsm, " Units decimal scaling factor.        %9d\n", isec1[22]);

  /*
    -----------------------------------------------------------------
    Section 1 . Print local DWD information.
    -----------------------------------------------------------------
  */
  if ((ISEC1_CenterID == 78 || ISEC1_CenterID == 215 || ISEC1_CenterID == 250) && (isec1[36] == 253 || isec1[36] == 254))
  {
    fprintf(grprsm, " DWD local usage identifier.          %9d\n", isec1[36]);
    if (isec1[36] == 253) fprintf(grprsm, " (Database labelling and ensemble forecast)\n");
    if (isec1[36] == 254) fprintf(grprsm, " (Database labelling)\n");

    fprintf(grprsm, " Year of database entry                     %3d  (%4d)\n", isec1[43], 1900 + isec1[43]);
    fprintf(grprsm, " Month of database entry                    %3d\n", isec1[44]);
    fprintf(grprsm, " Day of database entry                      %3d\n", isec1[45]);
    fprintf(grprsm, " Hour of database entry                     %3d\n", isec1[46]);
    fprintf(grprsm, " Minute of database entry                   %3d\n", isec1[47]);
    fprintf(grprsm, " DWD experiment number                %9d\n", isec1[48]);
    fprintf(grprsm, " DWD run type                         %9d\n", isec1[49]);
    if (isec1[36] == 253)
    {
      fprintf(grprsm, " User id                              %9d\n", isec1[50]);
      fprintf(grprsm, " Experiment identifier                %9d\n", isec1[51]);
      fprintf(grprsm, " Ensemble identification type         %9d\n", isec1[52]);
      fprintf(grprsm, " Number of ensemble members           %9d\n", isec1[53]);
      fprintf(grprsm, " Actual number of ensemble member     %9d\n", isec1[54]);
      fprintf(grprsm, " Model version                            %2d.%2.2d\n", isec1[55], isec1[56]);
    }
  }

  /*
    -----------------------------------------------------------------
    Section 2 . Print local ECMWF information.
    -----------------------------------------------------------------
  */
  /*
    Regular MARS labelling, or reformatted Washington EPS products.
  */
  if ((ISEC1_CenterID == 98 && ISEC1_LocalFLag == 1) || (ISEC1_SubCenterID == 98 && ISEC1_LocalFLag == 1)
      || (ISEC1_CenterID == 7 && ISEC1_SubCenterID == 98))
  {
    /*   Parameters common to all definitions.  */

    fprintf(grprsm, " ECMWF local usage identifier.        %9d\n", isec1[36]);
    if (isec1[36] == 1) fprintf(grprsm, " (Mars labelling or ensemble forecast)\n");
    if (isec1[36] == 2) fprintf(grprsm, " (Cluster means and standard deviations)\n");
    if (isec1[36] == 3) fprintf(grprsm, " (Satellite image data)\n");
    if (isec1[36] == 4) fprintf(grprsm, " (Ocean model data)\n");
    if (isec1[36] == 5) fprintf(grprsm, " (Forecast probability data)\n");
    if (isec1[36] == 6) fprintf(grprsm, " (Surface temperature data)\n");
    if (isec1[36] == 7) fprintf(grprsm, " (Sensitivity data)\n");
    if (isec1[36] == 8) fprintf(grprsm, " (ECMWF re-analysis data)\n");
    if (isec1[36] == 9) fprintf(grprsm, " (Singular vectors and ensemble perturbations)\n");
    if (isec1[36] == 10) fprintf(grprsm, " (EPS tubes)\n");
    if (isec1[36] == 11) fprintf(grprsm, " (Supplementary data used by analysis)\n");
    if (isec1[36] == 13) fprintf(grprsm, " (Wave 2D spectra direction and frequency)\n");

    fprintf(grprsm, " Class.                               %9d\n", isec1[37]);
    fprintf(grprsm, " Type.                                %9d\n", isec1[38]);
    fprintf(grprsm, " Stream.                              %9d\n", isec1[39]);
    snprintf(hversion, sizeof(hversion), "%4s", (char *) &isec1[40]);
    hversion[4] = 0;
    fprintf(grprsm, " Version number or Experiment identifier.  %4s\n", hversion);
    /*
      ECMWF Local definition 1.
      (MARS labelling or ensemble forecast data)
    */
    if (isec1[36] == 1)
    {
      fprintf(grprsm, " Forecast number.                     %9d\n", isec1[41]);
      if (isec1[39] != 1090) fprintf(grprsm, " Total number of forecasts.           %9d\n", isec1[42]);

      return;
    }
    /*
      ECMWF Local definition 2.
      (Cluster means and standard deviations)
    */
    if (isec1[36] == 2)
    {
      fprintf(grprsm, " Cluster number.                      %9d\n", isec1[41]);
      fprintf(grprsm, " Total number of clusters.            %9d\n", isec1[42]);
      fprintf(grprsm, " Clustering method.                   %9d\n", isec1[43]);
      fprintf(grprsm, " Start time step when clustering.     %9d\n", isec1[44]);
      fprintf(grprsm, " End time step when clustering.       %9d\n", isec1[45]);
      fprintf(grprsm, " Northern latitude of domain.         %9d\n", isec1[46]);
      fprintf(grprsm, " Western longitude of domain.         %9d\n", isec1[47]);
      fprintf(grprsm, " Southern latitude of domain.         %9d\n", isec1[48]);
      fprintf(grprsm, " Eastern longitude of domain.         %9d\n", isec1[49]);
      fprintf(grprsm, " Operational forecast in cluster      %9d\n", isec1[50]);
      fprintf(grprsm, " Control forecast in cluster          %9d\n", isec1[51]);
      fprintf(grprsm, " Number of forecasts in cluster.      %9d\n", isec1[52]);

      for (int jloop = 0; jloop < isec1[52]; jloop++)
        fprintf(grprsm, " Forecast number                      %9d\n", isec1[jloop + 53]);

      return;
    }
    /*
      ECMWF Local definition 3.
      (Satellite image data)
    */
    if (isec1[36] == 3)
    {
      fprintf(grprsm, " Satellite spectral band.             %9d\n", isec1[41]);
      fprintf(grprsm, " Function code.                       %9d\n", isec1[42]);
      return;
    }
    /*
      ECMWF Local definition 4.
      (Ocean model data)
    */
    if (isec1[36] == 4)
    {
      fprintf(grprsm, " Satellite spectral band.             %9d\n", isec1[41]);
      if (isec1[39] != 1090) fprintf(grprsm, " Function code.                       %9d\n", isec1[42]);
      fprintf(grprsm, " Coordinate structure definition.\n");
      fprintf(grprsm, " Fundamental spatial reference system.%9d\n", isec1[43]);
      fprintf(grprsm, " Fundamental time reference.          %9d\n", isec1[44]);
      fprintf(grprsm, " Space unit flag.                     %9d\n", isec1[45]);
      fprintf(grprsm, " Vertical coordinate definition.      %9d\n", isec1[46]);
      fprintf(grprsm, " Horizontal coordinate definition.    %9d\n", isec1[47]);
      fprintf(grprsm, " Time unit flag.                      %9d\n", isec1[48]);
      fprintf(grprsm, " Time coordinate definition.          %9d\n", isec1[49]);
      fprintf(grprsm, " Position definition.     \n");
      fprintf(grprsm, " Mixed coordinate field flag.         %9d\n", isec1[50]);
      fprintf(grprsm, " Coordinate 1 flag.                   %9d\n", isec1[51]);
      fprintf(grprsm, " Averaging flag.                      %9d\n", isec1[52]);
      fprintf(grprsm, " Position of level 1.                 %9d\n", isec1[53]);
      fprintf(grprsm, " Position of level 2.                 %9d\n", isec1[54]);
      fprintf(grprsm, " Coordinate 2 flag.                   %9d\n", isec1[55]);
      fprintf(grprsm, " Averaging flag.                      %9d\n", isec1[56]);
      fprintf(grprsm, " Position of level 1.                 %9d\n", isec1[57]);
      fprintf(grprsm, " Position of level 2.                 %9d\n", isec1[58]);
      fprintf(grprsm, " Grid Definition.\n");
      fprintf(grprsm, " Coordinate 3 flag (x-axis)           %9d\n", isec1[59]);
      fprintf(grprsm, " Coordinate 4 flag (y-axis)           %9d\n", isec1[60]);
      fprintf(grprsm, " Coordinate 4 of first grid point.    %9d\n", isec1[61]);
      fprintf(grprsm, " Coordinate 3 of first grid point.    %9d\n", isec1[62]);
      fprintf(grprsm, " Coordinate 4 of last grid point.     %9d\n", isec1[63]);
      fprintf(grprsm, " Coordinate 3 of last grid point.     %9d\n", isec1[64]);
      fprintf(grprsm, " i - increment.                       %9d\n", isec1[65]);
      fprintf(grprsm, " j - increment.                       %9d\n", isec1[66]);
      fprintf(grprsm, " Flag for irregular grid coordinates. %9d\n", isec1[67]);
      fprintf(grprsm, " Flag for normal or staggered grids.  %9d\n", isec1[68]);
      fprintf(grprsm, " Further information.\n");
      fprintf(grprsm, " Further information flag.            %9d\n", isec1[69]);
      fprintf(grprsm, " Auxiliary information.\n");
      fprintf(grprsm, " No. entries in horizontal coordinate %9d\n", isec1[70]);
      fprintf(grprsm, " No. entries in mixed coordinate defn.%9d\n", isec1[71]);
      fprintf(grprsm, " No. entries in grid coordinate list. %9d\n", isec1[72]);
      fprintf(grprsm, " No. entries in auxiliary array.      %9d\n", isec1[73]);
      /*
        Horizontal coordinate supplement.
      */
      fprintf(grprsm, " Horizontal coordinate supplement.\n");
      if (isec1[70] == 0) { fprintf(grprsm, "(None).\n"); }
      else
      {
        fprintf(grprsm, "Number of items = %d\n", isec1[70]);
        for (int jloop = 0; jloop < isec1[70]; jloop++) fprintf(grprsm, "         %12d\n", isec1[74 + jloop]);
      }
      /*
        Mixed coordinate definition.
      */
      fprintf(grprsm, " Mixed coordinate definition.\n");
      if (isec1[71] == 0) { fprintf(grprsm, "(None).\n"); }
      else
      {
        fprintf(grprsm, "Number of items = %d\n", isec1[71]);
        ioffset = 74 + isec1[70];
        for (int jloop = 0; jloop < isec1[71]; jloop++) fprintf(grprsm, "         %12d\n", isec1[ioffset + jloop]);
      }
      /*
        Grid coordinate list.
      */
      fprintf(grprsm, " Grid coordinate list. \n");
      if (isec1[72] == 0) { fprintf(grprsm, "(None).\n"); }
      else
      {
        fprintf(grprsm, "Number of items = %d\n", isec1[72]);
        ioffset = 74 + isec1[70] + isec1[71];
        for (int jloop = 0; jloop < isec1[72]; jloop++) fprintf(grprsm, "         %12d\n", isec1[ioffset + jloop]);
      }
      /*
        Auxiliary array.
      */
      fprintf(grprsm, " Auxiliary array.      \n");
      if (isec1[73] == 0) { fprintf(grprsm, "(None).\n"); }
      else
      {
        fprintf(grprsm, "Number of items = %d\n", isec1[73]);
        ioffset = 74 + isec1[70] + isec1[71] + isec1[72];
        for (int jloop = 0; jloop < isec1[73]; jloop++) fprintf(grprsm, "         %12d\n", isec1[ioffset + jloop]);
      }
      /*
        Post-auxiliary array.
      */
      fprintf(grprsm, " Post-auxiliary array. \n");
      ioffset = 74 + isec1[70] + isec1[71] + isec1[72] + isec1[73];
      if (isec1[ioffset] == 0) { fprintf(grprsm, "(None).\n"); }
      else
      {
        fprintf(grprsm, "Number of items = %d\n", isec1[ioffset]);
        for (int jloop = 1; jloop < isec1[ioffset]; jloop++) fprintf(grprsm, "         %12d\n", isec1[ioffset + jloop]);
      }

      return;
    }
    /*
      ECMWF Local definition 5.
      (Forecast probability data)
    */
    if (isec1[36] == 5)
    {
      fprintf(grprsm, " Forecast probability number          %9d\n", isec1[41]);
      fprintf(grprsm, " Total number of forecast probabilities %7d\n", isec1[42]);
      fprintf(grprsm, " Threshold units decimal scale factor %9d\n", isec1[43]);
      fprintf(grprsm, " Threshold indicator(1=lower,2=upper,3=both) %2d\n", isec1[44]);
      if (isec1[44] != 2) fprintf(grprsm, " Lower threshold value                %9d\n", isec1[45]);
      if (isec1[44] != 1) fprintf(grprsm, " Upper threshold value                %9d\n", isec1[46]);
      return;
    }
    /*
      ECMWF Local definition 6.
      (Surface temperature data)
    */
    if (isec1[36] == 6)
    {
      iyear = isec1[43];
      if (iyear > 100)
      {
        if (iyear < 19000000) iyear = iyear + 19000000;
        fprintf(grprsm, " Date of SST field used               %9d\n", iyear);
      }
      else
        fprintf(grprsm, "Date of SST field used               Not given\n");
    }
    if (isec1[44] == 0) fprintf(grprsm, " Type of SST field (= climatology)    %9d\n", isec1[44]);
    if (isec1[44] == 1) fprintf(grprsm, " Type of SST field (= 1/1 degree)     %9d\n", isec1[44]);
    if (isec1[44] == 2) fprintf(grprsm, " Type of SST field (= 2/2 degree)     %9d\n", isec1[44]);

    fprintf(grprsm, " Number of ICE fields used:           %9d\n", isec1[45]);

    for (int jloop = 1; jloop <= isec1[45]; jloop++)
    {
      iyear = isec1[44 + (jloop * 2)];
      if (iyear > 100)
      {
        if (iyear < 19000000) iyear = iyear + 19000000;
        fprintf(grprsm, " Date of ICE field%3d                 %9d\n", jloop, iyear);
        fprintf(grprsm, " Satellite number (ICE field%3d)      %9d\n", jloop, isec1[45 + (jloop * 2)]);
      }
      else
        fprintf(grprsm, "Date of SST field used               Not given\n");
    }
    /*
      ECMWF Local definition 7.
      (Sensitivity data)
    */
    if (isec1[36] == 7)
    {
      if (isec1[38] == 51) fprintf(grprsm, " Forecast number                      %9d\n", isec1[41]);
      if (isec1[38] != 51) fprintf(grprsm, " Iteration number                     %9d\n", isec1[41]);
      if (isec1[38] != 52) fprintf(grprsm, " Total number of diagnostics          %9d\n", isec1[42]);
      if (isec1[38] == 52) fprintf(grprsm, " No.interations in diag. minimisation %9d\n", isec1[42]);
      fprintf(grprsm, " Domain(0=Global,1=Europe,2=N.Hem.,3=S.Hem.) %2d\n", isec1[43]);
      fprintf(grprsm, " Diagnostic number                    %9d\n", isec1[44]);
    }
    /*
      ECMWF Local definition 8.
      (ECMWF re-analysis data)
    */
    if (isec1[36] == 8)
    {
      if ((isec1[39] == 1043) || (isec1[39] == 1070) || (isec1[39] == 1071))
      {
        fprintf(grprsm, " Interval between reference times     %9d\n", isec1[41]);
        for (int jloop = 43; jloop <= 54; jloop++)
        {
          jiloop = jloop + 8;
          fprintf(grprsm, " ERA section 1 octet %2d.              %9d\n", jiloop, isec1[jloop - 1]);
        }
      }
      else
      {
        for (int jloop = 42; jloop <= 54; jloop++)
        {
          jiloop = jloop + 8;
          fprintf(grprsm, " ERA section 1 octet %2d.              %9d\n", jiloop, isec1[jloop - 1]);
        }
      }
      return;
    }

    if (isec1[38] > 4 && isec1[38] < 9)
    {
      fprintf(grprsm, " Simulation number.                   %9d\n", isec1[41]);
      fprintf(grprsm, " Total number of simulations.         %9d\n", isec1[42]);
    }
    /*
      ECMWF Local definition 9.
      (Singular vectors and ensemble perturbations)
    */
    if (isec1[36] == 9)
    {
      if (isec1[38] == 60) fprintf(grprsm, " Perturbed ensemble forecast number   %9d\n", isec1[41]);
      if (isec1[38] == 61) fprintf(grprsm, " Initial state perturbation number    %9d\n", isec1[41]);
      if (isec1[38] == 62) fprintf(grprsm, " Singular vector number               %9d\n", isec1[41]);
      if (isec1[38] == 62)
      {
        fprintf(grprsm, " Number of iterations                 %9d\n", isec1[42]);
        fprintf(grprsm, " Number of singular vectors computed  %9d\n", isec1[43]);
        fprintf(grprsm, " Norm used at initial time            %9d\n", isec1[44]);
        fprintf(grprsm, " Norm used at final time              %9d\n", isec1[45]);
        fprintf(grprsm, " Multiplication factor                %9d\n", isec1[46]);
        fprintf(grprsm, " Latitude of north-west corner        %9d\n", isec1[47]);
        fprintf(grprsm, " Longitude of north-west corner       %9d\n", isec1[48]);
        fprintf(grprsm, " Latitude of south-east corner        %9d\n", isec1[49]);
        fprintf(grprsm, " Longitude of south-east corner       %9d\n", isec1[50]);
        fprintf(grprsm, " Accuracy                             %9d\n", isec1[51]);
        fprintf(grprsm, " Number of singular vectors evolved   %9d\n", isec1[52]);
        fprintf(grprsm, " Ritz number one                      %9d\n", isec1[53]);
        fprintf(grprsm, " Ritz number two                      %9d\n", isec1[54]);
      }
    }
    /*
      ECMWF Local definition 10.
      (EPS tubes)
    */
    if (isec1[36] == 10)
    {
      fprintf(grprsm, " Tube number                          %9d\n", isec1[41]);
      fprintf(grprsm, " Total number of tubes                %9d\n", isec1[42]);
      fprintf(grprsm, " Central cluster definition           %9d\n", isec1[43]);
      fprintf(grprsm, " Parameter                            %9d\n", isec1[44]);
      fprintf(grprsm, " Type of level                        %9d\n", isec1[45]);
      fprintf(grprsm, " Northern latitude of domain of tubing%9d\n", isec1[46]);
      fprintf(grprsm, " Western longitude of domain of tubing%9d\n", isec1[47]);
      fprintf(grprsm, " Southern latitude of domain of tubing%9d\n", isec1[48]);
      fprintf(grprsm, " Eastern longitude of domain of tubing%9d\n", isec1[49]);
      fprintf(grprsm, " Tube number of operational forecast  %9d\n", isec1[50]);
      fprintf(grprsm, " Tube number of control forecast      %9d\n", isec1[51]);
      fprintf(grprsm, " Height/pressure of level             %9d\n", isec1[52]);
      fprintf(grprsm, " Reference step                       %9d\n", isec1[53]);
      fprintf(grprsm, " Radius of central cluster            %9d\n", isec1[54]);
      fprintf(grprsm, " Ensemble standard deviation          %9d\n", isec1[55]);
      fprintf(grprsm, " Dist.of tube extreme to ensemble mean%9d\n", isec1[56]);
      fprintf(grprsm, " Number of forecasts in the tube      %9d\n", isec1[57]);

      fprintf(grprsm, " List of ensemble forecast numbers:\n");
      for (int jloop = 1; jloop <= isec1[57]; jloop++) fprintf(grprsm, "    %9d\n", isec1[57 + jloop]);
    }
    /*
      ECMWF Local definition 11.
      (Supplementary data used by the analysis)
    */
    if (isec1[36] == 11)
    {
      fprintf(grprsm, " Details of analysis which used the supplementary data:\n");
      fprintf(grprsm, "   Class                              %9d\n", isec1[41]);
      fprintf(grprsm, "   Type                               %9d\n", isec1[42]);
      fprintf(grprsm, "   Stream                             %9d\n", isec1[43]);
      /*
      snprintf(hversion, sizeof(hversion), "%8d", isec1[44]);
      fprintf(grprsm, "   Version number/experiment identifier:   %4s\n", &hversion[4]);
      */
      iyear = isec1[45];
      iyear = iyear + ((iyear > 50) ? 1900 : 2000);

      fprintf(grprsm, "   Year                               %9d\n", iyear);
      fprintf(grprsm, "   Month                              %9d\n", isec1[46]);
      fprintf(grprsm, "   Day                                %9d\n", isec1[47]);
      fprintf(grprsm, "   Hour                               %9d\n", isec1[48]);
      fprintf(grprsm, "   Minute                             %9d\n", isec1[49]);
      fprintf(grprsm, "   Century                            %9d\n", isec1[50]);
      fprintf(grprsm, "   Originating centre                 %9d\n", isec1[51]);
      fprintf(grprsm, "   Sub-centre                         %9d\n", isec1[52]);
    }
    /*
      ECMWF Local definition 12.
    */
    if (isec1[36] == 12)
    {
      fprintf(grprsm, " (Mean, average, etc)\n");
      fprintf(grprsm, " Start date of the period              %8d\n", isec1[41]);
      fprintf(grprsm, " Start time of the period                  %4.4d\n", isec1[42]);
      fprintf(grprsm, " Finish date of the period             %8d\n", isec1[43]);
      fprintf(grprsm, " Finish time of the period                 %4.4d\n", isec1[44]);
      fprintf(grprsm, " Verifying date of the period          %8d\n", isec1[45]);
      fprintf(grprsm, " Verifying time of the period              %4.4d\n", isec1[46]);
      fprintf(grprsm, " Code showing method                   %8d\n", isec1[47]);
      fprintf(grprsm, " Number of different time intervals used  %5d\n", isec1[48]);
      fprintf(grprsm, " List of different time intervals used:\n");
      iprev = isec1[49];
      unsigned icount = 0;
      for (int jloop = 1; jloop <= isec1[48]; jloop++)
      {
        icurr = isec1[48 + jloop];
        if (icurr != iprev)
        {
          if (icount == 1) fprintf(grprsm, "  - interval %5.4d used       once\n", iprev);
          if (icount == 2) fprintf(grprsm, "  - interval %5.4d used       twice\n", iprev);
          if (icount > 2) fprintf(grprsm, "  - interval %5.4d used %5u times\n", iprev, icount);
          iprev = icurr;
          icount = 1;
        }
        else
          icount = icount + 1;
      }
      if (icount == 1) fprintf(grprsm, "  - interval %5.4d used       once\n", iprev);
      if (icount == 2) fprintf(grprsm, "  - interval %5.4d used       twice\n", iprev);
      if (icount > 2) fprintf(grprsm, "  - interval %5.4d used %5u times\n", iprev, icount);
    }
    /*
      ECMWF Local definition 13.
      (Wave 2D spectra direction and frequency)
    */
    if (isec1[36] == 13)
    {
      fprintf(grprsm, " Direction number                     %9d\n", isec1[43]);
      fprintf(grprsm, " Frequency number                     %9d\n", isec1[44]);
      fprintf(grprsm, " Total number of directions           %9d\n", isec1[45]);
      fprintf(grprsm, " Total number of frequencies          %9d\n", isec1[46]);
      fprintf(grprsm, " Scale factor applied to directions   %9d\n", isec1[47]);
      fprintf(grprsm, " Scale factor applied to frequencies  %9d\n", isec1[48]);
      fprintf(grprsm, " List of directions:\n");
      for (int jloop = 1; jloop <= isec1[45]; jloop++)
      {
        value = (float) (isec1[48 + jloop]) / (float) (isec1[47]);
        if (isec1[43] == jloop)
          fprintf(grprsm, " %2.2d:%15.7f   <-- this field value\n", jloop, value);
        else
          fprintf(grprsm, "%2.2d:%15.7f\n", jloop, value);
      }
      fprintf(grprsm, " List of frequencies:\n");
      for (int jloop = 1; jloop <= isec1[46]; jloop++)
      {
        value = (float) (isec1[48 + isec1[45] + jloop]) / (float) (isec1[48]);
        if (isec1[44] == jloop)
          fprintf(grprsm, " %2.2d:%15.7f   <-- this field value\n", jloop, value);
        else
          fprintf(grprsm, "%2.2d:%15.7f\n", jloop, value);

        if (isec1[49 + isec1[45] + isec1[46]] != 0)
        {
          fprintf(grprsm, " System number (65535 = missing)      %9d\n", isec1[49 + isec1[45] + isec1[46]]);
          fprintf(grprsm, " Method number (65535 = missing)      %9d\n", isec1[50 + isec1[45] + isec1[46]]);
        }
      }
      /*
        ECMWF Local definition 14.
        (Brightness temperature)
      */
      if (isec1[36] == 14)
      {
        fprintf(grprsm, " Channel number                       %9d\n", isec1[43]);
        fprintf(grprsm, " Scale factor applied to frequencies  %9d\n", isec1[44]);
        fprintf(grprsm, " Total number of frequencies          %9d\n", isec1[45]);
        fprintf(grprsm, " List of frequencies:\n");
        for (int jloop = 1; jloop <= isec1[45]; jloop++)
        {
          value = (float) (isec1[45 + jloop]) / (float) (isec1[44]);
          if (isec1[43] == jloop)
            fprintf(grprsm, " %3d:%15.9f   <-- this channel\n", jloop, value);
          else
            fprintf(grprsm, " %3d:%15.9f\n", jloop, value);
        }
      }
      /*
        ECMWF Local definition 15.
        (Ocean ensemble seasonal forecast)
      */
      if (isec1[36] == 15)
      {
        fprintf(grprsm, " Ensemble member number               %9d\n", isec1[41]);
        fprintf(grprsm, " System number                        %9d\n", isec1[42]);
        fprintf(grprsm, " Method number                        %9d\n", isec1[43]);
      }
      /*
        ECMWF Local definition 16.
        (Seasonal forecast monthly mean atmosphere data)
      */
      if (isec1[36] == 16)
      {
        fprintf(grprsm, " Ensemble member number               %9d\n", isec1[41]);
        fprintf(grprsm, " System number                        %9d\n", isec1[43]);
        fprintf(grprsm, " Method number                        %9d\n", isec1[44]);
        fprintf(grprsm, " Verifying month                      %9d\n", isec1[45]);
        fprintf(grprsm, " Averaging period                     %9d\n", isec1[46]);
      }
      /*
        ECMWF Local definition 17.
        (Sst or sea-ice used by analysis)
      */
      if (isec1[36] == 17)
      {
        iyear = isec1[43];
        if (iyear > 100)
        {
          if (iyear < 19000000) iyear = iyear + 19000000;
          fprintf(grprsm, " Date of sst/ice field used           %9d\n", iyear);
        }
        else
          fprintf(grprsm, " Date of sst/ice field used           Not given\n");

        if (isec1[44] == 0) fprintf(grprsm, " Type of sst/ice field (= climatology)%9d\n", isec1[44]);
        if (isec1[44] == 1) fprintf(grprsm, " Type of sst/ice field (= 1/1 degree) %9d\n", isec1[44]);
        if (isec1[44] == 2) fprintf(grprsm, " Type of sst/ice field (= 2/2 degree) %9d\n", isec1[44]);

        fprintf(grprsm, " Number of ICE fields used:           %9d\n", isec1[45]);

        for (int jloop = 1; jloop < isec1[45]; jloop++)
        {
          iyear = isec1[44 + (jloop * 2)];
          if (iyear > 100)
          {
            if (iyear < 19000000) iyear = iyear + 19000000;
            fprintf(grprsm, " Date of ICE field%3d                 %9d\n", jloop, iyear);
            fprintf(grprsm, " Satellite number (ICE field%3d)      %9d\n", jloop, isec1[45 + (jloop * 2)]);
          }
          else
            fprintf(grprsm, "Date of sst/ice field used           Not given\n");
        }
      }
    }
  }
  /*
    -----------------------------------------------------------------
    Section 3 . Print Washington ensemble product information.
    -----------------------------------------------------------------
  */
  /*
    Washington EPS products (but not reformatted Washington EPS
    products.
  */
  if ((isec1[1] == 7 && isec1[23] == 1) && (!(ISEC1_SubCenterID == 98)))
  { /*   CALL KWPRS1 (iSEC0,iSEC1)*/
  }
  /*
    -----------------------------------------------------------------
    Section 4 . Print local MPIM information.
    -----------------------------------------------------------------
  */
  if (isec1[1] == 252 && isec1[36] == 1)
  {
    fprintf(grprsm, " MPIM local usage identifier.         %9d\n", isec1[36]);
    fprintf(grprsm, " Type of ensemble forecast            %9d\n", isec1[37]);
    fprintf(grprsm, " Individual ensemble member           %9d\n", isec1[38]);
    fprintf(grprsm, " Number of forecasts in ensemble      %9d\n", isec1[39]);
  }
}

static void
printQuasi(int *isec2)
{
  /*

    Print the qusai-regular information in the Grid Description
    Section (Section 2) of decoded GRIB data.

    Input Parameters:

       isec2 - Array of decoded integers from Section 2.

    Comments:

       Only data representation types catered for are Gaussian
       grid, latitude/longitude grid, Spherical Harmonics,
       Polar stereographic and Space view perspective.

    Converted from EMOS routine PTQUASI.

       Uwe Schulzweida   MPIfM   01/04/2001

  */

  char yout[64];

  /*
    -----------------------------------------------------------------
    Section 1. Print quasi-grid data.
    -----------------------------------------------------------------
  */
  // See if scanning is north->south or south->north
  fprintf(grprsm, "  Number of points along a parallel varies.\n");

  int ntos = (fmod((double) isec2[10], 128.) < 64);
  fprintf(grprsm, "  Number of points.   Parallel. %s\n", ntos ? "(North to South)" : "(South to North)");

  // Display number of points for each latitude
  int latcnt = isec2[2];
  int nextlat = 0;
  memset(yout, ' ', (size_t) 11);

  for (int j = 0; j < latcnt; ++j)
  {
    nextlat = nextlat + 1;
    snprintf(yout, sizeof(yout), "%4d", nextlat);

    // Finished?
    if (nextlat > latcnt) break;
    if (nextlat == latcnt)
    {
      fprintf(grprsm, " %5d                %-12s\n", isec2[nextlat + 21], yout);
      break;
    }
    // Look for neighbouring latitudes with same number of points
    unsigned nrepeat = 0;

  LABEL110:
    // If neighbouring latitudes have same number of points increase the repeat count.
    if (isec2[nextlat + 21 + 1] == isec2[nextlat + 21])
    {
      nrepeat = nrepeat + 1;
      nextlat = nextlat + 1;
      if (nextlat < latcnt) goto LABEL110;
    }
    // Display neighbouring latitudes with same number of points as 'nn to mm'.
    if (nrepeat >= 1) snprintf(yout + 4, sizeof(yout) - 4, "to %5d", nextlat);
    fprintf(grprsm, " %5d                %-12s\n", isec2[nextlat + 21], yout);
    memset(yout, ' ', (size_t) 11);
  }
}

void
gribPrintSec2DP(int *isec0, int *isec2, double *fsec2)
{
  /*

    Print the information in the Grid Description
    Section (Section 2) of decoded GRIB data.

    Input Parameters:

       isec0  - Array of decoded integers from Section 0

       isec2  - Array of decoded integers from Section 2

       fsec2  - Array of decoded floats from Section 2

    Comments:

       Only data representation types catered for are Gaussian
       grid, latitude/longitude grid, Spherical Harmonics,
       Polar stereographic and Space view perspective.


    Converted from EMOS routine GRPRS2.

       Uwe Schulzweida   MPIfM   01/04/2001

  */

  int ibit, iedit, ierr, iout, iresol;

  grsdef();
  /*
    -----------------------------------------------------------------
    Section 1 . Print GRIB Edition number.
    -----------------------------------------------------------------
  */
  iedit = isec0[1];
  fprintf(grprsm, " \n");
  fprintf(grprsm, " Section 2 - Grid Description Section.\n");
  fprintf(grprsm, " -------------------------------------\n");
  /*
    -----------------------------------------------------------------
    Section 2 . Print spherical harmonic data.
    -----------------------------------------------------------------
  */
  if (isec2[0] == 50 || isec2[0] == 60 || isec2[0] == 70 || isec2[0] == 80)
  {
    fprintf(grprsm, " Data represent type = spectral     (Table 6) %9d\n", isec2[0]);
    fprintf(grprsm, " J - Pentagonal resolution parameter.         %9d\n", isec2[1]);
    fprintf(grprsm, " K - Pentagonal resolution parameter.         %9d\n", isec2[2]);
    fprintf(grprsm, " M - Pentagonal resolution parameter.         %9d\n", isec2[3]);
    fprintf(grprsm, " Representation type (Table 9)                %9d\n", isec2[4]);
    fprintf(grprsm, " Representation mode (Table 10).              %9d\n", isec2[5]);
    for (int i = 7; i <= 11; ++i) fprintf(grprsm, " Not used.                                    %9d\n", isec2[i - 1]);
    fprintf(grprsm, " Number of vertical coordinate parameters.    %9d\n", isec2[11]);
    goto LABEL800;
  }
  /*
    -----------------------------------------------------------------
    Section 3 . Print Gaussian grid data.
    -----------------------------------------------------------------
  */
  if (isec2[0] == 4 || isec2[0] == 14 || isec2[0] == 24 || isec2[0] == 34)
  {
    fprintf(grprsm, " (Southern latitudes and Western longitudes are negative.)\n");
    fprintf(grprsm, " Data represent type = gaussian     (Table 6) %9d\n", isec2[0]);
    /*
      Quasi-regular grids introduced in Edition 1.
    */
    if (isec2[16] == 0 || iedit < 1)
      fprintf(grprsm, " Number of points along a parallel.           %9d\n", isec2[1]);
    else
      printQuasi(isec2);

    fprintf(grprsm, " Number of points along a meridian.           %9d\n", isec2[2]);
    fprintf(grprsm, " Latitude of first grid point.                %9d\n", isec2[3]);
    fprintf(grprsm, " Longitude of first grid point.               %9d\n", isec2[4]);

    ibit = 8;
    iresol = isec2[5] + isec2[17] + isec2[18];
    prtbin(iresol, ibit, &iout, &ierr);

    fprintf(grprsm, " Resolution and components flag.               %8.8d\n", iout);
    fprintf(grprsm, " Latitude of last grid point.                 %9d\n", isec2[6]);
    fprintf(grprsm, " Longitude of last grid point.                %9d\n", isec2[7]);
    /*
      Print increment if given.
    */
    if (isec2[5] == 128)
      fprintf(grprsm, " i direction (East-West) increment.           %9d\n", isec2[8]);
    else
      fprintf(grprsm, " i direction (East-West) increment            Not given\n");

    fprintf(grprsm, " Number of parallels between pole and equator.%9d\n", isec2[9]);

    ibit = 8;
    prtbin(isec2[10], ibit, &iout, &ierr);

    fprintf(grprsm, " Scanning mode flags (Code Table 8)            %8.8d\n", iout);
    fprintf(grprsm, " Number of vertical coordinate parameters.    %9d\n", isec2[11]);
    goto LABEL800;
  }
  /*
    -----------------------------------------------------------------
    Section 4 . Print Latitude / longitude grid data.
    -----------------------------------------------------------------
  */
  if (isec2[0] == 0 || isec2[0] == 10 || isec2[0] == 20 || isec2[0] == 30)
  {
    fprintf(grprsm, " (Southern latitudes and Western longitudes are negative.)\n");
    fprintf(grprsm, " Data represent type = lat/long     (Table 6) %9d\n", isec2[0]);
    /*
      Quasi-regular lat/long grids also possible.
    */
    if (isec2[16] == 0)
      fprintf(grprsm, " Number of points along a parallel.           %9d\n", isec2[1]);
    else
      printQuasi(isec2);

    fprintf(grprsm, " Number of points along a meridian.           %9d\n", isec2[2]);
    fprintf(grprsm, " Latitude of first grid point.                %9d\n", isec2[3]);
    fprintf(grprsm, " Longitude of first grid point.               %9d\n", isec2[4]);

    ibit = 8;
    iresol = isec2[5] + isec2[17] + isec2[18];
    prtbin(iresol, ibit, &iout, &ierr);

    fprintf(grprsm, " Resolution and components flag.               %8.8d\n", iout);
    fprintf(grprsm, " Latitude of last grid point.                 %9d\n", isec2[6]);
    fprintf(grprsm, " Longitude of last grid point.                %9d\n", isec2[7]);
    /*
      Print increment if given.
    */
    if (isec2[8] < 0)
      fprintf(grprsm, " i direction (East-West) increment            Not given\n");
    else
      fprintf(grprsm, " i direction (East-West) increment.           %9d\n", isec2[8]);

    if (isec2[9] < 0)
      fprintf(grprsm, " j direction (North-South) increment          Not given\n");
    else
      fprintf(grprsm, " j direction (North-South) increment.         %9d\n", isec2[9]);

    ibit = 8;
    prtbin(isec2[10], ibit, &iout, &ierr);

    fprintf(grprsm, " Scanning mode flags (Code Table 8)            %8.8d\n", iout);
    fprintf(grprsm, " Number of vertical coordinate parameters.    %9d\n", isec2[11]);
    goto LABEL800;
  }
  /*
    -----------------------------------------------------------------
    Section 5 . Print polar stereographic data.
    -----------------------------------------------------------------
  */
  if (isec2[0] == 5)
  {
    fprintf(grprsm, " (Southern latitudes and Western longitudes are negative.)\n");
    fprintf(grprsm, " Data represent type = polar stereo (Table 6) %9d\n", isec2[0]);
    fprintf(grprsm, " Number of points along X axis.               %9d\n", isec2[1]);
    fprintf(grprsm, " Number of points along Y axis.               %9d\n", isec2[2]);
    fprintf(grprsm, " Latitude of first grid point.                %9d\n", isec2[3]);
    fprintf(grprsm, " Longitude of first grid point.               %9d\n", isec2[4]);
    ibit = 8;
    iresol = isec2[17] + isec2[18];
    prtbin(iresol, ibit, &iout, &ierr);
    fprintf(grprsm, " Resolution and components flag.               %8.8d\n", iout);
    fprintf(grprsm, " Orientation of the grid.                     %9d\n", isec2[6]);
    fprintf(grprsm, " X direction increment.                       %9d\n", isec2[8]);
    fprintf(grprsm, " Y direction increment.                       %9d\n", isec2[9]);
    ibit = 8;
    prtbin(isec2[10], ibit, &iout, &ierr);
    fprintf(grprsm, " Scanning mode flags (Code Table 8)            %8.8d\n", iout);
    fprintf(grprsm, " Number of vertical coordinate parameters.    %9d\n", isec2[11]);
    fprintf(grprsm, " Projection centre flag.                      %9d\n", isec2[12]);
    goto LABEL800;
  }
  /*
    -----------------------------------------------------------------
    Section 6 . Print Lambert conformal data.
    -----------------------------------------------------------------
  */
  if (isec2[0] == 3)
  {
    fprintf(grprsm, " (Southern latitudes and Western longitudes are negative.)\n");
    fprintf(grprsm, " Data represent type = Lambert      (Table 6) %9d\n", isec2[0]);
    fprintf(grprsm, " Number of points along X axis.               %9d\n", isec2[1]);
    fprintf(grprsm, " Number of points along Y axis.               %9d\n", isec2[2]);
    fprintf(grprsm, " Latitude of first grid point.                %9d\n", isec2[3]);
    fprintf(grprsm, " Longitude of first grid point.               %9d\n", isec2[4]);
    ibit = 8;
    iresol = isec2[17] + isec2[18] + isec2[5];
    prtbin(iresol, ibit, &iout, &ierr);
    fprintf(grprsm, " Resolution and components flag.               %8.8d\n", iout);
    fprintf(grprsm, " Orientation of the grid.                     %9d\n", isec2[6]);
    fprintf(grprsm, " X direction increment.                       %9d\n", isec2[8]);
    fprintf(grprsm, " Y direction increment.                       %9d\n", isec2[9]);
    ibit = 8;
    prtbin(isec2[10], ibit, &iout, &ierr);
    fprintf(grprsm, " Scanning mode flags (Code Table 8)            %8.8d\n", iout);
    fprintf(grprsm, " Number of vertical coordinate parameters.    %9d\n", isec2[11]);
    fprintf(grprsm, " Projection centre flag.                      %9d\n", isec2[12]);
    fprintf(grprsm, " Latitude intersection 1 - Latin 1 -.         %9d\n", isec2[13]);
    fprintf(grprsm, " Latitude intersection 2 - Latin 2 -.         %9d\n", isec2[14]);
    fprintf(grprsm, " Latitude of Southern Pole.                   %9d\n", isec2[19]);
    fprintf(grprsm, " Longitude of Southern Pole.                  %9d\n", isec2[20]);
    goto LABEL800;
  }
  /*
    -----------------------------------------------------------------
    Section 7 . Print space view perspective or orthographic data.
    -----------------------------------------------------------------
  */
  if (isec2[0] == 90)
  {
    fprintf(grprsm, " (Southern latitudes and Western longitudes are negative.)\n");
    fprintf(grprsm, " Data represent type = space/ortho  (Table 6) %9d\n", isec2[0]);
    fprintf(grprsm, " Number of points along X axis.               %9d\n", isec2[1]);
    fprintf(grprsm, " Number of points along Y axis.               %9d\n", isec2[2]);
    fprintf(grprsm, " Latitude of sub-satellite point.             %9d\n", isec2[3]);
    fprintf(grprsm, " Longitude of sub-satellite point.            %9d\n", isec2[4]);
    // iresol = isec2[17] + isec2[18];
    fprintf(grprsm, " Diameter of the earth in x direction.        %9d\n", isec2[6]);
    fprintf(grprsm, " Y coordinate of sub-satellite point.         %9d\n", isec2[9]);
    ibit = 8;
    prtbin(isec2[10], ibit, &iout, &ierr);
    fprintf(grprsm, " Scanning mode flags (Code Table 8)            %8.8d\n", iout);
    fprintf(grprsm, " Number of vertical coordinate parameters.    %9d\n", isec2[11]);
    fprintf(grprsm, " Orientation of the grid.                     %9d\n", isec2[6]);
    fprintf(grprsm, " Altitude of the camera.                      %9d\n", isec2[13]);
    fprintf(grprsm, " Y coordinate of origin of sector image.      %9d\n", isec2[14]);
    fprintf(grprsm, " X coordinate of origin of sector image.      %9d\n", isec2[15]);
    goto LABEL800;
  }
  /*
    -----------------------------------------------------------------
    Section 7.5 . Print ocean data
    -----------------------------------------------------------------
  */
  /*
  if ( isec2[0] == 192 && ISEC1_CenterID == 98 )
    {
      fprintf(grprsm, " Data represent type = ECMWF ocean  (Table 6) %9d\n", isec2[0]);
      if ( isec2[1] ==  32767 )
        fprintf(grprsm, " Number of points along the first axis.       Not used\n");
      else
        fprintf(grprsm, " Number of points along the first axis.       %9d\n", isec2[1]);

      if ( isec2[2] ==  32767 )
        fprintf(grprsm, " Number of points along the second axis.      Not used\n");
      else
        fprintf(grprsm, " Number of points along the second axis.      %9d\n", isec2[2]);

      ibit = 8;
      prtbin(isec2[10], ibit, &iout, &ierr);
      fprintf(grprsm, " Scanning mode flags (Code Table 8)            %8.8d\n", iout);
      goto LABEL800;
    }
    */
  /*
    -----------------------------------------------------------------
    Section 7.6 . Print triangular data
    -----------------------------------------------------------------
  */
  if (isec2[0] == 192 /* && ISEC1_CenterID == 78 */)
  {
    fprintf(grprsm, " Data represent type = triangular   (Table 6) %9d\n", isec2[0]);
    fprintf(grprsm, " Number of factor 2 in factorisation of Ni.   %9d\n", isec2[1]);
    fprintf(grprsm, " Number of factor 3 in factorisation of Ni.   %9d\n", isec2[2]);
    fprintf(grprsm, " Number of diamonds (Nd).                     %9d\n", isec2[3]);
    fprintf(grprsm, " Number of triangular subdivisions of the\n");
    fprintf(grprsm, "           icosahedron (Ni).                  %9d\n", isec2[4]);
    fprintf(grprsm, " Flag for orientation of diamonds (Table A).  %9d\n", isec2[5]);
    fprintf(grprsm, " Latitude of pole point.                      %9d\n", isec2[6]);
    fprintf(grprsm, " Longitude of pole point.                     %9d\n", isec2[7]);
    fprintf(grprsm, " Longitude of the first diamond.              %9d\n", isec2[8]);
    fprintf(grprsm, " Flag for storage sequence (Table B).         %9d\n", isec2[9]);
    fprintf(grprsm, " Number of vertical coordinate parameters.    %9d\n", isec2[11]);
    goto LABEL800;
  }
  /*
    -----------------------------------------------------------------
    Drop through to here => representation type not catered for.
    -----------------------------------------------------------------
  */
  fprintf(grprsm, "GRPRS2 :Data representation type not catered for -%d\n", isec2[0]);

  goto LABEL900;
  /*
    -----------------------------------------------------------------
    Section 8 . Print vertical coordinate parameters,
                rotated grid information,
                stretched grid information, if any.
    -----------------------------------------------------------------
  */
LABEL800:;
  /*
    Vertical coordinate parameters ...
  */
  if (isec2[11] != 0)
  {
    fprintf(grprsm, " \n");
    fprintf(grprsm, " Vertical Coordinate Parameters.\n");
    fprintf(grprsm, " -------------------------------\n");
    for (int i = 10; i < isec2[11] + 10; ++i) fprintf(grprsm, "    %20.12f\n", fsec2[i]);
  }
  /*
    Rotated and stretched grids introduced in Edition 1.
  */
  if (iedit < 1) goto LABEL900;
  /*
    Rotated grid information ...
  */
  if (isec2[0] == 10 || isec2[0] == 30 || isec2[0] == 14 || isec2[0] == 34 || isec2[0] == 60 || isec2[0] == 80 || isec2[0] == 30)
  {
    fprintf(grprsm, " \n");
    fprintf(grprsm, " Latitude of southern pole of rotation.       %9d\n", isec2[12]);
    fprintf(grprsm, " Longitude of southern pole of rotation.      %9d\n", isec2[13]);
    fprintf(grprsm, " Angle of rotation.                     %20.10f\n", fsec2[0]);
  }
  /*
    Stretched grid information ...
  */
  if (isec2[0] == 20 || isec2[0] == 30 || isec2[0] == 24 || isec2[0] == 34 || isec2[0] == 70 || isec2[0] == 80)
  {
    fprintf(grprsm, " \n");
    fprintf(grprsm, " Latitude of pole of stretching.              %9d\n", isec2[14]);
    fprintf(grprsm, " Longitude of pole of stretching.             %9d\n", isec2[15]);
    fprintf(grprsm, " Stretching factor.                     %20.10f\n", fsec2[1]);
  }

LABEL900:;

  return;
}

void
gribPrintSec2SP(int *isec0, int *isec2, float *fsec2sp)
{
  int inum = 10 + isec2[11];

  double *fsec2 = (double *) Malloc((size_t) inum * sizeof(double));
  if (fsec2 == NULL) SysError("No Memory!");

  for (int j = 0; j < inum; ++j) fsec2[j] = fsec2sp[j];

  gribPrintSec2DP(isec0, isec2, fsec2);

  Free(fsec2);
}

void
gribPrintSec3DP(int *isec0, int *isec3, double *fsec3)
{
  /*

    Print the information in the Bit-Map Section
    (Section 3) of decoded GRIB data.

    Input Parameters:

       isec0  - Array of decoded integers from Section 0

       isec3  - Array of decoded integers from Section 3

       fsec3  - Array of decoded floats from Section 3


    Converted from EMOS routine GRPRS3.

       Uwe Schulzweida   MPIfM   01/04/2001

  */

  UNUSED(isec0);

  grsdef();

  fprintf(grprsm, " \n");
  fprintf(grprsm, " Section 3 - Bit-map Section.\n");
  fprintf(grprsm, " -------------------------------------\n");

  if (isec3[0] != 0)
    fprintf(grprsm, " Predetermined bit-map number.                %9d\n", isec3[0]);
  else
    fprintf(grprsm, " No predetermined bit-map.\n");

  fprintf(grprsm, " Missing data value for integer data.    %14d\n", isec3[1]);

  fprintf(grprsm, " Missing data value for real data. %20.6g\n", fsec3[1]);
}

void
gribPrintSec3SP(int *isec0, int *isec3, float *fsec3sp)
{
  double fsec3[2];

  fsec3[0] = fsec3sp[0];
  fsec3[1] = fsec3sp[1];

  gribPrintSec3DP(isec0, isec3, fsec3);
}

void
gribPrintSec4DP(int *isec0, int *isec4, double *fsec4)
{
  /*

    Print the information in the Binary Data Section
    (Section 4) of decoded GRIB data.

    Input Parameters:

       isec0  - Array of decoded integers from Section 0

       isec4  - Array of decoded integers from Section 4

       fsec4  - Array of decoded floats from Section 4


    Converted from EMOS routine GRPRS4.

       Uwe Schulzweida   MPIfM   01/04/2001

  */
  int inum;

  UNUSED(isec0);

  grsdef();

  /*
    -----------------------------------------------------------------
    Section 1 . Print integer information from isec4.
    -----------------------------------------------------------------
  */
  fprintf(grprsm, " \n");
  fprintf(grprsm, " Section 4 - Binary Data  Section.\n");
  fprintf(grprsm, " -------------------------------------\n");

  fprintf(grprsm, " Number of data values coded/decoded.         %9d\n", isec4[0]);
  fprintf(grprsm, " Number of bits per data value.               %9d\n", isec4[1]);
  fprintf(grprsm, " Type of data       (0=grid pt, 128=spectral).%9d\n", isec4[2]);
  fprintf(grprsm, " Type of packing    (0=simple, 64=complex).   %9d\n", isec4[3]);
  fprintf(grprsm, " Type of data       (0=float, 32=integer).    %9d\n", isec4[4]);
  fprintf(grprsm, " Additional flags   (0=none, 16=present).     %9d\n", isec4[5]);
  fprintf(grprsm, " Reserved.                                    %9d\n", isec4[6]);
  fprintf(grprsm, " Number of values   (0=single, 64=matrix).    %9d\n", isec4[7]);
  fprintf(grprsm, " Secondary bit-maps (0=none, 32=present).     %9d\n", isec4[8]);
  fprintf(grprsm, " Values width       (0=constant, 16=variable).%9d\n", isec4[9]);
  /*
    If complex packing ..
  */
  if (isec4[3] == 64)
  {
    if (isec4[2] == 128)
    {
      fprintf(grprsm, " Byte offset of start of packed data (N).     %9d\n", isec4[15]);
      fprintf(grprsm, " Power (P * 1000).                            %9d\n", isec4[16]);
      fprintf(grprsm, " Pentagonal resolution parameter J for subset.%9d\n", isec4[17]);
      fprintf(grprsm, " Pentagonal resolution parameter K for subset.%9d\n", isec4[18]);
      fprintf(grprsm, " Pentagonal resolution parameter M for subset.%9d\n", isec4[19]);
    }
    else
    {
      fprintf(grprsm, " Bits number of 2nd order values    (none=>0).%9d\n", isec4[10]);
      fprintf(grprsm, " General extend. 2-order packing (0=no,8=yes).%9d\n", isec4[11]);
      fprintf(grprsm, " Boustrophedonic ordering        (0=no,4=yes).%9d\n", isec4[12]);
      fprintf(grprsm, " Spatial differencing order          (0=none).%9d\n", isec4[13] + isec4[14]);
    }
  }
  /*
    Number of non-missing values
  */
  if (isec4[20] != 0) fprintf(grprsm, " Number of non-missing values                 %9d\n", isec4[20]);
  /*
    Information on matrix of values , if present.
  */
  if (isec4[7] == 64)
  {
    fprintf(grprsm, " First dimension (rows) of each matrix.       %9d\n", isec4[49]);
    fprintf(grprsm, " Second dimension (columns) of each matrix.   %9d\n", isec4[50]);
    fprintf(grprsm, " First dimension coordinate values definition.%9d\n", isec4[51]);
    fprintf(grprsm, " (Code Table 12)\n");
    fprintf(grprsm, " NC1 - Number of coefficients for 1st dimension.%7d\n", isec4[52]);
    fprintf(grprsm, " Second dimension coordinate values definition.%8d\n", isec4[53]);
    fprintf(grprsm, " (Code Table 12)\n");
    fprintf(grprsm, " NC2 - Number of coefficients for 2nd dimension.%7d\n", isec4[54]);
    fprintf(grprsm, " 1st dimension physical signifance (Table 13). %8d\n", isec4[55]);
    fprintf(grprsm, " 2nd dimension physical signifance (Table 13).%8d\n", isec4[56]);
  }
  /*
    -----------------------------------------------------------------
    Section 2. Print values from fsec4.
    -----------------------------------------------------------------
  */

  inum = isec4[0];
  if (inum < 0) inum = -inum;
  if (inum > 20) inum = 20;
  /*
    Print first inum values.
  */
  fprintf(grprsm, " \n");
  fprintf(grprsm, " First %4d data values.\n", inum);

  if (isec4[4] == 0)
  {
    /*
      Print real values ...
    */
    for (int j = 0; j < inum; ++j)
    {
      if (fabs(fsec4[j]) > 0)
      {
        if (fabs(fsec4[j]) >= 0.1 && fabs(fsec4[j]) <= 1.e8)
          fprintf(grprsm, " %#16.8G    \n", fsec4[j]);
        else
          fprintf(grprsm, " %#20.8E\n", fsec4[j]);
      }
      else
        fprintf(grprsm, " %#16.0f    \n", fabs(fsec4[j]));
    }
  }
  else
  {
    /*
      Print integer values ...
    */
    fprintf(grprsm, " Print of integer values not supported\n");
    /*
      CALL SETPAR(IBIT,IDUM,IDUM)
      DO 212 J=1,INUM
         INSPT = 0
         CALL INXBIT(IVALUE,1,INSPT,FSEC4(J),1,IBIT,IBIT,'C',IRET)
         WRITE (*,9033) IVALUE
9033 FORMAT(' ',I15)
212   CONTINUE
    ENDIF
    */
  }
}

void
gribPrintSec4SP(int *isec0, int *isec4, float *fsec4sp)
{
  double fsec4[20];

  int inum = isec4[0];
  if (inum < 0) inum = -inum;
  if (inum > 20) inum = 20;

  for (int j = 0; j < inum; ++j) fsec4[j] = fsec4sp[j];

  gribPrintSec4DP(isec0, isec4, fsec4);
}

void
gribPrintSec4Wave(int *isec4)
{
  /*

    Print the wave coordinate information in the Binary Data
    Section (Section 4) of decoded GRIB data.

    Input Parameters:

       isec4 - Array of decoded integers from Section 4

    Comments:

       Wave coordinate information held in isec4 are 32-bit floats,
       hence the PTEMP and NTEMP used for printing are 4-byte variables.


    Converted from EMOS routine GRPRS4W.

       Uwe Schulzweida   MPIfM   01/04/2001

  */
  int ntemp[100];
  float *ptemp;

  grsdef();

  /*
    -----------------------------------------------------------------
    Section 1 . Print integer information from isec4.
    -----------------------------------------------------------------
  */
  fprintf(grprsm, " Coefficients defining first dimension coordinates:\n");
  for (int jloop = 0; jloop < isec4[52]; jloop++)
  {
    ntemp[jloop] = isec4[59 + jloop];
    ptemp = (float *) &ntemp[jloop];
    fprintf(grprsm, "%20.10f\n", *ptemp);
  }
  fprintf(grprsm, " Coefficients defining second dimension coordinates:\n");
  for (int jloop = 0; jloop < isec4[54]; jloop++)
  {
    ntemp[jloop] = isec4[59 + isec4[52] + jloop];
    ptemp = (float *) &ntemp[jloop];
    fprintf(grprsm, "%20.10f\n", *ptemp);
  }
}
#ifdef HAVE_CONFIG_H
#endif

#include <string.h>
#include <ctype.h>

int
gribOpen(const char *filename, const char *mode)
{
  int fileID = fileOpen(filename, mode);

#if defined(__sun)
  if (fileID != FILE_UNDEFID && tolower(*mode) == 'r') { fileSetBufferType(fileID, FILE_BUFTYPE_MMAP); }
#endif

  return fileID;
}

void
gribClose(int fileID)
{
  fileClose(fileID);
}

off_t
gribGetPos(int fileID)
{
  return fileGetPos(fileID);
}

int
gribCheckSeek(int fileID, long *offset, int *version)
{
  int ierr = gribFileSeek(fileID, offset);

  *version = -1;
  if (!ierr)
  {
    char buffer[4];
    if (fileRead(fileID, buffer, 4) == 4) *version = buffer[3];
  }

  return ierr;
}

int
gribFileSeek(int fileID, long *offset)
{
  // position file pointer after GRIB
  const long GRIB = 0x47524942;
  long code = 0;
  int retry = 4096 * 4096;

  *offset = 0;

  void *fileptr = filePtr(fileID);

  while (retry--)
  {
    int ch = filePtrGetc(fileptr);
    if (ch == EOF) return -1;

    code = ((code << 8) + ch) & 0xFFFFFFFF;
    if (code == GRIB)
    {
      if (CGRIBEX_Debug) Message("record offset = %ld", *offset);
      return 0;
    }

    (*offset)++;
  }

  if (CGRIBEX_Debug) Message("record offset = %ld", *offset);

  return 1;
}

static inline unsigned
read3ByteMSBFirst(void *fileptr)
{
  unsigned b1 = (unsigned) (filePtrGetc(fileptr));
  unsigned b2 = (unsigned) (filePtrGetc(fileptr));
  unsigned b3 = (unsigned) (filePtrGetc(fileptr));
  return GET_UINT3(b1, b2, b3);
}

size_t
gribReadSize(int fileID)
{
  size_t rgribsize = 0;
  void *fileptr = filePtr(fileID);
  off_t pos = fileGetPos(fileID);

  unsigned gribsize = read3ByteMSBFirst(fileptr);

  int gribversion = filePtrGetc(fileptr);

  if (gribsize == 24 && gribversion != 1 && gribversion != 2) gribversion = 0;

  if (CGRIBEX_Debug) Message("gribversion = %d", gribversion);

  if (gribversion == 0)
  {
    unsigned gdssize = 0, bmssize = 0;
    unsigned issize = 4, essize = 4;

    unsigned pdssize = gribsize;
    fileSetPos(fileID, (off_t) 3, SEEK_CUR);
    if (CGRIBEX_Debug) Message("pdssize     = %u", pdssize);
    int flag = filePtrGetc(fileptr);
    if (CGRIBEX_Debug) Message("flag        = %d", flag);

    fileSetPos(fileID, (off_t) pdssize - 8, SEEK_CUR);

    if (flag & 128)
    {
      gdssize = read3ByteMSBFirst(fileptr);
      fileSetPos(fileID, (off_t) gdssize - 3, SEEK_CUR);
      if (CGRIBEX_Debug) Message("gdssize     = %u", gdssize);
    }

    if (flag & 64)
    {
      bmssize = read3ByteMSBFirst(fileptr);
      fileSetPos(fileID, (off_t) bmssize - 3, SEEK_CUR);
      if (CGRIBEX_Debug) Message("bmssize     = %u", bmssize);
    }

    unsigned bdssize = read3ByteMSBFirst(fileptr);
    if (CGRIBEX_Debug) Message("bdssize     = %u", bdssize);

    gribsize = issize + pdssize + gdssize + bmssize + bdssize + essize;
    rgribsize = (size_t) gribsize;
  }
  else if (gribversion == 1)
  {
    if (gribsize > JP23SET)  // Large GRIB record
    {
      unsigned pdssize = read3ByteMSBFirst(fileptr);
      if (CGRIBEX_Debug) Message("pdssize     = %u", pdssize);

      int flag = 0;
      for (int i = 0; i < 5; ++i) flag = filePtrGetc(fileptr);
      if (CGRIBEX_Debug) Message("flag        = %d", flag);

      fileSetPos(fileID, (off_t) pdssize - 8, SEEK_CUR);

      unsigned gdssize = 0;
      if (flag & 128)
      {
        gdssize = read3ByteMSBFirst(fileptr);
        fileSetPos(fileID, (off_t) gdssize - 3, SEEK_CUR);
        if (CGRIBEX_Debug) Message("gdssize     = %u", gdssize);
      }

      unsigned bmssize = 0;
      if (flag & 64)
      {
        bmssize = read3ByteMSBFirst(fileptr);
        fileSetPos(fileID, (off_t) bmssize - 3, SEEK_CUR);
        if (CGRIBEX_Debug) Message("bmssize     = %u", bmssize);
      }

      unsigned bdssize = read3ByteMSBFirst(fileptr);
      if (CGRIBEX_Debug) Message("bdssize     = %u", bdssize);
      if (bdssize <= 120)
      {
        enum
        {
          issize = 4
        };
        gribsize &= JP23SET;
        gribsize *= 120;
        bdssize = correct_bdslen(bdssize, gribsize, issize + pdssize + gdssize + bmssize);
        if (CGRIBEX_Debug) Message("bdssize     = %u", bdssize);

        gribsize = issize + pdssize + gdssize + bmssize + bdssize + 4;
      }
    }
    rgribsize = (size_t) gribsize;
  }
  else if (gribversion == 2)
  {
    /* we set gribsize the following way because it doesn't matter then
       whether int is 4 or 8 bytes long - we don't have to care if the size
       really fits: if it does not, the record can not be read at all */
    rgribsize = 0;
    enum
    {
      g2size_bytes = 8
    };
    unsigned char g2size[g2size_bytes];
    filePtrRead(fileptr, g2size, g2size_bytes);
    for (int i = 0; i < g2size_bytes; ++i) rgribsize = (rgribsize << 8) | g2size[i];
  }
  else
  {
    rgribsize = 0;
    Warning("GRIB version %d unsupported!", gribversion);
  }

  if (filePtrEOF(fileptr)) rgribsize = 0;

  if (CGRIBEX_Debug) Message("gribsize = %zu", rgribsize);

  fileSetPos(fileID, pos, SEEK_SET);

  return rgribsize;
}

size_t
gribGetSize(int fileID)
{
  long offset;
  int ierr = gribFileSeek(fileID, &offset);  // position file pointer after GRIB
  if (ierr > 0)
  {
    Warning("GRIB record not found!");
    return 0;
  }

  if (ierr == -1) return 0;

  size_t recSize = gribReadSize(fileID);

  if (CGRIBEX_Debug) Message("recsize = %zu", recSize);

  fileSetPos(fileID, (off_t) -4, SEEK_CUR);

  return recSize;
}

int
gribRead(int fileID, void *buffer, size_t *buffersize)
{
  long offset;
  int ierr = gribFileSeek(fileID, &offset);  // position file pointer after GRIB
  if (ierr > 0)
  {
    Warning("GRIB record not found!");
    *buffersize = 0;
    return -2;
  }

  if (ierr == -1)
  {
    *buffersize = 0;
    return -1;
  }

  size_t recSize = gribReadSize(fileID);
  size_t readSize = recSize;

  if (readSize > *buffersize)
  {
    readSize = *buffersize;
    ierr = -3;  // Tell the caller that the buffer was insufficient.
  }

  *buffersize = recSize;  // Inform the caller about the record size.

  // Write the stuff to the buffer that has already been read in gribFileSeek().
  memcpy(buffer, "GRIB", 4);

  readSize -= 4;
  // Read the rest of the record into the buffer.
  size_t nread = fileRead(fileID, (char *) buffer + 4, readSize);

  if (nread != readSize) ierr = 1;

  return ierr;
}

int
gribWrite(int fileID, void *buffer, size_t buffersize)
{
  int nwrite = (int) (fileWrite(fileID, buffer, buffersize));
  if (nwrite != (int) buffersize)
  {
    perror(__func__);
    nwrite = -1;
  }

  return nwrite;
}
#include <string.h>
#include <ctype.h>

FILE *grprsm = NULL;
int CGRIBEX_grib_calendar = -1;

void
gribSetCalendar(int calendar)
{
  CGRIBEX_grib_calendar = calendar;
}

void
grsdef(void)
{
  /*
C---->
C**** GRSDEF - Initial (default) setting of common area variables
C              for GRIBEX package.
C
C     Purpose.
C     --------
C
C     Sets initial values for common area variables for all
C     routines of GRIBEX package, if not already done.
C
C**   Interface.
C     ----------
C
C     CALL GRSDEF
C
C     Input Parameters.
C     -----------------
C
C     None.
C
C     Output Parameters.
C     ------------------
C
C     None.
C
C     Method.
C     -------
C
C     Self-explanatory.
C
C     Externals.
C     ----------
C
C     None.
C
C     Reference.
C     ----------
C
C     See subroutine GRIBEX.
C
C     Comments.
C     ---------
C
C     None
C
C     Author.
C     -------
C
C     J. Clochard, Meteo France, for ECMWF - March 1998.
C
C     Modifications.
C     --------------
C
C     J. Clochard, Meteo France, for ECMWF - June 1999.
C     Add variable NSUBCE.
C     Use a static variable to determine if initialisation has already
C     been done. NUSER removed .
C     Reverse defaults for NEXT2O and NLOC2O, for consistency with
C     version 13.023 of software .
C
  */
  /*
C     ----------------------------------------------------------------
C*    Section 0 . Definition of variables.
C     ----------------------------------------------------------------
  */
  char *envString;
  char *env_stream;
  static bool lfirst = true;
  extern int CGRIBEX_Const;

  if (!lfirst) return;

  /*
    ----------------------------------------------------------------
    Section 1 . Set values, conditionally.
    ----------------------------------------------------------------
  */
  /*
    Common area variables have not been set. Set them.
  */
  // Set GRIB calendar.
  if (CGRIBEX_grib_calendar == -1)
  {
    CGRIBEX_grib_calendar = CALENDAR_PROLEPTIC;

    envString = getenv("GRIB_CALENDAR");
    if (envString)
    {
      if (strncmp(envString, "standard", 8) == 0)
        CGRIBEX_grib_calendar = CALENDAR_STANDARD;
      else if (strncmp(envString, "proleptic", 9) == 0)
        CGRIBEX_grib_calendar = CALENDAR_PROLEPTIC;
      else if (strncmp(envString, "360days", 7) == 0)
        CGRIBEX_grib_calendar = CALENDAR_360DAYS;
      else if (strncmp(envString, "365days", 7) == 0)
        CGRIBEX_grib_calendar = CALENDAR_365DAYS;
      else if (strncmp(envString, "366days", 7) == 0)
        CGRIBEX_grib_calendar = CALENDAR_366DAYS;
      else if (strncmp(envString, "none", 4) == 0)
        CGRIBEX_grib_calendar = CALENDAR_NONE;
    }
  }
  // Set GRIBEX compatibility mode.
  envString = getenv("GRIB_GRIBEX_MODE_ON");
  if (envString != NULL)
  {
    if (atoi(envString) == 1) CGRIBEX_Const = 0;
  }

  // See if output stream needs changing
  grprsm = stdout;
  env_stream = getenv("GRPRS_STREAM");
  if (env_stream)
  {
    if (isdigit((int) env_stream[0]))
    {
      int unit;
      unit = atoi(env_stream);
      if (unit < 1 || unit > 99)
        Warning("Invalid number for GRPRS_STREAM: %d", unit);
      else if (unit == 2)
        grprsm = stderr;
      else if (unit == 6)
        grprsm = stdout;
      else
      {
        char filename[] = "unit.00";
        snprintf(filename, sizeof(filename), "%2.2d", unit);
        grprsm = fopen(filename, "w");
        if (!grprsm) SysError("GRPRS_STREAM = %d", unit);
      }
    }
    else
    {
      if (env_stream[0])
      {
        grprsm = fopen(env_stream, "w");
        if (!grprsm) SysError("GRPRS_STREAM = %s", env_stream);
      }
    }
  }
  // Mark common area values set by user.
  lfirst = false;
}

// clang-format off

/* pack 8-bit bytes from 64-bit words to a packed buffer */
/* same as : for (int i = 0; i < bc; ++i) cp[i] = (unsigned char) up[i]; */

long packInt64(uint64_t *up, unsigned char *cp, long bc, long tc)
{
#if defined (CRAY)
  (void) _pack(up, cp, bc, tc);
#else
  unsigned char *cp0;
  uint64_t upi, *up0, *ip0, *ip1, *ip2, *ip3, *ip4, *ip5, *ip6, *ip7;
  long ipack = sizeof(int64_t);
  
  // Bytes until first word boundary in destination buffer

  long head = ( (long) cp ) & (ipack-1);
  if ( head != 0 ) head = ipack - head;

  long inner = bc - head;

  // Trailing bytes which do not make a full word

  long trail = inner & (ipack-1);

  // Number of bytes/words to be processed in fast loop

  inner -= trail;
  inner /= ipack;

  ip0 = up + head;
  ip1 = ip0 + 1;
  ip2 = ip0 + 2;
  ip3 = ip0 + 3;
  ip4 = ip0 + 4;
  ip5 = ip0 + 5;
  ip6 = ip0 + 6;
  ip7 = ip0 + 7;

  up0 = (uint64_t *)(void *)(cp + head);

  /* Here we should process any bytes until the first word boundary 
   * of our destination buffer 
   * That code is missing so far  because our output buffer is 
   * word aligned by FORTRAN 
   */

  long j = 0;

  if ( IS_BIGENDIAN() )
    {
#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
      for (long i = 0; i < inner; ++i)
	{
	  upi =             (   ip0[j]          << 56 ) 
	                 |  ( ( ip1[j] & 0xFF ) << 48 )
	                 |  ( ( ip2[j] & 0xFF ) << 40 )
	                 |  ( ( ip3[j] & 0xFF ) << 32 )
	                 |  ( ( ip4[j] & 0xFF ) << 24 ) ;
	  up0[i] = upi   |  ( ( ip5[j] & 0xFF ) << 16 )
	                 |  ( ( ip6[j] & 0xFF ) <<  8 )
	                 |    ( ip7[j] & 0xFF ) ;
	  j += ipack;
	}
    }
  else
    {
      for (long i = 0; i < inner; ++i)
	{
	  upi =             (   ip7[j]          << 56 ) 
	                 |  ( ( ip6[j] & 0xFF ) << 48 )
                         |  ( ( ip5[j] & 0xFF ) << 40 )
                         |  ( ( ip4[j] & 0xFF ) << 32 )
                         |  ( ( ip3[j] & 0xFF ) << 24 ) ;
	  up0[i] = upi   |  ( ( ip2[j] & 0xFF ) << 16 )
                         |  ( ( ip1[j] & 0xFF ) <<  8 )
                         |    ( ip0[j] & 0xFF ) ;
	  j += ipack;
	}
    }

  cp0 = (unsigned char *) ( up0 + inner );
  if ( trail > 0 )
    {
      up0[inner] = 0;
      for (long i = 0; i < trail ; ++i)
	{
	  *cp0 = (unsigned char) ip0[ipack*inner+i];
	  cp0++;
	}
    }

  if ( tc != -1 )
    {
      bc++;
      *cp0 = (unsigned char) tc;
    }
#endif
  return (bc);
}

/* unpack 8-bit bytes from a packed buffer with 64-bit words */
/* same as : for (int i = 0; i < bc; ++i) up[i] = (int64_t) cp[i]; */

long unpackInt64(const unsigned char *cp, uint64_t *up, long bc, long tc)
{
  const unsigned char *cp0;
  uint64_t *ip0, *ip1, *ip2, *ip3, *ip4, *ip5, *ip6, *ip7;
  long offset;
  long ipack = sizeof(int64_t);

  UNUSED(tc);

  // Bytes until first word boundary in source buffer

  long head = ( (long) cp ) & (ipack-1);
  if ( head != 0 ) head = ipack - head;
  if ( head > bc ) head = bc;

  long inner = bc - head;

  // Trailing bytes which do not make a full word
 
  long trail = inner & (ipack-1);
 
  // Number of bytes/words to be processed in fast loop

  inner -= trail;
  inner /= ipack;

  ip0 = up + head;
  ip1 = ip0 + 1;
  ip2 = ip0 + 2;
  ip3 = ip0 + 3;
  ip4 = ip0 + 4;
  ip5 = ip0 + 5;
  ip6 = ip0 + 6;
  ip7 = ip0 + 7;

  const uint64_t *up0 = (const uint64_t *)(const void *)(cp + head);

  /* Process any bytes until the first word boundary 
   * of our source buffer 
   */
  for (long i = 0; i < head; ++i) up[i] = (uint64_t) cp[i];

  long j = 0;

  if ( IS_BIGENDIAN() )
    {
#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
      for (long i = 0; i < inner; ++i)
	{
	  ip0[j] = (up0[i] >> 56) & 0xFF;
	  ip1[j] = (up0[i] >> 48) & 0xFF;
	  ip2[j] = (up0[i] >> 40) & 0xFF;
	  ip3[j] = (up0[i] >> 32) & 0xFF;
	  ip4[j] = (up0[i] >> 24) & 0xFF;
	  ip5[j] = (up0[i] >> 16) & 0xFF;
	  ip6[j] = (up0[i] >>  8) & 0xFF;
	  ip7[j] = (up0[i])       & 0xFF;

	  j += ipack;
	}
    }
  else
    {
      for (long i = 0; i < inner; ++i)
	{
	  ip7[j] = (up0[i] >> 56) & 0xFF;
	  ip6[j] = (up0[i] >> 48) & 0xFF;
	  ip5[j] = (up0[i] >> 40) & 0xFF;
	  ip4[j] = (up0[i] >> 32) & 0xFF;
	  ip3[j] = (up0[i] >> 24) & 0xFF;
	  ip2[j] = (up0[i] >> 16) & 0xFF;
	  ip1[j] = (up0[i] >>  8) & 0xFF;
	  ip0[j] = (up0[i])       & 0xFF;

	  j += ipack;
	}
    }

  if ( trail > 0 )
    {
      offset = head + ipack*inner;
      cp0 = cp + offset;
      for (long i = 0; i < trail; ++i) up[i+offset] = (uint64_t) cp0[i];
    }
  /*
  if ( tc != -1 ) {
    bc++;
    *cp0 = (unsigned char) tc;
  }
  */
  return bc;
}

/* pack 8-bit bytes from 32-bit words to a packed buffer */
/* same as : for (int i = 0; i < bc; ++i) cp[i] = (char) up[i]; */

long packInt32(uint32_t  *up, unsigned char *cp, long bc, long tc)
{
  unsigned char *cp0;
  uint32_t *up0, *ip0, *ip1, *ip2, *ip3;
  long ipack = sizeof(int32_t);
  
  // Bytes until first word boundary in destination buffer

  long head = ( (long) cp ) & (ipack-1);
  if ( head != 0 ) head = ipack - head;

  long inner = bc - head;

  // Trailing bytes which do not make a full word

  long trail = inner & (ipack-1);

  // Number of bytes/words to be processed in fast loop

  inner -= trail;
  inner /= ipack;

  ip0 = up + head;
  ip1 = ip0 + 1;
  ip2 = ip0 + 2;
  ip3 = ip0 + 3;

  up0 = (uint32_t *)(void *)(cp + head);

  /* Here we should process any bytes until the first word boundary 
   * of our destination buffer 
   * That code is missing so far  because our output buffer is 
   * word aligned by FORTRAN 
   */

  long j = 0;

  if ( IS_BIGENDIAN() )
    {
#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
      for (long i = 0; i < inner; ++i)
	{
	  up0[i] =          (   ip0[j]          << 24 ) 
	                 |  ( ( ip1[j] & 0xFF ) << 16 )
	                 |  ( ( ip2[j] & 0xFF ) <<  8 )
	                 |    ( ip3[j] & 0xFF ) ;
	  j += ipack;
	}
    }
  else
    {
      for (long i = 0; i < inner; ++i)
	{
	  up0[i] =          (   ip3[j]          << 24 ) 
	                 |  ( ( ip2[j] & 0xFF ) << 16 )
                         |  ( ( ip1[j] & 0xFF ) <<  8 )
                         |    ( ip0[j] & 0xFF ) ;
	  j += ipack;
	}
    }

  cp0 = (unsigned char *) ( up0 + inner );
  if ( trail > 0 )
    {
      up0[inner] = 0;
      for (long i = 0; i < trail; ++i)
	{
	  *cp0 = (unsigned char) ip0[ipack*inner+i];
	  cp0++;
	}
    }

  if ( tc != -1 )
    {
      bc++;
      *cp0 = (unsigned char) tc;
    }

  return (bc);
}

/* unpack 8-bit bytes from a packed buffer with 32-bit words */
/* same as : for (int i = 0; i < bc; ++i) up[i] = (int32_t) cp[i]; */

long unpackInt32(const unsigned char *cp, uint32_t *up, long bc, long tc)
{
  const unsigned char *cp0;
  uint32_t *ip0, *ip1, *ip2, *ip3;
  long offset;
  long ipack = sizeof(int32_t);

  UNUSED(tc);

  // Bytes until first word boundary in source buffer

  long head = ( (long) cp ) & (ipack-1);
  if ( head != 0 ) head = ipack - head;
  if ( head > bc ) head = bc;

  long inner = bc - head;

  // Trailing bytes which do not make a full word
 
  long trail = inner & (ipack-1);
 
  // Number of bytes/words to be processed in fast loop

  inner -= trail;
  inner /= ipack;

  ip0 = up + head;
  ip1 = ip0 + 1;
  ip2 = ip0 + 2;
  ip3 = ip0 + 3;

  const uint32_t *up0 = (const uint32_t *)(const void *)(cp + head);

  /* Process any bytes until the first word boundary 
   * of our source buffer 
   */
  for (long i = 0; i < head; ++i) up[i] = (uint32_t) cp[i];

  long j = 0;

  if ( IS_BIGENDIAN() )
    {
#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
      for (long i = 0; i < inner; ++i)
	{
	  ip0[j] = (up0[i] >> 24) & 0xFF;
	  ip1[j] = (up0[i] >> 16) & 0xFF;
	  ip2[j] = (up0[i] >>  8) & 0xFF;
	  ip3[j] = (up0[i])       & 0xFF;

	  j += ipack;
	}
    }
  else
    {
      for (long i = 0; i < inner; ++i)
	{
	  ip3[j] = (up0[i] >> 24) & 0xFF;
	  ip2[j] = (up0[i] >> 16) & 0xFF;
	  ip1[j] = (up0[i] >>  8) & 0xFF;
	  ip0[j] = (up0[i])       & 0xFF;

	  j += ipack;
	}
    }

  if ( trail > 0 )
    {
      offset = head + ipack*inner;
      cp0 = cp + offset;
      for (long i = 0; i < trail; ++i) up[i+offset] = (uint32_t) cp0[i];
    }
  /*
  if ( tc != -1 ) {
    bc++;
    *cp0 = (unsigned char) tc;
  }
  */

  return (bc);
}

// clang-format on
#include <stdio.h>

void
prtbin(int kin, int knbit, int *kout, int *kerr)
{
  /*

    Produces a decimal number with ones and zeroes
    corresponding to the ones and zeroes of the input
    binary number.
    eg input number 1011 binary, output number 1011 decimal.


    Input Parameters:

       kin   - Integer variable containing binary number.

       knbit - Number of bits in binary number.

    Output Parameters:

       kout  - Integer variable containing decimal value
               with ones and zeroes corresponding to those of
               the input binary number.

       kerr  - 0, If no error.
               1, Number of bits in binary number exceeds
                  maximum allowed or is less than 1.


    Converted from EMOS routine PRTBIN.

       Uwe Schulzweida   MPIfM   01/04/2001

  */
  int idec;
  int ik;
  int itemp;

  /*
    Check length of binary number to ensure decimal number
    generated will fit in the computer word - in this case will
    it fit in a Cray 48 bit integer?
  */
  if (knbit < 1 || knbit > 14)
  {
    *kerr = 1;
    printf(" prtbin : Error in binary number length - %3d bits.\n", knbit);
    return;
  }
  else
    *kerr = 0;
  /*
    -----------------------------------------------------------------
    Section 1. Generate required number.
    -----------------------------------------------------------------
  */
  *kout = 0;
  ik = kin;
  idec = 1;

  for (int j = 0; j < knbit; ++j)
  {
    itemp = ik - ((ik / 2) * 2);
    *kout = (*kout) + itemp * idec;
    ik = ik / 2;
    idec = idec * 10;
  }

  return;
}

void
ref2ibm(double *pref, int kbits)
{
  /*

    Purpose:
    --------

    Code and check reference value in IBM format

    Input Parameters:
    -----------------

    pref       - Reference value
    kbits      - Number of bits per computer word.

    Output Parameters:
    ------------------

    pref       - Reference value

    Method:
    -------

    Codes in IBM format, then decides to ensure that reference
    value used for packing is not different from that stored
    because of packing differences.

    Externals.
    ----------

    confp3    - Encode into IBM floating point format.
    decfp2    - Decode from IBM floating point format.

    Reference:
    ----------

    None.

    Comments:
    --------

    None.

    Author:
    -------

    J.D.Chambers     ECMWF      17:05:94

    Modifications:
    --------------

    Uwe Schulzweida   MPIfM   01/04/2001

    Convert to C from EMOS library version 130

  */

  int itrnd;
  int kexp, kmant;
  double ztemp, zdumm;
  extern int CGRIBEX_Debug;

  /* ----------------------------------------------------------------- */
  /*   Section 1. Convert to and from IBM format.                      */
  /* ----------------------------------------------------------------- */

  /*  Convert floating point reference value to IBM representation. */

  itrnd = 1;
  zdumm = ztemp = *pref;
  confp3(zdumm, &kexp, &kmant, kbits, itrnd);

  if (kexp == 0 && kmant == 0) return;

  /*  Set reference value to that actually stored in the GRIB code. */

  *pref = decfp2(kexp, kmant);

  /*  If the nearest number which can be represented in */
  /*  GRIB format is greater than the reference value,  */
  /*  find the nearest number in GRIB format lower      */
  /*  than the reference value.                         */

  if (ztemp < *pref)
  {
    /*  Convert floating point to GRIB representation */
    /*  using truncation to ensure that the converted */
    /*  number is smaller than the original one.      */

    itrnd = 0;
    zdumm = ztemp;
    confp3(zdumm, &kexp, &kmant, kbits, itrnd);

    /*  Set reference value to that stored in the GRIB code. */

    *pref = decfp2(kexp, kmant);

    if (ztemp < *pref)
    {
      if (CGRIBEX_Debug)
      {
        Message("Reference value error.");
        Message("Notify Met.Applications Section.");
        Message("ZTEMP = ", ztemp);
        Message("PREF = ", pref);
      }
      *pref = ztemp;
    }
  }

  return;
} /* ref2ibm */
#include <math.h>
#include <string.h>

unsigned
correct_bdslen(unsigned bdslen, long recsize, long gribpos)
{
  /*
    If a very large product, the section 4 length field holds
    the number of bytes in the product after section 4 upto
    the end of the padding bytes.
    This is a fixup to get round the restriction on product lengths
    due to the count being only 24 bits. It is only possible because
    the (default) rounding for GRIB products is 120 bytes.
  */
  if (recsize > JP23SET && bdslen <= 120) bdslen = (unsigned) (recsize - gribpos - bdslen);
  return bdslen;
}

int
grib1Sections(unsigned char *gribbuffer, long gribbufsize, unsigned char **pdsp, unsigned char **gdsp, unsigned char **bmsp,
              unsigned char **bdsp, long *gribrecsize)
{
  *gribrecsize = 0;
  *pdsp = NULL;
  *gdsp = NULL;
  *bmsp = NULL;
  *bdsp = NULL;

  unsigned char *section = gribbuffer;
  unsigned char *is = gribbuffer;
  if (!GRIB_START(section))
  {
    fprintf(stderr, "Wrong GRIB indicator section: found >%c%c%c%c<\n", section[0], section[1], section[2], section[3]);
    return -1;
  }

  unsigned recsize = GET_UINT3(section[4], section[5], section[6]);

  int gribversion = GRIB_EDITION(section);
  if (gribversion != 0 && gribversion != 1)
  {
    fprintf(stderr, "Error while decoding GRIB1 sections: GRIB edition %d records not supported!\n", gribversion);
    return -1;
  }

  unsigned grib1offset = (gribversion == 1) ? 4 : 0;

  unsigned char *pds = is + 4 + grib1offset;
  unsigned char *bufpointer = pds + PDS_Len;
  unsigned gribsize = 4 + grib1offset + PDS_Len;

  unsigned char *gds = NULL;
  if (PDS_HAS_GDS)
  {
    gds = bufpointer;
    bufpointer += GDS_Len;
    gribsize += GDS_Len;
  }

  unsigned char *bms = NULL;
  if (PDS_HAS_BMS)
  {
    bms = bufpointer;
    bufpointer += BMS_Len;
    gribsize += BMS_Len;
  }

  unsigned char *bds = bufpointer;
  unsigned bdslen = BDS_Len;
  if (recsize > JP23SET && bdslen <= 120)
  {
    recsize &= JP23SET;
    recsize *= 120;
    bdslen = correct_bdslen(bdslen, recsize, gribsize);
  }
  bufpointer += bdslen;
  gribsize += bdslen;
  gribsize += 4;

  *pdsp = pds;
  *gdsp = gds;
  *bmsp = bms;
  *bdsp = bds;

  *gribrecsize = gribsize;
  if (gribbufsize < gribsize)
  {
    fprintf(stderr, "Inconsistent length of GRIB message (grib_buffer_size=%ld < grib_record_size=%u)!\n", gribbufsize, gribsize);
    return 1;
  }

  if (!GRIB_FIN(bufpointer))  // end section - "7777" in ASCII
  {
    fprintf(stderr, "Missing GRIB end section: found >%c%c%c%c<\n", bufpointer[0], bufpointer[1], bufpointer[2], bufpointer[3]);
    return -2;
  }

  return 0;
}

int
grib2Sections(unsigned char *gribbuffer, long gribbufsize, unsigned char **idsp, unsigned char **lusp, unsigned char **gdsp,
              unsigned char **pdsp, unsigned char **drsp, unsigned char **bmsp, unsigned char **bdsp)
{
  UNUSED(gribbufsize);

  *idsp = NULL;
  *lusp = NULL;
  *gdsp = NULL;
  *pdsp = NULL;
  *drsp = NULL;
  *bmsp = NULL;
  *bdsp = NULL;

  unsigned char *section = gribbuffer;
  unsigned sec_len = 16;

  if (!GRIB_START(section))
  {
    fprintf(stderr, "wrong indicator section >%c%c%c%c<\n", section[0], section[1], section[2], section[3]);
    return -1;
  }

  int gribversion = GRIB_EDITION(section);
  if (gribversion != 2)
  {
    fprintf(stderr, "wrong GRIB version %d\n", gribversion);
    return -1;
  }

  unsigned gribsize = 0;
  for (int i = 0; i < 8; ++i) gribsize = (gribsize << 8) | section[8 + i];

  unsigned grib_len = sec_len;
  section += sec_len;

  /* section 1 */
  sec_len = GRIB2_SECLEN(section);
  int sec_num = GRIB2_SECNUM(section);
  // fprintf(stderr, "ids %d %ld\n", sec_num, sec_len);

  if (sec_num != 1)
  {
    fprintf(stderr, "Unexpected section1 number %d\n", sec_num);
    return -1;
  }

  *idsp = section;

  grib_len += sec_len;
  section += sec_len;

  /* section 2 and 3 */
  sec_len = GRIB2_SECLEN(section);
  sec_num = GRIB2_SECNUM(section);
  // fprintf(stderr, "lus %d %ld\n", sec_num, sec_len);

  if (sec_num == 2)
  {
    *lusp = section;

    grib_len += sec_len;
    section += sec_len;

    /* section 3 */
    sec_len = GRIB2_SECLEN(section);
    // sec_num = GRIB2_SECNUM(section);
    // fprintf(stderr, "gds %d %ld\n", sec_num, sec_len);

    *gdsp = section;
  }
  else if (sec_num == 3) { *gdsp = section; }
  else
  {
    fprintf(stderr, "Unexpected section3 number %d\n", sec_num);
    return -1;
  }

  grib_len += sec_len;
  section += sec_len;

  /* section 4 */
  sec_len = GRIB2_SECLEN(section);
  sec_num = GRIB2_SECNUM(section);
  // fprintf(stderr, "pds %d %ld\n", sec_num, sec_len);

  if (sec_num != 4)
  {
    fprintf(stderr, "Unexpected section4 number %d\n", sec_num);
    return -1;
  }

  *pdsp = section;

  grib_len += sec_len;
  section += sec_len;

  /* section 5 */
  sec_len = GRIB2_SECLEN(section);
  sec_num = GRIB2_SECNUM(section);
  // fprintf(stderr, "drs %d %ld\n", sec_num, sec_len);

  if (sec_num != 5)
  {
    fprintf(stderr, "Unexpected section5 number %d\n", sec_num);
    return -1;
  }

  *drsp = section;

  grib_len += sec_len;
  section += sec_len;

  /* section 6 */
  sec_len = GRIB2_SECLEN(section);
  sec_num = GRIB2_SECNUM(section);
  // fprintf(stderr, "bms %d %ld\n", sec_num, sec_len);

  if (sec_num != 6)
  {
    fprintf(stderr, "Unexpected section6 number %d\n", sec_num);
    return -1;
  }

  *bmsp = section;

  grib_len += sec_len;
  section += sec_len;

  /* section 7 */
  sec_len = GRIB2_SECLEN(section);
  sec_num = GRIB2_SECNUM(section);
  // fprintf(stderr, "bds %d %ld\n", sec_num, sec_len);

  if (sec_num != 7)
  {
    fprintf(stderr, "Unexpected section7 number %d\n", sec_num);
    return -1;
  }

  *bdsp = section;

  grib_len += sec_len;
  section += sec_len;

  /* skip multi GRIB sections */
  int msec = 1;
  while (!GRIB_FIN(section))
  {
    sec_len = GRIB2_SECLEN(section);
    sec_num = GRIB2_SECNUM(section);

    if (sec_num < 1 || sec_num > 7) break;

    if (sec_num == 7) fprintf(stderr, "Skipped unsupported multi GRIB section %d!\n", ++msec);

    if ((grib_len + sec_len) > gribsize) break;

    grib_len += sec_len;
    section += sec_len;
  }

  /* end section - "7777" in ASCII */
  if (!GRIB_FIN(section))
  {
    fprintf(stderr, "Missing end section >%2x %2x %2x %2x<\n", section[0], section[1], section[2], section[3]);
    return -2;
  }

  return 0;
}

int
grib_info_for_grads(off_t recpos, long recsize, unsigned char *gribbuffer, int *intnum, float *fltnum, off_t *bignum)
{
  long gribsize = 0;
  off_t bpos = 0;

  unsigned char *section = gribbuffer;
  unsigned char *is = gribbuffer;
  if (!GRIB_START(section))
  {
    fprintf(stderr, "wrong indicator section >%c%c%c%c<\n", section[0], section[1], section[2], section[3]);
    return -1;
  }

  int gribversion = GRIB_EDITION(section);
  if (recsize == 24 && gribversion == 0) gribversion = 0;

  unsigned grib1offset = (gribversion == 1) ? 4 : 0;

  unsigned char *pds = is + 4 + grib1offset;
  unsigned char *bufpointer = pds + PDS_Len;
  gribsize += 4 + grib1offset + PDS_Len;

  unsigned char *gds = NULL;
  if (PDS_HAS_GDS)
  {
    gds = bufpointer;
    bufpointer += GDS_Len;
    gribsize += GDS_Len;
  }

  unsigned char *bms = NULL;
  if (PDS_HAS_BMS)
  {
    bms = bufpointer;
    bufpointer += BMS_Len;
    bpos = recpos + gribsize + 6;
    gribsize += BMS_Len;
  }

  unsigned char *bds = bufpointer;

  off_t dpos = recpos + gribsize + 11;

  unsigned bdslen = BDS_Len;
  bdslen = correct_bdslen(bdslen, recsize, bds - gribbuffer);
  bufpointer += bdslen;
  gribsize += bdslen;
  gribsize += 4;

  if (gribsize > recsize)
  {
    fprintf(stderr, "GRIB buffer size %ld too small! Min size = %ld\n", recsize, gribsize);
    return 1;
  }

  /* end section - "7777" in ascii */
  if (!GRIB_FIN(bufpointer))
  {
    fprintf(stderr, "Missing end section >%2x %2x %2x %2x<\n", bufpointer[0], bufpointer[1], bufpointer[2], bufpointer[3]);
  }

  int bs = BDS_BinScale;
  if (bs > 32767) bs = 32768 - bs;
  float bsf = ldexpf(1.0f, bs);

  bignum[0] = dpos;
  bignum[1] = bms ? bpos : -999;
  intnum[0] = BDS_NumBits;

  /*  fltnum[0] = 1.0; */
  fltnum[0] = powf(10.0f, (float) PDS_DecimalScale);
  fltnum[1] = bsf;
  fltnum[2] = (float) BDS_RefValue;
  /*
  printf("intnum %d %d %d\n", intnum[0], intnum[1], intnum[2]);
  printf("fltnum %g %g %g\n", fltnum[0], fltnum[1], fltnum[2]);
  */
  return 0;
}

static int
get_level(unsigned char *pds)
{
  int level = 0;

  if (PDS_LevelType == 100)
    level = (int) (PDS_Level) *100;
  else if (PDS_LevelType == 99 || PDS_LevelType == 109)
    level = (int) (PDS_Level);
  else
    level = PDS_Level1;

  return level;
}

static double
get_cr(unsigned char *w1, unsigned char *w2)
{
  unsigned s1 = GET_UINT3(w1[0], w1[1], w1[2]);
  unsigned s2 = GET_UINT3(w2[0], w2[1], w2[2]);
  return ((double) s1) / s2;
}

static void
grib1PrintALL(int nrec, long offset, long recpos, long recsize, unsigned char *gribbuffer)
{
  static bool header = true;
  unsigned char *is = NULL, *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;

  if (header)
  {
    fprintf(stdout, "  Rec : Off Position   Size : V PDS  GDS    BMS    BDS : Code Level :  LType GType: CR LL\n");
    //               ----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+
    header = false;
  }

  is = gribbuffer;

  unsigned gribsize = GET_UINT3(is[4], is[5], is[6]);

  long gribrecsize;
  int nerr = grib1Sections(gribbuffer, recsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "%5d :%4ld %8ld %6ld : GRIB message error\n", nrec, offset, recpos, recsize);
    return;
  }

  int GridType = (gds == NULL) ? -1 : (int) GDS_GridType;

  int level = get_level(pds);

  unsigned bdslen = BDS_Len;

  bool llarge = (gribsize > JP23SET && bdslen <= 120);

  bdslen = correct_bdslen(bdslen, recsize, bds - gribbuffer);

  double cr = (((BDS_Flag >> 4) & 1) && (BDS_Z == 128 || BDS_Z == 130)) ? get_cr(&bds[14], &gribbuffer[4]) : 1;

  fprintf(stdout, "%5d :%4ld %8ld %6ld :%2d%4d%5d %6d %6d : %3d %6d : %5d %5d %6.4g  %c", nrec, offset, recpos, recsize,
          GRIB_EDITION(is), PDS_Len, GDS_Len, BMS_Len, bdslen, PDS_Parameter, level, PDS_LevelType, GridType, cr,
          llarge ? 'T' : 'F');

  if (nerr > 0) fprintf(stdout, " <-- GRIB data corrupted!");
  fprintf(stdout, "\n");
}

static void
grib2PrintALL(int nrec, long offset, long recpos, long recsize, unsigned char *gribbuffer)
{
  static bool header = true;
  unsigned char *is = NULL, *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;
  unsigned char *ids = NULL, *lus = NULL, *drs = NULL;
  long ids_len = 0, lus_len = 0, gds_len = 0, pds_len = 0, drs_len = 0, bms_len = 0, bds_len = 0;
  double cr = 1;

  if (header)
  {
    fprintf(stdout, "  Rec : Off Position   Size : V IDS LUS GDS PDS  DRS    BMS    BDS : Parameter   Level :  LType GType: CR\n");
    //       ----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+
    header = false;
  }

  is = gribbuffer;

  int nerr = grib2Sections(gribbuffer, recsize, &ids, &lus, &gds, &pds, &drs, &bms, &bds);
  if (nerr)
  {
    fprintf(stdout, "%5d :%4ld %8ld %6ld : error\n", nrec, offset, recpos, recsize);
    return;
  }

  if (ids) ids_len = GRIB2_SECLEN(ids);
  if (lus) lus_len = GRIB2_SECLEN(lus);
  if (gds) gds_len = GRIB2_SECLEN(gds);
  if (pds) pds_len = GRIB2_SECLEN(pds);
  if (drs) drs_len = GRIB2_SECLEN(drs);
  if (bms) bms_len = GRIB2_SECLEN(bms);
  if (bds) bds_len = GRIB2_SECLEN(bds);

  // double cr = (((BDS_Flag >> 4)&1) && (BDS_Z == 128 || BDS_Z == 130)) ? get_cr(&bds[14], &gribbuffer[4]) : 1;

  int dis = GET_UINT1(is[6]);
  int gridtype = (int) (GET_UINT2(gds[12], gds[13]));
  int paramcat = GET_UINT1(pds[9]);
  int paramnum = GET_UINT1(pds[10]);
  int level1type = GET_UINT1(pds[22]);
  /* level1sf   = GET_UINT1(pds[23]); */
  int level1 = (int) (GET_UINT4(pds[24], pds[25], pds[26], pds[27]));
  /* level2type = GET_UINT1(pds[28]); */
  /* level2sf   = GET_UINT1(pds[29]); */
  /* level2     = GET_UINT4(pds[30],pds[31],pds[32],pds[33]); */
  /*
  printf("level %d %d %d %d %d %d %d\n", level1type, level1sf, level1, level1*level1sf, level2sf, level2, level2*level2sf);
  */
  char paramstr[16];
  snprintf(paramstr, sizeof(paramstr), "%d.%d.%d", paramnum, paramcat, dis);
  fprintf(stdout, "%5d :%4ld %8ld %6ld :%2d %3ld %3ld %3ld %3ld %4ld %6ld %6ld : %-9s %7d : %5d %5d %6.4g\n", nrec, offset, recpos,
          recsize, GRIB_EDITION(is), ids_len, lus_len, gds_len, pds_len, drs_len, bms_len, bds_len, paramstr, level1, level1type,
          gridtype, cr);
}

void
gribPrintALL(int nrec, long offset, long recpos, long recsize, unsigned char *gribbuffer)
{
  int gribversion = gribVersion(gribbuffer, (size_t) recsize);

  if (gribversion == 0 || gribversion == 1)
    grib1PrintALL(nrec, offset, recpos, recsize, gribbuffer);
  else if (gribversion == 2)
    grib2PrintALL(nrec, offset, recpos, recsize, gribbuffer);
  else { fprintf(stdout, "%5d :%4ld%9ld%7ld : GRIB version %d unsupported\n", nrec, offset, recpos, recsize, gribversion); }
}

static void
grib1PrintPDS(int nrec, long recpos, long recsize, unsigned char *gribbuffer)
{
  static int header = 1;
  unsigned char *is = NULL, *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;
  int century, subcenter, decimalscale;
  int fc_num = 0;
  int year = 0, date;

  UNUSED(recpos);

  if (header)
  {
    fprintf(stdout, "  Rec : PDS Tab Cen Sub Ver Grid Code LTyp Level1 Level2    Date  Time P1 P2 TU TR NAVE Scale FCnum CT\n");
    //               ----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+
    header = 0;
  }

  is = gribbuffer;

  long gribrecsize;
  int nerr = grib1Sections(gribbuffer, recsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "%5d : GRIB message error\n", nrec);
    return;
  }

  switch (GRIB_EDITION(is))
  {
    case 0:
      year = GET_UINT1(pds[12]);
      century = 1;
      subcenter = 0;
      decimalscale = 0;
      break;
    case 1:
      year = PDS_Year;
      century = PDS_Century;
      subcenter = PDS_Subcenter;
      decimalscale = PDS_DecimalScale;
      break;
    default: fprintf(stderr, "Grib version %d not supported!", GRIB_EDITION(is)); exit(EXIT_FAILURE);
  }

  if (PDS_Len > 28)
    if (PDS_CenterID == 98 || PDS_Subcenter == 98 || (PDS_CenterID == 7 && PDS_Subcenter == 98))
      if (pds[40] == 1) fc_num = GET_UINT1(pds[49]);

  if (year < 0)
  {
    date = (-year) * 10000 + (int) PDS_Month * 100 + (int) PDS_Day;
    century = -century;
  }
  else { date = year * 10000 + (int) PDS_Month * 100 + (int) PDS_Day; }

  fprintf(stdout, "%5d :%4d%4d%4d%4d%4d %4d %4d%4d%7d%7d %8d%6d%3d%3d%3d%3d%5d%6d%5d%4d", nrec, PDS_Len, PDS_CodeTable,
          PDS_CenterID, subcenter, PDS_ModelID, PDS_GridDefinition, PDS_Parameter, PDS_LevelType, PDS_Level1, PDS_Level2, date,
          PDS_Time, PDS_TimePeriod1, PDS_TimePeriod2, PDS_TimeUnit, PDS_TimeRange, PDS_AvgNum, decimalscale, fc_num, century);

  if (nerr > 0) fprintf(stdout, " <-- GRIB data corrupted!");
  fprintf(stdout, "\n");
}

void
gribPrintPDS(int nrec, long recpos, long recsize, unsigned char *gribbuffer)
{
  int gribversion = gribVersion(gribbuffer, (size_t) recsize);

  if (gribversion == 0 || gribversion == 1) grib1PrintPDS(nrec, recpos, recsize, gribbuffer);
  /*
  else if ( gribversion == 2 )
    grib2PrintPDS(nrec, recpos, recsize, gribbuffer);
  */
  else { fprintf(stdout, "%5d :%4ld%9ld%7ld : GRIB version %d unsupported\n", nrec, 0L, recpos, recsize, gribversion); }
}

static void
grib1PrintGDS(int nrec, long recpos, long recsize, unsigned char *gribbuffer)
{
  static int header = 1;
  unsigned char *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;

  UNUSED(recpos);

  if (header)
  {
    fprintf(stdout, "  Rec : GDS  NV PVPL Typ : xsize ysize   Lat1   Lon1   Lat2   Lon2    dx    dy\n");
    //               ----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+
    header = 0;
  }

  long gribrecsize;
  int nerr = grib1Sections(gribbuffer, recsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "%5d : GRIB message error\n", nrec);
    return;
  }

  fprintf(stdout, "%5d :", nrec);

  if (gds)
    fprintf(stdout, "%4d%4d%4d %4d :%6d%6d%7d%7d%7d%7d%6d%6d", GDS_Len, GDS_NV, GDS_PVPL, GDS_GridType, GDS_NumLon, GDS_NumLat,
            GDS_FirstLat, GDS_FirstLon, GDS_LastLat, GDS_LastLon, GDS_LonIncr, GDS_LatIncr);
  else
    fprintf(stdout, " Grid Description Section not defined");

  if (nerr > 0) fprintf(stdout, " <-- GRIB data corrupted!");
  fprintf(stdout, "\n");
}

void
gribPrintGDS(int nrec, long recpos, long recsize, unsigned char *gribbuffer)
{
  int gribversion = gribVersion(gribbuffer, (size_t) recsize);

  if (gribversion == 0 || gribversion == 1) grib1PrintGDS(nrec, recpos, recsize, gribbuffer);
  /*
  else if ( gribversion == 2 )
    grib2PrintGDS(nrec, recpos, recsize, gribbuffer);
  */
  else { fprintf(stdout, "%5d :%4ld%9ld%7ld : GRIB version %d unsupported\n", nrec, 0L, recpos, recsize, gribversion); }
}

static void
grib1PrintBMS(int nrec, long recpos, long recsize, unsigned char *gribbuffer)
{
  static int header = 1;
  unsigned char *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;

  UNUSED(recpos);

  if (header)
  {
    fprintf(stdout, "  Rec : Code Level     BMS    Size\n");
    //               ----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+
    header = 0;
  }

  long gribrecsize;
  int nerr = grib1Sections(gribbuffer, recsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "%5d : GRIB message error\n", nrec);
    return;
  }

  int level = get_level(pds);

  fprintf(stdout, "%5d :", nrec);

  if (bms)
    fprintf(stdout, "%4d%7d %7d %7d", PDS_Parameter, level, BMS_Len, BMS_BitmapSize);
  else
    fprintf(stdout, "%4d%7d Bit Map Section not defined", PDS_Parameter, level);

  if (nerr > 0) fprintf(stdout, " <-- GRIB data corrupted!");
  fprintf(stdout, "\n");
}

void
gribPrintBMS(int nrec, long recpos, long recsize, unsigned char *gribbuffer)
{
  int gribversion = gribVersion(gribbuffer, (size_t) recsize);

  if (gribversion == 0 || gribversion == 1) grib1PrintBMS(nrec, recpos, recsize, gribbuffer);
  /*
  else if ( gribversion == 2 )
    grib2PrintBMS(nrec, recpos, recsize, gribbuffer);
  */
  else { fprintf(stdout, "%5d :%4ld%9ld%7ld : GRIB version %d unsupported\n", nrec, 0L, recpos, recsize, gribversion); }
}

static void
grib1PrintBDS(int nrec, long recpos, long recsize, unsigned char *gribbuffer)
{
  static int header = 1;
  unsigned char *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;
  double scale;

  UNUSED(recpos);

  if (header)
  {
    fprintf(stdout, "  Rec : Code Level     BDS Flag     Scale   RefValue Bits  CR\n");
    //               ----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+
    header = 0;
  }

  long gribrecsize;
  int nerr = grib1Sections(gribbuffer, recsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "%5d : GRIB message error\n", nrec);
    return;
  }

  int level = get_level(pds);

  double cr = (((BDS_Flag >> 4) & 1) && BDS_Z == 128) ? get_cr(&bds[17], &bds[20]) : 1;

  double refval = BDS_RefValue;

  if (BDS_BinScale < 0)
    scale = 1.0 / pow(2.0, (double) -BDS_BinScale);
  else
    scale = pow(2.0, (double) BDS_BinScale);

  if (PDS_DecimalScale != 0)
  {
    double decscale = pow(10.0, (double) -PDS_DecimalScale);
    refval *= decscale;
    scale *= decscale;
  }

  fprintf(stdout, "%5d :", nrec);

  if (bds)
    fprintf(stdout, "%4d%7d %7d %4d %8.5g %11.5g%4d %6.4g", PDS_Parameter, level, BDS_Len, BDS_Flag, scale, refval, BDS_NumBits,
            cr);
  else
    fprintf(stdout, " Binary Data Section not defined");

  if (nerr > 0) fprintf(stdout, " <-- GRIB data corrupted!");
  fprintf(stdout, "\n");
}

void
gribPrintBDS(int nrec, long recpos, long recsize, unsigned char *gribbuffer)
{
  int gribversion = gribVersion(gribbuffer, (size_t) recsize);

  if (gribversion == 0 || gribversion == 1) grib1PrintBDS(nrec, recpos, recsize, gribbuffer);
  /*
  else if ( gribversion == 2 )
    grib2PrintBDS(nrec, recpos, recsize, gribbuffer);
  */
  else { fprintf(stdout, "%5d :%4ld%9ld%7ld : GRIB version %d unsupported\n", nrec, 0L, recpos, recsize, gribversion); }
}

void
gribCheck1(int nrec, long recpos, long recsize, unsigned char *gribbuffer)
{
  unsigned char *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;

  UNUSED(recpos);

  long gribrecsize;
  int nerr = grib1Sections(gribbuffer, recsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "%5d : GRIB message error\n", nrec);
    return;
  }

  if (nerr > 0)
  {
    fprintf(stdout, "%5d : <-- GRIB data corrupted!\n", nrec);
    return;
  }

  int level = get_level(pds);

  double cr = (((BDS_Flag >> 4) & 1) && BDS_Z == 128) ? get_cr(&bds[17], &bds[20]) : 1;

  if (IS_EQUAL(cr, 1) && BDS_NumBits == 24)
    fprintf(stdout, "GRIB record %5d : code = %4d   level = %7d\n", nrec, PDS_Parameter, level);
}

static void
repair1(unsigned char *gbuf, long gbufsize)
{
  unsigned char *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;
  /* int recLen; */
  size_t bds_nbits;
  int bds_flag, lspherc, lcomplex /*, lcompress */;
  enum
  {
    bds_head = 11
  };
  size_t bds_ext = 0, bds_ubits;

  long gribrecsize;
  int nerr = grib1Sections(gbuf, gbufsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "GRIB message error\n");
    return;
  }

  if (nerr > 0)
  {
    fprintf(stdout, "GRIB data corrupted!\n");
    return;
  }

  unsigned bds_len = BDS_Len;
  bds_nbits = BDS_NumBits;
  bds_flag = BDS_Flag;
  bds_ubits = (size_t) bds_flag & 15;
  lspherc = bds_flag >> 7;
  lcomplex = (bds_flag >> 6) & 1;
  /* lcompress = (bds_flag >> 4)&1; */

  if (lspherc)
  {
    if (lcomplex)
    {
      size_t jup, ioff;
      jup = (size_t) bds[15];
      ioff = (jup + 1) * (jup + 2);
      bds_ext = 4 + 3 + 4 * ioff;
    }
    else { bds_ext = 4; }
  }

  size_t datstart = bds_head + bds_ext;

  unsigned char *source = bds + datstart;

  size_t sourceLen = ((((bds_len - datstart) * 8 - bds_ubits) / bds_nbits) * bds_nbits) / 8;

  if (bds_nbits == 24)
  {
    unsigned char *pbuf = (unsigned char *) Malloc(sourceLen);
    ;
    size_t nelem = sourceLen / 3;
    for (size_t i = 0; i < nelem; ++i)
    {
      pbuf[3 * i] = source[i];
      pbuf[3 * i + 1] = source[nelem + i];
      pbuf[3 * i + 2] = source[2 * nelem + i];
    }
    memcpy(source, pbuf, sourceLen);
    Free(pbuf);
  }
}

void
gribRepair1(int nrec, long recsize, unsigned char *gribbuffer)
{
  unsigned char *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;

  long gribrecsize;
  int nerr = grib1Sections(gribbuffer, recsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "%5d : GRIB message error\n", nrec);
    return;
  }

  if (nerr > 0)
  {
    fprintf(stdout, "%5d : <-- GRIB data corrupted!\n", nrec);
    return;
  }

  int level = get_level(pds);

  double cr = (((BDS_Flag >> 4) & 1) && BDS_Z == 128) ? get_cr(&bds[17], &bds[20]) : 1;

  if (IS_EQUAL(cr, 1) && BDS_NumBits == 24)
  {
    fprintf(stdout, "Repair GRIB record %5d : code = %4d   level = %7d\n", nrec, PDS_Parameter, level);
    repair1(gribbuffer, recsize);
  }
}
#include <stdio.h>
#include <string.h>

#if defined(HAVE_CONFIG_H)
#endif

#if defined(HAVE_LIBSZ)
#if defined(__cplusplus)
extern "C"
{
#endif
#include <szlib.h>
#ifdef __cplusplus
}
#endif

// clang-format off

#define OPTIONS_MASK        (SZ_RAW_OPTION_MASK | SZ_MSB_OPTION_MASK | SZ_NN_OPTION_MASK)

#define PIXELS_PER_BLOCK    (8)
#define PIXELS_PER_SCANLINE (PIXELS_PER_BLOCK*128)

#define MIN_COMPRESS        (0.95)
#define MIN_SIZE            (256)
#endif

#define  Z_SZIP  128

#if  defined (HAVE_LIBSZ) || defined (HAVE_LIBAEC)
#define SetLen3(var, offset, value) ((var[offset+0] = 0xFF & (value >> 16)), \
				     (var[offset+1] = 0xFF & (value >>  8)), \
				     (var[offset+2] = 0xFF & (value      )))
#define SetLen4(var, offset, value) ((var[offset+0] = 0xFF & (value >> 24)), \
				     (var[offset+1] = 0xFF & (value >> 16)), \
				     (var[offset+2] = 0xFF & (value >>  8)), \
				     (var[offset+3] = 0xFF & (value      )))
#endif

// clang-format on

int
gribGetZip(size_t recsize, unsigned char *gribbuffer, size_t *urecsize)
{
  int compress = 0;
  unsigned char *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;

  int gribversion = gribVersion(gribbuffer, recsize);

  if (gribversion == 2) return compress;

  long gribrecsize;
  int nerr = grib1Sections(gribbuffer, (long) recsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "GRIB message error\n");
    return compress;
  }

  if (nerr > 0)
  {
    fprintf(stdout, "GRIB data corrupted!\n");
    return compress;
  }

  /* bds_len   = BDS_Len; */
  /* bds_nbits = BDS_NumBits; */
  int bds_flag = BDS_Flag;
  /* lspherc   =  bds_flag >> 7; */
  /* lcomplex  = (bds_flag >> 6)&1; */
  int lcompress = (bds_flag >> 4) & 1;

  size_t gribsize = 0;
  if (lcompress)
  {
    compress = BDS_Z;
    if (compress == Z_SZIP) gribsize = (size_t) GET_UINT3(bds[14], bds[15], bds[16]);
  }

  *urecsize = gribsize;

  return compress;
}

int
gribZip(unsigned char *dbuf, long dbufsize, unsigned char *sbuf, long sbufsize)
{
#if !defined(HAVE_LIBSZ)
  static int libszwarn = 1;
#endif
  unsigned char *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;
  bool llarge = false;

  unsigned gribLen = GET_UINT3(dbuf[4], dbuf[5], dbuf[6]);

  int rec_len = (int) gribLen;

  long gribrecsize;
  int nerr = grib1Sections(dbuf, dbufsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "GRIB message error\n");
    return (int) gribrecsize;
  }

  if (nerr > 0)
  {
    fprintf(stdout, "GRIB data corrupted!\n");
    return (int) gribrecsize;
  }

  int bds_zoffset = 12;

  unsigned bds_len = BDS_Len;
  if (gribLen > JP23SET && bds_len <= 120)
  {
    gribLen &= JP23SET;
    gribLen *= 120;
    bds_len = correct_bdslen(bds_len, gribLen, bds - dbuf);
    llarge = true;
    bds_zoffset += 2;
  }

  if (gribLen > JP24SET || llarge) return (int) gribLen;

#if defined(HAVE_LIBSZ)
  {
    int bds_zstart = 14;
    unsigned gribLenOld = 0;
    int bds_head = 11;
    int bds_ext = 0;
    unsigned char *pbuf = NULL;

    int bds_nbits = BDS_NumBits;
    int bds_flag = BDS_Flag;
    int bds_ubits = bds_flag & 15;
    int lspherc = bds_flag >> 7;
    int lcomplex = (bds_flag >> 6) & 1;
    /* lcompress = (bds_flag >> 4)&1; */

    if (bds_nbits != 8 && bds_nbits != 16 && bds_nbits != 24 && bds_nbits != 32)
    {
      static bool linfo = true;
      if (linfo && bds_nbits != 0)
      {
        linfo = false;
        fprintf(stderr, "GRIB szip supports only 8, 16, 24 and 32 bit data!\n");
      }
      return rec_len;
    }

    int bits_per_sample = (bds_nbits == 24) ? 8 : bds_nbits;

    SZ_com_t sz_param; /* szip parameter block */
    sz_param.options_mask = OPTIONS_MASK;
    sz_param.bits_per_pixel = bits_per_sample;
    sz_param.pixels_per_block = PIXELS_PER_BLOCK;
    sz_param.pixels_per_scanline = PIXELS_PER_SCANLINE;

    if (lspherc)
    {
      bds_ext = 4;
      if (lcomplex)
      {
        int jup = bds[15];
        int ioff = (jup + 1) * (jup + 2);
        bds_ext += 3 + 4 * ioff;
      }
    }

    size_t datstart = bds_head + bds_ext;

    size_t datsize = ((((bds_len - datstart) * 8 - bds_ubits) / bds_nbits) * bds_nbits) / 8;

    if (datsize < MIN_SIZE) return rec_len;
    /*
    fprintf(stderr, "%d %d %d %d\n", bds_len, datstart, bds_len - datstart, datsize);
    */
    size_t sourceLen = datsize;
    size_t destLen = sbufsize;

    unsigned char *source = bds + datstart;
    unsigned char *dest = sbuf;

    if (bds_nbits == 24)
    {
      long nelem = sourceLen / 3;
      pbuf = (unsigned char *) Malloc(sourceLen);
      for (long i = 0; i < nelem; ++i)
      {
        pbuf[i] = source[3 * i];
        pbuf[nelem + i] = source[3 * i + 1];
        pbuf[2 * nelem + i] = source[3 * i + 2];
      }
      source = pbuf;
    }

    int status = SZ_BufftoBuffCompress(dest, &destLen, source, sourceLen, &sz_param);
    if (status != SZ_OK)
    {
      if (status == SZ_NO_ENCODER_ERROR)
        Warning("SZ_NO_ENCODER_ERROR code %3d level %3d", PDS_Parameter, PDS_Level2);
      else if (status == SZ_PARAM_ERROR)
        Warning("SZ_PARAM_ERROR code %3d level %3d", PDS_Parameter, PDS_Level2);
      else if (status == SZ_MEM_ERROR)
        Warning("SZ_MEM_ERROR code %3d level %3d", PDS_Parameter, PDS_Level2);
      else if (status == SZ_OUTBUFF_FULL)
        /*Warning("SZ_OUTBUFF_FULL code %3d level %3d", PDS_Parameter, PDS_Level2)*/;
      else
        Warning("SZ ERROR: %d code %3d level %3d", status, PDS_Parameter, PDS_Level2);
    }

    if (pbuf) Free(pbuf);
    /*
    fprintf(stderr, "sourceLen, destLen %d %d\n", sourceLen, destLen);
    */
    if (destLen < MIN_COMPRESS * sourceLen)
    {
      source = bds + datstart + bds_zoffset;
      memcpy(source, dest, destLen);

      /* ----++++ number of unused bits at end of section) */

      BDS_Flag -= bds_ubits;

      gribLenOld = gribLen;

      if (bds_ext)
        for (long i = bds_ext - 1; i >= 0; --i) bds[bds_zoffset + bds_head + i] = bds[bds_head + i];

      /*
      fprintf(stderr, "destLen, datsize, datstart %d %d %d\n", destLen, datsize, datstart);
      */
      /*	memcpy(bds + datstart + bds_zoffset, source, destLen); */
      /*
        fprintf(stderr, "z>>> %d %d %d %d <<<\n", (int) bds[0+datstart+bds_zoffset],
          (int)bds[1+datstart+bds_zoffset], (int)bds[2+datstart+bds_zoffset], (int)bds[3+datstart+bds_zoffset]);
      */
      if (llarge)
      {
        if (gribLenOld % 120)
        {
          fprintf(stderr, "Internal problem, record length not multiple of 120!");
          while (gribLenOld % 120) gribLenOld++;
        }
        // gribLenOld = gribLenOld / (-120);
        // gribLenOld = JP23SET - gribLenOld + 1;

        SetLen3(bds, bds_zstart, gribLenOld);
        SetLen4(bds, bds_zstart + 3, sourceLen);
        SetLen4(bds, bds_zstart + 7, destLen);
      }
      else
      {
        SetLen3(bds, bds_zstart, gribLenOld);
        SetLen3(bds, bds_zstart + 3, sourceLen);
        SetLen3(bds, bds_zstart + 6, destLen);
      }

      int bdsLen = datstart + bds_zoffset + destLen;

      bds[11] = 0;
      bds[12] = 0;

      BDS_Z = Z_SZIP;

      BDS_Flag += 16;
      if ((bdsLen % 2) == 1)
      {
        BDS_Flag += 8;
        bds[bdsLen++] = 0;
      }

      SetLen3(bds, 0, bdsLen);

      gribLen = (bds - dbuf) + bdsLen;

      dbuf[gribLen++] = '7';
      dbuf[gribLen++] = '7';
      dbuf[gribLen++] = '7';
      dbuf[gribLen++] = '7';

      if (llarge)
      {
        long bdslen = gribLen - 4;

        /*
          If a very large product, the section 4 length field holds
          the number of bytes in the product after section 4 upto
          the end of the padding bytes.
          This is a fixup to get round the restriction on product lengths
          due to the count being only 24 bits. It is only possible because
          the (default) rounding for GRIB products is 120 bytes.
        */
        while (gribLen % 120) dbuf[gribLen++] = 0;

        long itemp = gribLen / (-120);
        itemp = JP23SET - itemp + 1;

        SetLen3(dbuf, 4, itemp);

        bdslen = gribLen - bdslen;

        SetLen3(bds, 0, bdslen);
      }
      else { SetLen3(dbuf, 4, gribLen); }
    }
    else {}
    /*
    fprintf(stderr, "%3d %3d griblen in %6d  out %6d  CR %g   slen %6d dlen %6d  CR %g\n",
            PDS_Parameter, PDS_Level1, gribLenOld, gribLen,
            ((double)gribLenOld)/gribLen, sourceLen, destLen,
            ((double)sourceLen)/destLen);
    */
  }

#else

  UNUSED(sbuf);
  UNUSED(sbufsize);

  if (libszwarn)
  {
    Warning("Compression disabled, szlib not available!");
    libszwarn = 0;
  }
#endif

  if (llarge)
    while (gribLen % 120) dbuf[gribLen++] = 0;
  else
    while (gribLen & 7) dbuf[gribLen++] = 0;

  rec_len = (int) gribLen;

  return rec_len;
}

int
gribUnzip(unsigned char *dbuf, long dbufsize, unsigned char *sbuf, long sbufsize)
{
#if !defined(HAVE_LIBSZ)
  static int libszwarn = 1;
#endif
  unsigned char *pds = NULL, *gds = NULL, *bms = NULL, *bds = NULL;
  size_t gribLen = 0;
  size_t destLen, sourceLen;
  enum
  {
    bds_head = 11
  };
  int bds_ext = 0;

  UNUSED(dbufsize);

  long gribrecsize;
  int nerr = grib1Sections(sbuf, sbufsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "GRIB message error\n");
    return 0;
  }

  if (nerr > 0)
  {
    fprintf(stdout, "GRIB data corrupted!\n");
    return 0;
  }

  // unsigned bds_len = BDS_Len;
  bool llarge = false;

  int bds_zoffset = 12;
  if (llarge) bds_zoffset += 2;

  int bds_nbits = BDS_NumBits;
  int bds_flag = BDS_Flag;
  int lspherc = bds_flag >> 7;
  int lcomplex = (bds_flag >> 6) & 1;
  /* lcompress = (bds_flag >> 4)&1; */

  if (lspherc)
  {
    if (lcomplex)
    {
      int jup = bds[bds_zoffset + 15];
      int ioff = (jup + 1) * (jup + 2);
      bds_ext = 4 + 3 + 4 * ioff;
    }
    else { bds_ext = 4; }
  }

  size_t datstart = bds_head + (size_t) bds_ext;

  unsigned char *source = bds + datstart + bds_zoffset;
  if (llarge)
    sourceLen = ((size_t) ((bds[21] << 24) + (bds[22] << 16) + (bds[23] << 8) + bds[24]));
  else
    sourceLen = ((size_t) ((bds[20] << 16) + (bds[21] << 8) + bds[22]));

  nerr = grib1Sections(dbuf, sbufsize, &pds, &gds, &bms, &bds, &gribrecsize);
  if (nerr < 0)
  {
    fprintf(stdout, "GRIB message error\n");
    return 0;
  }

  if (nerr > 0)
  {
    fprintf(stdout, "GRIB data corrupted!\n");
    return 0;
  }

  unsigned char *dest = bds + datstart;
  if (llarge)
    destLen = ((size_t) ((bds[17] << 24) + (bds[18] << 16) + (bds[19] << 8) + bds[20]));
  else
    destLen = ((size_t) ((bds[17] << 16) + (bds[18] << 8) + bds[19]));

  BDS_Flag = (unsigned char) (BDS_Flag - 16);

  size_t bdsLen = datstart + destLen;

#if defined(HAVE_LIBSZ)
  {
    int bds_zstart = 14;
    unsigned recLen = GET_UINT3(bds[bds_zstart], bds[bds_zstart + 1], bds[bds_zstart + 2]);

    int bits_per_sample = (bds_nbits == 24) ? 8 : bds_nbits;

    SZ_com_t sz_param; /* szip parameter block */
    sz_param.options_mask = OPTIONS_MASK;
    sz_param.bits_per_pixel = bits_per_sample;
    sz_param.pixels_per_block = PIXELS_PER_BLOCK;
    sz_param.pixels_per_scanline = PIXELS_PER_SCANLINE;

    if (bds_ext)
      for (long i = 0; i < bds_ext; ++i) bds[bds_head + i] = bds[bds_zoffset + bds_head + i];

    /*    fprintf(stderr, "gribUnzip: sourceLen %ld; destLen %ld\n", (long)sourceLen, (long)destLen);
    fprintf(stderr, "gribUnzip: sourceOff %d; destOff %d\n", bds[12], bds[11]);
    fprintf(stderr, "gribUnzip: reclen %d; bdslen %d\n", recLen, bdsLen);
    */

    size_t tmpLen = destLen;

    int status = SZ_BufftoBuffDecompress(dest, &tmpLen, source, sourceLen, &sz_param);
    if (status != SZ_OK)
    {
      if (status == SZ_NO_ENCODER_ERROR)
        Warning("SZ_NO_ENCODER_ERROR code %3d level %3d", PDS_Parameter, PDS_Level2);
      else if (status == SZ_PARAM_ERROR)
        Warning("SZ_PARAM_ERROR code %3d level %3d", PDS_Parameter, PDS_Level2);
      else if (status == SZ_MEM_ERROR)
        Warning("SZ_MEM_ERROR code %3d level %3d", PDS_Parameter, PDS_Level2);
      else if (status == SZ_OUTBUFF_FULL)
        Warning("SZ_OUTBUFF_FULL code %3d level %3d", PDS_Parameter, PDS_Level2);
      else
        Warning("SZ ERROR: %d code %3d level %3d", status, PDS_Parameter, PDS_Level2);
    }
    /*
    fprintf(stderr, "gribUnzip: sl = %ld  dl = %ld   tl = %ld\n",
            (long)sourceLen, (long)destLen,(long) tmpLen);
    */
    if (tmpLen != destLen)
      Warning("unzip size differ: code %3d level %3d  ibuflen %ld ubuflen %ld", PDS_Parameter, PDS_Level2, (long) destLen,
              (long) tmpLen);

    if (bds_nbits == 24)
    {
      long nelem = tmpLen / 3;
      unsigned char *pbuf = (unsigned char *) Malloc(tmpLen);
      for (long i = 0; i < nelem; ++i)
      {
        pbuf[3 * i] = dest[i];
        pbuf[3 * i + 1] = dest[nelem + i];
        pbuf[3 * i + 2] = dest[2 * nelem + i];
      }
      memcpy(dest, pbuf, tmpLen);
      Free(pbuf);
    }

    int bds_ubits = BDS_Flag & 15;
    BDS_Flag -= bds_ubits;

    if ((bdsLen % 2) == 1)
    {
      BDS_Flag += 8;
      bds[bdsLen++] = 0;
    }

    SetLen3(bds, 0, bdsLen);

    gribLen = (bds - dbuf) + bdsLen;

    dbuf[gribLen++] = '7';
    dbuf[gribLen++] = '7';
    dbuf[gribLen++] = '7';
    dbuf[gribLen++] = '7';

    if (llarge)
    {
      long itemp;
      bdsLen = gribLen - 4;
      /*
        If a very large product, the section 4 length field holds
        the number of bytes in the product after section 4 upto
        the end of the padding bytes.
        This is a fixup to get round the restriction on product lengths
        due to the count being only 24 bits. It is only possible because
        the (default) rounding for GRIB products is 120 bytes.
      */
      while (gribLen % 120) dbuf[gribLen++] = 0;

      if (gribLen != (size_t) recLen) fprintf(stderr, "Internal problem, recLen and gribLen differ!\n");

      itemp = gribLen / (-120);
      itemp = JP23SET - itemp + 1;

      SetLen3(dbuf, 4, itemp);

      bdsLen = gribLen - bdsLen;

      SetLen3(bds, 0, bdsLen);
    }
    else { SetLen3(dbuf, 4, recLen); }
    /*
    fprintf(stderr, "recLen, gribLen, bdsLen %d %d %d\n", recLen, gribLen, bdsLen);
    */
    if (llarge)
      while (gribLen % 120) dbuf[gribLen++] = 0;
    else
      while (gribLen & 7) dbuf[gribLen++] = 0;
    /*
    fprintf(stderr, "recLen, gribLen, bdsLen %d %d %d\n", recLen, gribLen, bdsLen);
    */
  }
#else
  UNUSED(bds_nbits);
  UNUSED(sourceLen);
  UNUSED(source);
  UNUSED(bdsLen);
  UNUSED(dest);

  if (libszwarn)
  {
    Warning("Decompression disabled, szlib not available!");
    libszwarn = 0;
  }
#endif

  return (int) gribLen;
}
#include <stdio.h>
#include <math.h>

// clang-format off


static void
scm0_double(double *pdl, double *pdr, double *pfl, double *pfr, int klg);


static
int rowina2(double *p, int ko, int ki, double *pw,
	    int kcode, double msval, int *kret)
{
  /* System generated locals */
  int pw_dim1, pw_offset, i_1;

  /* Local variables */
  double zwt1, zrdi, zpos;
  int ip;
  double zdo, zwt;

  /* Parameter adjustments */
  --p;
  pw_dim1 = ko + 3;
  pw_offset = pw_dim1;
  pw -= pw_offset;

  /* **** ROWINA2 - Interpolation of row of values. */
  /*     Input Parameters. */
  /*     ----------------- */
  /*     P      - Row of values to be interpolated. */
  /*              Dimension must be at least KO. */
  /*     KO     - Number of values required. */
  /*     KI     - Number of values in P on input. */
  /*     PW     - Working array. */
  /*              Dimension must be at least (0:KO+2,3). */
  /*     KCODE  - Interpolation required. */
  /*              1 , linear. */
  /*              3 , cubic. */
  /*     PMSVAL - Value used for missing data indicator. */

  /*     Output Parameters. */
  /*     ------------------ */
  /*     P     - Now contains KO values. */
  /*     KRET  - Return code */
  /*             0, OK */
  /*             Non-zero, error */

  /*     Author. */
  /*     ------- */
  /*     J.D.Chambers    ECMWF     22.07.94 */

  /*     ********************************    */
  /*     Section 1.  Linear interpolation .. */
  /*     ********************************    */

  *kret = 0;

  if ( kcode == 1 )
    {
      /*    Move input values to work array */
      for (int jl = 1; jl <= ki; ++jl)
	pw[jl + pw_dim1] = p[jl];

      /*    Arrange wrap-around value in work array */
      pw[ki + 1 + pw_dim1] = p[1];

      /*    Set up constants to be used to figure out weighting for */
      /*    values in interpolation. */
      zrdi = (double) ki;
      zdo = 1.0 / (double) ko;

      /*    Loop through the output points */
      for (int jl = 1; jl <= ko; ++jl)
	{

	  /*    Calculate weight from the start of row */
	  zpos = (jl - 1) * zdo;
	  zwt = zpos * zrdi;

	  /*    Get the current array position(minus 1) from the weight - */
	  /*    note the implicit truncation. */
	  ip = (int) zwt;

	  /*    If the left value is missing, use the right value */
	  if ( IS_EQUAL(pw[ip + 1 + pw_dim1], msval) )
	    {
	      p[jl] = pw[ip + 2 + pw_dim1];
	    }
	  /*    If the right value is missing, use the left value */
	  else if ( IS_EQUAL(pw[ip + 2 + pw_dim1], msval) )
	    {
	      p[jl] = pw[ip + 1 + pw_dim1];
	    }
	  /*    If neither missing, interpolate ... */
	  else
	    {

	      /*       Adjust the weight to range (0.0 to 1.0) */
	      zwt -= ip;

	      /*       Interpolate using the weighted values on either side */
	      /*       of the output point position */
	      p[jl] = (1.0 - zwt) * pw[ip + 1 + pw_dim1] +
		zwt * pw[ip + 2 + pw_dim1];
	    }
	}

      /*     *******************************    */
      /*     Section 2.  Cubic interpolation .. */
      /*     *******************************    */

    }
  else if ( kcode == 3 )
    {
      i_1 = ki;
      for (int jl = 1; jl <= i_1; ++jl)
	{
          if ( IS_EQUAL(p[jl], msval) )
	    {
	      fprintf(stderr," ROWINA2: ");
	      fprintf(stderr," Cubic interpolation not supported");
	      fprintf(stderr," for fields containing missing data.\n");
	      *kret = 1;
	      goto L900;
	    }
          pw[jl + pw_dim1] = p[jl];
	}
      pw[pw_dim1] = p[ki];
      pw[ki + 1 + pw_dim1] = p[1];
      pw[ki + 2 + pw_dim1] = p[2];
      i_1 = ki;
      for (int jl = 1; jl <= i_1; ++jl)
	{
          pw[jl + (pw_dim1 << 1)] =
	        - pw[jl - 1 + pw_dim1] / 3.0 -
	          pw[jl     + pw_dim1] * 0.5 +
	          pw[jl + 1 + pw_dim1] - pw[jl + 2 + pw_dim1] / 6.0;
          pw[jl + 1 + pw_dim1 * 3] =
                  pw[jl - 1 + pw_dim1] / 6.0 -
                  pw[jl     + pw_dim1] +
                  pw[jl + 1 + pw_dim1] * 0.5 +
                  pw[jl + 2 + pw_dim1] / 3.0;
	}

      scm0_double(&pw[(pw_dim1 << 1) + 1], &pw[pw_dim1 * 3 + 2],
		  &pw[pw_dim1 + 1], &pw[pw_dim1 + 2], ki);

      zrdi = (double) ki;
      zdo = 1.0 / (double) ko;
      for (int jl = 1; jl <= ko; ++jl)
	{
          zpos = (jl - 1) * zdo;
          zwt = zpos * zrdi;
          ip = (int) zwt + 1;
          zwt = zwt + 1.0 - ip;
          zwt1 = 1.0 - zwt;
          p[jl] = ((3.0 - zwt1 * 2.0) * pw[ip + pw_dim1] +
                  zwt * pw[ip + (pw_dim1 << 1)]) * zwt1 * zwt1 +
                  ((3.0 - zwt * 2.0) * pw[ip + 1 + pw_dim1] -
                  zwt1 * pw[ip + 1 + pw_dim1 * 3]) * zwt * zwt;
	}

    }
  else
    {
      /*    **************************************    */
      /*    Section 3.  Invalid interpolation code .. */
      /*    **************************************    */
      fprintf(stderr," ROWINA2:");
      fprintf(stderr," Invalid interpolation code = %2d\n",kcode);
      *kret = 2;
    }

L900:
    return 0;
} /* rowina2 */



int qu2reg2(double *pfield, int *kpoint, int klat, int klon,
	    double *ztemp, double msval, int *kret)
{
   /* System generated locals */
   int i_1, i_2;
   int kcode = 1;

   /* Local variables */
   int ilii, ilio, icode;
   double *zline = NULL;
   double *zwork = NULL;
   int iregno, iquano;


   zline = (double*) Malloc(2*(size_t)klon*sizeof(double));
   if ( zline == NULL ) SysError("No Memory!");

   zwork = (double*) Malloc(3*(2*(size_t)klon+3)*sizeof(double));
   if ( zwork == NULL ) SysError("No Memory!");

   /* Parameter adjustments */
   --pfield;
   --kpoint;

/* **** QU2REG - Convert quasi-regular grid data to regular. */
/*     Input Parameters. */
/*     ----------------- */
/*     PFIELD     - Array containing quasi-regular grid */
/*                  data. */
/*     KPOINT     - Array containing list of the number of */
/*                  points on each latitude (or longitude) of */
/*                  the quasi-regular grid. */
/*     KLAT       - Number of latitude lines */
/*     KLON       - Number of longitude lines */
/*     KCODE      - Interpolation required. */
/*                  1 , linear - data quasi-regular on */
/*                               latitude lines. */
/*                  3 , cubic -  data quasi-regular on */
/*                               latitude lines. */
/*                  11, linear - data quasi-regular on */
/*                               longitude lines. */
/*                  13, cubic -  data quasi-regular on */
/*                               longitude lines. */
/*     PMSVAL     - Value used for missing data indicator. */
/*     Output Parameters. */
/*     ------------------ */
/*     KRET       - return code */
/*                  0 = OK */
/*                  non-zero indicates fatal error */
/*     PFIELD     - Array containing regular grid data. */
/*     Author. */
/*     ------- */
/*     J.D.Chambers     ECMWF      22.07.94 */
/*     J.D.Chambers     ECMWF      13.09.94 */
/*     Add return code KRET and remove calls to ABORT. */


/* ------------------------------ */
/* Section 1. Set initial values. */
/* ------------------------------ */

   *kret = 0;

/* Check input parameters. */

   if (kcode != 1 && kcode != 3 && kcode != 11 && kcode != 13) {
      fprintf(stderr," QU2REG :");
      fprintf(stderr," Invalid interpolation type code = %2d\n",kcode);
      *kret = 1;
      goto L900;
   }

/* Set array indices to 0. */

   ilii = 0;
   ilio = 0;

/* Establish values of loop parameters. */

   if (kcode > 10) {

/*    Quasi-regular along longitude lines. */

      iquano = klon;
      iregno = klat;
      icode = kcode - 10;
   } else {

/*    Quasi-regular along latitude lines. */

      iquano = klat;
      iregno = klon;
      icode = kcode;
   }

/*     -------------------------------------------------------- */
/**    Section 2. Interpolate field from quasi to regular grid. */
/*     -------------------------------------------------------- */

   i_1 = iquano;
   for (int j230 = 1; j230 <= i_1; ++j230) {

      if (iregno != kpoint[j230]) {

/*       Line contains less values than required,so */
/*       extract quasi-regular grid values for a line */

         i_2 = kpoint[j230];
         for (int j210 = 1; j210 <= i_2; ++j210) {
            ++ilii;
            zline[j210 - 1] = pfield[ilii];
         }

/*       and interpolate this line. */

         rowina2(zline, iregno, kpoint[j230], zwork, icode, msval, kret);
         if (*kret != 0) goto L900;

/*       Add regular grid values for this line to the
         temporary array. */

         i_2 = iregno;
         for (int j220 = 1; j220 <= i_2; ++j220) {
            ++ilio;
            ztemp[ilio - 1] = zline[j220 - 1];
         }

      } else {

/*       Line contains the required number of values, so add */
/*       this line to the temporary array. */

         i_2 = iregno;
         for (int j225 = 1; j225 <= i_2; ++j225) {
            ++ilio;
            ++ilii;
            ztemp[ilio - 1] = pfield[ilii];
         }
      }
   }

   /* Copy temporary array to user array. */

   i_1 = klon * klat;
   for (int j240 = 1; j240 <= i_1; ++j240) {
      pfield[j240] = ztemp[j240 - 1];
   }

/* -------------------------------------------------------- */
/* Section 9. Return to calling routine. Format statements. */
/* -------------------------------------------------------- */

L900:

   Free(zline);
   Free(zwork);

   return 0;
} /* qu2reg2 */



#ifdef T
#undef T
#endif
#define T double
#ifdef T

/* calculate_pfactor: source code from grib_api-1.8.0 */
double TEMPLATE(calculate_pfactor,T)(const T *spectralField, long fieldTruncation, long subsetTruncation)
{
  /*long n_vals = ((fieldTruncation+1)*(fieldTruncation+2));*/
  long loop, index, m, n = 0;
  double zeps = 1.0e-15;
  long ismin = (subsetTruncation+1), ismax = (fieldTruncation+1);
  double weightedSumOverX = 0.0, weightedSumOverY = 0.0, sumOfWeights = 0.0;
  double numerator = 0.0, denominator = 0.0;

  // Setup the weights

  double range = (double) (ismax - ismin +1);

  double *weights = (double*) Malloc(((size_t)ismax+1)*sizeof(double));
  for( loop = ismin; loop <= ismax; loop++ )
    weights[loop] = range / (double) (loop-ismin+1);

  // Compute norms
  // Handle values 2 at a time (real and imaginary parts).
  double *norms = (double*) Malloc(((size_t)ismax+1)*sizeof(double));

  for( loop = 0; loop < ismax+1; loop++ ) norms[loop] = 0.0;

  // Form norms for the rows which contain part of the unscaled subset.

  index = -2;
  for( m = 0; m < subsetTruncation; m++ )
    for( n = m; n <= fieldTruncation; n++ ) {
      index += 2;
      if( n >= subsetTruncation ) {
        double tval = spectralField[index];
        tval=tval<0?-tval:tval;
        norms[n] = norms[n] > tval ? norms[n] : tval;
        tval = spectralField[index+1];
        tval=tval<0?-tval:tval;
        norms[n] = norms[n] > tval ? norms[n] : tval;
      }
    }

  // Form norms for the rows which do not contain part of the unscaled subset.

  for( m = subsetTruncation; m <= fieldTruncation; m++ )
    for( n = m; n <= fieldTruncation; n++ ) {
      double tval = spectralField[index];
      index += 2;
      tval=tval<0?-tval:tval;
      norms[n] = norms[n] > tval ? norms[n] : tval;
      tval = spectralField[index+1];
      tval=tval<0?-tval:tval;
      norms[n] = norms[n] > tval ? norms[n] : tval;
    }

  // Ensure the norms have a value which is not too small in case of problems with math functions (e.g. LOG).

  for( loop = ismin; loop <= ismax; loop++ ) {
    norms[n] = norms[n] > zeps ? norms[n] : zeps;
    if( IS_EQUAL(norms[n], zeps) ) weights[n] = 100.0 * zeps;
  }

  // Do linear fit to find the slope

  for( loop = ismin; loop <= ismax; loop++ ) {
    double x = log( (double) (loop*(loop+1)) );
    double y = log( norms[loop] );
    weightedSumOverX += x * weights[loop];
    weightedSumOverY += y * weights[loop];
    sumOfWeights = sumOfWeights + weights[loop];
  }
  weightedSumOverX /= sumOfWeights;
  weightedSumOverY /= sumOfWeights;

  // Perform a least square fit for the equation

  for( loop = ismin; loop <= ismax; loop++ ) {

    double x = log( (double)(loop*(loop+1)) );
    double y = log( norms[loop] );
    numerator += weights[loop] * (y-weightedSumOverY) * (x-weightedSumOverX);
    denominator += weights[loop] * ((x-weightedSumOverX) * (x-weightedSumOverX));
  }
  double slope = numerator / denominator;

  Free(weights);
  Free(norms);

  double pFactor = -slope;
  if( pFactor < -9999.9 ) pFactor = -9999.9;
  if( pFactor > 9999.9 )  pFactor = 9999.9;

  return pFactor;
}

void TEMPLATE(scale_complex,T)(T *fpdata, int pcStart, int pcScale, int trunc, int inv)
{

  if ( pcScale < -10000 || pcScale > 10000 )
    {
      fprintf(stderr, " %s: Invalid power given %6d\n", __func__, pcScale);
      return;
    }

  // Setup scaling factors = n(n+1)^^p for n = 1 to truncation

  if ( pcScale != 0 )
    {
      double *scale = (double*) Malloc(((size_t)trunc+1)*sizeof(double));
      const double power = (double) pcScale / 1000.;
      scale[0] = 1.0;

      if (pcScale != 1000)
        for (int n = 1; n <= trunc; ++n) scale[n] = pow((double) (n*(n+1)), power);
      else
        for (int n = 1; n <= trunc; ++n) scale[n] =     (double) (n*(n+1));

      if ( inv )
        for (int n = 1; n <= trunc; ++n) scale[n] = 1.0 / scale[n];

      // Scale the values

      size_t index = 0;

      for (int m = 0;   m < pcStart; ++m)
        for (int n = m; n <= trunc; n++, index += 2)
          if ( n >= pcStart )
            {
              fpdata[index  ] = (T)(fpdata[index  ] * scale[n]);
              fpdata[index+1] = (T)(fpdata[index+1] * scale[n]);
            }

      for (int m = pcStart; m <= trunc; ++m)
        for (int n = m;     n <= trunc; n++, index += 2)
          {
            fpdata[index  ] = (T)(fpdata[index  ] * scale[n]);
            fpdata[index+1] = (T)(fpdata[index+1] * scale[n]);
          }
      Free(scale);
    }
}


void TEMPLATE(scatter_complex,T)(T *fpdata, int pcStart, int trunc, int nsp)
{
  T *fphelp = (T*) Malloc((size_t)nsp*sizeof(T));
  size_t inext = 0;
  size_t pcStart_ = pcStart >= 0 ? (size_t)pcStart : 0U;
  size_t trunc_ = trunc >= 0 ? (size_t)trunc : 0U;
  for (size_t m = 0, index = 0; m <= pcStart_; ++m)
    {
      size_t n_copies = pcStart_ <= trunc_ ? (pcStart_ + 1 - m) * 2 : 0;
      for (size_t i = 0; i < n_copies; ++i) fphelp[index + i] = fpdata[inext + i];
      inext += n_copies;
      index += m <= trunc_ ? (trunc_ - m + 1) * 2 : 0;
    }
  for (size_t m = 0, index = 0; m <= trunc_; ++m)
    {
      size_t advIdx = m <= pcStart_ ? (pcStart_ - m + 1) * 2 : 0;
      index += advIdx;
      size_t copyStart = m > pcStart_ ? m : pcStart_ + 1;
      size_t n_copies = copyStart <= trunc_ ? (trunc_ - copyStart + 1) * 2 : 0;
      for (size_t i = 0; i < n_copies; ++i) fphelp[index + i] = fpdata[inext + i];
      inext += n_copies;
      index += n_copies;
    }
  for (size_t m = 0; m < (size_t)nsp; ++m) fpdata[m] = fphelp[m];

  Free(fphelp);
}


void TEMPLATE(gather_complex,T)(T *fpdata, size_t pcStart, size_t trunc, size_t nsp)
{
  T *restrict fphelp = (T*) Malloc(nsp*sizeof(T));
  size_t inext = 0;

  for (size_t m = 0, index = 0;   m <= pcStart; ++m)
    for (size_t n = m; n <= trunc; ++n)
      {
	if ( pcStart >= n )
	  {
	    fphelp[inext++] = fpdata[index];
	    fphelp[inext++] = fpdata[index+1];
	  }
	index += 2;
      }

  for (size_t m = 0, index = 0; m <= trunc; ++m)
    for (size_t n = m; n <= trunc; ++n)
      {
	if ( n > pcStart )
	  {
	    fphelp[inext++] = fpdata[index];
	    fphelp[inext++] = fpdata[index+1];
	  }
	index += 2;
      }

  for (size_t m = 0; m < nsp; ++m) fpdata[m] = fphelp[m];

  Free(fphelp);
}


static void TEMPLATE(scm0,T)(T *pdl, T *pdr, T *pfl, T *pfr, int klg)
{
  /* **** SCM0   - Apply SCM0 limiter to derivative estimates. */
  /* output: */
  /*   pdl   = the limited derivative at the left edge of the interval */
  /*   pdr   = the limited derivative at the right edge of the interval */
  /* inputs */
  /*   pdl   = the original derivative at the left edge */
  /*   pdr   = the original derivative at the right edge */
  /*   pfl   = function value at the left edge of the interval */
  /*   pfr   = function value at the right edge of the interval */
  /*   klg   = number of intervals where the derivatives are limited */

  /*  define constants */

  double zeps = 1.0e-12;
  double zfac = (1.0 - zeps) * 3.0;

  for (int jl = 0; jl < klg; ++jl)
    {
      double r_1;
      if ( (r_1 = pfr[jl] - pfl[jl], fabs(r_1)) > zeps )
	{
	  double zalpha = pdl[jl] / (pfr[jl] - pfl[jl]);
	  double zbeta  = pdr[jl] / (pfr[jl] - pfl[jl]);
	  if ( zalpha <= 0.0 ) pdl[jl] = 0.0;
	  if ( zbeta  <= 0.0 ) pdr[jl] = 0.0;
	  if ( zalpha > zfac ) pdl[jl] = (T)(zfac * (pfr[jl] - pfl[jl]));
	  if ( zbeta  > zfac ) pdr[jl] = (T)(zfac * (pfr[jl] - pfl[jl]));
	}
      else
	{
	  pdl[jl] = 0.0;
	  pdr[jl] = 0.0;
	}
    }
} /* scm0 */

static
int TEMPLATE(rowina3,T)(T *p, int ko, int ki, T *pw,
			int kcode, T msval, int *kret, int omisng, int operio, int oveggy)
{
  /*
C---->
C**** ROWINA3 - Interpolation of row of values.
C
C     Purpose.
C     --------
C
C     Interpolate a row of values.
C
C
C**   Interface.
C     ----------
C
C     CALL ROWINA3( P, KO, KI, PW, KCODE, PMSVAL, KRET, OMISNG, OPERIO)
C
C
C     Input Parameters.
C     -----------------
C
C     P      - Row of values to be interpolated.
C              Dimension must be at least KO.
C
C     KO     - Number of values required.
C
C     KI     - Number of values in P on input.
C
C     PW     - Working array.
C              Dimension must be at least (0:KO+2,3).
C
C     KCODE  - Interpolation required.
C              1 , linear.
C              3 , cubic.
C
C     PMSVAL - Value used for missing data indicator.
C
C     OMISNG - True if missing values are present in field.
C
C     OPERIO - True if input field is periodic.
C
C     OVEGGY - True if 'nearest neighbour' processing must be used
C              for interpolation
C
C     Output Parameters.
C     ------------------
C
C     P     - Now contains KO values.
C     KRET  - Return code
C             0, OK
C             Non-zero, error
C
C
C     Method.
C     -------
C
C     Linear or cubic interpolation performed as required.
C
C     Comments.
C     ---------
C
C     This is a version of ROWINA which allows for missing data
C     values and hence for bitmapped fields.
C
C
C     Author.
C     -------
C
C     J.D.Chambers    ECMWF     22.07.94
C
C
C     Modifications.
C     --------------
C
C     J.D.Chambers    ECMWF     13.09.94
C     Add return code KRET and remove calls to ABORT.
C
C     J. Clochard, Meteo France, for ECMWF - January 1998.
C     Addition of OMISNG and OPERIO arguments.
C
C
C     -----------------------------------------------------------------
*/
  /* System generated locals */
  int pw_dim1, pw_offset, i_1;

  /* Local variables */
  int ip;
  double zwt1, zrdi, zpos;
  double zdo, zwt;

  UNUSED(omisng);

  /* Parameter adjustments */
  --p;
  pw_dim1 = ko + 3;
  pw_offset = pw_dim1;
  pw -= pw_offset;

  *kret = 0;

  if ( kcode == 1 )
    {
      /*    Move input values to work array */
      for (int jl = 1; jl <= ki; ++jl)
	pw[jl + pw_dim1] = p[jl];

      if ( operio )
	{
	  /* Arrange wrap-around value in work array */
	  pw[ki + 1 + pw_dim1] = p[1];

	  /* Set up constants to be used to figure out weighting for */
	  /* values in interpolation. */
	  zrdi = (double) ki;
	  zdo = 1.0 / (double) ko;
	}
      else
	{
	  /* Repeat last value, to cope with "implicit truncation" below */
	  pw[ki + 1 + pw_dim1] = p[ki];

	  /* Set up constants to be used to figure out weighting for */
	  /* values in interpolation. */
	  zrdi = (double) (ki-1);
	  zdo = 1.0 / (double) (ko-1);
 	}

      /*    Loop through the output points */
      for (int jl = 1; jl <= ko; ++jl)
	{

	  /* Calculate weight from the start of row */
	  zpos = (jl - 1) * zdo;
	  zwt = zpos * zrdi;

	  /* Get the current array position(minus 1) from the weight - */
	  /* note the implicit truncation. */
	  ip = (int) zwt;

	  /* Adjust the weight to range (0.0 to 1.0) */
	  zwt -= ip;

          /* If 'nearest neighbour' processing must be used */
	  if ( oveggy )
	    {
              if ( zwt < 0.5 )
                p[jl] = pw[ip + 1 + pw_dim1];
	      else
		p[jl] = pw[ip + 2 + pw_dim1];
	    }
	  else
	    {
	      /*    If the left value is missing, use the right value */
	      if ( IS_EQUAL(pw[ip + 1 + pw_dim1], msval) )
		{
		  p[jl] = pw[ip + 2 + pw_dim1];
		}
	      /*    If the right value is missing, use the left value */
	      else if ( IS_EQUAL(pw[ip + 2 + pw_dim1], msval) )
		{
		  p[jl] = pw[ip + 1 + pw_dim1];
		}
	      /*    If neither missing, interpolate ... */
	      else
		{
		  /*  Interpolate using the weighted values on either side */
		  /*  of the output point position */
		  p[jl] = (T)((1.0 - zwt) * pw[ip+1 + pw_dim1]
                              + zwt * pw[ip+2 + pw_dim1]);
		}
	    }
	}
    }
  else if ( kcode == 3 )
    {
      /*     *******************************    */
      /*     Section 2.  Cubic interpolation .. */
      /*     *******************************    */
      i_1 = ki;
      for (int jl = 1; jl <= i_1; ++jl)
	{
          if ( IS_EQUAL(p[jl], msval) )
	    {
	      fprintf(stderr," ROWINA3: ");
	      fprintf(stderr," Cubic interpolation not supported");
	      fprintf(stderr," for fields containing missing data.\n");
	      *kret = 1;
	      goto L900;
	    }
          pw[jl + pw_dim1] = p[jl];
	}
      pw[pw_dim1] = p[ki];
      pw[ki + 1 + pw_dim1] = p[1];
      pw[ki + 2 + pw_dim1] = p[2];
      i_1 = ki;
      for (int jl = 1; jl <= i_1; ++jl)
	{
          pw[jl + (pw_dim1 << 1)] =
            (T)(- pw[jl - 1 + pw_dim1] / 3.0 -
                pw[jl     + pw_dim1] * 0.5 +
                pw[jl + 1 + pw_dim1] - pw[jl + 2 + pw_dim1] / 6.0);
          pw[jl + 1 + pw_dim1 * 3] =
            (T)(pw[jl - 1 + pw_dim1] / 6.0 -
                pw[jl     + pw_dim1] +
                pw[jl + 1 + pw_dim1] * 0.5 +
                pw[jl + 2 + pw_dim1] / 3.0);
	}

      TEMPLATE(scm0,T)(&pw[(pw_dim1 << 1) + 1], &pw[pw_dim1 * 3 + 2],
		       &pw[pw_dim1 + 1], &pw[pw_dim1 + 2], ki);

      zrdi = (double) ki;
      zdo = 1.0 / (double) ko;
      for (int jl = 1; jl <= ko; ++jl)
	{
          zpos = (jl - 1) * zdo;
          zwt = zpos * zrdi;
          ip = (int) zwt + 1;
          zwt = zwt + 1.0 - ip;
          zwt1 = 1.0 - zwt;
          p[jl] = (T)(((3.0 - zwt1 * 2.0) * pw[ip + pw_dim1] +
                       zwt * pw[ip + (pw_dim1 << 1)]) * zwt1 * zwt1 +
                      ((3.0 - zwt * 2.0) * pw[ip + 1 + pw_dim1] -
                       zwt1 * pw[ip + 1 + pw_dim1 * 3]) * zwt * zwt);
	}

    }
  else
    {
      /*    **************************************    */
      /*    Section 3.  Invalid interpolation code .. */
      /*    **************************************    */
      fprintf(stderr," ROWINA3:");
      fprintf(stderr," Invalid interpolation code = %2d\n",kcode);
      *kret = 2;
    }

L900:
    return 0;
} /* rowina3 */


int TEMPLATE(qu2reg3,T)(T *pfield, int *kpoint, int klat, int klon,
			T msval, int *kret, int omisng, int operio, int oveggy)
{
  /*
C**** QU2REG3 - Convert quasi-regular grid data to regular.
C
C     Purpose.
C     --------
C
C     Convert quasi-regular grid data to regular,
C     using either a linear or cubic interpolation.
C
C
C**   Interface.
C     ----------
C
C     CALL QU2REG3(PFIELD,KPOINT,KLAT,KLON,KCODE,PMSVAL,OMISNG,OPERIO,
C    X            OVEGGY)
C
C
C     Input Parameters.
C     -----------------
C
C     PFIELD     - Array containing quasi-regular grid data.
C
C     KPOINT     - Array containing list of the number of
C                  points on each latitude (or longitude) of
C                  the quasi-regular grid.
C
C     KLAT       - Number of latitude lines
C
C     KLON       - Number of longitude lines
C
C     KCODE      - Interpolation required.
C                  1 , linear - data quasi-regular on latitude lines.
C                  3 , cubic -  data quasi-regular on latitude lines.
C                  11, linear - data quasi-regular on longitude lines.
C                  13, cubic -  data quasi-regular on longitude lines.
C
C     PMSVAL     - Value used for missing data indicator.
C
C     OMISNG     - True if missing values are present in field.
C
C     OPERIO     - True if input field is periodic.
C
C     OVEGGY     - True if 'nearest neighbour' processing must be used
C                  for interpolation
C
C
C     Output Parameters.
C     ------------------
C
C     KRET       - return code
C                  0 = OK
C                  non-zero indicates fatal error
C
C
C     Output Parameters.
C     ------------------
C
C     PFIELD     - Array containing regular grid data.
C
C
C     Method.
C     -------
C
C     Data is interpolated and expanded into a temporary array,
C     which is then copied back into the user's array.
C     Returns an error code if an invalid interpolation is requested
C     or field size exceeds array dimensions.
C
C     Comments.
C     ---------
C
C     This routine is an adaptation of QU2REG to allow missing data
C     values, and hence bit mapped fields.
C
C
C     Author.
C     -------
C
C     J.D.Chambers     ECMWF      22.07.94
C
C
C     Modifications.
C     --------------
C
C     J.D.Chambers     ECMWF      13.09.94
C     Add return code KRET and remove calls to ABORT.
C
C     J.D.Chambers     ECMWF        Feb 1997
C     Allow for 64-bit pointers
C
C     J. Clochard, Meteo France, for ECMWF - January 1998.
C     Addition of OMISNG and OPERIO arguments.
C     Fix message for longitude number out of bounds, and routine
C     name in title and formats.
C
*/
   /* System generated locals */
   int i_1, i_2;
   int kcode = 1;

   /* Local variables */
   int ilii, ilio, icode;
   int iregno, iquano;

   T *ztemp = (T*) Malloc((size_t)klon*(size_t)klat*sizeof(T));
   T *zline = (T*) Malloc(2*(size_t)klon*sizeof(T));
   T *zwork = (T*) Malloc(3*(2*(size_t)klon+3)*sizeof(T));

   /* Parameter adjustments */
   --pfield;
   --kpoint;

/* ------------------------------ */
/* Section 1. Set initial values. */
/* ------------------------------ */

   *kret = 0;

/* Check input parameters. */

   if (kcode != 1 && kcode != 3 && kcode != 11 && kcode != 13) {
      fprintf(stderr," QU2REG :");
      fprintf(stderr," Invalid interpolation type code = %2d\n",kcode);
      *kret = 1;
      goto L900;
   }

/* Set array indices to 0. */

   ilii = 0;
   ilio = 0;

/* Establish values of loop parameters. */

   if (kcode > 10) {

/*    Quasi-regular along longitude lines. */

      iquano = klon;
      iregno = klat;
      icode = kcode - 10;
   } else {

/*    Quasi-regular along latitude lines. */

      iquano = klat;
      iregno = klon;
      icode = kcode;
   }

/*     -------------------------------------------------------- */
/**    Section 2. Interpolate field from quasi to regular grid. */
/*     -------------------------------------------------------- */

   i_1 = iquano;
   for (int j230 = 1; j230 <= i_1; ++j230) {

      if (iregno != kpoint[j230]) {

/*       Line contains less values than required,so */
/*       extract quasi-regular grid values for a line */

         i_2 = kpoint[j230];
         for (int j210 = 1; j210 <= i_2; ++j210) {
            ++ilii;
            zline[j210 - 1] = pfield[ilii];
         }

/*       and interpolate this line. */

         TEMPLATE(rowina3,T)(zline, iregno, kpoint[j230], zwork, icode, msval, kret, omisng, operio , oveggy);
         if (*kret != 0) goto L900;

/*       Add regular grid values for this line to the
         temporary array. */

         i_2 = iregno;
         for (int j220 = 1; j220 <= i_2; ++j220) {
            ++ilio;
            ztemp[ilio - 1] = zline[j220 - 1];
         }

      } else {

/*       Line contains the required number of values, so add */
/*       this line to the temporary array. */

         i_2 = iregno;
         for (int j225 = 1; j225 <= i_2; ++j225) {
            ++ilio;
            ++ilii;
            ztemp[ilio - 1] = pfield[ilii];
         }
      }
   }

/* Copy temporary array to user array. */

   i_1 = klon * klat;
   for (int j240 = 1; j240 <= i_1; ++j240) {
      pfield[j240] = ztemp[j240 - 1];
   }

/* -------------------------------------------------------- */
/* Section 9. Return to calling routine. Format statements. */
/* -------------------------------------------------------- */

L900:

   Free(zwork);
   Free(zline);
   Free(ztemp);

   return 0;
} /* qu2reg3 */

#endif /* T */

/*
 * Local Variables:
 * mode: c
 * c-file-style: "Java"
 * c-basic-offset: 2
 * indent-tabs-mode: nil
 * show-trailing-whitespace: t
 * require-trailing-newline: t
 * End:
 */

#ifdef T
#undef T
#endif
#define T float
#ifdef T

/* calculate_pfactor: source code from grib_api-1.8.0 */
double TEMPLATE(calculate_pfactor,T)(const T *spectralField, long fieldTruncation, long subsetTruncation)
{
  /*long n_vals = ((fieldTruncation+1)*(fieldTruncation+2));*/
  long loop, index, m, n = 0;
  double zeps = 1.0e-15;
  long ismin = (subsetTruncation+1), ismax = (fieldTruncation+1);
  double weightedSumOverX = 0.0, weightedSumOverY = 0.0, sumOfWeights = 0.0;
  double numerator = 0.0, denominator = 0.0;

  // Setup the weights

  double range = (double) (ismax - ismin +1);

  double *weights = (double*) Malloc(((size_t)ismax+1)*sizeof(double));
  for( loop = ismin; loop <= ismax; loop++ )
    weights[loop] = range / (double) (loop-ismin+1);

  // Compute norms
  // Handle values 2 at a time (real and imaginary parts).
  double *norms = (double*) Malloc(((size_t)ismax+1)*sizeof(double));

  for( loop = 0; loop < ismax+1; loop++ ) norms[loop] = 0.0;

  // Form norms for the rows which contain part of the unscaled subset.

  index = -2;
  for( m = 0; m < subsetTruncation; m++ )
    for( n = m; n <= fieldTruncation; n++ ) {
      index += 2;
      if( n >= subsetTruncation ) {
        double tval = spectralField[index];
        tval=tval<0?-tval:tval;
        norms[n] = norms[n] > tval ? norms[n] : tval;
        tval = spectralField[index+1];
        tval=tval<0?-tval:tval;
        norms[n] = norms[n] > tval ? norms[n] : tval;
      }
    }

  // Form norms for the rows which do not contain part of the unscaled subset.

  for( m = subsetTruncation; m <= fieldTruncation; m++ )
    for( n = m; n <= fieldTruncation; n++ ) {
      double tval = spectralField[index];
      index += 2;
      tval=tval<0?-tval:tval;
      norms[n] = norms[n] > tval ? norms[n] : tval;
      tval = spectralField[index+1];
      tval=tval<0?-tval:tval;
      norms[n] = norms[n] > tval ? norms[n] : tval;
    }

  // Ensure the norms have a value which is not too small in case of problems with math functions (e.g. LOG).

  for( loop = ismin; loop <= ismax; loop++ ) {
    norms[n] = norms[n] > zeps ? norms[n] : zeps;
    if( IS_EQUAL(norms[n], zeps) ) weights[n] = 100.0 * zeps;
  }

  // Do linear fit to find the slope

  for( loop = ismin; loop <= ismax; loop++ ) {
    double x = log( (double) (loop*(loop+1)) );
    double y = log( norms[loop] );
    weightedSumOverX += x * weights[loop];
    weightedSumOverY += y * weights[loop];
    sumOfWeights = sumOfWeights + weights[loop];
  }
  weightedSumOverX /= sumOfWeights;
  weightedSumOverY /= sumOfWeights;

  // Perform a least square fit for the equation

  for( loop = ismin; loop <= ismax; loop++ ) {

    double x = log( (double)(loop*(loop+1)) );
    double y = log( norms[loop] );
    numerator += weights[loop] * (y-weightedSumOverY) * (x-weightedSumOverX);
    denominator += weights[loop] * ((x-weightedSumOverX) * (x-weightedSumOverX));
  }
  double slope = numerator / denominator;

  Free(weights);
  Free(norms);

  double pFactor = -slope;
  if( pFactor < -9999.9 ) pFactor = -9999.9;
  if( pFactor > 9999.9 )  pFactor = 9999.9;

  return pFactor;
}

void TEMPLATE(scale_complex,T)(T *fpdata, int pcStart, int pcScale, int trunc, int inv)
{

  if ( pcScale < -10000 || pcScale > 10000 )
    {
      fprintf(stderr, " %s: Invalid power given %6d\n", __func__, pcScale);
      return;
    }

  // Setup scaling factors = n(n+1)^^p for n = 1 to truncation

  if ( pcScale != 0 )
    {
      double *scale = (double*) Malloc(((size_t)trunc+1)*sizeof(double));
      const double power = (double) pcScale / 1000.;
      scale[0] = 1.0;

      if (pcScale != 1000)
        for (int n = 1; n <= trunc; ++n) scale[n] = pow((double) (n*(n+1)), power);
      else
        for (int n = 1; n <= trunc; ++n) scale[n] =     (double) (n*(n+1));

      if ( inv )
        for (int n = 1; n <= trunc; ++n) scale[n] = 1.0 / scale[n];

      // Scale the values

      size_t index = 0;

      for (int m = 0;   m < pcStart; ++m)
        for (int n = m; n <= trunc; n++, index += 2)
          if ( n >= pcStart )
            {
              fpdata[index  ] = (T)(fpdata[index  ] * scale[n]);
              fpdata[index+1] = (T)(fpdata[index+1] * scale[n]);
            }

      for (int m = pcStart; m <= trunc; ++m)
        for (int n = m;     n <= trunc; n++, index += 2)
          {
            fpdata[index  ] = (T)(fpdata[index  ] * scale[n]);
            fpdata[index+1] = (T)(fpdata[index+1] * scale[n]);
          }
      Free(scale);
    }
}


void TEMPLATE(scatter_complex,T)(T *fpdata, int pcStart, int trunc, int nsp)
{
  T *fphelp = (T*) Malloc((size_t)nsp*sizeof(T));
  size_t inext = 0;
  size_t pcStart_ = pcStart >= 0 ? (size_t)pcStart : 0U;
  size_t trunc_ = trunc >= 0 ? (size_t)trunc : 0U;
  for (size_t m = 0, index = 0; m <= pcStart_; ++m)
    {
      size_t n_copies = pcStart_ <= trunc_ ? (pcStart_ + 1 - m) * 2 : 0;
      for (size_t i = 0; i < n_copies; ++i) fphelp[index + i] = fpdata[inext + i];
      inext += n_copies;
      index += m <= trunc_ ? (trunc_ - m + 1) * 2 : 0;
    }
  for (size_t m = 0, index = 0; m <= trunc_; ++m)
    {
      size_t advIdx = m <= pcStart_ ? (pcStart_ - m + 1) * 2 : 0;
      index += advIdx;
      size_t copyStart = m > pcStart_ ? m : pcStart_ + 1;
      size_t n_copies = copyStart <= trunc_ ? (trunc_ - copyStart + 1) * 2 : 0;
      for (size_t i = 0; i < n_copies; ++i) fphelp[index + i] = fpdata[inext + i];
      inext += n_copies;
      index += n_copies;
    }
  for (size_t m = 0; m < (size_t)nsp; ++m) fpdata[m] = fphelp[m];

  Free(fphelp);
}


void TEMPLATE(gather_complex,T)(T *fpdata, size_t pcStart, size_t trunc, size_t nsp)
{
  T *restrict fphelp = (T*) Malloc(nsp*sizeof(T));
  size_t inext = 0;

  for (size_t m = 0, index = 0;   m <= pcStart; ++m)
    for (size_t n = m; n <= trunc; ++n)
      {
	if ( pcStart >= n )
	  {
	    fphelp[inext++] = fpdata[index];
	    fphelp[inext++] = fpdata[index+1];
	  }
	index += 2;
      }

  for (size_t m = 0, index = 0; m <= trunc; ++m)
    for (size_t n = m; n <= trunc; ++n)
      {
	if ( n > pcStart )
	  {
	    fphelp[inext++] = fpdata[index];
	    fphelp[inext++] = fpdata[index+1];
	  }
	index += 2;
      }

  for (size_t m = 0; m < nsp; ++m) fpdata[m] = fphelp[m];

  Free(fphelp);
}


static void TEMPLATE(scm0,T)(T *pdl, T *pdr, T *pfl, T *pfr, int klg)
{
  /* **** SCM0   - Apply SCM0 limiter to derivative estimates. */
  /* output: */
  /*   pdl   = the limited derivative at the left edge of the interval */
  /*   pdr   = the limited derivative at the right edge of the interval */
  /* inputs */
  /*   pdl   = the original derivative at the left edge */
  /*   pdr   = the original derivative at the right edge */
  /*   pfl   = function value at the left edge of the interval */
  /*   pfr   = function value at the right edge of the interval */
  /*   klg   = number of intervals where the derivatives are limited */

  /*  define constants */

  double zeps = 1.0e-12;
  double zfac = (1.0 - zeps) * 3.0;

  for (int jl = 0; jl < klg; ++jl)
    {
      double r_1;
      if ( (r_1 = pfr[jl] - pfl[jl], fabs(r_1)) > zeps )
	{
	  double zalpha = pdl[jl] / (pfr[jl] - pfl[jl]);
	  double zbeta  = pdr[jl] / (pfr[jl] - pfl[jl]);
	  if ( zalpha <= 0.0 ) pdl[jl] = 0.0;
	  if ( zbeta  <= 0.0 ) pdr[jl] = 0.0;
	  if ( zalpha > zfac ) pdl[jl] = (T)(zfac * (pfr[jl] - pfl[jl]));
	  if ( zbeta  > zfac ) pdr[jl] = (T)(zfac * (pfr[jl] - pfl[jl]));
	}
      else
	{
	  pdl[jl] = 0.0;
	  pdr[jl] = 0.0;
	}
    }
} /* scm0 */

static
int TEMPLATE(rowina3,T)(T *p, int ko, int ki, T *pw,
			int kcode, T msval, int *kret, int omisng, int operio, int oveggy)
{
  /*
C---->
C**** ROWINA3 - Interpolation of row of values.
C
C     Purpose.
C     --------
C
C     Interpolate a row of values.
C
C
C**   Interface.
C     ----------
C
C     CALL ROWINA3( P, KO, KI, PW, KCODE, PMSVAL, KRET, OMISNG, OPERIO)
C
C
C     Input Parameters.
C     -----------------
C
C     P      - Row of values to be interpolated.
C              Dimension must be at least KO.
C
C     KO     - Number of values required.
C
C     KI     - Number of values in P on input.
C
C     PW     - Working array.
C              Dimension must be at least (0:KO+2,3).
C
C     KCODE  - Interpolation required.
C              1 , linear.
C              3 , cubic.
C
C     PMSVAL - Value used for missing data indicator.
C
C     OMISNG - True if missing values are present in field.
C
C     OPERIO - True if input field is periodic.
C
C     OVEGGY - True if 'nearest neighbour' processing must be used
C              for interpolation
C
C     Output Parameters.
C     ------------------
C
C     P     - Now contains KO values.
C     KRET  - Return code
C             0, OK
C             Non-zero, error
C
C
C     Method.
C     -------
C
C     Linear or cubic interpolation performed as required.
C
C     Comments.
C     ---------
C
C     This is a version of ROWINA which allows for missing data
C     values and hence for bitmapped fields.
C
C
C     Author.
C     -------
C
C     J.D.Chambers    ECMWF     22.07.94
C
C
C     Modifications.
C     --------------
C
C     J.D.Chambers    ECMWF     13.09.94
C     Add return code KRET and remove calls to ABORT.
C
C     J. Clochard, Meteo France, for ECMWF - January 1998.
C     Addition of OMISNG and OPERIO arguments.
C
C
C     -----------------------------------------------------------------
*/
  /* System generated locals */
  int pw_dim1, pw_offset, i_1;

  /* Local variables */
  int ip;
  double zwt1, zrdi, zpos;
  double zdo, zwt;

  UNUSED(omisng);

  /* Parameter adjustments */
  --p;
  pw_dim1 = ko + 3;
  pw_offset = pw_dim1;
  pw -= pw_offset;

  *kret = 0;

  if ( kcode == 1 )
    {
      /*    Move input values to work array */
      for (int jl = 1; jl <= ki; ++jl)
	pw[jl + pw_dim1] = p[jl];

      if ( operio )
	{
	  /* Arrange wrap-around value in work array */
	  pw[ki + 1 + pw_dim1] = p[1];

	  /* Set up constants to be used to figure out weighting for */
	  /* values in interpolation. */
	  zrdi = (double) ki;
	  zdo = 1.0 / (double) ko;
	}
      else
	{
	  /* Repeat last value, to cope with "implicit truncation" below */
	  pw[ki + 1 + pw_dim1] = p[ki];

	  /* Set up constants to be used to figure out weighting for */
	  /* values in interpolation. */
	  zrdi = (double) (ki-1);
	  zdo = 1.0 / (double) (ko-1);
 	}

      /*    Loop through the output points */
      for (int jl = 1; jl <= ko; ++jl)
	{

	  /* Calculate weight from the start of row */
	  zpos = (jl - 1) * zdo;
	  zwt = zpos * zrdi;

	  /* Get the current array position(minus 1) from the weight - */
	  /* note the implicit truncation. */
	  ip = (int) zwt;

	  /* Adjust the weight to range (0.0 to 1.0) */
	  zwt -= ip;

          /* If 'nearest neighbour' processing must be used */
	  if ( oveggy )
	    {
              if ( zwt < 0.5 )
                p[jl] = pw[ip + 1 + pw_dim1];
	      else
		p[jl] = pw[ip + 2 + pw_dim1];
	    }
	  else
	    {
	      /*    If the left value is missing, use the right value */
	      if ( IS_EQUAL(pw[ip + 1 + pw_dim1], msval) )
		{
		  p[jl] = pw[ip + 2 + pw_dim1];
		}
	      /*    If the right value is missing, use the left value */
	      else if ( IS_EQUAL(pw[ip + 2 + pw_dim1], msval) )
		{
		  p[jl] = pw[ip + 1 + pw_dim1];
		}
	      /*    If neither missing, interpolate ... */
	      else
		{
		  /*  Interpolate using the weighted values on either side */
		  /*  of the output point position */
		  p[jl] = (T)((1.0 - zwt) * pw[ip+1 + pw_dim1]
                              + zwt * pw[ip+2 + pw_dim1]);
		}
	    }
	}
    }
  else if ( kcode == 3 )
    {
      /*     *******************************    */
      /*     Section 2.  Cubic interpolation .. */
      /*     *******************************    */
      i_1 = ki;
      for (int jl = 1; jl <= i_1; ++jl)
	{
          if ( IS_EQUAL(p[jl], msval) )
	    {
	      fprintf(stderr," ROWINA3: ");
	      fprintf(stderr," Cubic interpolation not supported");
	      fprintf(stderr," for fields containing missing data.\n");
	      *kret = 1;
	      goto L900;
	    }
          pw[jl + pw_dim1] = p[jl];
	}
      pw[pw_dim1] = p[ki];
      pw[ki + 1 + pw_dim1] = p[1];
      pw[ki + 2 + pw_dim1] = p[2];
      i_1 = ki;
      for (int jl = 1; jl <= i_1; ++jl)
	{
          pw[jl + (pw_dim1 << 1)] =
            (T)(- pw[jl - 1 + pw_dim1] / 3.0 -
                pw[jl     + pw_dim1] * 0.5 +
                pw[jl + 1 + pw_dim1] - pw[jl + 2 + pw_dim1] / 6.0);
          pw[jl + 1 + pw_dim1 * 3] =
            (T)(pw[jl - 1 + pw_dim1] / 6.0 -
                pw[jl     + pw_dim1] +
                pw[jl + 1 + pw_dim1] * 0.5 +
                pw[jl + 2 + pw_dim1] / 3.0);
	}

      TEMPLATE(scm0,T)(&pw[(pw_dim1 << 1) + 1], &pw[pw_dim1 * 3 + 2],
		       &pw[pw_dim1 + 1], &pw[pw_dim1 + 2], ki);

      zrdi = (double) ki;
      zdo = 1.0 / (double) ko;
      for (int jl = 1; jl <= ko; ++jl)
	{
          zpos = (jl - 1) * zdo;
          zwt = zpos * zrdi;
          ip = (int) zwt + 1;
          zwt = zwt + 1.0 - ip;
          zwt1 = 1.0 - zwt;
          p[jl] = (T)(((3.0 - zwt1 * 2.0) * pw[ip + pw_dim1] +
                       zwt * pw[ip + (pw_dim1 << 1)]) * zwt1 * zwt1 +
                      ((3.0 - zwt * 2.0) * pw[ip + 1 + pw_dim1] -
                       zwt1 * pw[ip + 1 + pw_dim1 * 3]) * zwt * zwt);
	}

    }
  else
    {
      /*    **************************************    */
      /*    Section 3.  Invalid interpolation code .. */
      /*    **************************************    */
      fprintf(stderr," ROWINA3:");
      fprintf(stderr," Invalid interpolation code = %2d\n",kcode);
      *kret = 2;
    }

L900:
    return 0;
} /* rowina3 */


int TEMPLATE(qu2reg3,T)(T *pfield, int *kpoint, int klat, int klon,
			T msval, int *kret, int omisng, int operio, int oveggy)
{
  /*
C**** QU2REG3 - Convert quasi-regular grid data to regular.
C
C     Purpose.
C     --------
C
C     Convert quasi-regular grid data to regular,
C     using either a linear or cubic interpolation.
C
C
C**   Interface.
C     ----------
C
C     CALL QU2REG3(PFIELD,KPOINT,KLAT,KLON,KCODE,PMSVAL,OMISNG,OPERIO,
C    X            OVEGGY)
C
C
C     Input Parameters.
C     -----------------
C
C     PFIELD     - Array containing quasi-regular grid data.
C
C     KPOINT     - Array containing list of the number of
C                  points on each latitude (or longitude) of
C                  the quasi-regular grid.
C
C     KLAT       - Number of latitude lines
C
C     KLON       - Number of longitude lines
C
C     KCODE      - Interpolation required.
C                  1 , linear - data quasi-regular on latitude lines.
C                  3 , cubic -  data quasi-regular on latitude lines.
C                  11, linear - data quasi-regular on longitude lines.
C                  13, cubic -  data quasi-regular on longitude lines.
C
C     PMSVAL     - Value used for missing data indicator.
C
C     OMISNG     - True if missing values are present in field.
C
C     OPERIO     - True if input field is periodic.
C
C     OVEGGY     - True if 'nearest neighbour' processing must be used
C                  for interpolation
C
C
C     Output Parameters.
C     ------------------
C
C     KRET       - return code
C                  0 = OK
C                  non-zero indicates fatal error
C
C
C     Output Parameters.
C     ------------------
C
C     PFIELD     - Array containing regular grid data.
C
C
C     Method.
C     -------
C
C     Data is interpolated and expanded into a temporary array,
C     which is then copied back into the user's array.
C     Returns an error code if an invalid interpolation is requested
C     or field size exceeds array dimensions.
C
C     Comments.
C     ---------
C
C     This routine is an adaptation of QU2REG to allow missing data
C     values, and hence bit mapped fields.
C
C
C     Author.
C     -------
C
C     J.D.Chambers     ECMWF      22.07.94
C
C
C     Modifications.
C     --------------
C
C     J.D.Chambers     ECMWF      13.09.94
C     Add return code KRET and remove calls to ABORT.
C
C     J.D.Chambers     ECMWF        Feb 1997
C     Allow for 64-bit pointers
C
C     J. Clochard, Meteo France, for ECMWF - January 1998.
C     Addition of OMISNG and OPERIO arguments.
C     Fix message for longitude number out of bounds, and routine
C     name in title and formats.
C
*/
   /* System generated locals */
   int i_1, i_2;
   int kcode = 1;

   /* Local variables */
   int ilii, ilio, icode;
   int iregno, iquano;

   T *ztemp = (T*) Malloc((size_t)klon*(size_t)klat*sizeof(T));
   T *zline = (T*) Malloc(2*(size_t)klon*sizeof(T));
   T *zwork = (T*) Malloc(3*(2*(size_t)klon+3)*sizeof(T));

   /* Parameter adjustments */
   --pfield;
   --kpoint;

/* ------------------------------ */
/* Section 1. Set initial values. */
/* ------------------------------ */

   *kret = 0;

/* Check input parameters. */

   if (kcode != 1 && kcode != 3 && kcode != 11 && kcode != 13) {
      fprintf(stderr," QU2REG :");
      fprintf(stderr," Invalid interpolation type code = %2d\n",kcode);
      *kret = 1;
      goto L900;
   }

/* Set array indices to 0. */

   ilii = 0;
   ilio = 0;

/* Establish values of loop parameters. */

   if (kcode > 10) {

/*    Quasi-regular along longitude lines. */

      iquano = klon;
      iregno = klat;
      icode = kcode - 10;
   } else {

/*    Quasi-regular along latitude lines. */

      iquano = klat;
      iregno = klon;
      icode = kcode;
   }

/*     -------------------------------------------------------- */
/**    Section 2. Interpolate field from quasi to regular grid. */
/*     -------------------------------------------------------- */

   i_1 = iquano;
   for (int j230 = 1; j230 <= i_1; ++j230) {

      if (iregno != kpoint[j230]) {

/*       Line contains less values than required,so */
/*       extract quasi-regular grid values for a line */

         i_2 = kpoint[j230];
         for (int j210 = 1; j210 <= i_2; ++j210) {
            ++ilii;
            zline[j210 - 1] = pfield[ilii];
         }

/*       and interpolate this line. */

         TEMPLATE(rowina3,T)(zline, iregno, kpoint[j230], zwork, icode, msval, kret, omisng, operio , oveggy);
         if (*kret != 0) goto L900;

/*       Add regular grid values for this line to the
         temporary array. */

         i_2 = iregno;
         for (int j220 = 1; j220 <= i_2; ++j220) {
            ++ilio;
            ztemp[ilio - 1] = zline[j220 - 1];
         }

      } else {

/*       Line contains the required number of values, so add */
/*       this line to the temporary array. */

         i_2 = iregno;
         for (int j225 = 1; j225 <= i_2; ++j225) {
            ++ilio;
            ++ilii;
            ztemp[ilio - 1] = pfield[ilii];
         }
      }
   }

/* Copy temporary array to user array. */

   i_1 = klon * klat;
   for (int j240 = 1; j240 <= i_1; ++j240) {
      pfield[j240] = ztemp[j240 - 1];
   }

/* -------------------------------------------------------- */
/* Section 9. Return to calling routine. Format statements. */
/* -------------------------------------------------------- */

L900:

   Free(zwork);
   Free(zline);
   Free(ztemp);

   return 0;
} /* qu2reg3 */

#endif /* T */

/*
 * Local Variables:
 * mode: c
 * c-file-style: "Java"
 * c-basic-offset: 2
 * indent-tabs-mode: nil
 * show-trailing-whitespace: t
 * require-trailing-newline: t
 * End:
 */

// clang-format on
#include <string.h>

int
gribVersion(unsigned char *is, size_t buffersize)
{
  if (buffersize < 8) Error("Buffer too small (current size %d)!", (int) buffersize);

  return GRIB_EDITION(is);
}

static double
GET_Real(unsigned char *grib)
{
  int iexp = GET_UINT1(grib[0]);
  int imant = (int) (GET_UINT3(grib[1], grib[2], grib[3]));

  return decfp2(iexp, imant);
}

static size_t
decodeIS(unsigned char *is, int *isec0, int *iret)
{
  // Octets 1 - 4 : The letters G R I B. Four 8 bit fields.

  // Check letters -> GRIB, BUDG or TIDE.

  // Check that 'GRIB' is found where expected.
  bool lgrib = GRIB_START(is);

  // ECMWF pseudo-grib data uses 'BUDG' and 'TIDE'.
  bool lbudg = BUDG_START(is);
  bool ltide = TIDE_START(is);

  // Data is not GRIB or pseudo-grib.
  if (lgrib == false && lbudg == false && ltide == false)
  {
    *iret = 305;
    gprintf(__func__, "Input data is not GRIB or pseudo-grib.");
    gprintf(__func__, "Return code = %d", *iret);
  }
  if (lbudg || ltide)
  {
    *iret = 305;
    gprintf(__func__, "Pseudo-grib data unsupported.");
    gprintf(__func__, "Return code = %d", *iret);
  }

  // Octets 5 - 7 : Length of message. One 24 bit field.
  ISEC0_GRIB_Len = (int) (GRIB1_SECLEN(is));

  // Octet 8 : GRIB Edition Number. One 8 bit field.
  ISEC0_GRIB_Version = GRIB_EDITION(is);

  if (ISEC0_GRIB_Version > 1) Error("GRIB version %d unsupported!", ISEC0_GRIB_Version);

  int grib1offset = ISEC0_GRIB_Version * 4;

  size_t isLen = 4 + (size_t) grib1offset;

  return isLen;
}

static void
decodePDS_ECMWF_local_Extension_1(unsigned char *pds, int *isec1)
{
  isec1[36] = GET_UINT1(pds[40]);                  /* extension identifier       */
  isec1[37] = GET_UINT1(pds[41]);                  /* Class                      */
  isec1[38] = GET_UINT1(pds[42]);                  /* Type                       */
  isec1[39] = (int) (GET_UINT2(pds[43], pds[44])); /* Stream                     */
  /* isec1[40] = GET_UINT4(pds[45],pds[46],pds[47],pds[48]); */
  memcpy((char *) &isec1[40], &pds[45], 4);
  isec1[41] = GET_UINT1(pds[49]); /* Forecast number            */
  isec1[42] = GET_UINT1(pds[50]); /* Total number of forecasts  */
}

static void
decodePDS_DWD_local_Extension_254(unsigned char *pds, int *isec1)
{
  isec1[36] = GET_UINT1(pds[40]); /* extension identifier */
  for (int i = 0; i < 11; ++i) isec1[37 + i] = GET_UINT1(pds[41 + i]);

  int isvn = (int) (GET_UINT2(pds[52], pds[53]));

  isec1[48] = isvn % 0x8000; /* DWD experiment identifier            */
  isec1[49] = isvn >> 15;    /* DWD run type (0=main, 2=ass, 3=test) */
}

static void
decodePDS_DWD_local_Extension_253(unsigned char *pds, int *isec1)
{
  isec1[36] = GET_UINT1(pds[40]); /* extension identifier */
  for (int i = 0; i < 11; ++i) isec1[37 + i] = GET_UINT1(pds[41 + i]);

  int isvn = (int) (GET_UINT2(pds[52], pds[53]));

  isec1[48] = isvn % 0x8000;                       /* DWD experiment identifier            */
  isec1[49] = isvn >> 15;                          /* DWD run type (0=main, 2=ass, 3=test) */
  isec1[50] = GET_UINT1(pds[54]);                  /* User id, specified by table          */
  isec1[51] = (int) (GET_UINT2(pds[55], pds[56])); /* Experiment identifier                */
  isec1[52] = (int) (GET_UINT2(pds[57], pds[58])); /* Ensemble identification by table     */
  isec1[53] = (int) (GET_UINT2(pds[59], pds[60])); /* Number of ensemble members           */
  isec1[54] = (int) (GET_UINT2(pds[61], pds[62])); /* Actual number of ensemble member     */
  isec1[55] = GET_UINT1(pds[63]);                  /* Model major version number           */
  isec1[56] = GET_UINT1(pds[64]);                  /* Model minor version number           */
}

static void
decodePDS_MPIM_local_Extension_1(unsigned char *pds, int *isec1)
{
  isec1[36] = GET_UINT1(pds[40]);                  /* extension identifier            */
  isec1[37] = GET_UINT1(pds[41]);                  /* type of ensemble forecast       */
  isec1[38] = (int) (GET_UINT2(pds[42], pds[43])); /* individual ensemble member      */
  isec1[39] = (int) (GET_UINT2(pds[44], pds[45])); /* number of forecasts in ensemble */
}

static size_t
decodePDS(unsigned char *pds, int *isec0, int *isec1)
{
  size_t pdsLen = PDS_Len;

  // clang-format off
  ISEC1_CodeTable      = PDS_CodeTable;
  ISEC1_CenterID       = PDS_CenterID;
  ISEC1_ModelID        = PDS_ModelID;
  ISEC1_GridDefinition = PDS_GridDefinition;
  ISEC1_Sec2Or3Flag    = PDS_Sec2Or3Flag;
  ISEC1_Parameter      = PDS_Parameter;
  ISEC1_LevelType      = PDS_LevelType;

  if ( (ISEC1_LevelType !=  20) && 
       (ISEC1_LevelType != GRIB1_LTYPE_99)           && 
       (ISEC1_LevelType != GRIB1_LTYPE_ISOBARIC)     && 
       (ISEC1_LevelType != GRIB1_LTYPE_ISOBARIC_PA)  && 
       (ISEC1_LevelType != GRIB1_LTYPE_ALTITUDE)     && 
       (ISEC1_LevelType != GRIB1_LTYPE_HEIGHT)       && 
       (ISEC1_LevelType != GRIB1_LTYPE_SIGMA)        && 
       (ISEC1_LevelType != GRIB1_LTYPE_HYBRID)       && 
       (ISEC1_LevelType != GRIB1_LTYPE_LANDDEPTH)    && 
       (ISEC1_LevelType != GRIB1_LTYPE_ISENTROPIC)   && 
       (ISEC1_LevelType != 115) && 
       (ISEC1_LevelType != 117) && 
       (ISEC1_LevelType != 125) && 
       (ISEC1_LevelType != 127) && 
       (ISEC1_LevelType != GRIB1_LTYPE_SEADEPTH)     && 
       (ISEC1_LevelType != 210) )
    {
      ISEC1_Level1 = PDS_Level1;
      ISEC1_Level2 = PDS_Level2;
    }
  else
    {
      ISEC1_Level1 = (int)(PDS_Level);
      ISEC1_Level2 = 0;
    }

  /* ISEC1_Year        = PDS_Year; */
  ISEC1_Month          = PDS_Month;
  ISEC1_Day            = PDS_Day;
  ISEC1_Hour           = PDS_Hour;
  ISEC1_Minute         = PDS_Minute;
  ISEC1_TimeUnit       = PDS_TimeUnit;
  ISEC1_TimePeriod1    = PDS_TimePeriod1;
  ISEC1_TimePeriod2    = PDS_TimePeriod2;
  ISEC1_TimeRange      = PDS_TimeRange;
  ISEC1_AvgNum         = (int)(PDS_AvgNum);
  ISEC1_AvgMiss        = PDS_AvgMiss;

  if ( ISEC0_GRIB_Version == 1 )
    {
      ISEC1_Year           = PDS_Year;
      ISEC1_Century        = PDS_Century;
      ISEC1_SubCenterID    = PDS_Subcenter;
      ISEC1_DecScaleFactor = PDS_DecimalScale;
    }
  else
    {
      int year             = GET_UINT1(pds[12]);
      if ( year <= 100 )
	{
	  ISEC1_Year       = year;
	  ISEC1_Century    = 1;
	}
      else
	{
	  ISEC1_Year       = year%100;
	  ISEC1_Century    = 1 + (year-ISEC1_Year)/100;
	}
      ISEC1_SubCenterID    = 0;
      ISEC1_DecScaleFactor = 0;
    }

  if ( ISEC1_Year < 0 )
    {
      ISEC1_Year    = -ISEC1_Year;
      ISEC1_Century = -ISEC1_Century;
    }

  ISEC1_LocalFLag = 0;
  if ( pdsLen > 28 )
    {
      size_t localextlen = pdsLen-28;

      if ( localextlen > 4000 )
	{
	  Warning("PDS larger than 4000 bytes not supported!");
	}
      else
	{
	  ISEC1_LocalFLag = 1;

	  if ( ISEC1_CenterID == 78 || ISEC1_CenterID == 215 || ISEC1_CenterID == 250 )
	    {
	      if ( pds[40] == 254 ) 
                decodePDS_DWD_local_Extension_254(pds, isec1);
	      else if ( pds[40] == 253 )
                decodePDS_DWD_local_Extension_253(pds, isec1);
	    }
	  else if ( (ISEC1_CenterID    == 98 && ISEC1_LocalFLag ==  1) ||
		    (ISEC1_SubCenterID == 98 && ISEC1_LocalFLag ==  1) ||
		    (ISEC1_CenterID    ==  7 && ISEC1_SubCenterID == 98) )
	    {
	      if ( pds[40] == 1 )
		decodePDS_ECMWF_local_Extension_1(pds, isec1);
	    }
	  else if ( ISEC1_CenterID    == 252 && ISEC1_LocalFLag ==  1 )
	    {
	      if ( pds[40] == 1 )
		decodePDS_MPIM_local_Extension_1(pds, isec1);	      
	    }
	  else
	    {
	      for ( size_t i = 0; i < localextlen; i++ )
                isec1[24+i] = pds[28+i];
	    }
	}
    }
  // clang-format on

  return pdsLen;
}

static void
gribPrintSec2_double(int *isec0, int *isec2, double *fsec2)
{
  gribPrintSec2DP(isec0, isec2, fsec2);
}
static void
gribPrintSec3_double(int *isec0, int *isec3, double *fsec3)
{
  gribPrintSec3DP(isec0, isec3, fsec3);
}
static void
gribPrintSec4_double(int *isec0, int *isec4, double *fsec4)
{
  gribPrintSec4DP(isec0, isec4, fsec4);
}
static void
gribPrintSec2_float(int *isec0, int *isec2, float *fsec2)
{
  gribPrintSec2SP(isec0, isec2, fsec2);
}
static void
gribPrintSec3_float(int *isec0, int *isec3, float *fsec3)
{
  gribPrintSec3SP(isec0, isec3, fsec3);
}
static void
gribPrintSec4_float(int *isec0, int *isec4, float *fsec4)
{
  gribPrintSec4SP(isec0, isec4, fsec4);
}

// clang-format off

#ifdef T
#undef T
#endif
#define T double
#ifdef T

#include <inttypes.h>

static 
void TEMPLATE(decode_array_common,T)(const unsigned char *restrict igrib, long jlend, int NumBits, 
				     T fmin, T zscale, T *restrict fpdata)
{
  /* code from wgrib routine BDS_unpack */
  const unsigned char *bits = igrib;
  unsigned int tbits = 0;
  int n_bits = NumBits;
  int t_bits = 0;

  const unsigned jmask = (1U << n_bits) - 1U;
  for (long i = 0; i < jlend; ++i)
    {
      if (n_bits - t_bits > 8)
	{
	  tbits = (tbits << 16) | ((unsigned)bits[0] << 8) | ((unsigned)bits[1]);
	  bits += 2;
	  t_bits += 16;
	}

      while ( t_bits < n_bits )
	{
	  tbits = (tbits * 256) + *bits++;
	  t_bits += 8;
	}
      t_bits -= n_bits;
      fpdata[i] = (float)((tbits >> t_bits) & jmask);
    }
  // at least this vectorizes :)
  for (long i = 0; i < jlend; ++i)
    fpdata[i] = fmin + zscale*fpdata[i];
}

static
void TEMPLATE(decode_array_common2,T)(const unsigned char *restrict igrib, long jlend, int NumBits,
				      T fmin, T zscale, T *restrict fpdata)
{
  static const unsigned mask[] = {0,1,3,7,15,31,63,127,255};
  static const double shift[9] = {1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0};

  // code from wgrib routine BDS_unpack
  const unsigned char *bits = igrib;
  int n_bits = NumBits;
  int c_bits, j_bits;

  // older unoptimized code, not often used
  c_bits = 8;
  for (long i = 0; i < jlend; ++i)
    {
      double jj = 0.0;
      j_bits = n_bits;
      while (c_bits <= j_bits)
	{
	  if (c_bits == 8)
	    {
	      jj = jj * 256.0  + (double) (*bits++);
	      j_bits -= 8;
	    }
	  else
	    {
	      jj = (jj * shift[c_bits]) + (double) (*bits & mask[c_bits]);
	      bits++;
	      j_bits -= c_bits;
	      c_bits = 8;
	    }
	}

      if (j_bits)
	{
	  c_bits -= j_bits;
	  jj = (jj * shift[j_bits]) + (double) (((unsigned)*bits >> c_bits) & mask[j_bits]);
	}
      fpdata[i] = (T)(fmin + zscale*jj);
    }
}

static
void TEMPLATE(decode_array_2byte,T)(size_t jlend, const unsigned char *restrict igrib,
                                    T *fpdata, T fmin, T zscale)
{
  const uint16_t *restrict sgrib = (const uint16_t *)(const void *)(igrib);

  if ( IS_BIGENDIAN() )
    {
      for (size_t i = 0; i < jlend; ++i)
        {
          fpdata[i] = fmin + zscale * sgrib[i];
        }
    }
  else
    {
      for (size_t i = 0; i < jlend; ++i)
        {
          uint16_t ui16 = gribSwapByteOrder_uint16(sgrib[i]);
          fpdata[i] = fmin + zscale * ui16;
        }
    }
}

static 
void TEMPLATE(decode_array,T)(const unsigned char *restrict igrib, long jlend, int numBits, 
			      T fmin, T zscale, T *restrict fpdata)
{
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER 
  uint64_t start_decode, end_decode;
#endif

#ifdef VECTORCODE
  GRIBPACK *lgrib = NULL;

  if ( numBits%8 == 0 )
    {
      long jlenc = jlend * numBits / 8;
      if ( jlenc > 0 ) 
	{
	  lgrib = (GRIBPACK*) Malloc(jlenc*sizeof(GRIBPACK));
	  if ( lgrib == NULL ) SysError("No Memory!");

	  (void) UNPACK_GRIB(igrib, lgrib, jlenc, -1L);
	}
    }

  if ( numBits ==  0 )
    {
      for (long i = 0; i < jlend; ++i)
	fpdata[i] = fmin;
    }
  else if ( numBits ==  8 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (int)lgrib[i];
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits == 16 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (((int)lgrib[2*i  ] <<  8) +  (int)lgrib[2*i+1]);
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits == 24 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (((int)lgrib[3*i  ] << 16) + ((int)lgrib[3*i+1] <<  8) +
	  	 (int)lgrib[3*i+2]);
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits == 32 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (((unsigned int)lgrib[4*i  ] << 24) + ((unsigned int)lgrib[4*i+1] << 16) +
		((unsigned int)lgrib[4*i+2] <<  8) +  (unsigned int)lgrib[4*i+3]);
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits <= 25 )
    {
      TEMPLATE(decode_array_common,T)(igrib, jlend, numBits, fmin, zscale, fpdata);
    }
  else if ( numBits > 25 && numBits < 32 )
    {
      TEMPLATE(decode_array_common2,T)(igrib, jlend, numBits, fmin, zscale, fpdata);
    }
  else
    {
      Error("Unimplemented packing factor %d!", numBits);
    }

  if ( lgrib ) Free(lgrib);

#else
  if ( numBits ==  0 )
    {
      for (long i = 0; i < jlend; ++i)
	fpdata[i] = fmin;
    }
  else if ( numBits ==  8 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (int)igrib[i];
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits == 16 )
    {
      TEMPLATE(decode_array_2byte,T)((size_t) jlend, igrib, fpdata, fmin, zscale);
    }
  else if ( numBits == 24 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (T)(((int)igrib[3*i  ] << 16) + ((int)igrib[3*i+1] <<  8) +
                     (int)igrib[3*i+2]);
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits == 32 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (T)(((unsigned int)igrib[4*i  ] << 24) + ((unsigned int)igrib[4*i+1] << 16) +
                     ((unsigned int)igrib[4*i+2] <<  8) +  (unsigned int)igrib[4*i+3]);
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits <= 25 )
    {
      TEMPLATE(decode_array_common,T)(igrib, jlend, numBits, fmin, zscale, fpdata);
    }
  else if ( numBits > 25 && numBits < 32 )
    {
      TEMPLATE(decode_array_common2,T)(igrib, jlend, numBits, fmin, zscale, fpdata);
    }
  else
    {
      Error("Unimplemented packing factor %d!", numBits);
    }
#endif
}

#endif /* T */

/*
 * Local Variables:
 * mode: c
 * End:
 */

#ifdef T
#undef T
#endif
#define T float
#ifdef T

#include <inttypes.h>

static 
void TEMPLATE(decode_array_common,T)(const unsigned char *restrict igrib, long jlend, int NumBits, 
				     T fmin, T zscale, T *restrict fpdata)
{
  /* code from wgrib routine BDS_unpack */
  const unsigned char *bits = igrib;
  unsigned int tbits = 0;
  int n_bits = NumBits;
  int t_bits = 0;

  const unsigned jmask = (1U << n_bits) - 1U;
  for (long i = 0; i < jlend; ++i)
    {
      if (n_bits - t_bits > 8)
	{
	  tbits = (tbits << 16) | ((unsigned)bits[0] << 8) | ((unsigned)bits[1]);
	  bits += 2;
	  t_bits += 16;
	}

      while ( t_bits < n_bits )
	{
	  tbits = (tbits * 256) + *bits++;
	  t_bits += 8;
	}
      t_bits -= n_bits;
      fpdata[i] = (float)((tbits >> t_bits) & jmask);
    }
  // at least this vectorizes :)
  for (long i = 0; i < jlend; ++i)
    fpdata[i] = fmin + zscale*fpdata[i];
}

static
void TEMPLATE(decode_array_common2,T)(const unsigned char *restrict igrib, long jlend, int NumBits,
				      T fmin, T zscale, T *restrict fpdata)
{
  static const unsigned mask[] = {0,1,3,7,15,31,63,127,255};
  static const double shift[9] = {1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0};

  // code from wgrib routine BDS_unpack
  const unsigned char *bits = igrib;
  int n_bits = NumBits;
  int c_bits, j_bits;

  // older unoptimized code, not often used
  c_bits = 8;
  for (long i = 0; i < jlend; ++i)
    {
      double jj = 0.0;
      j_bits = n_bits;
      while (c_bits <= j_bits)
	{
	  if (c_bits == 8)
	    {
	      jj = jj * 256.0  + (double) (*bits++);
	      j_bits -= 8;
	    }
	  else
	    {
	      jj = (jj * shift[c_bits]) + (double) (*bits & mask[c_bits]);
	      bits++;
	      j_bits -= c_bits;
	      c_bits = 8;
	    }
	}

      if (j_bits)
	{
	  c_bits -= j_bits;
	  jj = (jj * shift[j_bits]) + (double) (((unsigned)*bits >> c_bits) & mask[j_bits]);
	}
      fpdata[i] = (T)(fmin + zscale*jj);
    }
}

static
void TEMPLATE(decode_array_2byte,T)(size_t jlend, const unsigned char *restrict igrib,
                                    T *fpdata, T fmin, T zscale)
{
  const uint16_t *restrict sgrib = (const uint16_t *)(const void *)(igrib);

  if ( IS_BIGENDIAN() )
    {
      for (size_t i = 0; i < jlend; ++i)
        {
          fpdata[i] = fmin + zscale * sgrib[i];
        }
    }
  else
    {
      for (size_t i = 0; i < jlend; ++i)
        {
          uint16_t ui16 = gribSwapByteOrder_uint16(sgrib[i]);
          fpdata[i] = fmin + zscale * ui16;
        }
    }
}

static 
void TEMPLATE(decode_array,T)(const unsigned char *restrict igrib, long jlend, int numBits, 
			      T fmin, T zscale, T *restrict fpdata)
{
#if defined _GET_X86_COUNTER || defined _GET_MACH_COUNTER 
  uint64_t start_decode, end_decode;
#endif

#ifdef VECTORCODE
  GRIBPACK *lgrib = NULL;

  if ( numBits%8 == 0 )
    {
      long jlenc = jlend * numBits / 8;
      if ( jlenc > 0 ) 
	{
	  lgrib = (GRIBPACK*) Malloc(jlenc*sizeof(GRIBPACK));
	  if ( lgrib == NULL ) SysError("No Memory!");

	  (void) UNPACK_GRIB(igrib, lgrib, jlenc, -1L);
	}
    }

  if ( numBits ==  0 )
    {
      for (long i = 0; i < jlend; ++i)
	fpdata[i] = fmin;
    }
  else if ( numBits ==  8 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (int)lgrib[i];
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits == 16 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (((int)lgrib[2*i  ] <<  8) +  (int)lgrib[2*i+1]);
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits == 24 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (((int)lgrib[3*i  ] << 16) + ((int)lgrib[3*i+1] <<  8) +
	  	 (int)lgrib[3*i+2]);
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits == 32 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (((unsigned int)lgrib[4*i  ] << 24) + ((unsigned int)lgrib[4*i+1] << 16) +
		((unsigned int)lgrib[4*i+2] <<  8) +  (unsigned int)lgrib[4*i+3]);
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits <= 25 )
    {
      TEMPLATE(decode_array_common,T)(igrib, jlend, numBits, fmin, zscale, fpdata);
    }
  else if ( numBits > 25 && numBits < 32 )
    {
      TEMPLATE(decode_array_common2,T)(igrib, jlend, numBits, fmin, zscale, fpdata);
    }
  else
    {
      Error("Unimplemented packing factor %d!", numBits);
    }

  if ( lgrib ) Free(lgrib);

#else
  if ( numBits ==  0 )
    {
      for (long i = 0; i < jlend; ++i)
	fpdata[i] = fmin;
    }
  else if ( numBits ==  8 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (int)igrib[i];
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits == 16 )
    {
      TEMPLATE(decode_array_2byte,T)((size_t) jlend, igrib, fpdata, fmin, zscale);
    }
  else if ( numBits == 24 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (T)(((int)igrib[3*i  ] << 16) + ((int)igrib[3*i+1] <<  8) +
                     (int)igrib[3*i+2]);
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits == 32 )
    for (long i = 0; i < jlend; ++i)
      {
	T dval = (T)(((unsigned int)igrib[4*i  ] << 24) + ((unsigned int)igrib[4*i+1] << 16) +
                     ((unsigned int)igrib[4*i+2] <<  8) +  (unsigned int)igrib[4*i+3]);
	fpdata[i] = fmin + zscale * dval;
      }
  else if ( numBits <= 25 )
    {
      TEMPLATE(decode_array_common,T)(igrib, jlend, numBits, fmin, zscale, fpdata);
    }
  else if ( numBits > 25 && numBits < 32 )
    {
      TEMPLATE(decode_array_common2,T)(igrib, jlend, numBits, fmin, zscale, fpdata);
    }
  else
    {
      Error("Unimplemented packing factor %d!", numBits);
    }
#endif
}

#endif /* T */

/*
 * Local Variables:
 * mode: c
 * End:
 */


#ifdef T
#undef T
#endif
#define T double
#ifdef T

static
size_t TEMPLATE(decodeGDS,T)(unsigned char  *gds, int *isec0, int *isec2, T *fsec2, size_t *numGridVals)
{
  // int imisng = 0;
  bool ReducedGrid = false, VertCoorTab = false;
#ifdef VECTORCODE
  unsigned char *igrib;
  GRIBPACK *lgrib = NULL;
  size_t lGribLen = 0;
#endif

  *numGridVals = 0;

  memset(isec2, 0, 22*sizeof(int));

  const unsigned gdsLen = GDS_Len;

  unsigned ipvpl = GDS_PVPL;
  if ( ipvpl == 0 ) ipvpl = 0xFF;

  if ( ipvpl != 0xFF )
    { // Either vct or reduced grid
      if ( GDS_NV != 0 )
	{ // we have vct
	  VertCoorTab = true;
	  const unsigned ipl =  4*GDS_NV + ipvpl - 1;
	  if ( ipl < gdsLen ) ReducedGrid = true;
	}
      else
	{
	  VertCoorTab = false;
	  ReducedGrid = true;
	}
      // ReducedGrid = (gdsLen - 32 - 4*GDS_NV);
    }
 
  if ( ISEC0_GRIB_Version == 0 ) VertCoorTab = ((gdsLen - 32) > 0);
  
  if ( ReducedGrid )
    {
      const unsigned locnl = GDS_PVPL - 1U + (VertCoorTab * 4U * GDS_NV);
      const unsigned jlenl = (gdsLen - locnl)  >> 1;
      if ( jlenl == GDS_NumLat )
	{
	  ISEC2_Reduced = true;
          size_t accum = 0;
	  for ( size_t i = 0; i < jlenl; ++i )
	    {
              unsigned rpi = GET_UINT2(gds[locnl+2*i], gds[locnl+2*i+1]);
              ISEC2_ReducedPoints(i) = (int)rpi;
              accum += rpi;
	    }
          *numGridVals = accum;
	}
      else
	{
	  ReducedGrid = false;
	}
    }

  ISEC2_GridType = GDS_GridType;

  // Gaussian grid definition.

  if ( ISEC2_GridType == GRIB1_GTYPE_LATLON    ||
       ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN  ||
       ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT )
    {
      ISEC2_NumLat    = (int)(GDS_NumLat);
      if ( ! ReducedGrid )
	{
	  ISEC2_NumLon = (int)(GDS_NumLon);
	  *numGridVals  = (size_t)ISEC2_NumLon*(size_t)ISEC2_NumLat;
	}
      ISEC2_FirstLat  = GDS_FirstLat;
      ISEC2_FirstLon  = GDS_FirstLon;
      ISEC2_ResFlag   = GDS_ResFlag;
      ISEC2_LastLat   = GDS_LastLat;
      ISEC2_LastLon   = GDS_LastLon;
      ISEC2_LonIncr   = (int)(GDS_LonIncr);

      ISEC2_NumPar    = (int)GDS_NumPar;
      ISEC2_ScanFlag  = GDS_ScanFlag;
      if ( ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT )
	{
	  ISEC2_LatSP     = GDS_LatSP;
	  ISEC2_LonSP     = GDS_LonSP;
	  FSEC2_RotAngle  = (T)GDS_RotAngle;
	}
      // if ( Lons != Longitudes || Lats != Latitudes ) Error("Latitude/Longitude Conflict");
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN     ||
	    ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN_ROT ||
	    ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN_STR ||
	    ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN_ROTSTR )
    {
      // iret = decodeGDS_GG(gds, gdspos, isec0, isec2, imisng);
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_LATLON     ||
	    ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT ||
	    ISEC2_GridType == GRIB1_GTYPE_LATLON_STR ||
	    ISEC2_GridType == GRIB1_GTYPE_LATLON_ROTSTR )
    {
      // iret = decodeGDS_LL(gds, gdspos, isec0, isec2, imisng);
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_LCC )
    {
      ISEC2_NumLon    = (int)(GDS_NumLon);
      ISEC2_NumLat    = (int)(GDS_NumLat);
      *numGridVals  = (size_t)ISEC2_NumLon*(size_t)ISEC2_NumLat;
      ISEC2_FirstLat  = GDS_FirstLat;
      ISEC2_FirstLon  = GDS_FirstLon;
      ISEC2_ResFlag   = GDS_ResFlag;
      ISEC2_Lambert_Lov   = GDS_Lambert_Lov;
      ISEC2_Lambert_dx    = GDS_Lambert_dx;
      ISEC2_Lambert_dy    = GDS_Lambert_dy;
      ISEC2_Lambert_LatS1 = GDS_Lambert_LatS1;
      ISEC2_Lambert_LatS2 = GDS_Lambert_LatS2;
      ISEC2_Lambert_LatSP = GDS_Lambert_LatSP;
      ISEC2_Lambert_LonSP = GDS_Lambert_LonSP;
      ISEC2_Lambert_ProjFlag = GDS_Lambert_ProjFlag;
      ISEC2_ScanFlag      = GDS_ScanFlag;
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_SPECTRAL )
    {
      ISEC2_PentaJ  = (int)(GDS_PentaJ); // Truncation
      ISEC2_PentaK  = (int)(GDS_PentaK);
      ISEC2_PentaM  = (int)(GDS_PentaM);
      ISEC2_RepType = GDS_RepType;
      ISEC2_RepMode = GDS_RepMode;
      *numGridVals  = ((size_t)ISEC2_PentaJ+1)*((size_t)ISEC2_PentaJ+2);
      isec2[ 6] = 0;
      isec2[ 7] = 0;
      isec2[ 8] = 0;
      isec2[ 9] = 0;
      isec2[10] = 0;
      // iret = decodeGDS_SH(gds, gdspos, isec0, isec2, imisng);
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_GME )
    {
      ISEC2_GME_NI2    = (int)(GDS_GME_NI2);
      ISEC2_GME_NI3    = (int)(GDS_GME_NI3);
      ISEC2_GME_ND     = (int)(GDS_GME_ND);
      ISEC2_GME_NI     = (int)(GDS_GME_NI);
      ISEC2_GME_AFlag  = GDS_GME_AFlag;
      ISEC2_GME_LatPP  = GDS_GME_LatPP;
      ISEC2_GME_LonPP  = GDS_GME_LonPP;
      ISEC2_GME_LonMPL = GDS_GME_LonMPL;
      ISEC2_GME_BFlag  = GDS_GME_BFlag;
      *numGridVals  = ((size_t)ISEC2_GME_NI+1)*((size_t)ISEC2_GME_NI+1)*10;
      // iret = decodeGDS_TR(gds, gdspos, isec0, isec2, imisng);
    }
  else
    {
      static bool lwarn = true;
      unsigned nlon = GDS_NumLon, nlat = GDS_NumLat;
      ISEC2_NumLon = (int)nlon;
      ISEC2_NumLat = (int)nlat;
      *numGridVals  = (size_t)nlon*(size_t)nlat;
      if ( lwarn )
        {
          lwarn = false;
          Message("GRIB gridtype %d unsupported", ISEC2_GridType);
        }
    }

  // Vertical coordinate parameters for hybrid levels.
  // Get number of vertical coordinate parameters, if any.

  ISEC2_NumVCP = 0;

  isec2[17] = 0;
  isec2[18] = 0;

  if ( VertCoorTab )
    {
      int locnv;
      if ( ISEC0_GRIB_Version  == 0 )
	{
	  locnv = 32;
	  ISEC2_NumVCP = ((int)gdsLen - 32) >> 2;
	}
      else
	{
	  locnv = (int)GDS_PVPL - 1;
	  ISEC2_NumVCP = GDS_NV;
	}
#if defined (SX)
      lGribLen = 4*ISEC2_NumVCP;	      
      lgrib    = (GRIBPACK*) Malloc(lGribLen*sizeof(GRIBPACK));

      igrib = &gds[locnv];
      if ( ISEC2_NumVCP > 0 ) (void) UNPACK_GRIB(igrib, lgrib, lGribLen, -1L);
      for (int i = 0; i < ISEC2_NumVCP; ++i)
	{
	  const int iexp  = lgrib[4*i];
	  const int imant = GET_UINT3(lgrib[4*i+1], lgrib[4*i+2], lgrib[4*i+3]);
	  fsec2[10+i] = POW_2_M24 * imant * ldexp(1.0, 4 * (iexp - 64));
	}

      Free(lgrib);
#else
      for (int i = 0; i < ISEC2_NumVCP; ++i)
	{
	  const int iexp  = gds[locnv+4*i];
	  const int imant = (int)(GET_UINT3(gds[locnv+4*i+1], gds[locnv+4*i+2], gds[locnv+4*i+3]));
	  fsec2[10+i] = (T)decfp2(iexp,imant);
	}
#endif
    }

  return gdsLen;
}

#define ldexp_double ldexp
#define ldexp_float  ldexpf
#define pow_double pow
#define pow_float powf

static
void TEMPLATE(decodeBDS,T)(int decscale, unsigned char *bds, int *isec2, int *isec4, 
                           T *fsec4, int fsec4len, int dfunc, size_t bdsLen, size_t numGridVals, int *iret)
{
  size_t ioff = 0;
  enum { bds_head = 11 };
  T zscale = 0.;
  T fmin = 0.;
  T *fpdata = fsec4;

  *iret = 0;
  unsigned char *igrib = bds;

  memset(isec4, 0, 42*sizeof(int));

  // 4 bit flag / 4 bit count of unused bits at end of block octet.

  const int bds_flag = BDS_Flag;

  // 0------- grid point
  // 1------- spherical harmonics

  const bool lspherc = (bds_flag >> 7)&1;
  if ( lspherc ) isec4[2] = 128;
  else           isec4[2] = 0;

  // -0------  simple packing
  // -1------ complex packing

  const bool lcomplex = (bds_flag >> 6)&1;
  if ( lcomplex ) isec4[3] = 64;
  else            isec4[3] =  0;

  // ---0---- No additional flags
  // ---1---- No additional flags

  const bool lcompress = (bds_flag >> 4)&1;

  unsigned zoff;
  if ( lcompress )
    { isec4[5] = 16; isec4[6] = BDS_Z; zoff = 12; }
  else
    { isec4[5] =  0; isec4[6] = 0;     zoff =  0; }

  // ----++++ number of unused bits at end of section)

  const unsigned bds_ubits = bds_flag & 0xF;
  
  // scale factor (2 bytes)
  const int jscale = BDS_BinScale;

  // check for missing data indicators.

  const int iexp  = bds[ 6];
  const int imant = (int)(GET_UINT3(bds[ 7], bds[ 8], bds[ 9]));

  const int imiss = (jscale == 0xFFFF && iexp == 0xFF && imant == 0xFFFFFF);

  // convert reference value and scale factor.

  if ( ! (dfunc == 'J') && imiss == 0 )
    {
      fmin = (T)BDS_RefValue;
      zscale = TEMPLATE(ldexp,T)((T)1.0, jscale);
    }

  // get number of bits in each data value.

  unsigned dvbits = BDS_NumBits;
  ISEC4_NumBits = BDS_NumBits;

  // octet number of start of packed data calculated from start of block 4 - 1

  size_t locnd = zoff + bds_head;

  // if data is in spherical harmonic form, distinguish  between simple/complex packing (lcomplex = 0/1)

  if ( lspherc )
    {
      if ( !lcomplex )
	{
	  // no unpacked binary data present octet number of start of packed data
	  // calculated from start of block 4 - 1

	  ioff   = 1;
	  locnd += 4*ioff;  // RealCoef

	  // get real (0,0) coefficient in grib format and convert to floating point.
	  if ( dfunc != 'J' )
	    {
	      if ( imiss ) *fpdata++ = 0.0;
	      else         *fpdata++ = (T)BDS_RealCoef;
	    }
	}
      else // complex packed spherical harmonics
	{
	  isec4[15] = BDS_PackData;
	  // scaling factor
	  isec4[16] = BDS_Power;

	  // pentagonal resolution parameters of the unpacked section of data field

	  const int jup = bds[zoff+15];
	  const int kup = bds[zoff+16];
	  const int mup = bds[zoff+17];

	  isec4[zoff+17] = jup;
	  isec4[zoff+18] = kup;
	  isec4[zoff+19] = mup;

	  // unpacked binary data

	  locnd += 4; // 2 + power
	  locnd += 3; // j, k, m
	  ioff   = ((size_t)jup+1)*((size_t)jup+2);

	  if ( dfunc != 'J' )
	    for ( size_t i = 0; i < ioff; ++i )
	      {
		if ( imiss )
		  fpdata[i] = 0.0;
		else
		  {
		    const int iexp2  = (int)(bds[locnd+4*i]);
		    const int imant2 = (int)(GET_UINT3(bds[locnd+4*i+1], bds[locnd+4*i+2], bds[locnd+4*i+3]));
		    fpdata[i] = (T)decfp2(iexp2,imant2);
		  }
	      }
          fpdata += ioff;
	  locnd += 4*ioff;  /* RealCoef */
	}
    }
  else
    {
      if ( lcomplex )
	{
	  *iret = 1999;
	  gprintf(__func__, " Second order packed grids unsupported!");
	  gprintf(__func__, " Return code =  %d", *iret);
	  return;
	}
    }

  // Decode data values to floating point and store in fsec4.
  // First calculate the number of data values.
  // Take into account that spherical harmonics can be packed
  // simple (lcomplex = 0) or complex (lcomplex = 1)

  size_t jlend = bdsLen - locnd;

  if ( dvbits == 0 )
    {
      if ( jlend > 1 )
	{
	  *iret = 2001;
	  gprintf(__func__, " Number of bits per data value = 0!");
	  gprintf(__func__, " Return code =  %d", *iret);
	  return;
	}

      if ( numGridVals == 0 )
	{
	  *iret = 2002;
	  gprintf(__func__, " Constant field unsupported for this grid type!");
	  gprintf(__func__, " Return code =  %d", *iret);
	  return;
	}

      jlend = numGridVals - ioff;
    }
  else
    {
      jlend = (jlend*8 - bds_ubits) / dvbits;
    }

  ISEC4_NumValues        = (int)(jlend + ioff);
  ISEC4_NumNonMissValues = 0;

  if ( lcompress )
    {
      const size_t len = ((size_t) ((bds[17]<<16)+(bds[18]<<8)+bds[19]));

      ISEC4_NumValues = (int)(len*8/dvbits);

      if ( lspherc ) ISEC4_NumValues += lcomplex ? (int)ioff : 1;
    }

  if ( dfunc == 'J' ) return;

  // check length of output array.
  
  if ( ISEC4_NumValues > fsec4len )
    {
      *iret = 710;
      gprintf(__func__, " Output array too small. Length = %d", fsec4len);
      gprintf(__func__, " Number of values = %d", ISEC4_NumValues);
      gprintf(__func__, " Return code =  %d", *iret);
      return;
    }

  if ( imiss ) memset((char *)fpdata, 0, jlend*sizeof(T));
  else
    {
      igrib += locnd;

      TEMPLATE(decode_array,T)(igrib, (long)jlend, ISEC4_NumBits, fmin, zscale, fpdata);
    }

  if ( lspherc && lcomplex )
    {
      int pcStart = isec4[19], pcScale = isec4[16];
      TEMPLATE(scatter_complex,T)(fsec4, pcStart, ISEC2_PentaJ, ISEC4_NumValues);
      TEMPLATE(scale_complex,T)(fsec4, pcStart, pcScale, ISEC2_PentaJ, 1);
    }

  if ( CGRIBEX_Fix_ZSE )  // Fix ZeroShiftError of simple packed spherical harmonics
    if ( lspherc && !lcomplex )
      {
        // 20100705: Fix ZeroShiftError - Edi Kirk
	if ( IS_NOT_EQUAL(fsec4[1], 0.0) )
	  {
	    const T zserr = fsec4[1];
	    for (int i = 1; i < ISEC4_NumValues; ++i) fsec4[i] -= zserr;
	  }
      }

  if ( decscale )
    {
      const T scale = TEMPLATE(pow,T)((T)10.0, (T)-decscale);
      for (int i = 0; i < ISEC4_NumValues; ++i) fsec4[i] *= scale;
    }
}


void TEMPLATE(grib_decode,T)(int *isec0, int *isec1, int *isec2, T *fsec2, int *isec3,
			     T *fsec3, int *isec4, T *fsec4, int fsec4len, int *kgrib,
			     int kleng, int *kword, int dfunc, int *iret)
{
  UCHAR *bms = NULL;
  bool lsect2 = false, lsect3 = false;
  static bool lmissvalinfo = true;

  UNUSED(kleng);

  *iret = 0;

  grsdef();

  ISEC2_Reduced = false;

  // ----------------------------------------------------------------
  // IS Indicator Section (Section 0)
  // ----------------------------------------------------------------
  UCHAR *is = (UCHAR *) &kgrib[0];
  size_t isLen = decodeIS(is, isec0, iret);

  size_t gribLen = (size_t)ISEC0_GRIB_Len;

  /*
    When decoding or calculating length, previous editions
    of the GRIB code must be taken into account.

    In the table below, covering sections 0 and 1 of the GRIB
    code, octet numbering is from the beginning of the GRIB
    message;
    * indicates that the value is not available in the code edition;
    R indicates reserved, should be set to 0;
    Experimental edition is considered as edition -1.

    GRIB code edition -1 has fixed length of 20 octets for
    section 1, the length not included in the message.
    GRIB code edition 0 has fixed length of 24 octets for
    section 1, the length being included in the message.
    GRIB code edition 1 can have different lengths for section
    1, the minimum being 28 octets, length being included in
    the message.

                                         Octet numbers for code
                                                  editions

                 Contents.                   -1      0      1
                 ---------                ----------------------
       Letters GRIB                          1-4    1-4    1-4
       Total length of GRIB message.          *      *     5-7
       GRIB code edition number               *      *      8
       Length of Section 1.                   *     5-7    9-11
       Reserved octet (R).                    *      8(R)   *
       Version no. of Code Table 2.           *      *     12
       Identification of centre.              5      9     13
       Generating process.                    6     10     14
       Grid definition .                      7     11     15
       Flag (Code Table 1).                   8     12     16
       Indicator of parameter.                9     13     17
       Indicator of type of level.           10     14     18
       Height, pressure etc of levels.      11-12  15-16  19-20
       Year of century.                      13     17     21
       Month.                                14     18     22
       Day.                                  15     19     23
       Hour.                                 16     20     24
       Minute.                               17     21     25
       Indicator of unit of time.            18     22     26
       P1 - Period of time.                  19     23     27
       P2 - Period of time                  20(R)   24     28
       or reserved octet (R).
       Time range indicator.                21(R)   25     29
       or reserved octet (R).
       Number included in average.       22-23(R)  26-27  30-31
       or reserved octet (R).
       Number missing from average.         24(R)  28(R)   32
       or reserved octet (R).
       Century of data.                       *      *     33
       Designates sub-centre if not 0.        *      *     34
       Decimal scale factor.                  *      *    35-36
       Reserved. Set to 0.                    *      *    37-48
       (Need not be present)
       For originating centre use only.       *      *    49-nn
       (Need not be present)

    Identify which GRIB code edition is being decoded.

    In GRIB edition 1, the edition number is in octet 8.
    In GRIB edition 0, octet 8 is reserved and set to 0.
    In GRIB edition -1, octet 8 is a flag field and can have a
    a valid value of 0, 1, 2 or 3.

    However, GRIB edition number 0 has a fixed
    length of 24, included in the message, for section 1, so
    if the value extracted from octets 5-7 is 24 and that from
    octet 8 is 0, it is safe to assume edition 0 of the code.

  */

  // Set length of GRIB message to missing data value.
  if ( ISEC0_GRIB_Len == 24 && ISEC0_GRIB_Version == 0 ) ISEC0_GRIB_Len = 0;

  // ----------------------------------------------------------------
  // PDS Product Definition Section (Section 1)
  // ----------------------------------------------------------------
  UCHAR *pds = is + isLen;
  size_t pdsLen = decodePDS(pds, isec0, isec1);

  // ----------------------------------------------------------------
  // GDS Grid Description Section (Section 2)
  // ----------------------------------------------------------------
  size_t numGridVals = 0;
  size_t gdsLen = 0;
  const bool gdsIncluded = ISEC1_Sec2Or3Flag & 128;
  if ( gdsIncluded )
    {
      UCHAR *gds = is + isLen + pdsLen;
      gdsLen = TEMPLATE(decodeGDS,T)(gds, isec0, isec2, fsec2, &numGridVals);
    }

  // ----------------------------------------------------------------
  // BMS Bit-Map Section Section (Section 3)
  // ----------------------------------------------------------------
  isec3[0] = 0;
  size_t bmsLen = 0, bitmapSize = 0, imaskSize = 0;
  const bool bmsIncluded = ISEC1_Sec2Or3Flag & 64;
  if ( bmsIncluded )
    {
      bms = is + isLen + pdsLen + gdsLen;
      bmsLen = BMS_Len;

      imaskSize = (bmsLen > 6) ? (bmsLen - 6)<<3 : 0;
      bitmapSize = imaskSize - BMS_UnusedBits;
    }

  // ----------------------------------------------------------------
  // BDS Binary Data Section (Section 4)
  // ----------------------------------------------------------------
  UCHAR *bds = is + isLen + pdsLen + gdsLen + bmsLen;
  unsigned bdsLen = BDS_Len;
  /*
    If a very large product, the section 4 length field holds
    the number of bytes in the product after section 4 upto
    the end of the padding bytes.
    This is a fixup to get round the restriction on product lengths
    due to the count being only 24 bits. It is only possible because
    the (default) rounding for GRIB products is 120 bytes.
  */
  const bool llarge = (gribLen > JP23SET && bdsLen <= 120);
  if ( llarge )
    {
      gribLen &= JP23SET;
      gribLen *= 120;
      ISEC0_GRIB_Len = (int)gribLen;
      bdsLen = correct_bdslen(bdsLen, (int)gribLen, (long)(isLen+pdsLen+gdsLen+bmsLen));
    }

  TEMPLATE(decodeBDS,T)(ISEC1_DecScaleFactor, bds, isec2, isec4, fsec4, fsec4len, dfunc, bdsLen, numGridVals, iret);

  if ( *iret != 0 ) return;

  ISEC4_NumNonMissValues = ISEC4_NumValues;

  if ( bitmapSize > 0 )
    {
      if ( dfunc != 'L' && dfunc != 'J' )
	if ( DBL_IS_NAN(FSEC3_MissVal) && lmissvalinfo )
	  {
	    lmissvalinfo = false;
	    FSEC3_MissVal = (T)GRIB_MISSVAL;
	    Message("Missing value = NaN is unsupported, set to %g!", GRIB_MISSVAL);
	  }

      // ISEC4_NumNonMissValues = ISEC4_NumValues;
      ISEC4_NumValues = (int)bitmapSize;

      if ( dfunc != 'J' || bitmapSize == (size_t)ISEC4_NumNonMissValues )
	{
	  GRIBPACK bitmap;
	  /*
	  unsigned char *bitmap;
	  bitmap = BMS_Bitmap;
	  int j = ISEC4_NumNonMissValues;
	  for (int i = ISEC4_NumValues-1; i >= 0; --i)
	    {
	      fsec4[i] = ((bitmap[i/8]>>(7-(i&7)))&1) ? fsec4[--j] : FSEC3_MissVal;
	    }
	  */

	  GRIBPACK *imask = (GRIBPACK*) Malloc((size_t)imaskSize*sizeof(GRIBPACK));

#ifdef VECTORCODE
	  (void) UNPACK_GRIB(BMS_Bitmap, imask, imaskSize/8, -1L);
	  GRIBPACK *pbitmap = imask;
#else
	  GRIBPACK *pbitmap = BMS_Bitmap;
#endif

#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
	  for ( size_t i = imaskSize/8-1; i != (size_t)-1; --i )
	    {
	      bitmap = pbitmap[i];
	      imask[i*8+0] = 1 & (bitmap >> 7);
	      imask[i*8+1] = 1 & (bitmap >> 6);
	      imask[i*8+2] = 1 & (bitmap >> 5);
	      imask[i*8+3] = 1 & (bitmap >> 4);
	      imask[i*8+4] = 1 & (bitmap >> 3);
	      imask[i*8+5] = 1 & (bitmap >> 2);
	      imask[i*8+6] = 1 & (bitmap >> 1);
	      imask[i*8+7] = 1 & (bitmap);
	    }

	  int j = 0;
	  for (int i = 0; i < ISEC4_NumValues; ++i)
	    if ( imask[i] ) j++;

	  if ( ISEC4_NumNonMissValues != j )
	    {
	      if ( dfunc != 'J' && ISEC4_NumBits != 0 )
		Warning("Bitmap (%d) and data (%d) section differ, using bitmap section!", j, ISEC4_NumNonMissValues);

	      ISEC4_NumNonMissValues = j;
	    }

	  if ( dfunc != 'J' )
	    {
#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
	      for (int i = ISEC4_NumValues-1; i >= 0; --i)
		fsec4[i] = imask[i] ? fsec4[--j] : FSEC3_MissVal;
	    }

	  Free(imask);
	}
    }

  if ( ISEC2_Reduced )
    {
      int nvalues = 0;
      int nlat = ISEC2_NumLat;
      int nlon = ISEC2_ReducedPointsPtr[0];
      for (int ilat = 0; ilat < nlat; ++ilat) nvalues += ISEC2_ReducedPoints(ilat);
      for (int ilat = 1; ilat < nlat; ++ilat)
	if ( ISEC2_ReducedPoints(ilat) > nlon ) nlon = ISEC2_ReducedPoints(ilat);

      // int dlon = ISEC2_LastLon-ISEC2_FirstLon;
      // if ( dlon < 0 ) dlon += 360000;
	  
      if ( nvalues != ISEC4_NumValues ) *iret = -801;

      //printf("nlat %d  nlon %d \n", nlat, nlon);
      //printf("nvalues %d %d\n", nvalues, ISEC4_NumValues);

      if ( dfunc == 'R' && *iret == -801 )
	gprintf(__func__, "Number of values (%d) and sum of lons per row (%d) differ, abort conversion to regular Gaussian grid!",
		ISEC4_NumValues, nvalues);
      
      if ( dfunc == 'R' && *iret != -801 )
	{
	  ISEC2_Reduced = 0;
	  ISEC2_NumLon = nlon;
	  ISEC4_NumValues = nlon*nlat;

	  lsect3 = bitmapSize > 0;
          int lperio = 1;
	  int lveggy = (ISEC1_CodeTable == 128) && (ISEC1_CenterID == 98) && 
                      ((ISEC1_Parameter == 27) || (ISEC1_Parameter == 28) || 
                       (ISEC1_Parameter == 29) || (ISEC1_Parameter == 30) ||
                       (ISEC1_Parameter == 39) || (ISEC1_Parameter == 40) ||
                       (ISEC1_Parameter == 41) || (ISEC1_Parameter == 42) ||
                       (ISEC1_Parameter == 43));
	
	  (void) TEMPLATE(qu2reg3,T)(fsec4, ISEC2_ReducedPointsPtr, nlat, nlon, FSEC3_MissVal, iret, lsect3, lperio, lveggy);
	      
	  if ( bitmapSize > 0 )
	    {
	      int j = 0;	      
	      for (int i = 0; i < ISEC4_NumValues; ++i)
		if ( IS_NOT_EQUAL(fsec4[i], FSEC3_MissVal) ) j++;
		  
	      ISEC4_NumNonMissValues = j;
	    }
	}
    }

  if ( ISEC0_GRIB_Version == 1 ) isLen = 8;
  enum { esLen = 4 };
  gribLen = isLen + pdsLen + gdsLen + bmsLen + bdsLen + esLen;

  if ( !llarge && ISEC0_GRIB_Len && (size_t)ISEC0_GRIB_Len < gribLen )
    Warning("Inconsistent length of GRIB message (grib_message_size=%d < grib_record_size=%zu)!", ISEC0_GRIB_Len, gribLen);

  ISEC0_GRIB_Len = (int)gribLen;

  *kword = (int)((gribLen + sizeof(int) - 1) / sizeof(int));

  // ----------------------------------------------------------------
  // Section 9 . Abort/return to calling routine.
  // ----------------------------------------------------------------
  bool ldebug = false, l_iorj = false;
  if ( ldebug )
    {
      gprintf(__func__, "Section 9.");
      gprintf(__func__, "Output values set -");

      gribPrintSec0(isec0);
      gribPrintSec1(isec0, isec1);
      // Print section 2 if present.
      if ( lsect2 ) TEMPLATE(gribPrintSec2,T)(isec0, isec2, fsec2);

      if ( ! l_iorj )
	{
	  // Print section 3 if present.
	  if ( lsect3 ) TEMPLATE(gribPrintSec3,T)(isec0, isec3, fsec3);

	  TEMPLATE(gribPrintSec4,T)(isec0, isec4, fsec4);
	  // Special print for 2D spectra wave field real values in section 4
	  if ( (isec1[ 0] ==  140) && 
	       (isec1[ 1] ==   98) && 
	       (isec1[23] ==    1) && 
	       ((isec1[39] == 1045) || (isec1[39] == 1081))  && 
	       ((isec1[ 5] ==  250) || (isec1[ 5] ==  251)) )
	    gribPrintSec4Wave(isec4);
	}
    }
}

#endif /* T */

/*
 * Local Variables:
 * mode: c
 * End:
 */

#ifdef T
#undef T
#endif
#define T float
#ifdef T

static
size_t TEMPLATE(decodeGDS,T)(unsigned char  *gds, int *isec0, int *isec2, T *fsec2, size_t *numGridVals)
{
  // int imisng = 0;
  bool ReducedGrid = false, VertCoorTab = false;
#ifdef VECTORCODE
  unsigned char *igrib;
  GRIBPACK *lgrib = NULL;
  size_t lGribLen = 0;
#endif

  *numGridVals = 0;

  memset(isec2, 0, 22*sizeof(int));

  const unsigned gdsLen = GDS_Len;

  unsigned ipvpl = GDS_PVPL;
  if ( ipvpl == 0 ) ipvpl = 0xFF;

  if ( ipvpl != 0xFF )
    { // Either vct or reduced grid
      if ( GDS_NV != 0 )
	{ // we have vct
	  VertCoorTab = true;
	  const unsigned ipl =  4*GDS_NV + ipvpl - 1;
	  if ( ipl < gdsLen ) ReducedGrid = true;
	}
      else
	{
	  VertCoorTab = false;
	  ReducedGrid = true;
	}
      // ReducedGrid = (gdsLen - 32 - 4*GDS_NV);
    }
 
  if ( ISEC0_GRIB_Version == 0 ) VertCoorTab = ((gdsLen - 32) > 0);
  
  if ( ReducedGrid )
    {
      const unsigned locnl = GDS_PVPL - 1U + (VertCoorTab * 4U * GDS_NV);
      const unsigned jlenl = (gdsLen - locnl)  >> 1;
      if ( jlenl == GDS_NumLat )
	{
	  ISEC2_Reduced = true;
          size_t accum = 0;
	  for ( size_t i = 0; i < jlenl; ++i )
	    {
              unsigned rpi = GET_UINT2(gds[locnl+2*i], gds[locnl+2*i+1]);
              ISEC2_ReducedPoints(i) = (int)rpi;
              accum += rpi;
	    }
          *numGridVals = accum;
	}
      else
	{
	  ReducedGrid = false;
	}
    }

  ISEC2_GridType = GDS_GridType;

  // Gaussian grid definition.

  if ( ISEC2_GridType == GRIB1_GTYPE_LATLON    ||
       ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN  ||
       ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT )
    {
      ISEC2_NumLat    = (int)(GDS_NumLat);
      if ( ! ReducedGrid )
	{
	  ISEC2_NumLon = (int)(GDS_NumLon);
	  *numGridVals  = (size_t)ISEC2_NumLon*(size_t)ISEC2_NumLat;
	}
      ISEC2_FirstLat  = GDS_FirstLat;
      ISEC2_FirstLon  = GDS_FirstLon;
      ISEC2_ResFlag   = GDS_ResFlag;
      ISEC2_LastLat   = GDS_LastLat;
      ISEC2_LastLon   = GDS_LastLon;
      ISEC2_LonIncr   = (int)(GDS_LonIncr);

      ISEC2_NumPar    = (int)GDS_NumPar;
      ISEC2_ScanFlag  = GDS_ScanFlag;
      if ( ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT )
	{
	  ISEC2_LatSP     = GDS_LatSP;
	  ISEC2_LonSP     = GDS_LonSP;
	  FSEC2_RotAngle  = (T)GDS_RotAngle;
	}
      // if ( Lons != Longitudes || Lats != Latitudes ) Error("Latitude/Longitude Conflict");
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN     ||
	    ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN_ROT ||
	    ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN_STR ||
	    ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN_ROTSTR )
    {
      // iret = decodeGDS_GG(gds, gdspos, isec0, isec2, imisng);
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_LATLON     ||
	    ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT ||
	    ISEC2_GridType == GRIB1_GTYPE_LATLON_STR ||
	    ISEC2_GridType == GRIB1_GTYPE_LATLON_ROTSTR )
    {
      // iret = decodeGDS_LL(gds, gdspos, isec0, isec2, imisng);
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_LCC )
    {
      ISEC2_NumLon    = (int)(GDS_NumLon);
      ISEC2_NumLat    = (int)(GDS_NumLat);
      *numGridVals  = (size_t)ISEC2_NumLon*(size_t)ISEC2_NumLat;
      ISEC2_FirstLat  = GDS_FirstLat;
      ISEC2_FirstLon  = GDS_FirstLon;
      ISEC2_ResFlag   = GDS_ResFlag;
      ISEC2_Lambert_Lov   = GDS_Lambert_Lov;
      ISEC2_Lambert_dx    = GDS_Lambert_dx;
      ISEC2_Lambert_dy    = GDS_Lambert_dy;
      ISEC2_Lambert_LatS1 = GDS_Lambert_LatS1;
      ISEC2_Lambert_LatS2 = GDS_Lambert_LatS2;
      ISEC2_Lambert_LatSP = GDS_Lambert_LatSP;
      ISEC2_Lambert_LonSP = GDS_Lambert_LonSP;
      ISEC2_Lambert_ProjFlag = GDS_Lambert_ProjFlag;
      ISEC2_ScanFlag      = GDS_ScanFlag;
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_SPECTRAL )
    {
      ISEC2_PentaJ  = (int)(GDS_PentaJ); // Truncation
      ISEC2_PentaK  = (int)(GDS_PentaK);
      ISEC2_PentaM  = (int)(GDS_PentaM);
      ISEC2_RepType = GDS_RepType;
      ISEC2_RepMode = GDS_RepMode;
      *numGridVals  = ((size_t)ISEC2_PentaJ+1)*((size_t)ISEC2_PentaJ+2);
      isec2[ 6] = 0;
      isec2[ 7] = 0;
      isec2[ 8] = 0;
      isec2[ 9] = 0;
      isec2[10] = 0;
      // iret = decodeGDS_SH(gds, gdspos, isec0, isec2, imisng);
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_GME )
    {
      ISEC2_GME_NI2    = (int)(GDS_GME_NI2);
      ISEC2_GME_NI3    = (int)(GDS_GME_NI3);
      ISEC2_GME_ND     = (int)(GDS_GME_ND);
      ISEC2_GME_NI     = (int)(GDS_GME_NI);
      ISEC2_GME_AFlag  = GDS_GME_AFlag;
      ISEC2_GME_LatPP  = GDS_GME_LatPP;
      ISEC2_GME_LonPP  = GDS_GME_LonPP;
      ISEC2_GME_LonMPL = GDS_GME_LonMPL;
      ISEC2_GME_BFlag  = GDS_GME_BFlag;
      *numGridVals  = ((size_t)ISEC2_GME_NI+1)*((size_t)ISEC2_GME_NI+1)*10;
      // iret = decodeGDS_TR(gds, gdspos, isec0, isec2, imisng);
    }
  else
    {
      static bool lwarn = true;
      unsigned nlon = GDS_NumLon, nlat = GDS_NumLat;
      ISEC2_NumLon = (int)nlon;
      ISEC2_NumLat = (int)nlat;
      *numGridVals  = (size_t)nlon*(size_t)nlat;
      if ( lwarn )
        {
          lwarn = false;
          Message("GRIB gridtype %d unsupported", ISEC2_GridType);
        }
    }

  // Vertical coordinate parameters for hybrid levels.
  // Get number of vertical coordinate parameters, if any.

  ISEC2_NumVCP = 0;

  isec2[17] = 0;
  isec2[18] = 0;

  if ( VertCoorTab )
    {
      int locnv;
      if ( ISEC0_GRIB_Version  == 0 )
	{
	  locnv = 32;
	  ISEC2_NumVCP = ((int)gdsLen - 32) >> 2;
	}
      else
	{
	  locnv = (int)GDS_PVPL - 1;
	  ISEC2_NumVCP = GDS_NV;
	}
#if defined (SX)
      lGribLen = 4*ISEC2_NumVCP;	      
      lgrib    = (GRIBPACK*) Malloc(lGribLen*sizeof(GRIBPACK));

      igrib = &gds[locnv];
      if ( ISEC2_NumVCP > 0 ) (void) UNPACK_GRIB(igrib, lgrib, lGribLen, -1L);
      for (int i = 0; i < ISEC2_NumVCP; ++i)
	{
	  const int iexp  = lgrib[4*i];
	  const int imant = GET_UINT3(lgrib[4*i+1], lgrib[4*i+2], lgrib[4*i+3]);
	  fsec2[10+i] = POW_2_M24 * imant * ldexp(1.0, 4 * (iexp - 64));
	}

      Free(lgrib);
#else
      for (int i = 0; i < ISEC2_NumVCP; ++i)
	{
	  const int iexp  = gds[locnv+4*i];
	  const int imant = (int)(GET_UINT3(gds[locnv+4*i+1], gds[locnv+4*i+2], gds[locnv+4*i+3]));
	  fsec2[10+i] = (T)decfp2(iexp,imant);
	}
#endif
    }

  return gdsLen;
}

#define ldexp_double ldexp
#define ldexp_float  ldexpf
#define pow_double pow
#define pow_float powf

static
void TEMPLATE(decodeBDS,T)(int decscale, unsigned char *bds, int *isec2, int *isec4, 
                           T *fsec4, int fsec4len, int dfunc, size_t bdsLen, size_t numGridVals, int *iret)
{
  size_t ioff = 0;
  enum { bds_head = 11 };
  T zscale = 0.;
  T fmin = 0.;
  T *fpdata = fsec4;

  *iret = 0;
  unsigned char *igrib = bds;

  memset(isec4, 0, 42*sizeof(int));

  // 4 bit flag / 4 bit count of unused bits at end of block octet.

  const int bds_flag = BDS_Flag;

  // 0------- grid point
  // 1------- spherical harmonics

  const bool lspherc = (bds_flag >> 7)&1;
  if ( lspherc ) isec4[2] = 128;
  else           isec4[2] = 0;

  // -0------  simple packing
  // -1------ complex packing

  const bool lcomplex = (bds_flag >> 6)&1;
  if ( lcomplex ) isec4[3] = 64;
  else            isec4[3] =  0;

  // ---0---- No additional flags
  // ---1---- No additional flags

  const bool lcompress = (bds_flag >> 4)&1;

  unsigned zoff;
  if ( lcompress )
    { isec4[5] = 16; isec4[6] = BDS_Z; zoff = 12; }
  else
    { isec4[5] =  0; isec4[6] = 0;     zoff =  0; }

  // ----++++ number of unused bits at end of section)

  const unsigned bds_ubits = bds_flag & 0xF;
  
  // scale factor (2 bytes)
  const int jscale = BDS_BinScale;

  // check for missing data indicators.

  const int iexp  = bds[ 6];
  const int imant = (int)(GET_UINT3(bds[ 7], bds[ 8], bds[ 9]));

  const int imiss = (jscale == 0xFFFF && iexp == 0xFF && imant == 0xFFFFFF);

  // convert reference value and scale factor.

  if ( ! (dfunc == 'J') && imiss == 0 )
    {
      fmin = (T)BDS_RefValue;
      zscale = TEMPLATE(ldexp,T)((T)1.0, jscale);
    }

  // get number of bits in each data value.

  unsigned dvbits = BDS_NumBits;
  ISEC4_NumBits = BDS_NumBits;

  // octet number of start of packed data calculated from start of block 4 - 1

  size_t locnd = zoff + bds_head;

  // if data is in spherical harmonic form, distinguish  between simple/complex packing (lcomplex = 0/1)

  if ( lspherc )
    {
      if ( !lcomplex )
	{
	  // no unpacked binary data present octet number of start of packed data
	  // calculated from start of block 4 - 1

	  ioff   = 1;
	  locnd += 4*ioff;  // RealCoef

	  // get real (0,0) coefficient in grib format and convert to floating point.
	  if ( dfunc != 'J' )
	    {
	      if ( imiss ) *fpdata++ = 0.0;
	      else         *fpdata++ = (T)BDS_RealCoef;
	    }
	}
      else // complex packed spherical harmonics
	{
	  isec4[15] = BDS_PackData;
	  // scaling factor
	  isec4[16] = BDS_Power;

	  // pentagonal resolution parameters of the unpacked section of data field

	  const int jup = bds[zoff+15];
	  const int kup = bds[zoff+16];
	  const int mup = bds[zoff+17];

	  isec4[zoff+17] = jup;
	  isec4[zoff+18] = kup;
	  isec4[zoff+19] = mup;

	  // unpacked binary data

	  locnd += 4; // 2 + power
	  locnd += 3; // j, k, m
	  ioff   = ((size_t)jup+1)*((size_t)jup+2);

	  if ( dfunc != 'J' )
	    for ( size_t i = 0; i < ioff; ++i )
	      {
		if ( imiss )
		  fpdata[i] = 0.0;
		else
		  {
		    const int iexp2  = (int)(bds[locnd+4*i]);
		    const int imant2 = (int)(GET_UINT3(bds[locnd+4*i+1], bds[locnd+4*i+2], bds[locnd+4*i+3]));
		    fpdata[i] = (T)decfp2(iexp2,imant2);
		  }
	      }
          fpdata += ioff;
	  locnd += 4*ioff;  /* RealCoef */
	}
    }
  else
    {
      if ( lcomplex )
	{
	  *iret = 1999;
	  gprintf(__func__, " Second order packed grids unsupported!");
	  gprintf(__func__, " Return code =  %d", *iret);
	  return;
	}
    }

  // Decode data values to floating point and store in fsec4.
  // First calculate the number of data values.
  // Take into account that spherical harmonics can be packed
  // simple (lcomplex = 0) or complex (lcomplex = 1)

  size_t jlend = bdsLen - locnd;

  if ( dvbits == 0 )
    {
      if ( jlend > 1 )
	{
	  *iret = 2001;
	  gprintf(__func__, " Number of bits per data value = 0!");
	  gprintf(__func__, " Return code =  %d", *iret);
	  return;
	}

      if ( numGridVals == 0 )
	{
	  *iret = 2002;
	  gprintf(__func__, " Constant field unsupported for this grid type!");
	  gprintf(__func__, " Return code =  %d", *iret);
	  return;
	}

      jlend = numGridVals - ioff;
    }
  else
    {
      jlend = (jlend*8 - bds_ubits) / dvbits;
    }

  ISEC4_NumValues        = (int)(jlend + ioff);
  ISEC4_NumNonMissValues = 0;

  if ( lcompress )
    {
      const size_t len = ((size_t) ((bds[17]<<16)+(bds[18]<<8)+bds[19]));

      ISEC4_NumValues = (int)(len*8/dvbits);

      if ( lspherc ) ISEC4_NumValues += lcomplex ? (int)ioff : 1;
    }

  if ( dfunc == 'J' ) return;

  // check length of output array.
  
  if ( ISEC4_NumValues > fsec4len )
    {
      *iret = 710;
      gprintf(__func__, " Output array too small. Length = %d", fsec4len);
      gprintf(__func__, " Number of values = %d", ISEC4_NumValues);
      gprintf(__func__, " Return code =  %d", *iret);
      return;
    }

  if ( imiss ) memset((char *)fpdata, 0, jlend*sizeof(T));
  else
    {
      igrib += locnd;

      TEMPLATE(decode_array,T)(igrib, (long)jlend, ISEC4_NumBits, fmin, zscale, fpdata);
    }

  if ( lspherc && lcomplex )
    {
      int pcStart = isec4[19], pcScale = isec4[16];
      TEMPLATE(scatter_complex,T)(fsec4, pcStart, ISEC2_PentaJ, ISEC4_NumValues);
      TEMPLATE(scale_complex,T)(fsec4, pcStart, pcScale, ISEC2_PentaJ, 1);
    }

  if ( CGRIBEX_Fix_ZSE )  // Fix ZeroShiftError of simple packed spherical harmonics
    if ( lspherc && !lcomplex )
      {
        // 20100705: Fix ZeroShiftError - Edi Kirk
	if ( IS_NOT_EQUAL(fsec4[1], 0.0) )
	  {
	    const T zserr = fsec4[1];
	    for (int i = 1; i < ISEC4_NumValues; ++i) fsec4[i] -= zserr;
	  }
      }

  if ( decscale )
    {
      const T scale = TEMPLATE(pow,T)((T)10.0, (T)-decscale);
      for (int i = 0; i < ISEC4_NumValues; ++i) fsec4[i] *= scale;
    }
}


void TEMPLATE(grib_decode,T)(int *isec0, int *isec1, int *isec2, T *fsec2, int *isec3,
			     T *fsec3, int *isec4, T *fsec4, int fsec4len, int *kgrib,
			     int kleng, int *kword, int dfunc, int *iret)
{
  UCHAR *bms = NULL;
  bool lsect2 = false, lsect3 = false;
  static bool lmissvalinfo = true;

  UNUSED(kleng);

  *iret = 0;

  grsdef();

  ISEC2_Reduced = false;

  // ----------------------------------------------------------------
  // IS Indicator Section (Section 0)
  // ----------------------------------------------------------------
  UCHAR *is = (UCHAR *) &kgrib[0];
  size_t isLen = decodeIS(is, isec0, iret);

  size_t gribLen = (size_t)ISEC0_GRIB_Len;

  /*
    When decoding or calculating length, previous editions
    of the GRIB code must be taken into account.

    In the table below, covering sections 0 and 1 of the GRIB
    code, octet numbering is from the beginning of the GRIB
    message;
    * indicates that the value is not available in the code edition;
    R indicates reserved, should be set to 0;
    Experimental edition is considered as edition -1.

    GRIB code edition -1 has fixed length of 20 octets for
    section 1, the length not included in the message.
    GRIB code edition 0 has fixed length of 24 octets for
    section 1, the length being included in the message.
    GRIB code edition 1 can have different lengths for section
    1, the minimum being 28 octets, length being included in
    the message.

                                         Octet numbers for code
                                                  editions

                 Contents.                   -1      0      1
                 ---------                ----------------------
       Letters GRIB                          1-4    1-4    1-4
       Total length of GRIB message.          *      *     5-7
       GRIB code edition number               *      *      8
       Length of Section 1.                   *     5-7    9-11
       Reserved octet (R).                    *      8(R)   *
       Version no. of Code Table 2.           *      *     12
       Identification of centre.              5      9     13
       Generating process.                    6     10     14
       Grid definition .                      7     11     15
       Flag (Code Table 1).                   8     12     16
       Indicator of parameter.                9     13     17
       Indicator of type of level.           10     14     18
       Height, pressure etc of levels.      11-12  15-16  19-20
       Year of century.                      13     17     21
       Month.                                14     18     22
       Day.                                  15     19     23
       Hour.                                 16     20     24
       Minute.                               17     21     25
       Indicator of unit of time.            18     22     26
       P1 - Period of time.                  19     23     27
       P2 - Period of time                  20(R)   24     28
       or reserved octet (R).
       Time range indicator.                21(R)   25     29
       or reserved octet (R).
       Number included in average.       22-23(R)  26-27  30-31
       or reserved octet (R).
       Number missing from average.         24(R)  28(R)   32
       or reserved octet (R).
       Century of data.                       *      *     33
       Designates sub-centre if not 0.        *      *     34
       Decimal scale factor.                  *      *    35-36
       Reserved. Set to 0.                    *      *    37-48
       (Need not be present)
       For originating centre use only.       *      *    49-nn
       (Need not be present)

    Identify which GRIB code edition is being decoded.

    In GRIB edition 1, the edition number is in octet 8.
    In GRIB edition 0, octet 8 is reserved and set to 0.
    In GRIB edition -1, octet 8 is a flag field and can have a
    a valid value of 0, 1, 2 or 3.

    However, GRIB edition number 0 has a fixed
    length of 24, included in the message, for section 1, so
    if the value extracted from octets 5-7 is 24 and that from
    octet 8 is 0, it is safe to assume edition 0 of the code.

  */

  // Set length of GRIB message to missing data value.
  if ( ISEC0_GRIB_Len == 24 && ISEC0_GRIB_Version == 0 ) ISEC0_GRIB_Len = 0;

  // ----------------------------------------------------------------
  // PDS Product Definition Section (Section 1)
  // ----------------------------------------------------------------
  UCHAR *pds = is + isLen;
  size_t pdsLen = decodePDS(pds, isec0, isec1);

  // ----------------------------------------------------------------
  // GDS Grid Description Section (Section 2)
  // ----------------------------------------------------------------
  size_t numGridVals = 0;
  size_t gdsLen = 0;
  const bool gdsIncluded = ISEC1_Sec2Or3Flag & 128;
  if ( gdsIncluded )
    {
      UCHAR *gds = is + isLen + pdsLen;
      gdsLen = TEMPLATE(decodeGDS,T)(gds, isec0, isec2, fsec2, &numGridVals);
    }

  // ----------------------------------------------------------------
  // BMS Bit-Map Section Section (Section 3)
  // ----------------------------------------------------------------
  isec3[0] = 0;
  size_t bmsLen = 0, bitmapSize = 0, imaskSize = 0;
  const bool bmsIncluded = ISEC1_Sec2Or3Flag & 64;
  if ( bmsIncluded )
    {
      bms = is + isLen + pdsLen + gdsLen;
      bmsLen = BMS_Len;

      imaskSize = (bmsLen > 6) ? (bmsLen - 6)<<3 : 0;
      bitmapSize = imaskSize - BMS_UnusedBits;
    }

  // ----------------------------------------------------------------
  // BDS Binary Data Section (Section 4)
  // ----------------------------------------------------------------
  UCHAR *bds = is + isLen + pdsLen + gdsLen + bmsLen;
  unsigned bdsLen = BDS_Len;
  /*
    If a very large product, the section 4 length field holds
    the number of bytes in the product after section 4 upto
    the end of the padding bytes.
    This is a fixup to get round the restriction on product lengths
    due to the count being only 24 bits. It is only possible because
    the (default) rounding for GRIB products is 120 bytes.
  */
  const bool llarge = (gribLen > JP23SET && bdsLen <= 120);
  if ( llarge )
    {
      gribLen &= JP23SET;
      gribLen *= 120;
      ISEC0_GRIB_Len = (int)gribLen;
      bdsLen = correct_bdslen(bdsLen, (int)gribLen, (long)(isLen+pdsLen+gdsLen+bmsLen));
    }

  TEMPLATE(decodeBDS,T)(ISEC1_DecScaleFactor, bds, isec2, isec4, fsec4, fsec4len, dfunc, bdsLen, numGridVals, iret);

  if ( *iret != 0 ) return;

  ISEC4_NumNonMissValues = ISEC4_NumValues;

  if ( bitmapSize > 0 )
    {
      if ( dfunc != 'L' && dfunc != 'J' )
	if ( DBL_IS_NAN(FSEC3_MissVal) && lmissvalinfo )
	  {
	    lmissvalinfo = false;
	    FSEC3_MissVal = (T)GRIB_MISSVAL;
	    Message("Missing value = NaN is unsupported, set to %g!", GRIB_MISSVAL);
	  }

      // ISEC4_NumNonMissValues = ISEC4_NumValues;
      ISEC4_NumValues = (int)bitmapSize;

      if ( dfunc != 'J' || bitmapSize == (size_t)ISEC4_NumNonMissValues )
	{
	  GRIBPACK bitmap;
	  /*
	  unsigned char *bitmap;
	  bitmap = BMS_Bitmap;
	  int j = ISEC4_NumNonMissValues;
	  for (int i = ISEC4_NumValues-1; i >= 0; --i)
	    {
	      fsec4[i] = ((bitmap[i/8]>>(7-(i&7)))&1) ? fsec4[--j] : FSEC3_MissVal;
	    }
	  */

	  GRIBPACK *imask = (GRIBPACK*) Malloc((size_t)imaskSize*sizeof(GRIBPACK));

#ifdef VECTORCODE
	  (void) UNPACK_GRIB(BMS_Bitmap, imask, imaskSize/8, -1L);
	  GRIBPACK *pbitmap = imask;
#else
	  GRIBPACK *pbitmap = BMS_Bitmap;
#endif

#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
	  for ( size_t i = imaskSize/8-1; i != (size_t)-1; --i )
	    {
	      bitmap = pbitmap[i];
	      imask[i*8+0] = 1 & (bitmap >> 7);
	      imask[i*8+1] = 1 & (bitmap >> 6);
	      imask[i*8+2] = 1 & (bitmap >> 5);
	      imask[i*8+3] = 1 & (bitmap >> 4);
	      imask[i*8+4] = 1 & (bitmap >> 3);
	      imask[i*8+5] = 1 & (bitmap >> 2);
	      imask[i*8+6] = 1 & (bitmap >> 1);
	      imask[i*8+7] = 1 & (bitmap);
	    }

	  int j = 0;
	  for (int i = 0; i < ISEC4_NumValues; ++i)
	    if ( imask[i] ) j++;

	  if ( ISEC4_NumNonMissValues != j )
	    {
	      if ( dfunc != 'J' && ISEC4_NumBits != 0 )
		Warning("Bitmap (%d) and data (%d) section differ, using bitmap section!", j, ISEC4_NumNonMissValues);

	      ISEC4_NumNonMissValues = j;
	    }

	  if ( dfunc != 'J' )
	    {
#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
	      for (int i = ISEC4_NumValues-1; i >= 0; --i)
		fsec4[i] = imask[i] ? fsec4[--j] : FSEC3_MissVal;
	    }

	  Free(imask);
	}
    }

  if ( ISEC2_Reduced )
    {
      int nvalues = 0;
      int nlat = ISEC2_NumLat;
      int nlon = ISEC2_ReducedPointsPtr[0];
      for (int ilat = 0; ilat < nlat; ++ilat) nvalues += ISEC2_ReducedPoints(ilat);
      for (int ilat = 1; ilat < nlat; ++ilat)
	if ( ISEC2_ReducedPoints(ilat) > nlon ) nlon = ISEC2_ReducedPoints(ilat);

      // int dlon = ISEC2_LastLon-ISEC2_FirstLon;
      // if ( dlon < 0 ) dlon += 360000;
	  
      if ( nvalues != ISEC4_NumValues ) *iret = -801;

      //printf("nlat %d  nlon %d \n", nlat, nlon);
      //printf("nvalues %d %d\n", nvalues, ISEC4_NumValues);

      if ( dfunc == 'R' && *iret == -801 )
	gprintf(__func__, "Number of values (%d) and sum of lons per row (%d) differ, abort conversion to regular Gaussian grid!",
		ISEC4_NumValues, nvalues);
      
      if ( dfunc == 'R' && *iret != -801 )
	{
	  ISEC2_Reduced = 0;
	  ISEC2_NumLon = nlon;
	  ISEC4_NumValues = nlon*nlat;

	  lsect3 = bitmapSize > 0;
          int lperio = 1;
	  int lveggy = (ISEC1_CodeTable == 128) && (ISEC1_CenterID == 98) && 
                      ((ISEC1_Parameter == 27) || (ISEC1_Parameter == 28) || 
                       (ISEC1_Parameter == 29) || (ISEC1_Parameter == 30) ||
                       (ISEC1_Parameter == 39) || (ISEC1_Parameter == 40) ||
                       (ISEC1_Parameter == 41) || (ISEC1_Parameter == 42) ||
                       (ISEC1_Parameter == 43));
	
	  (void) TEMPLATE(qu2reg3,T)(fsec4, ISEC2_ReducedPointsPtr, nlat, nlon, FSEC3_MissVal, iret, lsect3, lperio, lveggy);
	      
	  if ( bitmapSize > 0 )
	    {
	      int j = 0;	      
	      for (int i = 0; i < ISEC4_NumValues; ++i)
		if ( IS_NOT_EQUAL(fsec4[i], FSEC3_MissVal) ) j++;
		  
	      ISEC4_NumNonMissValues = j;
	    }
	}
    }

  if ( ISEC0_GRIB_Version == 1 ) isLen = 8;
  enum { esLen = 4 };
  gribLen = isLen + pdsLen + gdsLen + bmsLen + bdsLen + esLen;

  if ( !llarge && ISEC0_GRIB_Len && (size_t)ISEC0_GRIB_Len < gribLen )
    Warning("Inconsistent length of GRIB message (grib_message_size=%d < grib_record_size=%zu)!", ISEC0_GRIB_Len, gribLen);

  ISEC0_GRIB_Len = (int)gribLen;

  *kword = (int)((gribLen + sizeof(int) - 1) / sizeof(int));

  // ----------------------------------------------------------------
  // Section 9 . Abort/return to calling routine.
  // ----------------------------------------------------------------
  bool ldebug = false, l_iorj = false;
  if ( ldebug )
    {
      gprintf(__func__, "Section 9.");
      gprintf(__func__, "Output values set -");

      gribPrintSec0(isec0);
      gribPrintSec1(isec0, isec1);
      // Print section 2 if present.
      if ( lsect2 ) TEMPLATE(gribPrintSec2,T)(isec0, isec2, fsec2);

      if ( ! l_iorj )
	{
	  // Print section 3 if present.
	  if ( lsect3 ) TEMPLATE(gribPrintSec3,T)(isec0, isec3, fsec3);

	  TEMPLATE(gribPrintSec4,T)(isec0, isec4, fsec4);
	  // Special print for 2D spectra wave field real values in section 4
	  if ( (isec1[ 0] ==  140) && 
	       (isec1[ 1] ==   98) && 
	       (isec1[23] ==    1) && 
	       ((isec1[39] == 1045) || (isec1[39] == 1081))  && 
	       ((isec1[ 5] ==  250) || (isec1[ 5] ==  251)) )
	    gribPrintSec4Wave(isec4);
	}
    }
}

#endif /* T */

/*
 * Local Variables:
 * mode: c
 * End:
 */
// clang-format on

// GRIB block 0 - indicator block
static void
encodeIS(GRIBPACK *lGrib, long *gribLen)
{
  long z;
  // z = *gribLen;

  lGrib[0] = 'G';
  lGrib[1] = 'R';
  lGrib[2] = 'I';
  lGrib[3] = 'B';

  // lGrib[4]-lGrib[6] contains full length of grib record.
  // included before finished CODEGB

  z = 7;
  Put1Byte(1);  // grib version
  z = 8;

  *gribLen = z;
}

// GRIB block 5 - end block
static void
encodeES(GRIBPACK *lGrib, long *gribLen, long bdsstart)
{
  long z = *gribLen;

  lGrib[z++] = '7';
  lGrib[z++] = '7';
  lGrib[z++] = '7';
  lGrib[z++] = '7';

  if (z > JP24SET)
  {
    long bdslen = z - 4;
    // fprintf(stderr, "Abort: GRIB record too large (max = %d)!\n", JP23SET);
    // exit(1);
    /*
      If a very large product, the section 4 length field holds
      the number of bytes in the product after section 4 upto
      the end of the padding bytes.
      This is a fixup to get round the restriction on product lengths
      due to the count being only 24 bits. It is only possible because
      the (default) rounding for GRIB products is 120 bytes.
    */
    while (z % 120) lGrib[z++] = 0;

    if (z > JP23SET * 120)
    {
      fprintf(stderr, "Abort: GRIB1 record too large (size = %ld; max = %d)!\n", z, JP23SET * 120);
      exit(1);
    }

    long itemp = z / (-120);
    itemp = JP23SET - itemp + 1;

    lGrib[4] = (GRIBPACK) (itemp >> 16);
    lGrib[5] = (GRIBPACK) (itemp >> 8);
    lGrib[6] = (GRIBPACK) itemp;

    bdslen = z - bdslen;
    lGrib[bdsstart] = (GRIBPACK) (bdslen >> 16);
    lGrib[bdsstart + 1] = (GRIBPACK) (bdslen >> 8);
    lGrib[bdsstart + 2] = (GRIBPACK) bdslen;
  }
  else
  {
    lGrib[4] = (GRIBPACK) (z >> 16);
    lGrib[5] = (GRIBPACK) (z >> 8);
    lGrib[6] = (GRIBPACK) z;

    while (z % 8) lGrib[z++] = 0;
  }

  *gribLen = z;
}

// GRIB block 1 - product definition block.

#define DWD_extension_253_len 38
#define DWD_extension_254_len 26
#define ECMWF_extension_1_len 24
#define MPIM_extension_1_len 18

static long
getLocalExtLen(int *isec1)
{
  long extlen = 0;

  if (ISEC1_LocalFLag)
  {
    if (ISEC1_CenterID == 78 || ISEC1_CenterID == 215 || ISEC1_CenterID == 250)
    {
      if (isec1[36] == 254)
        extlen = DWD_extension_254_len;
      else if (isec1[36] == 253)
        extlen = DWD_extension_253_len;
    }
    else if (ISEC1_CenterID == 98)
    {
      if (isec1[36] == 1) extlen = ECMWF_extension_1_len;
    }
    else if (ISEC1_CenterID == 252)
    {
      if (isec1[36] == 1) extlen = MPIM_extension_1_len;
    }
  }

  return extlen;
}

static long
getPdsLen(int *isec1)
{
  long pdslen = 28;

  pdslen += getLocalExtLen(isec1);

  return pdslen;
}

static void
encodePDS_DWD_local_Extension_254(GRIBPACK *lGrib, long *zs, int *isec1)
{
  long z = *zs;

  const long localextlen = getLocalExtLen(isec1);
  for (long i = 0; i < localextlen - 2; i++) Put1Byte(isec1[24 + i]);

  int isvn = isec1[49] << 15 | isec1[48];  // DWD experiment identifier
  Put2Byte(isvn);                          // DWD run type (0=main, 2=ass, 3=test)

  *zs = z;
}

static void
encodePDS_DWD_local_Extension_253(GRIBPACK *lGrib, long *zs, int *isec1)
{
  long z = *zs;

  const long localextlen = DWD_extension_254_len;
  for (long i = 0; i < localextlen - 2; i++) Put1Byte(isec1[24 + i]);

  int isvn = isec1[49] << 15 | isec1[48]; /* DWD experiment identifier    */
  Put2Byte(isvn);                         /* DWD run type (0=main, 2=ass, 3=test) */
  Put1Byte(isec1[50]);                    /* 55 User id, specified by table       */
  Put2Byte(isec1[51]);                    /* 56 Experiment identifier             */
  Put2Byte(isec1[52]);                    /* 58 Ensemble identification by table  */
  Put2Byte(isec1[53]);                    /* 60 Number of ensemble members        */
  Put2Byte(isec1[54]);                    /* 62 Actual number of ensemble member  */
  Put1Byte(isec1[55]);                    /* 64 Model major version number        */
  Put1Byte(isec1[56]);                    /* 65 Model minor version number        */
  Put1Byte(0);                            /* 66 Blank for even buffer length      */

  *zs = z;
}

static void
encodePDS_ECMWF_local_Extension_1(GRIBPACK *lGrib, long *zs, int *isec1)
{
  long z = *zs;

  const long localextlen = getLocalExtLen(isec1);
  for (long i = 0; i < localextlen - 12; i++) Put1Byte(isec1[24 + i]);
  /* 12 bytes explicitly encoded below:         */
  Put1Byte(isec1[36]); /* ECMWF local GRIB use definition identifier */
                       /*    1=MARS labelling or ensemble fcst. data */
  Put1Byte(isec1[37]); /* Class                                      */
  Put1Byte(isec1[38]); /* Type                                       */
  Put2Byte(isec1[39]); /* Stream                                     */

  // Version number or experiment identifier
  Put1Byte(((unsigned char *) &isec1[40])[0]);
  Put1Byte(((unsigned char *) &isec1[40])[1]);
  Put1Byte(((unsigned char *) &isec1[40])[2]);
  Put1Byte(((unsigned char *) &isec1[40])[3]);

  Put1Byte(isec1[41]); /* Ensemble forecast number                   */
  Put1Byte(isec1[42]); /* Total number of forecasts in ensemble      */
  Put1Byte(0);         /* (Spare)                                    */

  *zs = z;
}

static void
encodePDS_MPIM_local_Extension_1(GRIBPACK *lGrib, long *zs, int *isec1)
{
  long z = *zs;

  const long localextlen = getLocalExtLen(isec1);
  for (long i = 0; i < localextlen - 6; i++) Put1Byte(isec1[24 + i]);
  /* 6 bytes explicitly encoded below:          */
  Put1Byte(isec1[36]); /* MPIM local GRIB use definition identifier  */
                       /*    (extension identifier)                  */
  Put1Byte(isec1[37]); /* type of ensemble forecast                  */
  Put2Byte(isec1[38]); /* individual ensemble member                 */
  Put2Byte(isec1[39]); /* number of forecasts in ensemble            */

  *zs = z;
}

// GRIB BLOCK 1 - PRODUCT DESCRIPTION SECTION
static void
encodePDS(GRIBPACK *lpds, long pdsLen, int *isec1)
{
  GRIBPACK *lGrib = lpds;
  long z = 0;
  int ival;

  int century = ISEC1_Century;
  int year = ISEC1_Year;

  if (century < 0)
  {
    century = -century;
    year = -year;
  }

  Put3Byte(pdsLen);               /*  0 Length of Block 1        */
  Put1Byte(ISEC1_CodeTable);      /*  3 Local table number       */
  Put1Byte(ISEC1_CenterID);       /*  4 Identification of centre */
  Put1Byte(ISEC1_ModelID);        /*  5 Identification of model  */
  Put1Byte(ISEC1_GridDefinition); /*  6 Grid definition          */
  Put1Byte(ISEC1_Sec2Or3Flag);    /*  7 Block 2 included         */
  Put1Byte(ISEC1_Parameter);      /*  8 Parameter Code           */
  Put1Byte(ISEC1_LevelType);      /*  9 Type of level            */
  // clang-format off
  if ( (ISEC1_LevelType !=  20) &&
       (ISEC1_LevelType != GRIB1_LTYPE_99)           &&
       (ISEC1_LevelType != GRIB1_LTYPE_ISOBARIC)     &&
       (ISEC1_LevelType != GRIB1_LTYPE_ISOBARIC_PA)  &&
       (ISEC1_LevelType != GRIB1_LTYPE_ALTITUDE)     &&
       (ISEC1_LevelType != GRIB1_LTYPE_HEIGHT)       &&
       (ISEC1_LevelType != GRIB1_LTYPE_SIGMA)        &&
       (ISEC1_LevelType != GRIB1_LTYPE_HYBRID)       &&
       (ISEC1_LevelType != GRIB1_LTYPE_LANDDEPTH)    &&
       (ISEC1_LevelType != GRIB1_LTYPE_ISENTROPIC)   &&
       (ISEC1_LevelType != 115) &&
       (ISEC1_LevelType != 117) &&
       (ISEC1_LevelType != 125) &&
       (ISEC1_LevelType != 127) &&
       (ISEC1_LevelType != 160) &&
       (ISEC1_LevelType != 210) )
    {
      Put1Byte(ISEC1_Level1);
      Put1Byte(ISEC1_Level2);
    }
  else
    {
      Put2Byte(ISEC1_Level1);     /* 10 Level                    */    
    }
  // clang-format on

  Put1Int(year);          /* 12 Year of Century          */
  Put1Byte(ISEC1_Month);  /* 13 Month                    */
  Put1Byte(ISEC1_Day);    /* 14 Day                      */
  Put1Byte(ISEC1_Hour);   /* 15 Hour                     */
  Put1Byte(ISEC1_Minute); /* 16 Minute                   */

  Put1Byte(ISEC1_TimeUnit); /* 17 Time unit                */
  if (ISEC1_TimeRange == 10)
  {
    Put1Byte(ISEC1_TimePeriod1);
    Put1Byte(ISEC1_TimePeriod2);
  }
  else if (ISEC1_TimeRange == 113 || ISEC1_TimeRange == 0)
  {
    Put1Byte(ISEC1_TimePeriod1);
    Put1Byte(0);
  }
  else if (ISEC1_TimeRange == 5 || ISEC1_TimeRange == 4 || ISEC1_TimeRange == 3 || ISEC1_TimeRange == 2)
  {
    Put1Byte(ISEC1_TimePeriod1);
    Put1Byte(ISEC1_TimePeriod2);
  }
  else
  {
    Put1Byte(0);
    Put1Byte(0);
  }
  Put1Byte(ISEC1_TimeRange); /* 20 Timerange flag           */
  Put2Byte(ISEC1_AvgNum);    /* 21 Average                  */

  Put1Byte(ISEC1_AvgMiss);       /* 23 Missing from averages    */
  Put1Byte(century);             /* 24 Century                  */
  Put1Byte(ISEC1_SubCenterID);   /* 25 Subcenter                */
  Put2Int(ISEC1_DecScaleFactor); /* 26 Decimal scale factor     */

  if (ISEC1_LocalFLag)
  {
    if (ISEC1_CenterID == 78 || ISEC1_CenterID == 215 || ISEC1_CenterID == 250)
    {
      if (isec1[36] == 254)
        encodePDS_DWD_local_Extension_254(lGrib, &z, isec1);
      else if (isec1[36] == 253)
        encodePDS_DWD_local_Extension_253(lGrib, &z, isec1);
    }
    else if (ISEC1_CenterID == 98)
    {
      if (isec1[36] == 1) encodePDS_ECMWF_local_Extension_1(lGrib, &z, isec1);
    }
    else if (ISEC1_CenterID == 252)
    {
      if (isec1[36] == 1) encodePDS_MPIM_local_Extension_1(lGrib, &z, isec1);
    }
    else
    {
      const long localextlen = getLocalExtLen(isec1);
      for (long i = 0; i < localextlen; i++) Put1Byte(isec1[24 + i]);
    }
  }
}

// clang-format off


#ifdef T
#undef T
#endif
#define T double
#ifdef T


#define CGRIBEX_FPSCALE(data) (((data) - zref) * factor + 0.5)

static
void TEMPLATE(encode_array_common,T)(int numBits, size_t packStart, size_t datasize, GRIBPACK *lGrib,
				     const T *data, T zref, T factor, size_t *gz)
{
  size_t z = *gz;
  unsigned int ival;
  int cbits, jbits;
  unsigned int c;

  // code from gribw routine flist2bitstream

  cbits = 8;
  c = 0;
  for (size_t i = packStart; i < datasize; ++i)
    {
      // note float -> unsigned int .. truncate
      ival = (unsigned int)(CGRIBEX_FPSCALE(data[i]));
      /*
	if ( ival > max_nbpv_pow2 ) ival = max_nbpv_pow2;
	if ( ival < 0 ) ival = 0;
      */
      jbits = numBits;
      while ( cbits <= jbits ) 
	{
	  if ( cbits == 8 )
	    {
	      jbits -= 8;
	      lGrib[z++] = (ival >> jbits) & 0xFF;
	    }
	  else
	    {
	      jbits -= cbits;
	      lGrib[z++] = (GRIBPACK)((c << cbits)
                                      + ((ival >> jbits) & ((1U << cbits) - 1)));
	      cbits = 8;
	      c = 0;
	    }
	}
      /* now jbits < cbits */
      if ( jbits )
	{
	  c = (c << jbits) + (ival & ((1U << jbits)-1));
	  cbits -= jbits;
	}
    }
  if ( cbits != 8 ) lGrib[z++] = (GRIBPACK)(c << cbits);

  *gz = z;
}


static
void TEMPLATE(encode_array_2byte,T)(size_t datasize, GRIBPACK *restrict lGrib,
				    const T *restrict data, T zref, T factor, size_t *gz)
{
  uint16_t *restrict sgrib = (uint16_t *)(void *)(lGrib+*gz);

  if (IS_BIGENDIAN())
    {
      for (size_t i = 0; i < datasize; ++i)
        sgrib[i] = (uint16_t) CGRIBEX_FPSCALE(data[i]);
    }
  else
    {
      for (size_t i = 0; i < datasize; ++i)
        {
          uint16_t ui16 = (uint16_t) CGRIBEX_FPSCALE(data[i]);
          sgrib[i] = gribSwapByteOrder_uint16(ui16);
        }
    }

  *gz += 2*datasize;
}

static
void TEMPLATE(encode_array,T)(int numBits, size_t packStart, size_t datasize, 
			      GRIBPACK *restrict lGrib,
			      const T *restrict data, 
			      T zref, T factor, size_t *gz)
{
  size_t z = *gz;

  data += packStart;
  datasize -= packStart;

  if (numBits ==  8)
    {
      for (size_t i = 0; i < datasize; ++i)
	{
	  lGrib[z++] = (GRIBPACK) CGRIBEX_FPSCALE(data[i]);
	}
    }
  else if (numBits == 16)
    {
      if (sizeof(T) == sizeof(double))
      	{
          grib_encode_array_2byte_double(datasize, lGrib, (const double *)(const void *)data, zref, factor, &z);
        }
      else
        {
          TEMPLATE(encode_array_2byte,T)(datasize, lGrib, data, zref, factor, &z);
        }
    }
  else if (numBits == 24)
    {
      for (size_t i = 0; i < datasize; ++i)
	{
          uint32_t ui32 = (uint32_t) CGRIBEX_FPSCALE(data[i]);
          lGrib[z++] = (GRIBPACK)(ui32 >> 16);
          lGrib[z++] = (GRIBPACK)(ui32 >>  8);
          lGrib[z++] = (GRIBPACK)ui32;
	}
    }
  else if (numBits == 32)
    {
      for (size_t i = 0; i < datasize; ++i)
	{
          uint32_t ui32 = (uint32_t) CGRIBEX_FPSCALE(data[i]);
          lGrib[z++] = (GRIBPACK)(ui32 >> 24);
          lGrib[z++] = (GRIBPACK)(ui32 >> 16);
          lGrib[z++] = (GRIBPACK)(ui32 >>  8);
          lGrib[z++] = (GRIBPACK)ui32;
	}
    }
  else if (numBits > 0 && numBits <= 32)
    {
      TEMPLATE(encode_array_common,T)(numBits, 0, datasize, lGrib, data, zref, factor, &z);
    }
  else if (numBits == 0)
    {
    }
  else
    {
      Error("Unimplemented packing factor %d!", numBits);
    }

  *gz = z;
}

static
void TEMPLATE(encode_array_unrolled,T)(int numBits, size_t packStart, size_t datasize, 
				       GRIBPACK *restrict lGrib,
				       const T *restrict data, 
				       T zref, T factor, size_t *gz)
{
  size_t z = *gz;
#ifdef _ARCH_PWR6
  enum { CGRIBEX__UNROLL_DEPTH_2 = 8 };
#else
  enum { CGRIBEX__UNROLL_DEPTH_2 = 128 };
#endif
  size_t residual;
  size_t ofs;
  double dval[CGRIBEX__UNROLL_DEPTH_2];

  data += packStart;
  datasize -= packStart;
  residual =  datasize % CGRIBEX__UNROLL_DEPTH_2;
  ofs = datasize - residual;

  // reducing FP operations to single FMA is slowing down on pwr6 ...

  if      ( numBits ==  8 )
    {
#ifdef _GET_IBM_COUNTER 
      hpmStart(2, "pack 8 bit unrolled");
#endif
      unsigned char *cgrib = (unsigned char *) (lGrib + z);
      size_t i;
      for (i = 0; i < datasize - residual; i += CGRIBEX__UNROLL_DEPTH_2)
	{
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
	    {
	      dval[j] = CGRIBEX_FPSCALE(data[i+j]);
	    }
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
	    {
#ifdef _ARCH_PWR6
	      *cgrib++ =  (unsigned long) dval[j];
#else
	      *cgrib++ =  (unsigned char) dval[j];
#endif
	    }
	  z += CGRIBEX__UNROLL_DEPTH_2;
	}
      for (size_t j = 0; j < residual; ++j) 
	{
	  dval[j] = CGRIBEX_FPSCALE(data[i+j]);
	}
      for (size_t j = 0; j < residual; ++j) 
	{
#ifdef _ARCH_PWR6
	  *cgrib++ = (unsigned long) dval[j];
#else
	  *cgrib++ = (unsigned char) dval[j];
#endif
	}
      z += residual;

#ifdef _GET_IBM_COUNTER 
      hpmStop(2);
#endif
    }
  else if ( numBits == 16 )
    {
#ifdef _GET_IBM_COUNTER 
      hpmStart(3, "pack 16 bit unrolled");
#endif
#ifdef _ARCH_PWR6
      unsigned long ival;
#else
      uint16_t ival;
#endif
      uint16_t *sgrib = (uint16_t *)(void *)(lGrib+z);

      for (size_t i = 0; i < datasize - residual; i += CGRIBEX__UNROLL_DEPTH_2)
	{
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
	    dval[j] = CGRIBEX_FPSCALE(data[j]);
	  if ( IS_BIGENDIAN() )
	    {
	      for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
		{
#ifdef _ARCH_PWR6
		  *sgrib++ = (unsigned long) dval[j];
#else
		  *sgrib++ = (uint16_t) dval[j];
#endif
		}
	      z += 2*CGRIBEX__UNROLL_DEPTH_2;
	    }
	  else
	    {
	      for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
		{
		  ival = (uint16_t) dval[j];
                  *sgrib++ = gribSwapByteOrder_uint16(ival);
		}
	      z += 2*CGRIBEX__UNROLL_DEPTH_2;
	    }
	}
      for (size_t j = 0; j < residual; ++j) 
	{
	  dval[j] = CGRIBEX_FPSCALE(data[j]);
	}
      if ( IS_BIGENDIAN() )
	{
	  for (size_t j = 0; j < residual; ++j) 
	    {
#ifdef _ARCH_PWR6
	      *sgrib++ = (unsigned long) dval[j];
#else
              *sgrib++ = (uint16_t) dval[j];
#endif
	    }
	  z += 2*residual;
	}
      else
	{
	  for (size_t j = 0; j < residual; ++j) 
	    {
              ival = (uint16_t) dval[j];
	      lGrib[z  ] = (GRIBPACK)(ival >>  8);
	      lGrib[z+1] = (GRIBPACK)ival;
	      z += 2;
	    }
	}
#ifdef _GET_IBM_COUNTER 
      hpmStop(3);
#endif
    }
  else if ( numBits == 24 )
    {
#ifdef _GET_IBM_COUNTER 
      hpmStart(4, "pack 24 bit unrolled");
#endif
#ifdef _ARCH_PWR6
      unsigned long ival;
#else
      uint32_t ival;
#endif
      for (size_t i = 0; i < datasize - residual; i += CGRIBEX__UNROLL_DEPTH_2)
	{
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
	    {
	      dval[j] = CGRIBEX_FPSCALE(data[j]);
	    }
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
	    {
#ifdef _ARCH_PWR6
	      ival = (unsigned long) dval[j];
#else
	      ival = (uint32_t) dval[j];
#endif
	      lGrib[z  ] =  (GRIBPACK)(ival >> 16);
	      lGrib[z+1] =  (GRIBPACK)(ival >>  8);
	      lGrib[z+2] =  (GRIBPACK)ival;
	      z += 3;
	    }
	}
      for (size_t j = 0; j < residual; ++j) 
	{
	  dval[j] = CGRIBEX_FPSCALE(data[j]);
	}
      for (size_t j = 0; j < residual; ++j) 
	{
	  ival = (uint32_t) dval[j];
	  lGrib[z  ] =  (GRIBPACK)(ival >> 16);
	  lGrib[z+1] =  (GRIBPACK)(ival >>  8);
	  lGrib[z+2] =  (GRIBPACK)ival;
	  z += 3;
	}
#ifdef _GET_IBM_COUNTER 
      hpmStop(4);
#endif
    }
  else if ( numBits == 32 )
    {
#ifdef _GET_IBM_COUNTER 
      hpmStart(5, "pack 32 bit unrolled");
#endif
#ifdef _ARCH_PWR6
      unsigned long ival;
#else
      uint32_t ival;
#endif
      unsigned int *igrib = (unsigned int *)(void *)(lGrib + z);
      for (size_t i = 0; i < datasize - residual; i += CGRIBEX__UNROLL_DEPTH_2)
        {
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j) dval[j] = CGRIBEX_FPSCALE(data[i+j]);

	  if ( IS_BIGENDIAN() )
	    {
	      for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
		{
#ifdef _ARCH_PWR6
		  *igrib = (unsigned long) dval[j];
#else
		  *igrib = (uint32_t) dval[j];
#endif
		  igrib++;
		  z += 4;
		}
	    }
	  else
	    {
	      for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
		{
                  ival = (uint32_t) dval[j];
		  lGrib[z  ] =  (GRIBPACK)(ival >> 24);
		  lGrib[z+1] =  (GRIBPACK)(ival >> 16);
		  lGrib[z+2] =  (GRIBPACK)(ival >>  8);
		  lGrib[z+3] =  (GRIBPACK)ival;
		  z += 4;
		}
	    }
	}
      for (size_t j = 0; j < residual; ++j) 
	{
          dval[j] = CGRIBEX_FPSCALE(data[ofs+j]);
	}
      if ( IS_BIGENDIAN() )
	{
	  for (size_t j = 0; j < residual; ++j) 
	    {
#ifdef _ARCH_PWR6
	      *igrib = (unsigned long) dval[j];
#else
	      *igrib = (uint32_t) dval[j];
#endif
	      igrib++;
	      z += 4;
	    }
	}
      else
	{
          for (size_t j = 0; j < residual; ++j) 
	    {
	      ival = (uint32_t) dval[j];
	      lGrib[z  ] =  (GRIBPACK)(ival >> 24);
	      lGrib[z+1] =  (GRIBPACK)(ival >> 16);
	      lGrib[z+2] =  (GRIBPACK)(ival >>  8);
	      lGrib[z+3] =  (GRIBPACK)ival;
	      z += 4;
	    }
	}
#ifdef _GET_IBM_COUNTER 
      hpmStop(5);
#endif
    }
  else if ( numBits > 0 && numBits <= 32 )
    {
      TEMPLATE(encode_array_common,T)(numBits, 0, datasize, lGrib, data, zref, factor, &z);
    }
  else if ( numBits == 0 )
    {
    }
  else
    {
      Error("Unimplemented packing factor %d!", numBits);
    }

  *gz = z;
}

#ifdef CGRIBEX_FPSCALE
#undef CGRIBEX_FPSCALE
#endif

#endif /* T */

#ifdef T
#undef T
#endif
#define T float
#ifdef T


#define CGRIBEX_FPSCALE(data) (((data) - zref) * factor + 0.5)

static
void TEMPLATE(encode_array_common,T)(int numBits, size_t packStart, size_t datasize, GRIBPACK *lGrib,
				     const T *data, T zref, T factor, size_t *gz)
{
  size_t z = *gz;
  unsigned int ival;
  int cbits, jbits;
  unsigned int c;

  // code from gribw routine flist2bitstream

  cbits = 8;
  c = 0;
  for (size_t i = packStart; i < datasize; ++i)
    {
      // note float -> unsigned int .. truncate
      ival = (unsigned int)(CGRIBEX_FPSCALE(data[i]));
      /*
	if ( ival > max_nbpv_pow2 ) ival = max_nbpv_pow2;
	if ( ival < 0 ) ival = 0;
      */
      jbits = numBits;
      while ( cbits <= jbits ) 
	{
	  if ( cbits == 8 )
	    {
	      jbits -= 8;
	      lGrib[z++] = (ival >> jbits) & 0xFF;
	    }
	  else
	    {
	      jbits -= cbits;
	      lGrib[z++] = (GRIBPACK)((c << cbits)
                                      + ((ival >> jbits) & ((1U << cbits) - 1)));
	      cbits = 8;
	      c = 0;
	    }
	}
      /* now jbits < cbits */
      if ( jbits )
	{
	  c = (c << jbits) + (ival & ((1U << jbits)-1));
	  cbits -= jbits;
	}
    }
  if ( cbits != 8 ) lGrib[z++] = (GRIBPACK)(c << cbits);

  *gz = z;
}


static
void TEMPLATE(encode_array_2byte,T)(size_t datasize, GRIBPACK *restrict lGrib,
				    const T *restrict data, T zref, T factor, size_t *gz)
{
  uint16_t *restrict sgrib = (uint16_t *)(void *)(lGrib+*gz);

  if (IS_BIGENDIAN())
    {
      for (size_t i = 0; i < datasize; ++i)
        sgrib[i] = (uint16_t) CGRIBEX_FPSCALE(data[i]);
    }
  else
    {
      for (size_t i = 0; i < datasize; ++i)
        {
          uint16_t ui16 = (uint16_t) CGRIBEX_FPSCALE(data[i]);
          sgrib[i] = gribSwapByteOrder_uint16(ui16);
        }
    }

  *gz += 2*datasize;
}

static
void TEMPLATE(encode_array,T)(int numBits, size_t packStart, size_t datasize, 
			      GRIBPACK *restrict lGrib,
			      const T *restrict data, 
			      T zref, T factor, size_t *gz)
{
  size_t z = *gz;

  data += packStart;
  datasize -= packStart;

  if (numBits ==  8)
    {
      for (size_t i = 0; i < datasize; ++i)
	{
	  lGrib[z++] = (GRIBPACK) CGRIBEX_FPSCALE(data[i]);
	}
    }
  else if (numBits == 16)
    {
      if (sizeof(T) == sizeof(double))
      	{
          grib_encode_array_2byte_double(datasize, lGrib, (const double *)(const void *)data, zref, factor, &z);
        }
      else
        {
          TEMPLATE(encode_array_2byte,T)(datasize, lGrib, data, zref, factor, &z);
        }
    }
  else if (numBits == 24)
    {
      for (size_t i = 0; i < datasize; ++i)
	{
          uint32_t ui32 = (uint32_t) CGRIBEX_FPSCALE(data[i]);
          lGrib[z++] = (GRIBPACK)(ui32 >> 16);
          lGrib[z++] = (GRIBPACK)(ui32 >>  8);
          lGrib[z++] = (GRIBPACK)ui32;
	}
    }
  else if (numBits == 32)
    {
      for (size_t i = 0; i < datasize; ++i)
	{
          uint32_t ui32 = (uint32_t) CGRIBEX_FPSCALE(data[i]);
          lGrib[z++] = (GRIBPACK)(ui32 >> 24);
          lGrib[z++] = (GRIBPACK)(ui32 >> 16);
          lGrib[z++] = (GRIBPACK)(ui32 >>  8);
          lGrib[z++] = (GRIBPACK)ui32;
	}
    }
  else if (numBits > 0 && numBits <= 32)
    {
      TEMPLATE(encode_array_common,T)(numBits, 0, datasize, lGrib, data, zref, factor, &z);
    }
  else if (numBits == 0)
    {
    }
  else
    {
      Error("Unimplemented packing factor %d!", numBits);
    }

  *gz = z;
}

static
void TEMPLATE(encode_array_unrolled,T)(int numBits, size_t packStart, size_t datasize, 
				       GRIBPACK *restrict lGrib,
				       const T *restrict data, 
				       T zref, T factor, size_t *gz)
{
  size_t z = *gz;
#ifdef _ARCH_PWR6
  enum { CGRIBEX__UNROLL_DEPTH_2 = 8 };
#else
  enum { CGRIBEX__UNROLL_DEPTH_2 = 128 };
#endif
  size_t residual;
  size_t ofs;
  double dval[CGRIBEX__UNROLL_DEPTH_2];

  data += packStart;
  datasize -= packStart;
  residual =  datasize % CGRIBEX__UNROLL_DEPTH_2;
  ofs = datasize - residual;

  // reducing FP operations to single FMA is slowing down on pwr6 ...

  if      ( numBits ==  8 )
    {
#ifdef _GET_IBM_COUNTER 
      hpmStart(2, "pack 8 bit unrolled");
#endif
      unsigned char *cgrib = (unsigned char *) (lGrib + z);
      size_t i;
      for (i = 0; i < datasize - residual; i += CGRIBEX__UNROLL_DEPTH_2)
	{
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
	    {
	      dval[j] = CGRIBEX_FPSCALE(data[i+j]);
	    }
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
	    {
#ifdef _ARCH_PWR6
	      *cgrib++ =  (unsigned long) dval[j];
#else
	      *cgrib++ =  (unsigned char) dval[j];
#endif
	    }
	  z += CGRIBEX__UNROLL_DEPTH_2;
	}
      for (size_t j = 0; j < residual; ++j) 
	{
	  dval[j] = CGRIBEX_FPSCALE(data[i+j]);
	}
      for (size_t j = 0; j < residual; ++j) 
	{
#ifdef _ARCH_PWR6
	  *cgrib++ = (unsigned long) dval[j];
#else
	  *cgrib++ = (unsigned char) dval[j];
#endif
	}
      z += residual;

#ifdef _GET_IBM_COUNTER 
      hpmStop(2);
#endif
    }
  else if ( numBits == 16 )
    {
#ifdef _GET_IBM_COUNTER 
      hpmStart(3, "pack 16 bit unrolled");
#endif
#ifdef _ARCH_PWR6
      unsigned long ival;
#else
      uint16_t ival;
#endif
      uint16_t *sgrib = (uint16_t *)(void *)(lGrib+z);

      for (size_t i = 0; i < datasize - residual; i += CGRIBEX__UNROLL_DEPTH_2)
	{
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
	    dval[j] = CGRIBEX_FPSCALE(data[j]);
	  if ( IS_BIGENDIAN() )
	    {
	      for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
		{
#ifdef _ARCH_PWR6
		  *sgrib++ = (unsigned long) dval[j];
#else
		  *sgrib++ = (uint16_t) dval[j];
#endif
		}
	      z += 2*CGRIBEX__UNROLL_DEPTH_2;
	    }
	  else
	    {
	      for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
		{
		  ival = (uint16_t) dval[j];
                  *sgrib++ = gribSwapByteOrder_uint16(ival);
		}
	      z += 2*CGRIBEX__UNROLL_DEPTH_2;
	    }
	}
      for (size_t j = 0; j < residual; ++j) 
	{
	  dval[j] = CGRIBEX_FPSCALE(data[j]);
	}
      if ( IS_BIGENDIAN() )
	{
	  for (size_t j = 0; j < residual; ++j) 
	    {
#ifdef _ARCH_PWR6
	      *sgrib++ = (unsigned long) dval[j];
#else
              *sgrib++ = (uint16_t) dval[j];
#endif
	    }
	  z += 2*residual;
	}
      else
	{
	  for (size_t j = 0; j < residual; ++j) 
	    {
              ival = (uint16_t) dval[j];
	      lGrib[z  ] = (GRIBPACK)(ival >>  8);
	      lGrib[z+1] = (GRIBPACK)ival;
	      z += 2;
	    }
	}
#ifdef _GET_IBM_COUNTER 
      hpmStop(3);
#endif
    }
  else if ( numBits == 24 )
    {
#ifdef _GET_IBM_COUNTER 
      hpmStart(4, "pack 24 bit unrolled");
#endif
#ifdef _ARCH_PWR6
      unsigned long ival;
#else
      uint32_t ival;
#endif
      for (size_t i = 0; i < datasize - residual; i += CGRIBEX__UNROLL_DEPTH_2)
	{
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
	    {
	      dval[j] = CGRIBEX_FPSCALE(data[j]);
	    }
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
	    {
#ifdef _ARCH_PWR6
	      ival = (unsigned long) dval[j];
#else
	      ival = (uint32_t) dval[j];
#endif
	      lGrib[z  ] =  (GRIBPACK)(ival >> 16);
	      lGrib[z+1] =  (GRIBPACK)(ival >>  8);
	      lGrib[z+2] =  (GRIBPACK)ival;
	      z += 3;
	    }
	}
      for (size_t j = 0; j < residual; ++j) 
	{
	  dval[j] = CGRIBEX_FPSCALE(data[j]);
	}
      for (size_t j = 0; j < residual; ++j) 
	{
	  ival = (uint32_t) dval[j];
	  lGrib[z  ] =  (GRIBPACK)(ival >> 16);
	  lGrib[z+1] =  (GRIBPACK)(ival >>  8);
	  lGrib[z+2] =  (GRIBPACK)ival;
	  z += 3;
	}
#ifdef _GET_IBM_COUNTER 
      hpmStop(4);
#endif
    }
  else if ( numBits == 32 )
    {
#ifdef _GET_IBM_COUNTER 
      hpmStart(5, "pack 32 bit unrolled");
#endif
#ifdef _ARCH_PWR6
      unsigned long ival;
#else
      uint32_t ival;
#endif
      unsigned int *igrib = (unsigned int *)(void *)(lGrib + z);
      for (size_t i = 0; i < datasize - residual; i += CGRIBEX__UNROLL_DEPTH_2)
        {
	  for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j) dval[j] = CGRIBEX_FPSCALE(data[i+j]);

	  if ( IS_BIGENDIAN() )
	    {
	      for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
		{
#ifdef _ARCH_PWR6
		  *igrib = (unsigned long) dval[j];
#else
		  *igrib = (uint32_t) dval[j];
#endif
		  igrib++;
		  z += 4;
		}
	    }
	  else
	    {
	      for (size_t j = 0; j < CGRIBEX__UNROLL_DEPTH_2; ++j)
		{
                  ival = (uint32_t) dval[j];
		  lGrib[z  ] =  (GRIBPACK)(ival >> 24);
		  lGrib[z+1] =  (GRIBPACK)(ival >> 16);
		  lGrib[z+2] =  (GRIBPACK)(ival >>  8);
		  lGrib[z+3] =  (GRIBPACK)ival;
		  z += 4;
		}
	    }
	}
      for (size_t j = 0; j < residual; ++j) 
	{
          dval[j] = CGRIBEX_FPSCALE(data[ofs+j]);
	}
      if ( IS_BIGENDIAN() )
	{
	  for (size_t j = 0; j < residual; ++j) 
	    {
#ifdef _ARCH_PWR6
	      *igrib = (unsigned long) dval[j];
#else
	      *igrib = (uint32_t) dval[j];
#endif
	      igrib++;
	      z += 4;
	    }
	}
      else
	{
          for (size_t j = 0; j < residual; ++j) 
	    {
	      ival = (uint32_t) dval[j];
	      lGrib[z  ] =  (GRIBPACK)(ival >> 24);
	      lGrib[z+1] =  (GRIBPACK)(ival >> 16);
	      lGrib[z+2] =  (GRIBPACK)(ival >>  8);
	      lGrib[z+3] =  (GRIBPACK)ival;
	      z += 4;
	    }
	}
#ifdef _GET_IBM_COUNTER 
      hpmStop(5);
#endif
    }
  else if ( numBits > 0 && numBits <= 32 )
    {
      TEMPLATE(encode_array_common,T)(numBits, 0, datasize, lGrib, data, zref, factor, &z);
    }
  else if ( numBits == 0 )
    {
    }
  else
    {
      Error("Unimplemented packing factor %d!", numBits);
    }

  *gz = z;
}

#ifdef CGRIBEX_FPSCALE
#undef CGRIBEX_FPSCALE
#endif

#endif /* T */


#ifdef T
#undef T
#endif
#define T double
#ifdef T

// GRIB BLOCK 2 - GRID DESCRIPTION SECTION
static
void TEMPLATE(encodeGDS,T)(GRIBPACK *lGrib, long *gribLen, int *isec2, T *fsec2)
{
  long z = *gribLen;
  int exponent, mantissa;
  int ival;
  int gdslen = 32;

  if ( ISEC2_GridType == GRIB1_GTYPE_LCC ) gdslen += 10;

  if ( ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT )  gdslen += 10;

  const int pvoffset = (ISEC2_NumVCP || ISEC2_Reduced) ? gdslen + 1 : 0xFF;

  if ( ISEC2_Reduced ) gdslen += 2 * ISEC2_NumLat;

  gdslen += ISEC2_NumVCP * 4;

  Put3Byte(gdslen);             /*  0- 2 Length of Block 2 Byte 0 */
  Put1Byte(ISEC2_NumVCP);       /*  3    NV */
  Put1Byte(pvoffset);           /*  4    PV */
  Put1Byte(ISEC2_GridType);     /*  5    LatLon=0 Gauss=4 Spectral=50 */

  if ( ISEC2_GridType == GRIB1_GTYPE_SPECTRAL )
    {
      Put2Byte(ISEC2_PentaJ);   /*  6- 7 Pentagonal resolution J  */
      Put2Byte(ISEC2_PentaK);   /*  8- 9 Pentagonal resolution K  */
      Put2Byte(ISEC2_PentaM);   /* 10-11 Pentagonal resolution M  */
      Put1Byte(ISEC2_RepType);  /* 12    Representation type      */
      Put1Byte(ISEC2_RepMode);  /* 13    Representation mode      */
      PutnZero(18);             /* 14-31 reserved                 */
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_GME )
    {
      Put2Byte(ISEC2_GME_NI2);
      Put2Byte(ISEC2_GME_NI3);
      Put3Byte(ISEC2_GME_ND);
      Put3Byte(ISEC2_GME_NI);
      Put1Byte(ISEC2_GME_AFlag);
      Put3Int(ISEC2_GME_LatPP);
      Put3Int(ISEC2_GME_LonPP);
      Put3Int(ISEC2_GME_LonMPL);
      Put1Byte(ISEC2_GME_BFlag);
      PutnZero(5);
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_LCC )
    {
      Put2Byte(ISEC2_NumLon);          /*  6- 7 Longitudes               */

      Put2Byte(ISEC2_NumLat);          /*  8- 9 Latitudes                */
      Put3Int(ISEC2_FirstLat);
      Put3Int(ISEC2_FirstLon);
      Put1Byte(ISEC2_ResFlag);         /* 16    Resolution flag          */
      Put3Int(ISEC2_Lambert_Lov);      /* 17-19 */
      Put3Int(ISEC2_Lambert_dx);       /* 20-22 */
      Put3Int(ISEC2_Lambert_dy);       /* 23-25 */
      Put1Byte(ISEC2_Lambert_ProjFlag);/* 26    Projection flag          */
      Put1Byte(ISEC2_ScanFlag);        /* 27    Scanning mode            */
      Put3Int(ISEC2_Lambert_LatS1);    /* 28-30 */  
      Put3Int(ISEC2_Lambert_LatS2);    /* 31-33 */
      Put3Int(ISEC2_Lambert_LatSP);    /* 34-36 */  
      Put3Int(ISEC2_Lambert_LonSP);    /* 37-39 */
      PutnZero(2);                     /* 34-41 */
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_LATLON    ||
	    ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN  ||
	    ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT )
    {
      const int numlon = ISEC2_Reduced ? 0xFFFF : ISEC2_NumLon;
      Put2Byte(numlon);                /*  6- 7 Number of Longitudes     */

      Put2Byte(ISEC2_NumLat);          /*  8- 9 Number of Latitudes      */
      Put3Int(ISEC2_FirstLat);
      Put3Int(ISEC2_FirstLon);
      Put1Byte(ISEC2_ResFlag);         /* 16    Resolution flag          */
      Put3Int(ISEC2_LastLat);
      Put3Int(ISEC2_LastLon);
      const unsigned lonIncr = (ISEC2_ResFlag == 0) ? 0xFFFF : (unsigned)ISEC2_LonIncr;
      const unsigned latIncr = (ISEC2_ResFlag == 0) ? 0xFFFF : (unsigned)ISEC2_LatIncr;
      Put2Byte(lonIncr);               /* 23-24 i - direction increment  */
      if ( ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN )
	Put2Byte(ISEC2_NumPar);        /* 25-26 Latitudes Pole->Equator  */
      else
	Put2Byte(latIncr);             /* 25-26 j - direction increment  */

      Put1Byte(ISEC2_ScanFlag);        /* 27    Scanning mode            */
      PutnZero(4);                     /* 28-31 reserved                 */

      if ( ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT )
	{
	  Put3Int(ISEC2_LatSP);
	  Put3Int(ISEC2_LonSP);
	  Put1Real((double)(FSEC2_RotAngle));
	}
    }
  else
    {
      Error("Unsupported grid type %d", ISEC2_GridType);
    }

#if defined (SX)
#pragma vdir novector     /* vectorization gives wrong results on NEC */
#endif
  for (long i = 0; i < ISEC2_NumVCP; ++i)
    {
      Put1Real((double)(fsec2[10+i]));
    }

  if ( ISEC2_Reduced )
    for (long i = 0; i < ISEC2_NumLat; ++i) Put2Byte(ISEC2_ReducedPoints(i));

  *gribLen = z;
}

// GRIB BLOCK 3 - BIT MAP SECTION
static
void TEMPLATE(encodeBMS,T)(GRIBPACK *lGrib, long *gribLen, T *fsec3, int *isec4, T *data, long *datasize)
{
  long z = *gribLen;
  static bool lmissvalinfo = true;
  //  unsigned int c, imask;

  if ( DBL_IS_NAN(FSEC3_MissVal) && lmissvalinfo)
    {
      lmissvalinfo = false;
      Message("Missing value = NaN is unsupported!");
    }

  const long bitmapSize = ISEC4_NumValues;
  const long imaskSize = ((bitmapSize+7)>>3)<<3;
  GRIBPACK *bitmap = &lGrib[z+6];
  long fsec4size = 0;

#ifdef VECTORCODE
  unsigned int *imask = (unsigned int*) Malloc(imaskSize*sizeof(unsigned int));
  memset(imask, 0, imaskSize*sizeof(int));

#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
  for (long i = 0; i < bitmapSize; ++i)
    {
      if ( IS_NOT_EQUAL(data[i], FSEC3_MissVal) )
	{
	  data[fsec4size++] = data[i];
	  imask[i] = 1;
	}
    }

#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
  for (long i = 0; i < imaskSize/8; ++i)
    {
      bitmap[i] = (imask[i*8+0] << 7) | (imask[i*8+1] << 6) |
	          (imask[i*8+2] << 5) | (imask[i*8+3] << 4) |
	          (imask[i*8+4] << 3) | (imask[i*8+5] << 2) |
	          (imask[i*8+6] << 1) | (imask[i*8+7]);
    }

  Free(imask);
#else
  for (long i = 0; i < imaskSize/8; ++i) bitmap[i] = 0;

  for (long i = 0; i < bitmapSize; ++i)
    {
      if ( IS_NOT_EQUAL(data[i], FSEC3_MissVal) )
	{
	  data[fsec4size++] = data[i];
	  bitmap[i/8] |= (GRIBPACK)(1<<(7-(i&7)));
	}
    }
#endif

  const long bmsLen = imaskSize/8 + 6;
  const long bmsUnusedBits = imaskSize - bitmapSize;

  Put3Byte(bmsLen);   /*  0- 2 Length of Block 3 Byte 0 */
  Put1Byte(bmsUnusedBits);
  Put2Byte(0);

  *gribLen += bmsLen;

  *datasize = fsec4size;
}

#define pow_double pow
#define pow_float powf

// GRIB BLOCK 4 - BINARY DATA SECTION
static
int TEMPLATE(encodeBDS,T)(GRIBPACK *lGrib, long *gribLen, int decscale, int *isec2, int *isec4, long datasize, T *data,
			  long *datstart, long *datsize, int code)
{
  // Uwe Schulzweida, 11/04/2003 : Check that number of bits per value is not exceeded
  // Uwe Schulzweida,  6/05/2003 : Copy result to fpval to prevent integer overflow

  size_t z = (size_t)*gribLen;
  int numBits;
  int ival;
  long PackStart = 0;
  int Flag = 0;
  int binscale = 0;
  int bds_head = 11;
  int bds_ext = 0;
  /* ibits = BitsPerInt; */
  int exponent, mantissa;
  bool lspherc = false;
  int isubset = 0, itemp = 0, itrunc = 0;
  T factor = 1, fmin, fmax;
  const double jpepsln = 1.0e-12; // -----> tolerance used to check equality
                                  //        of floating point numbers - needed
		                  //        on some platforms (eg vpp700, linux)
  extern int CGRIBEX_Const;       // 1: Don't pack constant fields on regular grids

  if ( isec2 )
    {
      /* If section 2 is present, it says if data is spherical harmonic */

      lspherc =  ( isec2[0] == 50 || isec2[0] == 60 ||
                   isec2[0] == 70 || isec2[0] == 80 );

      isec4[2] = lspherc ? 128 : 0;
    }
  else
    {
      /* Section 4 says if it's spherical harmonic data.. */

      lspherc = ( isec4[2] == 128 );
    }

  /* Complex packing supported for spherical harmonics. */

  const bool lcomplex = ( lspherc && ( isec4[3] == 64 ) ) ||
                        ( lspherc && isec2 && ( isec2[5] == 2 ) );

  // Check input specification is consistent

  if ( lcomplex && isec2 )
    {
      if ( ( isec4[3] != 64 ) && ( isec2[5] == 2 ) )
	{
	  gprintf(__func__, "  COMPLEX mismatch. isec4[3] = %d\n", isec4[3]);
	  gprintf(__func__, "  COMPLEX mismatch. isec2[5] = %d\n", isec2[5]);
	  return (807);
	}
      else if ( ( isec4[3] == 64 ) && ( isec2[5] != 2 ) )
	{
	  gprintf(__func__, "  COMPLEX mismatch. isec4[3] = %d\n", isec4[3]);
	  gprintf(__func__, "  COMPLEX mismatch. isec2[5] = %d\n", isec2[5]);
	  return (807);
        }
      else if ( lcomplex )
	{
          // Truncation of full spectrum, which is supposed triangular, has to be diagnosed. Define also sub-set truncation.
	  isubset = isec4[17];
	  // When encoding, use the total number of data.
	  itemp   = isec4[0];
	  itrunc  = (int) (sqrt(itemp*4 + 1.) - 3) / 2;
	}
    }

  if ( decscale )
    {
      const T scale = TEMPLATE(pow,T)((T)10.0, (T)decscale);
      for (long i = 0; i < datasize; ++i) data[i] *= scale;
    }

  if ( lspherc )
    {
      if ( lcomplex )
	{
	  const int jup  = isubset;
	  const int ioff = (jup+1)*(jup+2);
	  bds_ext = 4 + 3 + 4*ioff;
	  PackStart = ioff;
	  Flag = 192;
	}
      else
	{
	  bds_ext = 4;
	  PackStart = 1;
	  Flag = 128;
	}
    }

  *datstart = bds_head + bds_ext;

  int nbpv = numBits = ISEC4_NumBits;

  if ( lspherc && lcomplex )
    {
      const int pcStart = isubset;
      const int pcScale = isec4[16];
      TEMPLATE(scale_complex,T)(data, pcStart, pcScale, itrunc, 0);
      TEMPLATE(gather_complex,T)(data, (size_t)pcStart, (size_t)itrunc, (size_t)datasize);
    }

  fmin = fmax = data[PackStart];

  TEMPLATE(minmax_val,T)(data+PackStart, datasize-PackStart, &fmin, &fmax);

  double zref = (double)fmin;
  if (!(zref < DBL_MAX && zref > -DBL_MAX))
    {
      gprintf(__func__, "Minimum value out of range: %g!", zref);
      return (707);
    }

  if ( CGRIBEX_Const && !lspherc )
    {
      if ( IS_EQUAL(fmin, fmax) ) nbpv = 0;
    }

  long blockLength = (*datstart) + (nbpv*(datasize - PackStart) + 7)/8;
  blockLength += blockLength & 1;

  const long unused_bits = blockLength*8 - (*datstart)*8 - nbpv*(datasize - PackStart);

  Flag += (int)unused_bits;


  // Adjust number of bits per value if full integer length to avoid hitting most significant bit (sign bit).
  // if( nbpv == ibits ) nbpv = nbpv - 1;
  /*
    Calculate the binary scaling factor to spread the range of values over the number of bits per value.
    Limit scaling to 2**-126 to 2**127 (using IEEE 32-bit floatsas a guideline).
  */
  const double range = fabs(fmax - fmin);

  if ( fabs(fmin) < FLT_MIN ) fmin = 0;
  /*
    Have to allow tolerance in comparisons on some platforms (eg vpp700 and linux),
    such as 0.9999999999999999 = 1.0, to avoid clipping ranges which are a power of 2.
  */
  if ( range <= jpepsln )
    {
      binscale = 0;
    }
  else if ( IS_NOT_EQUAL(fmin, 0.0) && (fabs(range/fmin) <= jpepsln) )
    {
      binscale = 0;
    }
  else if ( fabs(range-1.0) <= jpepsln )
    {
      binscale = 1 - nbpv;
    }
  else if ( range > 1.0 )
    {
      const double rangec = range + jpepsln;
      double p2 = 2.0;
      int jloop = 1;
      while ( jloop < 128 && p2 <= rangec )
        {
          p2 *= 2.0;
          ++jloop;
        }
      if (jloop < 128)
        binscale = jloop - nbpv;
      else
        {
          gprintf(__func__, "Problem calculating binary scale value for encode code %d!", code);
          gprintf(__func__, "> range %g rangec %g fmin %g fmax %g", range, rangec, fmin, fmax);
          return (707);
        }
    }
  else
    {
      const double rangec = range - jpepsln;
      double p05 = 0.5;
      int jloop = 1;
      while ( jloop < 127 && p05 >= rangec )
	{
          p05 *= 0.5;
          jloop++;
	}
      if ( jloop < 127 )
	{
	  binscale = 1 - jloop - nbpv;
	}
      else
	{
	  gprintf(__func__, "Problem calculating binary scale value for encode code %d!", code);
	  gprintf(__func__, "< range %g rangec %g fmin %g fmax %g", range, rangec, fmin, fmax);
	  return (707);
	}
    }

  const uint64_t max_nbpv_pow2 = (uint64_t) ((1ULL << nbpv) - 1);

  if ( binscale != 0 )
    {
      while ( (uint64_t)(ldexp(range, -binscale)+0.5) > max_nbpv_pow2 ) binscale++;

      factor = (T)intpow2(-binscale);
    }

  ref2ibm(&zref, BitsPerInt);

  Put3Byte(blockLength);      //  0-2 Length of Block 4
  Put1Byte(Flag);             //  3   Flag & Unused bits
  if ( binscale < 0 ) binscale = 32768 - binscale;
  Put2Byte(binscale);         //  4-5 Scale factor
  Put1Real(zref);             //  6-9 Reference value
  Put1Byte(nbpv);             //   10 Packing size

  if ( lspherc )
    {
      if ( lcomplex )
	{
	  const int jup = isubset;
	  int ioff = (int)z + bds_ext;
	  if ( ioff > 0xFFFF ) ioff = 0;
	  Put2Byte(ioff);
	  Put2Int(isec4[16]);
	  Put1Byte(jup);
	  Put1Byte(jup);
	  Put1Byte(jup);
	  for (long i = 0; i < ((jup+1)*(jup+2)); ++i) Put1Real((double)(data[i]));
	}
      else
	{
	  Put1Real((double)(data[0]));
	}
    }

  *datsize  = ((datasize-PackStart)*nbpv + 7)/8;

#if  defined  (_ARCH_PWR6)
  TEMPLATE(encode_array_unrolled,T)(nbpv, (size_t)PackStart, (size_t)datasize, lGrib, data, (T)zref, factor, &z);
#else
  TEMPLATE(encode_array,T)(nbpv, (size_t)PackStart, (size_t)datasize, lGrib, data, (T)zref, factor, &z);
#endif

  if ( unused_bits >= 8 ) Put1Byte(0);  //  Fillbyte

  *gribLen = (long)z;

  return 0;
}


void TEMPLATE(grib_encode,T)(int *isec0, int *isec1, int *isec2, T *fsec2, int *isec3,
			     T *fsec3, int *isec4, T *fsec4, int klenp, int *kgrib,
			     int kleng, int *kword, int efunc, int *kret)
{
  long gribLen = 0; // Counter of GRIB length for output
  long fsec4size = 0;
  long datstart, datsize;

  UNUSED(isec3);
  UNUSED(efunc);

  grsdef();

  unsigned char *CGrib = (unsigned char *) kgrib;

  const bool gdsIncluded = ISEC1_Sec2Or3Flag & 128;
  const bool bmsIncluded = ISEC1_Sec2Or3Flag & 64;

  // set max header len
  size_t len = 16384;

  // add data len
  const size_t numBytes = (size_t)((ISEC4_NumBits+7)>>3);

  len += numBytes*(size_t)klenp;

  // add bitmap len
  if ( bmsIncluded ) len += (size_t)((klenp+7)>>3);

#ifdef VECTORCODE
  GRIBPACK *lGrib = (GRIBPACK*) Malloc(len*sizeof(GRIBPACK));
  if ( lGrib == NULL ) SysError("No Memory!");
#else
  GRIBPACK *lGrib = CGrib;
#endif

  const long isLen = 8;
  encodeIS(lGrib, &gribLen);
  GRIBPACK *lpds = &lGrib[isLen];
  const long pdsLen = getPdsLen(isec1);

  encodePDS(lpds, pdsLen,  isec1);
  gribLen += pdsLen;
  /*
  if ( ( isec4[3] == 64 ) && ( isec2[5] == 2 ) )
    {
      static bool lwarn_cplx = true;

      if ( lwarn_cplx )
	Message("Complex packing of spectral data unsupported, using simple packing!");

      isec2[5] = 1;
      isec4[3] = 0;

      lwarn_cplx = false;
    }
  */
  if ( gdsIncluded ) TEMPLATE(encodeGDS,T)(lGrib, &gribLen, isec2, fsec2);
  /*
    ----------------------------------------------------------------
    BMS Bit-Map Section Section (Section 3)
    ----------------------------------------------------------------
  */ 
  if ( bmsIncluded )
    {
      TEMPLATE(encodeBMS,T)(lGrib, &gribLen, fsec3, isec4, fsec4, &fsec4size);
    }
  else
    {
      fsec4size = ISEC4_NumValues;
    }

  const long bdsstart = gribLen;
  int status = TEMPLATE(encodeBDS,T)(lGrib, &gribLen, ISEC1_DecScaleFactor, isec2,
                                     isec4, fsec4size, fsec4, &datstart, &datsize, ISEC1_Parameter);
  if ( status )
    {
      *kret = status;
      return;
    }

  encodeES(lGrib, &gribLen, bdsstart);

  if ( (size_t) gribLen > (size_t)kleng*sizeof(int) )
    Error("kgrib buffer too small! kleng = %d  gribLen = %d", kleng, gribLen);

#ifdef VECTORCODE
  if ( (size_t) gribLen > len )
    Error("lGrib buffer too small! len = %d  gribLen = %d", len, gribLen);

  (void) PACK_GRIB(lGrib, (unsigned char *)CGrib, gribLen, -1L);

  Free(lGrib);
#endif

  ISEC0_GRIB_Len     = (int)gribLen;
  ISEC0_GRIB_Version = 1;

  *kword = (int)((gribLen + (long)sizeof(int) - 1) / (long)sizeof(int));

  *kret = status;
}

#endif /* T */

/*
 * Local Variables:
 * mode: c
 * End:
 */

#ifdef T
#undef T
#endif
#define T float
#ifdef T

// GRIB BLOCK 2 - GRID DESCRIPTION SECTION
static
void TEMPLATE(encodeGDS,T)(GRIBPACK *lGrib, long *gribLen, int *isec2, T *fsec2)
{
  long z = *gribLen;
  int exponent, mantissa;
  int ival;
  int gdslen = 32;

  if ( ISEC2_GridType == GRIB1_GTYPE_LCC ) gdslen += 10;

  if ( ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT )  gdslen += 10;

  const int pvoffset = (ISEC2_NumVCP || ISEC2_Reduced) ? gdslen + 1 : 0xFF;

  if ( ISEC2_Reduced ) gdslen += 2 * ISEC2_NumLat;

  gdslen += ISEC2_NumVCP * 4;

  Put3Byte(gdslen);             /*  0- 2 Length of Block 2 Byte 0 */
  Put1Byte(ISEC2_NumVCP);       /*  3    NV */
  Put1Byte(pvoffset);           /*  4    PV */
  Put1Byte(ISEC2_GridType);     /*  5    LatLon=0 Gauss=4 Spectral=50 */

  if ( ISEC2_GridType == GRIB1_GTYPE_SPECTRAL )
    {
      Put2Byte(ISEC2_PentaJ);   /*  6- 7 Pentagonal resolution J  */
      Put2Byte(ISEC2_PentaK);   /*  8- 9 Pentagonal resolution K  */
      Put2Byte(ISEC2_PentaM);   /* 10-11 Pentagonal resolution M  */
      Put1Byte(ISEC2_RepType);  /* 12    Representation type      */
      Put1Byte(ISEC2_RepMode);  /* 13    Representation mode      */
      PutnZero(18);             /* 14-31 reserved                 */
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_GME )
    {
      Put2Byte(ISEC2_GME_NI2);
      Put2Byte(ISEC2_GME_NI3);
      Put3Byte(ISEC2_GME_ND);
      Put3Byte(ISEC2_GME_NI);
      Put1Byte(ISEC2_GME_AFlag);
      Put3Int(ISEC2_GME_LatPP);
      Put3Int(ISEC2_GME_LonPP);
      Put3Int(ISEC2_GME_LonMPL);
      Put1Byte(ISEC2_GME_BFlag);
      PutnZero(5);
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_LCC )
    {
      Put2Byte(ISEC2_NumLon);          /*  6- 7 Longitudes               */

      Put2Byte(ISEC2_NumLat);          /*  8- 9 Latitudes                */
      Put3Int(ISEC2_FirstLat);
      Put3Int(ISEC2_FirstLon);
      Put1Byte(ISEC2_ResFlag);         /* 16    Resolution flag          */
      Put3Int(ISEC2_Lambert_Lov);      /* 17-19 */
      Put3Int(ISEC2_Lambert_dx);       /* 20-22 */
      Put3Int(ISEC2_Lambert_dy);       /* 23-25 */
      Put1Byte(ISEC2_Lambert_ProjFlag);/* 26    Projection flag          */
      Put1Byte(ISEC2_ScanFlag);        /* 27    Scanning mode            */
      Put3Int(ISEC2_Lambert_LatS1);    /* 28-30 */  
      Put3Int(ISEC2_Lambert_LatS2);    /* 31-33 */
      Put3Int(ISEC2_Lambert_LatSP);    /* 34-36 */  
      Put3Int(ISEC2_Lambert_LonSP);    /* 37-39 */
      PutnZero(2);                     /* 34-41 */
    }
  else if ( ISEC2_GridType == GRIB1_GTYPE_LATLON    ||
	    ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN  ||
	    ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT )
    {
      const int numlon = ISEC2_Reduced ? 0xFFFF : ISEC2_NumLon;
      Put2Byte(numlon);                /*  6- 7 Number of Longitudes     */

      Put2Byte(ISEC2_NumLat);          /*  8- 9 Number of Latitudes      */
      Put3Int(ISEC2_FirstLat);
      Put3Int(ISEC2_FirstLon);
      Put1Byte(ISEC2_ResFlag);         /* 16    Resolution flag          */
      Put3Int(ISEC2_LastLat);
      Put3Int(ISEC2_LastLon);
      const unsigned lonIncr = (ISEC2_ResFlag == 0) ? 0xFFFF : (unsigned)ISEC2_LonIncr;
      const unsigned latIncr = (ISEC2_ResFlag == 0) ? 0xFFFF : (unsigned)ISEC2_LatIncr;
      Put2Byte(lonIncr);               /* 23-24 i - direction increment  */
      if ( ISEC2_GridType == GRIB1_GTYPE_GAUSSIAN )
	Put2Byte(ISEC2_NumPar);        /* 25-26 Latitudes Pole->Equator  */
      else
	Put2Byte(latIncr);             /* 25-26 j - direction increment  */

      Put1Byte(ISEC2_ScanFlag);        /* 27    Scanning mode            */
      PutnZero(4);                     /* 28-31 reserved                 */

      if ( ISEC2_GridType == GRIB1_GTYPE_LATLON_ROT )
	{
	  Put3Int(ISEC2_LatSP);
	  Put3Int(ISEC2_LonSP);
	  Put1Real((double)(FSEC2_RotAngle));
	}
    }
  else
    {
      Error("Unsupported grid type %d", ISEC2_GridType);
    }

#if defined (SX)
#pragma vdir novector     /* vectorization gives wrong results on NEC */
#endif
  for (long i = 0; i < ISEC2_NumVCP; ++i)
    {
      Put1Real((double)(fsec2[10+i]));
    }

  if ( ISEC2_Reduced )
    for (long i = 0; i < ISEC2_NumLat; ++i) Put2Byte(ISEC2_ReducedPoints(i));

  *gribLen = z;
}

// GRIB BLOCK 3 - BIT MAP SECTION
static
void TEMPLATE(encodeBMS,T)(GRIBPACK *lGrib, long *gribLen, T *fsec3, int *isec4, T *data, long *datasize)
{
  long z = *gribLen;
  static bool lmissvalinfo = true;
  //  unsigned int c, imask;

  if ( DBL_IS_NAN(FSEC3_MissVal) && lmissvalinfo)
    {
      lmissvalinfo = false;
      Message("Missing value = NaN is unsupported!");
    }

  const long bitmapSize = ISEC4_NumValues;
  const long imaskSize = ((bitmapSize+7)>>3)<<3;
  GRIBPACK *bitmap = &lGrib[z+6];
  long fsec4size = 0;

#ifdef VECTORCODE
  unsigned int *imask = (unsigned int*) Malloc(imaskSize*sizeof(unsigned int));
  memset(imask, 0, imaskSize*sizeof(int));

#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
  for (long i = 0; i < bitmapSize; ++i)
    {
      if ( IS_NOT_EQUAL(data[i], FSEC3_MissVal) )
	{
	  data[fsec4size++] = data[i];
	  imask[i] = 1;
	}
    }

#if defined (CRAY)
#pragma _CRI ivdep
#endif
#if defined (SX)
#pragma vdir nodep
#endif
#ifdef __uxpch__
#pragma loop novrec
#endif
  for (long i = 0; i < imaskSize/8; ++i)
    {
      bitmap[i] = (imask[i*8+0] << 7) | (imask[i*8+1] << 6) |
	          (imask[i*8+2] << 5) | (imask[i*8+3] << 4) |
	          (imask[i*8+4] << 3) | (imask[i*8+5] << 2) |
	          (imask[i*8+6] << 1) | (imask[i*8+7]);
    }

  Free(imask);
#else
  for (long i = 0; i < imaskSize/8; ++i) bitmap[i] = 0;

  for (long i = 0; i < bitmapSize; ++i)
    {
      if ( IS_NOT_EQUAL(data[i], FSEC3_MissVal) )
	{
	  data[fsec4size++] = data[i];
	  bitmap[i/8] |= (GRIBPACK)(1<<(7-(i&7)));
	}
    }
#endif

  const long bmsLen = imaskSize/8 + 6;
  const long bmsUnusedBits = imaskSize - bitmapSize;

  Put3Byte(bmsLen);   /*  0- 2 Length of Block 3 Byte 0 */
  Put1Byte(bmsUnusedBits);
  Put2Byte(0);

  *gribLen += bmsLen;

  *datasize = fsec4size;
}

#define pow_double pow
#define pow_float powf

// GRIB BLOCK 4 - BINARY DATA SECTION
static
int TEMPLATE(encodeBDS,T)(GRIBPACK *lGrib, long *gribLen, int decscale, int *isec2, int *isec4, long datasize, T *data,
			  long *datstart, long *datsize, int code)
{
  // Uwe Schulzweida, 11/04/2003 : Check that number of bits per value is not exceeded
  // Uwe Schulzweida,  6/05/2003 : Copy result to fpval to prevent integer overflow

  size_t z = (size_t)*gribLen;
  int numBits;
  int ival;
  long PackStart = 0;
  int Flag = 0;
  int binscale = 0;
  int bds_head = 11;
  int bds_ext = 0;
  /* ibits = BitsPerInt; */
  int exponent, mantissa;
  bool lspherc = false;
  int isubset = 0, itemp = 0, itrunc = 0;
  T factor = 1, fmin, fmax;
  const double jpepsln = 1.0e-12; // -----> tolerance used to check equality
                                  //        of floating point numbers - needed
		                  //        on some platforms (eg vpp700, linux)
  extern int CGRIBEX_Const;       // 1: Don't pack constant fields on regular grids

  if ( isec2 )
    {
      /* If section 2 is present, it says if data is spherical harmonic */

      lspherc =  ( isec2[0] == 50 || isec2[0] == 60 ||
                   isec2[0] == 70 || isec2[0] == 80 );

      isec4[2] = lspherc ? 128 : 0;
    }
  else
    {
      /* Section 4 says if it's spherical harmonic data.. */

      lspherc = ( isec4[2] == 128 );
    }

  /* Complex packing supported for spherical harmonics. */

  const bool lcomplex = ( lspherc && ( isec4[3] == 64 ) ) ||
                        ( lspherc && isec2 && ( isec2[5] == 2 ) );

  // Check input specification is consistent

  if ( lcomplex && isec2 )
    {
      if ( ( isec4[3] != 64 ) && ( isec2[5] == 2 ) )
	{
	  gprintf(__func__, "  COMPLEX mismatch. isec4[3] = %d\n", isec4[3]);
	  gprintf(__func__, "  COMPLEX mismatch. isec2[5] = %d\n", isec2[5]);
	  return (807);
	}
      else if ( ( isec4[3] == 64 ) && ( isec2[5] != 2 ) )
	{
	  gprintf(__func__, "  COMPLEX mismatch. isec4[3] = %d\n", isec4[3]);
	  gprintf(__func__, "  COMPLEX mismatch. isec2[5] = %d\n", isec2[5]);
	  return (807);
        }
      else if ( lcomplex )
	{
          // Truncation of full spectrum, which is supposed triangular, has to be diagnosed. Define also sub-set truncation.
	  isubset = isec4[17];
	  // When encoding, use the total number of data.
	  itemp   = isec4[0];
	  itrunc  = (int) (sqrt(itemp*4 + 1.) - 3) / 2;
	}
    }

  if ( decscale )
    {
      const T scale = TEMPLATE(pow,T)((T)10.0, (T)decscale);
      for (long i = 0; i < datasize; ++i) data[i] *= scale;
    }

  if ( lspherc )
    {
      if ( lcomplex )
	{
	  const int jup  = isubset;
	  const int ioff = (jup+1)*(jup+2);
	  bds_ext = 4 + 3 + 4*ioff;
	  PackStart = ioff;
	  Flag = 192;
	}
      else
	{
	  bds_ext = 4;
	  PackStart = 1;
	  Flag = 128;
	}
    }

  *datstart = bds_head + bds_ext;

  int nbpv = numBits = ISEC4_NumBits;

  if ( lspherc && lcomplex )
    {
      const int pcStart = isubset;
      const int pcScale = isec4[16];
      TEMPLATE(scale_complex,T)(data, pcStart, pcScale, itrunc, 0);
      TEMPLATE(gather_complex,T)(data, (size_t)pcStart, (size_t)itrunc, (size_t)datasize);
    }

  fmin = fmax = data[PackStart];

  TEMPLATE(minmax_val,T)(data+PackStart, datasize-PackStart, &fmin, &fmax);

  double zref = (double)fmin;
  if (!(zref < DBL_MAX && zref > -DBL_MAX))
    {
      gprintf(__func__, "Minimum value out of range: %g!", zref);
      return (707);
    }

  if ( CGRIBEX_Const && !lspherc )
    {
      if ( IS_EQUAL(fmin, fmax) ) nbpv = 0;
    }

  long blockLength = (*datstart) + (nbpv*(datasize - PackStart) + 7)/8;
  blockLength += blockLength & 1;

  const long unused_bits = blockLength*8 - (*datstart)*8 - nbpv*(datasize - PackStart);

  Flag += (int)unused_bits;


  // Adjust number of bits per value if full integer length to avoid hitting most significant bit (sign bit).
  // if( nbpv == ibits ) nbpv = nbpv - 1;
  /*
    Calculate the binary scaling factor to spread the range of values over the number of bits per value.
    Limit scaling to 2**-126 to 2**127 (using IEEE 32-bit floatsas a guideline).
  */
  const double range = fabs(fmax - fmin);

  if ( fabs(fmin) < FLT_MIN ) fmin = 0;
  /*
    Have to allow tolerance in comparisons on some platforms (eg vpp700 and linux),
    such as 0.9999999999999999 = 1.0, to avoid clipping ranges which are a power of 2.
  */
  if ( range <= jpepsln )
    {
      binscale = 0;
    }
  else if ( IS_NOT_EQUAL(fmin, 0.0) && (fabs(range/fmin) <= jpepsln) )
    {
      binscale = 0;
    }
  else if ( fabs(range-1.0) <= jpepsln )
    {
      binscale = 1 - nbpv;
    }
  else if ( range > 1.0 )
    {
      const double rangec = range + jpepsln;
      double p2 = 2.0;
      int jloop = 1;
      while ( jloop < 128 && p2 <= rangec )
        {
          p2 *= 2.0;
          ++jloop;
        }
      if (jloop < 128)
        binscale = jloop - nbpv;
      else
        {
          gprintf(__func__, "Problem calculating binary scale value for encode code %d!", code);
          gprintf(__func__, "> range %g rangec %g fmin %g fmax %g", range, rangec, fmin, fmax);
          return (707);
        }
    }
  else
    {
      const double rangec = range - jpepsln;
      double p05 = 0.5;
      int jloop = 1;
      while ( jloop < 127 && p05 >= rangec )
	{
          p05 *= 0.5;
          jloop++;
	}
      if ( jloop < 127 )
	{
	  binscale = 1 - jloop - nbpv;
	}
      else
	{
	  gprintf(__func__, "Problem calculating binary scale value for encode code %d!", code);
	  gprintf(__func__, "< range %g rangec %g fmin %g fmax %g", range, rangec, fmin, fmax);
	  return (707);
	}
    }

  const uint64_t max_nbpv_pow2 = (uint64_t) ((1ULL << nbpv) - 1);

  if ( binscale != 0 )
    {
      while ( (uint64_t)(ldexp(range, -binscale)+0.5) > max_nbpv_pow2 ) binscale++;

      factor = (T)intpow2(-binscale);
    }

  ref2ibm(&zref, BitsPerInt);

  Put3Byte(blockLength);      //  0-2 Length of Block 4
  Put1Byte(Flag);             //  3   Flag & Unused bits
  if ( binscale < 0 ) binscale = 32768 - binscale;
  Put2Byte(binscale);         //  4-5 Scale factor
  Put1Real(zref);             //  6-9 Reference value
  Put1Byte(nbpv);             //   10 Packing size

  if ( lspherc )
    {
      if ( lcomplex )
	{
	  const int jup = isubset;
	  int ioff = (int)z + bds_ext;
	  if ( ioff > 0xFFFF ) ioff = 0;
	  Put2Byte(ioff);
	  Put2Int(isec4[16]);
	  Put1Byte(jup);
	  Put1Byte(jup);
	  Put1Byte(jup);
	  for (long i = 0; i < ((jup+1)*(jup+2)); ++i) Put1Real((double)(data[i]));
	}
      else
	{
	  Put1Real((double)(data[0]));
	}
    }

  *datsize  = ((datasize-PackStart)*nbpv + 7)/8;

#if  defined  (_ARCH_PWR6)
  TEMPLATE(encode_array_unrolled,T)(nbpv, (size_t)PackStart, (size_t)datasize, lGrib, data, (T)zref, factor, &z);
#else
  TEMPLATE(encode_array,T)(nbpv, (size_t)PackStart, (size_t)datasize, lGrib, data, (T)zref, factor, &z);
#endif

  if ( unused_bits >= 8 ) Put1Byte(0);  //  Fillbyte

  *gribLen = (long)z;

  return 0;
}


void TEMPLATE(grib_encode,T)(int *isec0, int *isec1, int *isec2, T *fsec2, int *isec3,
			     T *fsec3, int *isec4, T *fsec4, int klenp, int *kgrib,
			     int kleng, int *kword, int efunc, int *kret)
{
  long gribLen = 0; // Counter of GRIB length for output
  long fsec4size = 0;
  long datstart, datsize;

  UNUSED(isec3);
  UNUSED(efunc);

  grsdef();

  unsigned char *CGrib = (unsigned char *) kgrib;

  const bool gdsIncluded = ISEC1_Sec2Or3Flag & 128;
  const bool bmsIncluded = ISEC1_Sec2Or3Flag & 64;

  // set max header len
  size_t len = 16384;

  // add data len
  const size_t numBytes = (size_t)((ISEC4_NumBits+7)>>3);

  len += numBytes*(size_t)klenp;

  // add bitmap len
  if ( bmsIncluded ) len += (size_t)((klenp+7)>>3);

#ifdef VECTORCODE
  GRIBPACK *lGrib = (GRIBPACK*) Malloc(len*sizeof(GRIBPACK));
  if ( lGrib == NULL ) SysError("No Memory!");
#else
  GRIBPACK *lGrib = CGrib;
#endif

  const long isLen = 8;
  encodeIS(lGrib, &gribLen);
  GRIBPACK *lpds = &lGrib[isLen];
  const long pdsLen = getPdsLen(isec1);

  encodePDS(lpds, pdsLen,  isec1);
  gribLen += pdsLen;
  /*
  if ( ( isec4[3] == 64 ) && ( isec2[5] == 2 ) )
    {
      static bool lwarn_cplx = true;

      if ( lwarn_cplx )
	Message("Complex packing of spectral data unsupported, using simple packing!");

      isec2[5] = 1;
      isec4[3] = 0;

      lwarn_cplx = false;
    }
  */
  if ( gdsIncluded ) TEMPLATE(encodeGDS,T)(lGrib, &gribLen, isec2, fsec2);
  /*
    ----------------------------------------------------------------
    BMS Bit-Map Section Section (Section 3)
    ----------------------------------------------------------------
  */ 
  if ( bmsIncluded )
    {
      TEMPLATE(encodeBMS,T)(lGrib, &gribLen, fsec3, isec4, fsec4, &fsec4size);
    }
  else
    {
      fsec4size = ISEC4_NumValues;
    }

  const long bdsstart = gribLen;
  int status = TEMPLATE(encodeBDS,T)(lGrib, &gribLen, ISEC1_DecScaleFactor, isec2,
                                     isec4, fsec4size, fsec4, &datstart, &datsize, ISEC1_Parameter);
  if ( status )
    {
      *kret = status;
      return;
    }

  encodeES(lGrib, &gribLen, bdsstart);

  if ( (size_t) gribLen > (size_t)kleng*sizeof(int) )
    Error("kgrib buffer too small! kleng = %d  gribLen = %d", kleng, gribLen);

#ifdef VECTORCODE
  if ( (size_t) gribLen > len )
    Error("lGrib buffer too small! len = %d  gribLen = %d", len, gribLen);

  (void) PACK_GRIB(lGrib, (unsigned char *)CGrib, gribLen, -1L);

  Free(lGrib);
#endif

  ISEC0_GRIB_Len     = (int)gribLen;
  ISEC0_GRIB_Version = 1;

  *kword = (int)((gribLen + (long)sizeof(int) - 1) / (long)sizeof(int));

  *kret = status;
}

#endif /* T */

/*
 * Local Variables:
 * mode: c
 * End:
 */
// clang-format on

void encode_dummy(void);
void
encode_dummy(void)
{
  (void) encode_array_unrolled_double(0, 0, 0, NULL, NULL, 0, 0, NULL);
  (void) encode_array_unrolled_float(0, 0, 0, NULL, NULL, 0, 0, NULL);
}
static const char grb_libvers[] = "2.3.1";
const char *
cgribexLibraryVersion(void)
{
  return (grb_libvers);
}

#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 5)
#pragma GCC diagnostic pop
#endif
