File: psycho_I.c

package info (click to toggle)
toolame 02h-3
links: PTS
area: main
in suites: woody
size: 596 kB
ctags: 551
sloc: ansic: 10,332; sh: 111; makefile: 96
file content (668 lines) | stat: -rw-r--r-- 20,360 bytes
#include "common.h"
#include "encoder.h"
#define LONDON                  /* enable "LONDON" modification */
#define MAKE_SENSE              /* enable "MAKE_SENSE" modification */
#define MI_OPTION               /* enable "MI_OPTION" modification */

void hann_fft_pickmax(double sample[FFT_SIZE], mask power[HAN_SIZE], double spike[SBLIMIT], float  energy[FFT_SIZE]);
#define NEW_ADD_DB
void init_add_db(void);
#define DBTAB 1000
double dbtable[DBTAB];

extern void fft2(FLOAT *x_real, FLOAT *energy, int N);

/**********************************************************************
*
*        This module implements the psychoacoustic model I for the
* MPEG encoder layer II. It uses simplified tonal and noise masking
* threshold analysis to generate SMR for the encoder bit allocation
* routine.
*
**********************************************************************/

void psycho_i(short FAR buffer[2][1152],
              double FAR scale[2][SBLIMIT],
              double FAR ltmin[2][SBLIMIT],
              frame_params *fr_ps)
{
  layer *info = fr_ps->header;
  int   stereo = fr_ps->stereo;
  int   sblimit = fr_ps->sblimit;
  int k,i, tone=0, noise=0;
  static char init = 0;
  static int off[2] = {256,256};
  double sample[FFT_SIZE];
  double spike[2][SBLIMIT];
  static D1408 *fft_buf;
  static mask_ptr FAR power;
  static g_ptr FAR ltg;
  float energy[FFT_SIZE];

  /* call functions for critical boundaries, freq. */
  if(!init)
    {  /* bands, bark values, and mapping */
      fft_buf = (D1408 *) mem_alloc((long) sizeof(D1408) * 2, "fft_buf");
      power = (mask_ptr FAR ) mem_alloc(sizeof(mask) * HAN_SIZE, "power");
      if (info->version == MPEG_AUDIO_ID)
        {
          read_cbound(info->lay, info->sampling_frequency);
          read_freq_band(&ltg, info->lay, info->sampling_frequency);
        }
      else
        {
          read_cbound(info->lay, info->sampling_frequency + 4);
          read_freq_band(&ltg, info->lay, info->sampling_frequency + 4);
        }
      make_map(power,ltg);
      for (i=0;i<1408;i++)
        fft_buf[0][i] = fft_buf[1][i] = 0;

      init_add_db(); /* create the add_db table */

      init = 1;
    }
  for(k=0;k<stereo;k++)
    {  /* check pcm input for 3 blocks of 384 samples */
      for(i=0;i<1152;i++)
        fft_buf[k][(i+off[k])%1408]= (double)buffer[k][i]/SCALE;
      for(i=0;i<FFT_SIZE;i++)
        sample[i] = fft_buf[k][(i+1216+off[k])%1408];
      off[k] += 1152;
      off[k] %= 1408;

      hann_fft_pickmax(sample, power, &spike[k][0], energy);

      tonal_label(power, &tone);
      noise_label(power, &noise, ltg, energy);
      subsampling(power, ltg, &tone, &noise);
      threshold(power, ltg, &tone, &noise,
                bitrate[info->version][info->bitrate_index]/stereo);
      minimum_mask(ltg, &ltmin[k][0], sblimit);
      smr(&ltmin[k][0], &spike[k][0], &scale[k][0], sblimit);
    }
}


int crit_band;
int FAR *cbound;
int sub_size;

void read_cbound(lay,freq)  /* this function reads in critical */
int lay, freq;              /* band boundaries                 */
{

    #include "critband.h"
    //static const int FirstCriticalBand[7][27] = {...

	int i,k;

	if ((lay <1) || (lay >2)) {
		printf("Internal error (read_cbound())\n");
		return;
	}
	if ((freq <0) || (freq >6) || (freq ==3)) {
		printf("Internal error (read_cbound())\n");
		return;
	}

	/* read input for critical bands */
	if (lay ==1) {
		crit_band =FirstCriticalBand[freq][0];
		cbound = (int *) mem_alloc(sizeof(int) * crit_band, "cbound");
		for (i=0; i<crit_band; i++) { 
			k = FirstCriticalBand[freq][i+1];
			if (k !=0) {
				cbound[i] = k;
			}
			else {
				printf("Internal error (read_cbound())\n");
				return;
			}
		}
	}
	else {
		crit_band =SecondCriticalBand[freq][0];
		cbound = (int *) mem_alloc(sizeof(int) * crit_band, "cbound");
		for (i=0; i<crit_band; i++) { 
			k = SecondCriticalBand[freq][i+1];
			if (k !=0) {
				cbound[i] = k;
			}
			else {
				printf("Internal error (read_cbound())\n");
				return;
			}
		}
	}

}        

void read_freq_band(ltg,lay,freq)  /* this function reads in   */
int lay, freq;                     /* frequency bands and bark */
g_ptr *ltg;                    /* values                   */
{

#include "freqtable.h"

	int i,k;

	if ((lay <1) || (lay >2)) {
			printf("Internal error (read_freq_band())\n");
			return;
	}
	if ((freq <0) || (freq >6) || (freq ==3)) {
			printf("Internal error (read_freq_band())\n");
			return;
	}

	/* read input for freq. subbands */
	if (lay ==1) {
		sub_size =FirstFreqEntries[freq] +1;
		*ltg = (g_ptr ) mem_alloc(sizeof(g_thres) * sub_size, "ltg");
		(*ltg)[0].line = 0;          /* initialize global masking threshold */
		(*ltg)[0].bark = 0.0;
		(*ltg)[0].hear = 0.0;
		for (i=1; i<sub_size; i++) {
 			k = FirstFreqSubband[freq][i-1].line;
			if (k !=0) {
				(*ltg)[i].line = k;
				(*ltg)[i].bark = FirstFreqSubband[freq][i-1].bark;
				(*ltg)[i].hear = FirstFreqSubband[freq][i-1].hear;
			}
			else {
				printf("Internal error (read_freq_band())\n");
				return;
			}
		}
	}
	else {
		sub_size =SecondFreqEntries[freq] +1;
		*ltg = (g_ptr ) mem_alloc(sizeof(g_thres) * sub_size, "ltg");
		(*ltg)[0].line = 0;          /* initialize global masking threshold */
		(*ltg)[0].bark = 0.0;
		(*ltg)[0].hear = 0.0;
		for (i=1; i<sub_size; i++) {
 			k = SecondFreqSubband[freq][i-1].line;
			if (k !=0) {
				(*ltg)[i].line = k;
				(*ltg)[i].bark = SecondFreqSubband[freq][i-1].bark;
				(*ltg)[i].hear = SecondFreqSubband[freq][i-1].hear;
			}
			else {
				printf("Internal error (read_freq_band())\n");
				return;
			}
		}
	}
}


void make_map(mask FAR power[HAN_SIZE], g_thres FAR *ltg)
/* this function calculates the global masking threshold */
{
  int i,j;

  for(i=1;i<sub_size;i++) for(j=ltg[i-1].line;j<=ltg[i].line;j++)
      power[j].map = i;
}

#ifdef NEW_ADD_DB
void init_add_db(void)
{
  int i;
  double x;
  for (i=0;i<DBTAB;i++)
    {
      x=(double)i/10.0;
      dbtable[i]=10 * log10 (1 + pow(10.0,x/10.0)) - x;
    }
}

INLINE double add_db(double a, double b)
{
  /* MFC - if the difference between a and b is large (>99), then just return the
     largest one. (about 10% of the time)
         - For differences between 0 and 99, return the largest value, but add
           in a pre-calculated difference value.  
         - the value 99 was chosen arbitarily.
  - maximum (a-b) i've seen is 572 */
  int idiff;
  idiff = (int) (10.0*(a-b));

  if (idiff>990)
    {
      return a;
    }
  if (idiff>=0)
    {
      return (a+dbtable[idiff]);
    }
  if (idiff<-990)
    {
      return (b);
    }
  return (b+dbtable[-idiff]);
}
#else
/* use the old add_db code. slooooow */
double add_db(a,b)
double a,b;
{
  a = pow(10.0,a/10.0);
  b = pow(10.0,b/10.0);
  return 10 * log10(a+b);
}
#endif

/****************************************************************
*       Window the samples then, 
*        Fast Fourier transform of the input samples.
* 
*   (  call the FHT-based fft() in fft.c )
*    
*
****************************************************************/
void hann_fft_pickmax(double sample[FFT_SIZE], mask power[HAN_SIZE],
                      double spike[SBLIMIT], float  energy[FFT_SIZE])
{
  float x_real[FFT_SIZE];
  register int i,j;
  register double sqrt_8_over_3;
  static int init = 0;
  static double FAR *window;
  double sum;

  if(!init)
    {  /* calculate window function for the Fourier transform */
      window = (double FAR *) mem_alloc(sizeof(DFFT), "window");
      sqrt_8_over_3 = pow(8.0/3.0, 0.5);
      for(i=0;i<FFT_SIZE;i++)
        {
          /* Hann window formula */
          window[i]=sqrt_8_over_3*0.5*(1-cos(2.0*PI*i/(FFT_SIZE)))/FFT_SIZE;
        }
      init = 1;
    }
  for(i=0;i<FFT_SIZE;i++)
    x_real[i] = (float)(sample[i] * window[i]);

  fft2(x_real, energy, FFT_SIZE);

  for(i=0;i<HAN_SIZE;i++)
    {    /* calculate power density spectrum */
      if (energy[i] < 1E-20)
        power[i].x = -200.0 + POWERNORM;
      else
        power[i].x = 10 * log10(energy[i]) + POWERNORM;
      power[i].next = STOP;
      power[i].type = FALSE;
    }

  /* Combined LONDONs pick_max criteria.
     Not really sure of the difference between LONDON and the other one 
     A bit of tweaking shows that LONDON's 'sum' calculation is just a sum of energy[]
     with the CF factor to take into account POWERNORM */
  /* Calculate the sum of spectral component in each subband from bound 4-16 */

#define CF 1073741824 /* pow(10, 0.1*POWERNORM) */
#define DBM  1E-20 /* pow(10.0, 0.1*DBMIN */
  for(i=0;i<HAN_SIZE;spike[i>>4] = 10.0*log10(sum), i+=16)
    {
      for(j=0, sum = DBM ;j<16;j++)
        sum += CF*energy[i+j];
    }
}

/****************************************************************
*
*        This function labels the tonal component in the power
* spectrum.
*
****************************************************************/

void tonal_label(power, tone)  /* this function extracts (tonal) */
mask FAR power[HAN_SIZE];     /* sinusoidals from the spectrum  */
int *tone;
{
  int i,j, last = LAST, first, run, last_but_one = LAST; /* dpwe */
  double max;

  *tone = LAST;
  for(i=2;i<HAN_SIZE-12;i++)
    {
      if(power[i].x>power[i-1].x && power[i].x>=power[i+1].x)
        {
          power[i].type = TONE;
          power[i].next = LAST;
          if(last != LAST) power[last].next = i;
          else first = *tone = i;
          last = i;
        }
    }
  last = LAST;
  first = *tone;
  *tone = LAST;
  while(first != LAST)
    {               /* the conditions for the tonal          */
      if(first<3 || first>500) run = 0;/* otherwise k+/-j will be out of bounds */
      else if(first<63) run = 2;       /* components in layer II, which         */
      else if(first<127) run = 3;      /* are the boundaries for calc.          */
      else if(first<255) run = 6;      /* the tonal components                  */
      else run = 12;
      max = power[first].x - 7;        /* after calculation of tonal   */
      for(j=2;j<=run;j++)              /* components, set to local max */
        if(max < power[first-j].x || max < power[first+j].x)
          {
            power[first].type = FALSE;
            break;
          }
      if(power[first].type == TONE)
        {   /* extract tonal components */
          int help=first;
          if(*tone==LAST) *tone = first;
          while((power[help].next!=LAST)&&(power[help].next-first)<=run)
            help=power[help].next;
          help=power[help].next;
          power[first].next=help;
          if((first-last)<=run)
            {
              if(last_but_one != LAST) power[last_but_one].next=first;
            }
          if(first>1 && first<500)
            {     /* calculate the sum of the */
              double tmp;                /* powers of the components */
              tmp = add_db(power[first-1].x, power[first+1].x);
              power[first].x = add_db(power[first].x, tmp);
            }
          for(j=1;j<=run;j++)
            {
              power[first-j].x = power[first+j].x = DBMIN;
              power[first-j].next = power[first+j].next = STOP;
              power[first-j].type = power[first+j].type = FALSE;
            }
          last_but_one=last;
          last = first;
          first = power[first].next;
        }
      else
        {
          int ll;
          if(last == LAST); /* *tone = power[first].next; dpwe */
          else power[last].next = power[first].next;
          ll = first;
          first = power[first].next;
          power[ll].next = STOP;
        }
    }
}

/****************************************************************
*
*        This function groups all the remaining non-tonal
* spectral lines into critical band where they are replaced by
* one single line.
*
****************************************************************/

void noise_label(mask FAR *power, int *noise, g_thres FAR *ltg, float energy[FFT_SIZE])
{
  int i,j, centre, last = LAST;
  double index, weight, sum;
  /* calculate the remaining spectral */
  for(i=0;i<crit_band-1;i++)
    {  /* lines for non-tonal components   */
      for(j=cbound[i],weight = 0.0,sum = DBMIN;j<cbound[i+1];j++)
        {
          if(power[j].type != TONE)
            {
              if(power[j].x != DBMIN)
                {
                  sum = add_db(power[j].x,sum);
                  /* the line below and others under the "MAKE_SENSE" condition are an alternate
                     interpretation of "geometric mean". This approach may make more sense but
                     it has not been tested with hardware. */

#define NOPOW
#ifdef NOPOW
                  weight += CF*energy[j] * (double) (j-cbound[i]) /
                            (double) (cbound[i+1]-cbound[i]);  /* correction */
#else
                  if (CF*energy[j]>0.00001)
                    fprintf(stderr, "[%f %f]  ",pow(10.0,power[j].x/10.0), CF*energy[j]);
                  weight += pow(10.0,power[j].x/10.0) * (double) (j-cbound[i]) /
                            (double) (cbound[i+1]-cbound[i]);  /* correction */
#endif
                  power[j].x = DBMIN;
                }
            }   /*  check to see if the spectral line is low dB, and if  */
        }      /* so replace the center of the critical band, which is */
      /* the center freq. of the noise component              */

      if(sum <= DBMIN)
        centre = (cbound[i+1]+cbound[i]) /2;
      else
        {
          /* fprintf(stderr, "%i [%f %f] -", count++,weight/pow(10.0,0.1*sum), weight*pow(10.0,-0.1*sum)); */
          index = weight*pow(10.0,-0.1*sum);
          centre = cbound[i] + (int) (index * (double) (cbound[i+1]-cbound[i]) );
        }


      /* locate next non-tonal component until finished; */
      /* add to list of non-tonal components             */

      /* Masahiro Iwadare's fix for infinite looping problem? */
      if(power[centre].type == TONE)
        {
          if (power[centre+1].type == TONE)
            {
              centre++;
            }
          else
            centre--;
        }

      if(last == LAST) *noise = centre;
      else
        {
          power[centre].next = LAST;
          power[last].next = centre;
        }
      power[centre].x = sum;
      power[centre].type = NOISE;
      last = centre;
    }
}

/****************************************************************
*
*        This function reduces the number of noise and tonal
* component for further threshold analysis.
*
****************************************************************/

void subsampling(mask FAR power[HAN_SIZE],
                 g_thres FAR *ltg,
                 int *tone, int *noise)
{
  int i, old;

  i = *tone;
  old = STOP;    /* calculate tonal components for */
  while(i!=LAST)
    {           /* reduction of spectral lines    */
      if(power[i].x < ltg[power[i].map].hear)
        {
          power[i].type = FALSE;
          power[i].x = DBMIN;
          if(old == STOP) *tone = power[i].next;
          else power[old].next = power[i].next;
        }
      else old = i;
      i = power[i].next;
    }
  i = *noise;
  old = STOP;    /* calculate non-tonal components for */
  while(i!=LAST)
    {            /* reduction of spectral lines        */
      if(power[i].x < ltg[power[i].map].hear)
        {
          power[i].type = FALSE;
          power[i].x = DBMIN;
          if(old == STOP) *noise = power[i].next;
          else power[old].next = power[i].next;
        }
      else old = i;
      i = power[i].next;
    }
  i = *tone;
  old = STOP;
  while(i != LAST)
    {                              /* if more than one */
      if(power[i].next == LAST)break;             /* tonal component  */
      if(ltg[power[power[i].next].map].bark -     /* is less than .5  */
          ltg[power[i].map].bark < 0.5)
        {          /* bark, take the   */
          if(power[power[i].next].x > power[i].x )
            {/* maximum          */
              if(old == STOP) *tone = power[i].next;
              else power[old].next = power[i].next;
              power[i].type = FALSE;
              power[i].x = DBMIN;
              i = power[i].next;
            }
          else
            {
              power[power[i].next].type = FALSE;
              power[power[i].next].x = DBMIN;
              power[i].next = power[power[i].next].next;
              old = i;
            }
        }
      else
        {
          old = i;
          i = power[i].next;
        }
    }
}

/****************************************************************
*
*        This function calculates the individual threshold and
* sum with the quiet threshold to find the global threshold.
*
****************************************************************/

/* mainly just changed the way range checking was done MFC Nov 1999 */
void threshold(mask FAR power[HAN_SIZE],
               g_thres FAR *ltg,
               int *tone, int *noise, int bit_rate)
{
  int k, t;
  double dz, tmps, vf;

  for(k=1;k<sub_size;k++)
    {
      ltg[k].x = DBMIN;
      t = *tone;          /* calculate individual masking threshold for */
      while(t != LAST)
        {   /* components in order to find the global     */
          dz = ltg[k].bark-ltg[power[t].map].bark; /* distance of bark value*/
          if(dz >= -3.0 && dz <8.0)
            {
              tmps = -1.525-0.275*ltg[power[t].map].bark - 4.5 + power[t].x;
              /* masking function for lower & upper slopes */
              if(dz<-1)
                vf = 17*(dz+1)-(0.4*power[t].x +6);
              else if(dz<0)
                vf = (0.4 *power[t].x + 6) * dz;
              else if(dz<1)
                vf = (-17*dz);
              else
                vf = -(dz-1) * (17-0.15 *power[t].x) - 17;
              ltg[k].x = add_db(ltg[k].x, tmps+vf);
            }
          t = power[t].next;
        }

      t = *noise;        /* calculate individual masking threshold  */
      while(t != LAST)
        {  /* for non-tonal components to find LTG    */
          dz = ltg[k].bark-ltg[power[t].map].bark; /* distance of bark value */
          if(dz >= -3.0 && dz<8.0)
            {
              tmps = -1.525-0.175*ltg[power[t].map].bark -0.5 + power[t].x;
              /* masking function for lower & upper slopes */
              if(dz<-1)
                vf = 17*(dz+1)-(0.4*power[t].x +6);
              else if(dz<0)
                vf = (0.4 *power[t].x + 6) * dz;
              else if(dz<1)
                vf = (-17*dz);
              else
                vf = -(dz-1) * (17-0.15 *power[t].x) - 17;
              ltg[k].x = add_db(ltg[k].x, tmps+vf);
            }
          t = power[t].next;
        }
      if(bit_rate<96)
        ltg[k].x = add_db(ltg[k].hear, ltg[k].x);
      else
        ltg[k].x = add_db(ltg[k].hear-12.0, ltg[k].x);
    }
}

/****************************************************************
*
*        This function finds the minimum masking threshold and
* return the value to the encoder.
*
****************************************************************/

void minimum_mask(g_thres FAR *ltg, double FAR ltmin[SBLIMIT],int sblimit)
{
  double min;
  int i,j;

  j=1;
  for(i=0;i<sblimit;i++)
    if(j>=sub_size-1)                   /* check subband limit, and       */
      ltmin[i] = ltg[sub_size-1].hear; /* calculate the minimum masking  */
    else
      {                              /* level of LTMIN for each subband*/
        min = ltg[j].x;
        while(ltg[j].line>>4 == i && j < sub_size)
          {
            if(min>ltg[j].x)  min = ltg[j].x;
            j++;
          }
        ltmin[i] = min;
      }
}

/*****************************************************************
*
*        This procedure is called in musicin to pick out the
* smaller of the scalefactor or threshold.
*
*****************************************************************/

void smr(double FAR ltmin[SBLIMIT],
         double FAR spike[SBLIMIT],
         double FAR scale[SBLIMIT],
         int sblimit)
{
  int i;
  double max;

  for(i=0;i<sblimit;i++)
    {                     /* determine the signal   */
      max = 20 * log10(scale[i] * 32768) - 10; /* level for each subband */
      if(spike[i]>max) max = spike[i];         /* for the maximum scale  */
      max -= ltmin[i];                         /* factors                */
      ltmin[i] = max;
    }
}