cinelerra-5.1/mpeg2enc/quantize.c

   1 /* quantize.c, quantization / inverse quantization                          */
   2
   3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
   4
   5 /*
   6  * Disclaimer of Warranty
   7  *
   8  * These software programs are available to the user without any license fee or
   9  * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
  10  * any and all warranties, whether express, implied, or statuary, including any
  11  * implied warranties or merchantability or of fitness for a particular
  12  * purpose.  In no event shall the copyright-holder be liable for any
  13  * incidental, punitive, or consequential damages of any kind whatsoever
  14  * arising from the use of these programs.
  15  *
  16  * This disclaimer of warranty extends to the user of these programs and user's
  17  * customers, employees, agents, transferees, successors, and assigns.
  18  *
  19  * The MPEG Software Simulation Group does not represent or warrant that the
  20  * programs furnished hereunder are free of infringement of any third-party
  21  * patents.
  22  *
  23  * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
  24  * are subject to royalty fees to patent holders.  Many of these patents are
  25  * general enough such that they are unavoidable regardless of implementation
  26  * design.
  27  *
  28  */
  29
  30 #include "config.h"
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <math.h>
  34 #include <fenv.h>
  35 #include "global.h"
  36 #include "cpu_accel.h"
  37 #include "simd.h"
  38 #include "fastintfns.h"
  39
  40
  41 /* Global function pointers for SIMD-dependent functions */
  42 int (*pquant_non_intra)(pict_data_s *picture, int16_t *src, int16_t *dst,
  43                                                 int mquant, int *nonsat_mquant);
  44 int (*pquant_weight_coeff_sum)(int16_t *blk, uint16_t*i_quant_mat );
  45
  46 /* Local functions pointers for SIMD-dependent functions */
  47
  48 static void (*piquant_non_intra_m1)(int16_t *src, int16_t *dst,  uint16_t *quant_mat);
  49
  50
  51 static int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat );
  52 static void iquant_non_intra_m1(int16_t *src, int16_t *dst, uint16_t *quant_mat);
  53
  54
  55 /*
  56   Initialise quantization routines.
  57   Currently just setting up MMX routines if available...
  58  */
  59
  60 void init_quantizer_hv(int use_sse)
  61 {
  62 #ifdef X86_CPU
  63   int flags;
  64   flags = cpu_accel();
  65   if( (flags & ACCEL_X86_MMX) != 0 ) /* MMX CPU */
  66         {
  67                 if(verbose) fprintf( stderr, "SETTING " );
  68                 if( (flags & ACCEL_X86_3DNOW) != 0 )
  69                 {
  70                         if(verbose) fprintf( stderr, "3DNOW and ");
  71                         pquant_non_intra = quant_non_intra_hv_3dnow;
  72                 }
  73 /*
  74  *              else if ( (flags & ACCEL_X86_MMXEXT) != 0 )
  75  *              {
  76  *                      if(verbose) fprintf( stderr, "SSE and ");
  77  *                      pquant_non_intra = quant_non_intra_hv_sse;
  78  *              }
  79  */
  80                 else
  81                 {
  82                         pquant_non_intra = quant_non_intra_hv;
  83                 }
  84
  85                 if ( (flags & ACCEL_X86_MMXEXT) != 0 )
  86                 {
  87                         if(verbose) fprintf( stderr, "EXTENDED MMX");
  88                         pquant_weight_coeff_sum = quant_weight_coeff_sum_mmx;
  89                         if( use_sse )
  90                                 piquant_non_intra_m1 = iquant_non_intra_m1_sse;
  91                 }
  92                 else
  93                 {
  94                         if(verbose) fprintf( stderr, "MMX");
  95                         pquant_weight_coeff_sum = quant_weight_coeff_sum_mmx;
  96                         piquant_non_intra_m1 = iquant_non_intra_m1_mmx;
  97                 }
  98                 if(verbose) fprintf( stderr, " for QUANTIZER!\n");
  99         }
 100   else
 101 #endif
 102         {
 103           pquant_non_intra = quant_non_intra_hv;
 104           pquant_weight_coeff_sum = quant_weight_coeff_sum;
 105           piquant_non_intra_m1 = iquant_non_intra_m1;
 106         }
 107 }
 108
 109 /*
 110  *
 111  * Computes the next quantisation up.  Used to avoid saturation
 112  * in macroblock coefficients - common in MPEG-1 - which causes
 113  * nasty artefacts.
 114  *
 115  * NOTE: Does no range checking...
 116  *
 117  */
 118
 119
 120 int next_larger_quant_hv( pict_data_s *picture, int quant )
 121 {
 122         if( picture->q_scale_type )
 123                 {
 124                         if( map_non_linear_mquant_hv[quant]+1 > 31 )
 125                                 return quant;
 126                         else
 127                                 return non_linear_mquant_table_hv[map_non_linear_mquant_hv[quant]+1];
 128                 }
 129         else
 130                 {
 131                         if( quant+2 > 31 )
 132                                 return quant;
 133                         else
 134                                 return quant+2;
 135                 }
 136
 137 }
 138
 139 /*
 140  * Quantisation for intra blocks using Test Model 5 quantization
 141  *
 142  * this quantizer has a bias of 1/8 stepsize towards zero
 143  * (except for the DC coefficient)
 144  *
 145         PRECONDITION: src dst point to *disinct* memory buffers...
 146                           of block_count *adjact* int16_t[64] arrays...
 147  *
 148  * RETURN: 1 If non-zero coefficients left after quantisaiont 0 otherwise
 149  */
 150
 151 void quant_intra_hv(
 152         pict_data_s *picture,
 153         int16_t *src,
 154         int16_t *dst,
 155         int mquant,
 156         int *nonsat_mquant
 157         )
 158 {
 159   int16_t *psrc,*pbuf;
 160   int i,comp;
 161   int x, y, d;
 162   int clipping;
 163   int clipvalue  = dctsatlim;
 164   uint16_t *quant_mat = intra_q_tbl[mquant] /* intra_q */;
 165
 166
 167   /* Inspired by suggestion by Juan.  Quantize a little harder if we clip...
 168    */
 169
 170   do
 171         {
 172           clipping = 0;
 173           pbuf = dst;
 174           psrc = src;
 175           for( comp = 0; comp<block_count && !clipping; ++comp )
 176           {
 177                 x = psrc[0];
 178                 d = 8>>picture->dc_prec; /* intra_dc_mult */
 179                 pbuf[0] = (x>=0) ? (x+(d>>1))/d : -((-x+(d>>1))/d); /* round(x/d) */
 180
 181
 182                 for (i=1; i<64 ; i++)
 183                   {
 184                         x = psrc[i];
 185                         d = quant_mat[i];
 186                         /* RJ: save one divide operation */
 187                         y = ((abs(x) << 5)+ ((3 * quant_mat[i]) >> 2)) / (quant_mat[i] << 1);
 188                         if ( y > clipvalue )
 189                           {
 190                                 clipping = 1;
 191                                 mquant = next_larger_quant_hv( picture, mquant );
 192                                 quant_mat = intra_q_tbl[mquant];
 193                                 break;
 194                           }
 195
 196                         pbuf[i] = intsamesign(x,y);
 197                   }
 198                 pbuf += 64;
 199                 psrc += 64;
 200           }
 201
 202         } while( clipping );
 203   *nonsat_mquant = mquant;
 204 }
 205
 206
 207 /*
 208  * Quantisation matrix weighted Coefficient sum fixed-point
 209  * integer with low 16 bits fractional...
 210  * To be used for rate control as a measure of dct block
 211  * complexity...
 212  *
 213  */
 214
 215 int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat )
 216 {
 217   int i;
 218   int sum = 0;
 219    for( i = 0; i < 64; i+=2 )
 220         {
 221                 sum += abs((int)blk[i]) * (i_quant_mat[i]) + abs((int)blk[i+1]) * (i_quant_mat[i+1]);
 222         }
 223     return sum;
 224   /* In case you're wondering typical average coeff_sum's for a rather
 225         noisy video are around 20.0.  */
 226 }
 227
 228
 229
 230 /*
 231  * Quantisation for non-intra blocks using Test Model 5 quantization
 232  *
 233  * this quantizer has a bias of 1/8 stepsize towards zero
 234  * (except for the DC coefficient)
 235  *
 236  * A.Stevens 2000: The above comment is nonsense.  Only the intra quantiser does
 237  * this.  This one just truncates with a modest bias of 1/(4*quant_matrix_scale)
 238  * to 1.
 239  *
 240  *      PRECONDITION: src dst point to *disinct* memory buffers...
 241  *                    of block_count *adjacent* int16_t[64] arrays...
 242  *
 243  * RETURN: A bit-mask of block_count bits indicating non-zero blocks (a 1).
 244  *
 245  * TODO: A candidate for use of efficient abs and "intsamesign". If only gcc understood
 246  * PPro conditional moves...
 247  */
 248
 249 int quant_non_intra_hv(
 250                                                    pict_data_s *picture,
 251                                                    int16_t *src, int16_t *dst,
 252                                                    int mquant,
 253                                                    int *nonsat_mquant)
 254 {
 255         int i;
 256         int x, y, d;
 257         int nzflag;
 258         int coeff_count;
 259         int clipvalue  = dctsatlim;
 260         int flags = 0;
 261         int saturated = 0;
 262         uint16_t *quant_mat = inter_q_tbl[mquant]/* inter_q */;
 263
 264         coeff_count = 64*block_count;
 265         flags = 0;
 266         nzflag = 0;
 267         for (i=0; i<coeff_count; ++i)
 268         {
 269 restart:
 270                 if( (i%64) == 0 )
 271                 {
 272                         nzflag = (nzflag<<1) | !!flags;
 273                         flags = 0;
 274
 275                 }
 276                 /* RJ: save one divide operation */
 277
 278                 x = abs( ((int)src[i]) ) /*(src[i] >= 0 ? src[i] : -src[i])*/ ;
 279                 d = (int)quant_mat[(i&63)];
 280                 /* A.Stevens 2000: Given the math of non-intra frame
 281                    quantisation / inverse quantisation I always though the
 282                    funny little foudning factor was bogus.  It seems to be
 283                    the encoder needs less bits if you simply divide!
 284                 */
 285
 286                 y = (x<<4) /  (d) /* (32*x + (d>>1))/(d*2*mquant)*/ ;
 287                 if ( y > clipvalue )
 288                 {
 289                         if( saturated )
 290                         {
 291                                 y = clipvalue;
 292                         }
 293                         else
 294                         {
 295                                 int new_mquant = next_larger_quant_hv( picture, mquant );
 296                                 if( new_mquant != mquant )
 297                                 {
 298                                         mquant = new_mquant;
 299                                         quant_mat = inter_q_tbl[mquant];
 300                                 }
 301                                 else
 302                                 {
 303                                         saturated = 1;
 304                                 }
 305                                 i=0;
 306                                 nzflag =0;
 307                                 goto restart;
 308                         }
 309                 }
 310                 dst[i] = intsamesign(src[i], y) /* (src[i] >= 0 ? y : -y) */;
 311                 flags |= dst[i];
 312         }
 313         nzflag = (nzflag<<1) | !!flags;
 314
 315     *nonsat_mquant = mquant;
 316     return nzflag;
 317 }
 318
 319 /* MPEG-1 inverse quantization */
 320 static void iquant1_intra(int16_t *src, int16_t *dst, int dc_prec, int mquant)
 321 {
 322   int i, val;
 323   uint16_t *quant_mat = intra_q;
 324
 325   dst[0] = src[0] << (3-dc_prec);
 326   for (i=1; i<64; i++)
 327   {
 328     val = (int)(src[i]*quant_mat[i]*mquant)/16;
 329
 330     /* mismatch control */
 331     if ((val&1)==0 && val!=0)
 332       val+= (val>0) ? -1 : 1;
 333
 334     /* saturation */
 335     dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 336   }
 337 }
 338
 339
 340 /* MPEG-2 inverse quantization */
 341 void iquant_intra(int16_t *src, int16_t *dst, int dc_prec, int mquant)
 342 {
 343   int i, val, sum;
 344
 345   if ( mpeg1  )
 346     iquant1_intra(src,dst,dc_prec, mquant);
 347   else
 348   {
 349     sum = dst[0] = src[0] << (3-dc_prec);
 350     for (i=1; i<64; i++)
 351     {
 352       val = (int)(src[i]*intra_q[i]*mquant)/16;
 353       sum+= dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 354     }
 355
 356     /* mismatch control */
 357     if ((sum&1)==0)
 358       dst[63]^= 1;
 359   }
 360 }
 361
 362
 363 static void iquant_non_intra_m1(int16_t *src, int16_t *dst,  uint16_t *quant_mat)
 364 {
 365   int i, val;
 366
 367 #ifndef ORIGINAL_CODE
 368
 369   for (i=0; i<64; i++)
 370   {
 371     val = src[i];
 372     if (val!=0)
 373     {
 374       val = (int)((2*val+(val>0 ? 1 : -1))*quant_mat[i])/32;
 375
 376       /* mismatch control */
 377       if ((val&1)==0 && val!=0)
 378         val+= (val>0) ? -1 : 1;
 379     }
 380
 381     /* saturation */
 382      dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 383  }
 384 #else
 385
 386   for (i=0; i<64; i++)
 387   {
 388     val = abs(src[i]);
 389     if (val!=0)
 390     {
 391                 val = ((val+val+1)*quant_mat[i]) >> 5;
 392                 /* mismatch control */
 393                 val -= (~(val&1))&(val!=0);
 394                 val = fastmin(val, 2047); /* Saturation */
 395     }
 396         dst[i] = intsamesign(src[i],val);
 397
 398   }
 399
 400 #endif
 401 }
 402
 403
 404
 405
 406 void iquant_non_intra(int16_t *src, int16_t *dst, int mquant )
 407 {
 408   int i, val, sum;
 409   uint16_t *quant_mat;
 410
 411   if ( mpeg1 )
 412     (*piquant_non_intra_m1)(src,dst,inter_q_tbl[mquant]);
 413   else
 414   {
 415           sum = 0;
 416 #ifdef ORIGINAL_CODE
 417
 418           for (i=0; i<64; i++)
 419           {
 420                   val = src[i];
 421                   if (val!=0)
 422
 423                           val = (int)((2*val+(val>0 ? 1 : -1))*inter_q[i]*mquant)/32;
 424                   sum+= dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 425           }
 426 #else
 427           quant_mat = inter_q_tbl[mquant];
 428           for (i=0; i<64; i++)
 429           {
 430                   val = src[i];
 431                   if( val != 0 )
 432                   {
 433                           val = abs(val);
 434                           val = (int)((val+val+1)*quant_mat[i])>>5;
 435                           val = intmin( val, 2047);
 436                           sum += val;
 437                   }
 438                   dst[i] = intsamesign(src[i],val);
 439           }
 440 #endif
 441
 442     /* mismatch control */
 443     if ((sum&1)==0)
 444       dst[63]^= 1;
 445   }
 446 }
 447
 448 void iquantize( pict_data_s *picture )
 449 {
 450         int j,k;
 451         int16_t (*qblocks)[64] = picture->qblocks;
 452         for (k=0; k<mb_per_pict; k++)
 453         {
 454                 if (picture->mbinfo[k].mb_type & MB_INTRA)
 455                         for (j=0; j<block_count; j++)
 456                                 iquant_intra(qblocks[k*block_count+j],
 457                                                          qblocks[k*block_count+j],
 458                                                          cur_picture.dc_prec,
 459                                                          cur_picture.mbinfo[k].mquant);
 460                 else
 461                         for (j=0;j<block_count;j++)
 462                                 iquant_non_intra(qblocks[k*block_count+j],
 463                                                                  qblocks[k*block_count+j],
 464                                                                  cur_picture.mbinfo[k].mquant);
 465         }
 466 }