cinelerra-5.1/mpeg2enc/quantize.c

   1 /* quantize.c, quantization / inverse quantization                          */
   2
   3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
   4
   5 /*
   6  * Disclaimer of Warranty
   7  *
   8  * These software programs are available to the user without any license fee or
   9  * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
  10  * any and all warranties, whether express, implied, or statuary, including any
  11  * implied warranties or merchantability or of fitness for a particular
  12  * purpose.  In no event shall the copyright-holder be liable for any
  13  * incidental, punitive, or consequential damages of any kind whatsoever
  14  * arising from the use of these programs.
  15  *
  16  * This disclaimer of warranty extends to the user of these programs and user's
  17  * customers, employees, agents, transferees, successors, and assigns.
  18  *
  19  * The MPEG Software Simulation Group does not represent or warrant that the
  20  * programs furnished hereunder are free of infringement of any third-party
  21  * patents.
  22  *
  23  * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
  24  * are subject to royalty fees to patent holders.  Many of these patents are
  25  * general enough such that they are unavoidable regardless of implementation
  26  * design.
  27  *
  28  */
  29
  30 #include "config.h"
  31 #include <stdio.h>
  32 #include <math.h>
  33 #include <fenv.h>
  34 #include "global.h"
  35 #include "cpu_accel.h"
  36 #include "simd.h"
  37 #include "fastintfns.h"
  38
  39
  40 /* Global function pointers for SIMD-dependent functions */
  41 int (*pquant_non_intra)(pict_data_s *picture, int16_t *src, int16_t *dst,
  42                                                 int mquant, int *nonsat_mquant);
  43 int (*pquant_weight_coeff_sum)(int16_t *blk, uint16_t*i_quant_mat );
  44
  45 /* Local functions pointers for SIMD-dependent functions */
  46
  47 static void (*piquant_non_intra_m1)(int16_t *src, int16_t *dst,  uint16_t *quant_mat);
  48
  49
  50 static int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat );
  51 static void iquant_non_intra_m1(int16_t *src, int16_t *dst, uint16_t *quant_mat);
  52
  53
  54 /*
  55   Initialise quantization routines.
  56   Currently just setting up MMX routines if available...
  57  */
  58
  59 void init_quantizer_hv()
  60 {
  61   int flags;
  62   flags = cpu_accel();
  63 #ifdef X86_CPU
  64   if( (flags & ACCEL_X86_MMX) != 0 ) /* MMX CPU */
  65         {
  66                 if(verbose) fprintf( stderr, "SETTING " );
  67                 if( (flags & ACCEL_X86_3DNOW) != 0 )
  68                 {
  69                         if(verbose) fprintf( stderr, "3DNOW and ");
  70                         pquant_non_intra = quant_non_intra_hv_3dnow;
  71                 }
  72 /*
  73  *              else if ( (flags & ACCEL_X86_MMXEXT) != 0 )
  74  *              {
  75  *                      if(verbose) fprintf( stderr, "SSE and ");
  76  *                      pquant_non_intra = quant_non_intra_hv_sse;
  77  *              }
  78  */
  79                 else
  80                 {
  81                         pquant_non_intra = quant_non_intra_hv;
  82                 }
  83
  84                 if ( (flags & ACCEL_X86_MMXEXT) != 0 )
  85                 {
  86                         if(verbose) fprintf( stderr, "EXTENDED MMX");
  87                         pquant_weight_coeff_sum = quant_weight_coeff_sum_mmx;
  88                         piquant_non_intra_m1 = iquant_non_intra_m1_sse;
  89                 }
  90                 else
  91                 {
  92                         if(verbose) fprintf( stderr, "MMX");
  93                         pquant_weight_coeff_sum = quant_weight_coeff_sum_mmx;
  94                         piquant_non_intra_m1 = iquant_non_intra_m1_mmx;
  95                 }
  96                 if(verbose) fprintf( stderr, " for QUANTIZER!\n");
  97         }
  98   else
  99 #endif
 100         {
 101           pquant_non_intra = quant_non_intra_hv;
 102           pquant_weight_coeff_sum = quant_weight_coeff_sum;
 103           piquant_non_intra_m1 = iquant_non_intra_m1;
 104         }
 105 }
 106
 107 /*
 108  *
 109  * Computes the next quantisation up.  Used to avoid saturation
 110  * in macroblock coefficients - common in MPEG-1 - which causes
 111  * nasty artefacts.
 112  *
 113  * NOTE: Does no range checking...
 114  *
 115  */
 116
 117
 118 int next_larger_quant_hv( pict_data_s *picture, int quant )
 119 {
 120         if( picture->q_scale_type )
 121                 {
 122                         if( map_non_linear_mquant_hv[quant]+1 > 31 )
 123                                 return quant;
 124                         else
 125                                 return non_linear_mquant_table_hv[map_non_linear_mquant_hv[quant]+1];
 126                 }
 127         else
 128                 {
 129                         if( quant+2 > 31 )
 130                                 return quant;
 131                         else
 132                                 return quant+2;
 133                 }
 134
 135 }
 136
 137 /*
 138  * Quantisation for intra blocks using Test Model 5 quantization
 139  *
 140  * this quantizer has a bias of 1/8 stepsize towards zero
 141  * (except for the DC coefficient)
 142  *
 143         PRECONDITION: src dst point to *disinct* memory buffers...
 144                           of block_count *adjact* int16_t[64] arrays...
 145  *
 146  * RETURN: 1 If non-zero coefficients left after quantisaiont 0 otherwise
 147  */
 148
 149 void quant_intra_hv(
 150         pict_data_s *picture,
 151         int16_t *src,
 152         int16_t *dst,
 153         int mquant,
 154         int *nonsat_mquant
 155         )
 156 {
 157   int16_t *psrc,*pbuf;
 158   int i,comp;
 159   int x, y, d;
 160   int clipping;
 161   int clipvalue  = dctsatlim;
 162   uint16_t *quant_mat = intra_q_tbl[mquant] /* intra_q */;
 163
 164
 165   /* Inspired by suggestion by Juan.  Quantize a little harder if we clip...
 166    */
 167
 168   do
 169         {
 170           clipping = 0;
 171           pbuf = dst;
 172           psrc = src;
 173           for( comp = 0; comp<block_count && !clipping; ++comp )
 174           {
 175                 x = psrc[0];
 176                 d = 8>>picture->dc_prec; /* intra_dc_mult */
 177                 pbuf[0] = (x>=0) ? (x+(d>>1))/d : -((-x+(d>>1))/d); /* round(x/d) */
 178
 179
 180                 for (i=1; i<64 ; i++)
 181                   {
 182                         x = psrc[i];
 183                         d = quant_mat[i];
 184                         /* RJ: save one divide operation */
 185                         y = ((abs(x) << 5)+ ((3 * quant_mat[i]) >> 2)) / (quant_mat[i] << 1);
 186                         if ( y > clipvalue )
 187                           {
 188                                 clipping = 1;
 189                                 mquant = next_larger_quant_hv( picture, mquant );
 190                                 quant_mat = intra_q_tbl[mquant];
 191                                 break;
 192                           }
 193
 194                         pbuf[i] = intsamesign(x,y);
 195                   }
 196                 pbuf += 64;
 197                 psrc += 64;
 198           }
 199
 200         } while( clipping );
 201   *nonsat_mquant = mquant;
 202 }
 203
 204
 205 /*
 206  * Quantisation matrix weighted Coefficient sum fixed-point
 207  * integer with low 16 bits fractional...
 208  * To be used for rate control as a measure of dct block
 209  * complexity...
 210  *
 211  */
 212
 213 int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat )
 214 {
 215   int i;
 216   int sum = 0;
 217    for( i = 0; i < 64; i+=2 )
 218         {
 219                 sum += abs((int)blk[i]) * (i_quant_mat[i]) + abs((int)blk[i+1]) * (i_quant_mat[i+1]);
 220         }
 221     return sum;
 222   /* In case you're wondering typical average coeff_sum's for a rather
 223         noisy video are around 20.0.  */
 224 }
 225
 226
 227
 228 /*
 229  * Quantisation for non-intra blocks using Test Model 5 quantization
 230  *
 231  * this quantizer has a bias of 1/8 stepsize towards zero
 232  * (except for the DC coefficient)
 233  *
 234  * A.Stevens 2000: The above comment is nonsense.  Only the intra quantiser does
 235  * this.  This one just truncates with a modest bias of 1/(4*quant_matrix_scale)
 236  * to 1.
 237  *
 238  *      PRECONDITION: src dst point to *disinct* memory buffers...
 239  *                    of block_count *adjacent* int16_t[64] arrays...
 240  *
 241  * RETURN: A bit-mask of block_count bits indicating non-zero blocks (a 1).
 242  *
 243  * TODO: A candidate for use of efficient abs and "intsamesign". If only gcc understood
 244  * PPro conditional moves...
 245  */
 246
 247 int quant_non_intra_hv(
 248                                                    pict_data_s *picture,
 249                                                    int16_t *src, int16_t *dst,
 250                                                    int mquant,
 251                                                    int *nonsat_mquant)
 252 {
 253         int i;
 254         int x, y, d;
 255         int nzflag;
 256         int coeff_count;
 257         int clipvalue  = dctsatlim;
 258         int flags = 0;
 259         int saturated = 0;
 260         uint16_t *quant_mat = inter_q_tbl[mquant]/* inter_q */;
 261
 262         coeff_count = 64*block_count;
 263         flags = 0;
 264         nzflag = 0;
 265         for (i=0; i<coeff_count; ++i)
 266         {
 267 restart:
 268                 if( (i%64) == 0 )
 269                 {
 270                         nzflag = (nzflag<<1) | !!flags;
 271                         flags = 0;
 272
 273                 }
 274                 /* RJ: save one divide operation */
 275
 276                 x = abs( ((int)src[i]) ) /*(src[i] >= 0 ? src[i] : -src[i])*/ ;
 277                 d = (int)quant_mat[(i&63)];
 278                 /* A.Stevens 2000: Given the math of non-intra frame
 279                    quantisation / inverse quantisation I always though the
 280                    funny little foudning factor was bogus.  It seems to be
 281                    the encoder needs less bits if you simply divide!
 282                 */
 283
 284                 y = (x<<4) /  (d) /* (32*x + (d>>1))/(d*2*mquant)*/ ;
 285                 if ( y > clipvalue )
 286                 {
 287                         if( saturated )
 288                         {
 289                                 y = clipvalue;
 290                         }
 291                         else
 292                         {
 293                                 int new_mquant = next_larger_quant_hv( picture, mquant );
 294                                 if( new_mquant != mquant )
 295                                 {
 296                                         mquant = new_mquant;
 297                                         quant_mat = inter_q_tbl[mquant];
 298                                 }
 299                                 else
 300                                 {
 301                                         saturated = 1;
 302                                 }
 303                                 i=0;
 304                                 nzflag =0;
 305                                 goto restart;
 306                         }
 307                 }
 308                 dst[i] = intsamesign(src[i], y) /* (src[i] >= 0 ? y : -y) */;
 309                 flags |= dst[i];
 310         }
 311         nzflag = (nzflag<<1) | !!flags;
 312
 313     *nonsat_mquant = mquant;
 314     return nzflag;
 315 }
 316
 317 /* MPEG-1 inverse quantization */
 318 static void iquant1_intra(int16_t *src, int16_t *dst, int dc_prec, int mquant)
 319 {
 320   int i, val;
 321   uint16_t *quant_mat = intra_q;
 322
 323   dst[0] = src[0] << (3-dc_prec);
 324   for (i=1; i<64; i++)
 325   {
 326     val = (int)(src[i]*quant_mat[i]*mquant)/16;
 327
 328     /* mismatch control */
 329     if ((val&1)==0 && val!=0)
 330       val+= (val>0) ? -1 : 1;
 331
 332     /* saturation */
 333     dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 334   }
 335 }
 336
 337
 338 /* MPEG-2 inverse quantization */
 339 void iquant_intra(int16_t *src, int16_t *dst, int dc_prec, int mquant)
 340 {
 341   int i, val, sum;
 342
 343   if ( mpeg1  )
 344     iquant1_intra(src,dst,dc_prec, mquant);
 345   else
 346   {
 347     sum = dst[0] = src[0] << (3-dc_prec);
 348     for (i=1; i<64; i++)
 349     {
 350       val = (int)(src[i]*intra_q[i]*mquant)/16;
 351       sum+= dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 352     }
 353
 354     /* mismatch control */
 355     if ((sum&1)==0)
 356       dst[63]^= 1;
 357   }
 358 }
 359
 360
 361 static void iquant_non_intra_m1(int16_t *src, int16_t *dst,  uint16_t *quant_mat)
 362 {
 363   int i, val;
 364
 365 #ifndef ORIGINAL_CODE
 366
 367   for (i=0; i<64; i++)
 368   {
 369     val = src[i];
 370     if (val!=0)
 371     {
 372       val = (int)((2*val+(val>0 ? 1 : -1))*quant_mat[i])/32;
 373
 374       /* mismatch control */
 375       if ((val&1)==0 && val!=0)
 376         val+= (val>0) ? -1 : 1;
 377     }
 378
 379     /* saturation */
 380      dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 381  }
 382 #else
 383
 384   for (i=0; i<64; i++)
 385   {
 386     val = abs(src[i]);
 387     if (val!=0)
 388     {
 389                 val = ((val+val+1)*quant_mat[i]) >> 5;
 390                 /* mismatch control */
 391                 val -= (~(val&1))&(val!=0);
 392                 val = fastmin(val, 2047); /* Saturation */
 393     }
 394         dst[i] = intsamesign(src[i],val);
 395
 396   }
 397
 398 #endif
 399 }
 400
 401
 402
 403
 404 void iquant_non_intra(int16_t *src, int16_t *dst, int mquant )
 405 {
 406   int i, val, sum;
 407   uint16_t *quant_mat;
 408
 409   if ( mpeg1 )
 410     (*piquant_non_intra_m1)(src,dst,inter_q_tbl[mquant]);
 411   else
 412   {
 413           sum = 0;
 414 #ifdef ORIGINAL_CODE
 415
 416           for (i=0; i<64; i++)
 417           {
 418                   val = src[i];
 419                   if (val!=0)
 420
 421                           val = (int)((2*val+(val>0 ? 1 : -1))*inter_q[i]*mquant)/32;
 422                   sum+= dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 423           }
 424 #else
 425           quant_mat = inter_q_tbl[mquant];
 426           for (i=0; i<64; i++)
 427           {
 428                   val = src[i];
 429                   if( val != 0 )
 430                   {
 431                           val = abs(val);
 432                           val = (int)((val+val+1)*quant_mat[i])>>5;
 433                           val = intmin( val, 2047);
 434                           sum += val;
 435                   }
 436                   dst[i] = intsamesign(src[i],val);
 437           }
 438 #endif
 439
 440     /* mismatch control */
 441     if ((sum&1)==0)
 442       dst[63]^= 1;
 443   }
 444 }
 445
 446 void iquantize( pict_data_s *picture )
 447 {
 448         int j,k;
 449         int16_t (*qblocks)[64] = picture->qblocks;
 450         for (k=0; k<mb_per_pict; k++)
 451         {
 452                 if (picture->mbinfo[k].mb_type & MB_INTRA)
 453                         for (j=0; j<block_count; j++)
 454                                 iquant_intra(qblocks[k*block_count+j],
 455                                                          qblocks[k*block_count+j],
 456                                                          cur_picture.dc_prec,
 457                                                          cur_picture.mbinfo[k].mquant);
 458                 else
 459                         for (j=0;j<block_count;j++)
 460                                 iquant_non_intra(qblocks[k*block_count+j],
 461                                                                  qblocks[k*block_count+j],
 462                                                                  cur_picture.mbinfo[k].mquant);
 463         }
 464 }