cinelerra-5.1/mpeg2enc/quantize.c

   1 /* quantize.c, quantization / inverse quantization                          */
   2
   3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
   4
   5 /*
   6  * Disclaimer of Warranty
   7  *
   8  * These software programs are available to the user without any license fee or
   9  * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
  10  * any and all warranties, whether express, implied, or statuary, including any
  11  * implied warranties or merchantability or of fitness for a particular
  12  * purpose.  In no event shall the copyright-holder be liable for any
  13  * incidental, punitive, or consequential damages of any kind whatsoever
  14  * arising from the use of these programs.
  15  *
  16  * This disclaimer of warranty extends to the user of these programs and user's
  17  * customers, employees, agents, transferees, successors, and assigns.
  18  *
  19  * The MPEG Software Simulation Group does not represent or warrant that the
  20  * programs furnished hereunder are free of infringement of any third-party
  21  * patents.
  22  *
  23  * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
  24  * are subject to royalty fees to patent holders.  Many of these patents are
  25  * general enough such that they are unavoidable regardless of implementation
  26  * design.
  27  *
  28  */
  29
  30 #include "config.h"
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <math.h>
  34 #include <fenv.h>
  35 #include "global.h"
  36 #include "cpu_accel.h"
  37 #include "simd.h"
  38 #include "fastintfns.h"
  39
  40
  41 /* Global function pointers for SIMD-dependent functions */
  42 int (*pquant_non_intra)(pict_data_s *picture, int16_t *src, int16_t *dst,
  43                                                 int mquant, int *nonsat_mquant);
  44 int (*pquant_weight_coeff_sum)(int16_t *blk, uint16_t*i_quant_mat );
  45
  46 /* Local functions pointers for SIMD-dependent functions */
  47
  48 static void (*piquant_non_intra_m1)(int16_t *src, int16_t *dst,  uint16_t *quant_mat);
  49
  50
  51 static int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat );
  52 static void iquant_non_intra_m1(int16_t *src, int16_t *dst, uint16_t *quant_mat);
  53
  54
  55 /*
  56   Initialise quantization routines.
  57   Currently just setting up MMX routines if available...
  58  */
  59
  60 void init_quantizer_hv()
  61 {
  62 #ifdef X86_CPU
  63   int flags;
  64   flags = cpu_accel();
  65   if( (flags & ACCEL_X86_MMX) != 0 ) /* MMX CPU */
  66         {
  67                 if(verbose) fprintf( stderr, "SETTING " );
  68                 if( (flags & ACCEL_X86_3DNOW) != 0 )
  69                 {
  70                         if(verbose) fprintf( stderr, "3DNOW and ");
  71                         pquant_non_intra = quant_non_intra_hv_3dnow;
  72                 }
  73 /*
  74  *              else if ( (flags & ACCEL_X86_MMXEXT) != 0 )
  75  *              {
  76  *                      if(verbose) fprintf( stderr, "SSE and ");
  77  *                      pquant_non_intra = quant_non_intra_hv_sse;
  78  *              }
  79  */
  80                 else
  81                 {
  82                         pquant_non_intra = quant_non_intra_hv;
  83                 }
  84
  85                 if ( (flags & ACCEL_X86_MMXEXT) != 0 )
  86                 {
  87                         if(verbose) fprintf( stderr, "EXTENDED MMX");
  88                         pquant_weight_coeff_sum = quant_weight_coeff_sum_mmx;
  89                         piquant_non_intra_m1 = iquant_non_intra_m1_sse;
  90                 }
  91                 else
  92                 {
  93                         if(verbose) fprintf( stderr, "MMX");
  94                         pquant_weight_coeff_sum = quant_weight_coeff_sum_mmx;
  95                         piquant_non_intra_m1 = iquant_non_intra_m1_mmx;
  96                 }
  97                 if(verbose) fprintf( stderr, " for QUANTIZER!\n");
  98         }
  99   else
 100 #endif
 101         {
 102           pquant_non_intra = quant_non_intra_hv;
 103           pquant_weight_coeff_sum = quant_weight_coeff_sum;
 104           piquant_non_intra_m1 = iquant_non_intra_m1;
 105         }
 106 }
 107
 108 /*
 109  *
 110  * Computes the next quantisation up.  Used to avoid saturation
 111  * in macroblock coefficients - common in MPEG-1 - which causes
 112  * nasty artefacts.
 113  *
 114  * NOTE: Does no range checking...
 115  *
 116  */
 117
 118
 119 int next_larger_quant_hv( pict_data_s *picture, int quant )
 120 {
 121         if( picture->q_scale_type )
 122                 {
 123                         if( map_non_linear_mquant_hv[quant]+1 > 31 )
 124                                 return quant;
 125                         else
 126                                 return non_linear_mquant_table_hv[map_non_linear_mquant_hv[quant]+1];
 127                 }
 128         else
 129                 {
 130                         if( quant+2 > 31 )
 131                                 return quant;
 132                         else
 133                                 return quant+2;
 134                 }
 135
 136 }
 137
 138 /*
 139  * Quantisation for intra blocks using Test Model 5 quantization
 140  *
 141  * this quantizer has a bias of 1/8 stepsize towards zero
 142  * (except for the DC coefficient)
 143  *
 144         PRECONDITION: src dst point to *disinct* memory buffers...
 145                           of block_count *adjact* int16_t[64] arrays...
 146  *
 147  * RETURN: 1 If non-zero coefficients left after quantisaiont 0 otherwise
 148  */
 149
 150 void quant_intra_hv(
 151         pict_data_s *picture,
 152         int16_t *src,
 153         int16_t *dst,
 154         int mquant,
 155         int *nonsat_mquant
 156         )
 157 {
 158   int16_t *psrc,*pbuf;
 159   int i,comp;
 160   int x, y, d;
 161   int clipping;
 162   int clipvalue  = dctsatlim;
 163   uint16_t *quant_mat = intra_q_tbl[mquant] /* intra_q */;
 164
 165
 166   /* Inspired by suggestion by Juan.  Quantize a little harder if we clip...
 167    */
 168
 169   do
 170         {
 171           clipping = 0;
 172           pbuf = dst;
 173           psrc = src;
 174           for( comp = 0; comp<block_count && !clipping; ++comp )
 175           {
 176                 x = psrc[0];
 177                 d = 8>>picture->dc_prec; /* intra_dc_mult */
 178                 pbuf[0] = (x>=0) ? (x+(d>>1))/d : -((-x+(d>>1))/d); /* round(x/d) */
 179
 180
 181                 for (i=1; i<64 ; i++)
 182                   {
 183                         x = psrc[i];
 184                         d = quant_mat[i];
 185                         /* RJ: save one divide operation */
 186                         y = ((abs(x) << 5)+ ((3 * quant_mat[i]) >> 2)) / (quant_mat[i] << 1);
 187                         if ( y > clipvalue )
 188                           {
 189                                 clipping = 1;
 190                                 mquant = next_larger_quant_hv( picture, mquant );
 191                                 quant_mat = intra_q_tbl[mquant];
 192                                 break;
 193                           }
 194
 195                         pbuf[i] = intsamesign(x,y);
 196                   }
 197                 pbuf += 64;
 198                 psrc += 64;
 199           }
 200
 201         } while( clipping );
 202   *nonsat_mquant = mquant;
 203 }
 204
 205
 206 /*
 207  * Quantisation matrix weighted Coefficient sum fixed-point
 208  * integer with low 16 bits fractional...
 209  * To be used for rate control as a measure of dct block
 210  * complexity...
 211  *
 212  */
 213
 214 int quant_weight_coeff_sum( int16_t *blk, uint16_t * i_quant_mat )
 215 {
 216   int i;
 217   int sum = 0;
 218    for( i = 0; i < 64; i+=2 )
 219         {
 220                 sum += abs((int)blk[i]) * (i_quant_mat[i]) + abs((int)blk[i+1]) * (i_quant_mat[i+1]);
 221         }
 222     return sum;
 223   /* In case you're wondering typical average coeff_sum's for a rather
 224         noisy video are around 20.0.  */
 225 }
 226
 227
 228
 229 /*
 230  * Quantisation for non-intra blocks using Test Model 5 quantization
 231  *
 232  * this quantizer has a bias of 1/8 stepsize towards zero
 233  * (except for the DC coefficient)
 234  *
 235  * A.Stevens 2000: The above comment is nonsense.  Only the intra quantiser does
 236  * this.  This one just truncates with a modest bias of 1/(4*quant_matrix_scale)
 237  * to 1.
 238  *
 239  *      PRECONDITION: src dst point to *disinct* memory buffers...
 240  *                    of block_count *adjacent* int16_t[64] arrays...
 241  *
 242  * RETURN: A bit-mask of block_count bits indicating non-zero blocks (a 1).
 243  *
 244  * TODO: A candidate for use of efficient abs and "intsamesign". If only gcc understood
 245  * PPro conditional moves...
 246  */
 247
 248 int quant_non_intra_hv(
 249                                                    pict_data_s *picture,
 250                                                    int16_t *src, int16_t *dst,
 251                                                    int mquant,
 252                                                    int *nonsat_mquant)
 253 {
 254         int i;
 255         int x, y, d;
 256         int nzflag;
 257         int coeff_count;
 258         int clipvalue  = dctsatlim;
 259         int flags = 0;
 260         int saturated = 0;
 261         uint16_t *quant_mat = inter_q_tbl[mquant]/* inter_q */;
 262
 263         coeff_count = 64*block_count;
 264         flags = 0;
 265         nzflag = 0;
 266         for (i=0; i<coeff_count; ++i)
 267         {
 268 restart:
 269                 if( (i%64) == 0 )
 270                 {
 271                         nzflag = (nzflag<<1) | !!flags;
 272                         flags = 0;
 273
 274                 }
 275                 /* RJ: save one divide operation */
 276
 277                 x = abs( ((int)src[i]) ) /*(src[i] >= 0 ? src[i] : -src[i])*/ ;
 278                 d = (int)quant_mat[(i&63)];
 279                 /* A.Stevens 2000: Given the math of non-intra frame
 280                    quantisation / inverse quantisation I always though the
 281                    funny little foudning factor was bogus.  It seems to be
 282                    the encoder needs less bits if you simply divide!
 283                 */
 284
 285                 y = (x<<4) /  (d) /* (32*x + (d>>1))/(d*2*mquant)*/ ;
 286                 if ( y > clipvalue )
 287                 {
 288                         if( saturated )
 289                         {
 290                                 y = clipvalue;
 291                         }
 292                         else
 293                         {
 294                                 int new_mquant = next_larger_quant_hv( picture, mquant );
 295                                 if( new_mquant != mquant )
 296                                 {
 297                                         mquant = new_mquant;
 298                                         quant_mat = inter_q_tbl[mquant];
 299                                 }
 300                                 else
 301                                 {
 302                                         saturated = 1;
 303                                 }
 304                                 i=0;
 305                                 nzflag =0;
 306                                 goto restart;
 307                         }
 308                 }
 309                 dst[i] = intsamesign(src[i], y) /* (src[i] >= 0 ? y : -y) */;
 310                 flags |= dst[i];
 311         }
 312         nzflag = (nzflag<<1) | !!flags;
 313
 314     *nonsat_mquant = mquant;
 315     return nzflag;
 316 }
 317
 318 /* MPEG-1 inverse quantization */
 319 static void iquant1_intra(int16_t *src, int16_t *dst, int dc_prec, int mquant)
 320 {
 321   int i, val;
 322   uint16_t *quant_mat = intra_q;
 323
 324   dst[0] = src[0] << (3-dc_prec);
 325   for (i=1; i<64; i++)
 326   {
 327     val = (int)(src[i]*quant_mat[i]*mquant)/16;
 328
 329     /* mismatch control */
 330     if ((val&1)==0 && val!=0)
 331       val+= (val>0) ? -1 : 1;
 332
 333     /* saturation */
 334     dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 335   }
 336 }
 337
 338
 339 /* MPEG-2 inverse quantization */
 340 void iquant_intra(int16_t *src, int16_t *dst, int dc_prec, int mquant)
 341 {
 342   int i, val, sum;
 343
 344   if ( mpeg1  )
 345     iquant1_intra(src,dst,dc_prec, mquant);
 346   else
 347   {
 348     sum = dst[0] = src[0] << (3-dc_prec);
 349     for (i=1; i<64; i++)
 350     {
 351       val = (int)(src[i]*intra_q[i]*mquant)/16;
 352       sum+= dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 353     }
 354
 355     /* mismatch control */
 356     if ((sum&1)==0)
 357       dst[63]^= 1;
 358   }
 359 }
 360
 361
 362 static void iquant_non_intra_m1(int16_t *src, int16_t *dst,  uint16_t *quant_mat)
 363 {
 364   int i, val;
 365
 366 #ifndef ORIGINAL_CODE
 367
 368   for (i=0; i<64; i++)
 369   {
 370     val = src[i];
 371     if (val!=0)
 372     {
 373       val = (int)((2*val+(val>0 ? 1 : -1))*quant_mat[i])/32;
 374
 375       /* mismatch control */
 376       if ((val&1)==0 && val!=0)
 377         val+= (val>0) ? -1 : 1;
 378     }
 379
 380     /* saturation */
 381      dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 382  }
 383 #else
 384
 385   for (i=0; i<64; i++)
 386   {
 387     val = abs(src[i]);
 388     if (val!=0)
 389     {
 390                 val = ((val+val+1)*quant_mat[i]) >> 5;
 391                 /* mismatch control */
 392                 val -= (~(val&1))&(val!=0);
 393                 val = fastmin(val, 2047); /* Saturation */
 394     }
 395         dst[i] = intsamesign(src[i],val);
 396
 397   }
 398
 399 #endif
 400 }
 401
 402
 403
 404
 405 void iquant_non_intra(int16_t *src, int16_t *dst, int mquant )
 406 {
 407   int i, val, sum;
 408   uint16_t *quant_mat;
 409
 410   if ( mpeg1 )
 411     (*piquant_non_intra_m1)(src,dst,inter_q_tbl[mquant]);
 412   else
 413   {
 414           sum = 0;
 415 #ifdef ORIGINAL_CODE
 416
 417           for (i=0; i<64; i++)
 418           {
 419                   val = src[i];
 420                   if (val!=0)
 421
 422                           val = (int)((2*val+(val>0 ? 1 : -1))*inter_q[i]*mquant)/32;
 423                   sum+= dst[i] = (val>2047) ? 2047 : ((val<-2048) ? -2048 : val);
 424           }
 425 #else
 426           quant_mat = inter_q_tbl[mquant];
 427           for (i=0; i<64; i++)
 428           {
 429                   val = src[i];
 430                   if( val != 0 )
 431                   {
 432                           val = abs(val);
 433                           val = (int)((val+val+1)*quant_mat[i])>>5;
 434                           val = intmin( val, 2047);
 435                           sum += val;
 436                   }
 437                   dst[i] = intsamesign(src[i],val);
 438           }
 439 #endif
 440
 441     /* mismatch control */
 442     if ((sum&1)==0)
 443       dst[63]^= 1;
 444   }
 445 }
 446
 447 void iquantize( pict_data_s *picture )
 448 {
 449         int j,k;
 450         int16_t (*qblocks)[64] = picture->qblocks;
 451         for (k=0; k<mb_per_pict; k++)
 452         {
 453                 if (picture->mbinfo[k].mb_type & MB_INTRA)
 454                         for (j=0; j<block_count; j++)
 455                                 iquant_intra(qblocks[k*block_count+j],
 456                                                          qblocks[k*block_count+j],
 457                                                          cur_picture.dc_prec,
 458                                                          cur_picture.mbinfo[k].mquant);
 459                 else
 460                         for (j=0;j<block_count;j++)
 461                                 iquant_non_intra(qblocks[k*block_count+j],
 462                                                                  qblocks[k*block_count+j],
 463                                                                  cur_picture.mbinfo[k].mquant);
 464         }
 465 }