cinelerra-5.1/mpeg2enc/transfrm.c

   1 /* transfrm.c,  forward / inverse transformation                            */
   2
   3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
   4
   5 /*
   6  * Disclaimer of Warranty
   7  *
   8  * These software programs are available to the user without any license fee or
   9  * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
  10  * any and all warranties, whether express, implied, or statuary, including any
  11  * implied warranties or merchantability or of fitness for a particular
  12  * purpose.  In no event shall the copyright-holder be liable for any
  13  * incidental, punitive, or consequential damages of any kind whatsoever
  14  * arising from the use of these programs.
  15  *
  16  * This disclaimer of warranty extends to the user of these programs and user's
  17  * customers, employees, agents, transferees, successors, and assigns.
  18  *
  19  * The MPEG Software Simulation Group does not represent or warrant that the
  20  * programs furnished hereunder are free of infringement of any third-party
  21  * patents.
  22  *
  23  * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
  24  * are subject to royalty fees to patent holders.  Many of these patents are
  25  * general enough such that they are unavoidable regardless of implementation
  26  * design.
  27  *
  28  */
  29
  30 #include "config.h"
  31 #include "global.h"
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <math.h>
  35 #include "cpu_accel.h"
  36
  37 #ifdef X86_CPU
  38 extern void fdct_mmx( int16_t * blk );
  39 extern void idct_mmx( int16_t * blk, unsigned char *temp );
  40
  41 void add_pred_mmx (uint8_t *pred, uint8_t *cur,
  42                                    int lx, int16_t *blk);
  43 void sub_pred_mmx (uint8_t *pred, uint8_t *cur,
  44                                    int lx, int16_t *blk);
  45 #endif
  46
  47 extern void fdct( int16_t *blk );
  48 extern void idct( int16_t *blk, unsigned char *temp );
  49
  50
  51
  52 /* private prototypes*/
  53 static void add_pred (uint8_t *pred, uint8_t *cur,
  54                                           int lx, int16_t *blk);
  55 static void sub_pred (uint8_t *pred, uint8_t *cur,
  56                                           int lx, int16_t *blk);
  57
  58 /*
  59   Pointers to version of transform and prediction manipulation
  60   routines to be used..
  61  */
  62
  63 static void (*pfdct)( int16_t * blk );
  64 static void (*pidct)( int16_t * blk , unsigned char *temp);
  65 static void (*padd_pred) (uint8_t *pred, uint8_t *cur,
  66                                                   int lx, int16_t *blk);
  67 static void (*psub_pred) (uint8_t *pred, uint8_t *cur,
  68                                                   int lx, int16_t *blk);
  69
  70 /*
  71   Initialise DCT transformation routines
  72   Currently just activates MMX routines if available
  73  */
  74
  75
  76 void init_transform_hv()
  77 {
  78 #ifdef X86_CPU
  79         int flags;
  80         flags = cpu_accel();
  81
  82         if( (flags & ACCEL_X86_MMX) ) /* MMX CPU */
  83         {
  84                 if(verbose) fprintf( stderr, "SETTING MMX for TRANSFORM!\n");
  85                 pfdct = fdct_mmx;
  86                 pidct = idct_mmx;
  87                 padd_pred = add_pred_mmx;
  88                 psub_pred = sub_pred_mmx;
  89         }
  90         else
  91 #endif
  92         {
  93                 pfdct = fdct;
  94                 pidct = idct;
  95                 padd_pred = add_pred;
  96                 psub_pred = sub_pred;
  97
  98         }
  99 }
 100
 101 /* add prediction and prediction error, saturate to 0...255 */
 102 static void add_pred(unsigned char *pred,
 103         unsigned char *cur,
 104         int lx,
 105         short *blk)
 106 {
 107         int j;
 108
 109         for (j=0; j<8; j++)
 110         {
 111 /*
 112  *      for (i=0; i<8; i++)
 113  *        cur[i] = clp[blk[i] + pred[i]];
 114  */
 115         cur[0] = clp[blk[0] + pred[0]];
 116         cur[1] = clp[blk[1] + pred[1]];
 117         cur[2] = clp[blk[2] + pred[2]];
 118         cur[3] = clp[blk[3] + pred[3]];
 119         cur[4] = clp[blk[4] + pred[4]];
 120         cur[5] = clp[blk[5] + pred[5]];
 121         cur[6] = clp[blk[6] + pred[6]];
 122         cur[7] = clp[blk[7] + pred[7]];
 123
 124         blk += 8;
 125         cur += lx;
 126         pred += lx;
 127         }
 128 }
 129
 130 /* subtract prediction from block data */
 131 static void sub_pred(unsigned char *pred,
 132         unsigned char *cur,
 133         int lx,
 134         short *blk)
 135 {
 136         int j;
 137
 138         for (j=0; j<8; j++)
 139         {
 140 /*
 141  *      for (i=0; i<8; i++)
 142  *              blk[i] = cur[i] - pred[i];
 143  */
 144         blk[0] = cur[0] - pred[0];
 145         blk[1] = cur[1] - pred[1];
 146         blk[2] = cur[2] - pred[2];
 147         blk[3] = cur[3] - pred[3];
 148         blk[4] = cur[4] - pred[4];
 149         blk[5] = cur[5] - pred[5];
 150         blk[6] = cur[6] - pred[6];
 151         blk[7] = cur[7] - pred[7];
 152
 153         blk += 8;
 154         cur += lx;
 155         pred += lx;
 156         }
 157 }
 158
 159 void transform_engine_loop(transform_engine_t *engine)
 160 {
 161         while(!engine->done)
 162         {
 163                 pthread_mutex_lock(&(engine->input_lock));
 164
 165                 if(!engine->done)
 166                 {
 167                         pict_data_s *picture = engine->picture;
 168                         uint8_t **pred = engine->pred;
 169                         uint8_t **cur = engine->cur;
 170                         mbinfo_s *mbi = picture->mbinfo;
 171                         int16_t (*blocks)[64] = picture->blocks;
 172                         int i, j, i1, j1, k, n, cc, offs, lx;
 173
 174                         k = (engine->start_row / 16) * (width / 16);
 175
 176                         for(j = engine->start_row; j < engine->end_row; j += 16)
 177                         for(i = 0; i < width; i += 16)
 178                         {
 179                                         mbi[k].dctblocks = &blocks[k * block_count];
 180
 181                                 for(n = 0; n < block_count; n++)
 182                                 {
 183 /* color component index */
 184                                         cc = (n < 4) ? 0 : (n & 1) + 1;
 185                                         if(cc == 0)
 186                                         {
 187 /* A.Stevens Jul 2000 Record dct blocks associated with macroblock */
 188 /* We'll use this for quantisation calculations                    */
 189 /* luminance */
 190                                                         if ((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type)
 191                                                         {
 192 /* field DCT */
 193                                                                 offs = i + ((n & 1) << 3) + width * (j + ((n & 2) >> 1));
 194                                                                 lx = width << 1;
 195                                                         }
 196                                                         else
 197                                                         {
 198 /* frame DCT */
 199                                                                 offs = i + ((n & 1) << 3) + width2 * (j + ((n & 2) << 2));
 200                                                                 lx = width2;
 201                                                         }
 202
 203                                                         if (picture->pict_struct == BOTTOM_FIELD)
 204                                                                 offs += width;
 205                                         }
 206                                         else
 207                                         {
 208 /* chrominance */
 209 /* scale coordinates */
 210                                                 i1 = (chroma_format == CHROMA444) ? i : i >> 1;
 211                                                 j1 = (chroma_format != CHROMA420) ? j : j >> 1;
 212
 213                                                 if ((picture->pict_struct==FRAME_PICTURE) && mbi[k].dct_type
 214                                                 && (chroma_format!=CHROMA420))
 215                                                 {
 216 /* field DCT */
 217                                                 offs = i1 + (n&8) + chrom_width*(j1+((n&2)>>1));
 218                                                 lx = chrom_width<<1;
 219                                                 }
 220                                                 else
 221                                                 {
 222 /* frame DCT */
 223                                                 offs = i1 + (n&8) + chrom_width2*(j1+((n&2)<<2));
 224                                                 lx = chrom_width2;
 225                                                 }
 226
 227                                                 if(picture->pict_struct==BOTTOM_FIELD)
 228                                                 offs += chrom_width;
 229                                         }
 230
 231                                                 (*psub_pred)(pred[cc]+offs,cur[cc]+offs,lx,
 232                                                                          blocks[k*block_count+n]);
 233                                                 (*pfdct)(blocks[k*block_count+n]);
 234                                 }
 235
 236                                 k++;
 237                         }
 238                 }
 239                 pthread_mutex_unlock(&(engine->output_lock));
 240         }
 241 }
 242
 243 /* subtract prediction and transform prediction error */
 244 void transform(pict_data_s *picture,
 245         uint8_t *pred[], uint8_t *cur[])
 246 {
 247         int i;
 248 /* Start loop */
 249         for(i = 0; i < processors; i++)
 250         {
 251                 transform_engines[i].picture = picture;
 252                 transform_engines[i].pred = pred;
 253                 transform_engines[i].cur = cur;
 254                 pthread_mutex_unlock(&(transform_engines[i].input_lock));
 255         }
 256
 257 /* Wait for completion */
 258         for(i = 0; i < processors; i++)
 259         {
 260                 pthread_mutex_lock(&(transform_engines[i].output_lock));
 261         }
 262 }
 263
 264
 265
 266 void start_transform_engines()
 267 {
 268         int i;
 269         int rows_per_processor = (int)((float)height2 / 16 / processors + 0.5);
 270         int current_row = 0;
 271         pthread_attr_t  attr;
 272         pthread_mutexattr_t mutex_attr;
 273
 274         pthread_mutexattr_init(&mutex_attr);
 275         pthread_attr_init(&attr);
 276         transform_engines = calloc(1, sizeof(transform_engine_t) * processors);
 277         for(i = 0; i < processors; i++)
 278         {
 279                 transform_engines[i].start_row = current_row * 16;
 280                 current_row += rows_per_processor;
 281                 if(current_row > height2 / 16) current_row = height2 / 16;
 282                 transform_engines[i].end_row = current_row * 16;
 283                 pthread_mutex_init(&(transform_engines[i].input_lock), &mutex_attr);
 284                 pthread_mutex_lock(&(transform_engines[i].input_lock));
 285                 pthread_mutex_init(&(transform_engines[i].output_lock), &mutex_attr);
 286                 pthread_mutex_lock(&(transform_engines[i].output_lock));
 287                 transform_engines[i].done = 0;
 288                 pthread_create(&(transform_engines[i].tid),
 289                         &attr,
 290                         (void*)transform_engine_loop,
 291                         &transform_engines[i]);
 292         }
 293 }
 294
 295 void stop_transform_engines()
 296 {
 297         int i;
 298         for(i = 0; i < processors; i++)
 299         {
 300                 transform_engines[i].done = 1;
 301                 pthread_mutex_unlock(&(transform_engines[i].input_lock));
 302                 pthread_join(transform_engines[i].tid, 0);
 303                 pthread_mutex_destroy(&(transform_engines[i].input_lock));
 304                 pthread_mutex_destroy(&(transform_engines[i].output_lock));
 305         }
 306         free(transform_engines);
 307 }
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317 /* inverse transform prediction error and add prediction */
 318 void itransform_engine_loop(transform_engine_t *engine)
 319 {
 320         while(!engine->done)
 321         {
 322                 pthread_mutex_lock(&(engine->input_lock));
 323
 324                 if(!engine->done)
 325                 {
 326                         pict_data_s *picture = engine->picture;
 327                         uint8_t **pred = engine->pred;
 328                         uint8_t **cur = engine->cur;
 329                         int i, j, i1, j1, k, n, cc, offs, lx;
 330                 mbinfo_s *mbi = picture->mbinfo;
 331 /* Its the quantised / inverse quantised blocks were interested in
 332    for inverse transformation */
 333                         int16_t (*blocks)[64] = picture->qblocks;
 334
 335                         k = (engine->start_row / 16) * (width / 16);
 336
 337                         for(j = engine->start_row; j < engine->end_row; j += 16)
 338                                 for(i = 0; i < width; i += 16)
 339                                 {
 340                                         for(n = 0; n < block_count; n++)
 341                                         {
 342                                         cc = (n < 4) ? 0 : (n & 1) + 1; /* color component index */
 343
 344                                         if(cc == 0)
 345                                         {
 346 /* luminance */
 347                                                 if((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type)
 348                                                 {
 349 /* field DCT */
 350                                                         offs = i + ((n & 1) << 3) + width * (j + ((n & 2) >> 1));
 351                                                         lx = width<<1;
 352                                                 }
 353                                                 else
 354                                                 {
 355 /* frame DCT */
 356                                                         offs = i + ((n & 1) << 3) + width2 * (j + ((n & 2) << 2));
 357                                                         lx = width2;
 358                                                 }
 359
 360                                                 if(picture->pict_struct == BOTTOM_FIELD)
 361                                                 offs += width;
 362                                         }
 363                                         else
 364                                         {
 365 /* chrominance */
 366
 367 /* scale coordinates */
 368                                                 i1 = (chroma_format==CHROMA444) ? i : i>>1;
 369                                                 j1 = (chroma_format!=CHROMA420) ? j : j>>1;
 370
 371                                                 if((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type
 372                                                         && (chroma_format != CHROMA420))
 373                                                 {
 374 /* field DCT */
 375                                                         offs = i1 + (n & 8) + chrom_width * (j1 + ((n & 2) >> 1));
 376                                                         lx = chrom_width << 1;
 377                                                 }
 378                                                 else
 379                                                 {
 380 /* frame DCT */
 381                                                         offs = i1 + (n&8) + chrom_width2 * (j1 + ((n & 2) << 2));
 382                                                         lx = chrom_width2;
 383                                                 }
 384
 385                                                 if(picture->pict_struct == BOTTOM_FIELD)
 386                                                         offs += chrom_width;
 387                                     }
 388
 389 //pthread_mutex_lock(&test_lock);
 390                                                 (*pidct)(blocks[k*block_count+n], engine->temp);
 391                                                 (*padd_pred)(pred[cc]+offs,cur[cc]+offs,lx,blocks[k*block_count+n]);
 392 //pthread_mutex_unlock(&test_lock);
 393                                         }
 394
 395                                         k++;
 396                                 }
 397                 }
 398                 pthread_mutex_unlock(&(engine->output_lock));
 399         }
 400 }
 401
 402 void itransform(pict_data_s *picture,
 403         uint8_t *pred[], uint8_t *cur[])
 404 {
 405         int i;
 406 /* Start loop */
 407         for(i = 0; i < processors; i++)
 408         {
 409                 itransform_engines[i].picture = picture;
 410                 itransform_engines[i].cur = cur;
 411                 itransform_engines[i].pred = pred;
 412                 pthread_mutex_unlock(&(itransform_engines[i].input_lock));
 413         }
 414
 415 /* Wait for completion */
 416         for(i = 0; i < processors; i++)
 417         {
 418                 pthread_mutex_lock(&(itransform_engines[i].output_lock));
 419         }
 420 }
 421
 422 void start_itransform_engines()
 423 {
 424         int i;
 425         int rows_per_processor = (int)((float)height2 / 16 / processors + 0.5);
 426         int current_row = 0;
 427         pthread_attr_t  attr;
 428         pthread_mutexattr_t mutex_attr;
 429
 430         pthread_mutexattr_init(&mutex_attr);
 431         pthread_attr_init(&attr);
 432         itransform_engines = calloc(1, sizeof(transform_engine_t) * processors);
 433         for(i = 0; i < processors; i++)
 434         {
 435                 itransform_engines[i].start_row = current_row * 16;
 436                 current_row += rows_per_processor;
 437                 if(current_row > height2 / 16) current_row = height2 / 16;
 438                 itransform_engines[i].end_row = current_row * 16;
 439                 pthread_mutex_init(&(itransform_engines[i].input_lock), &mutex_attr);
 440                 pthread_mutex_lock(&(itransform_engines[i].input_lock));
 441                 pthread_mutex_init(&(itransform_engines[i].output_lock), &mutex_attr);
 442                 pthread_mutex_lock(&(itransform_engines[i].output_lock));
 443                 itransform_engines[i].done = 0;
 444                 pthread_create(&(itransform_engines[i].tid),
 445                         &attr,
 446                         (void*)itransform_engine_loop,
 447                         &itransform_engines[i]);
 448         }
 449 }
 450
 451 void stop_itransform_engines()
 452 {
 453         int i;
 454         for(i = 0; i < processors; i++)
 455         {
 456                 itransform_engines[i].done = 1;
 457                 pthread_mutex_unlock(&(itransform_engines[i].input_lock));
 458                 pthread_join(itransform_engines[i].tid, 0);
 459                 pthread_mutex_destroy(&(itransform_engines[i].input_lock));
 460                 pthread_mutex_destroy(&(itransform_engines[i].output_lock));
 461         }
 462         free(itransform_engines);
 463 }
 464
 465
 466
 467
 468 /*
 469  * select between frame and field DCT
 470  *
 471  * preliminary version: based on inter-field correlation
 472  */
 473
 474 void dct_type_estimation(
 475         pict_data_s *picture,
 476         uint8_t *pred, uint8_t *cur
 477         )
 478 {
 479
 480         struct mbinfo *mbi = picture->mbinfo;
 481
 482         int16_t blk0[128], blk1[128];
 483         int i, j, i0, j0, k, offs, s0, s1, sq0, sq1, s01;
 484         double d, r;
 485
 486         k = 0;
 487
 488         for (j0=0; j0<height2; j0+=16)
 489                 for (i0=0; i0<width; i0+=16)
 490                 {
 491                         if (picture->frame_pred_dct || picture->pict_struct!=FRAME_PICTURE)
 492                                 mbi[k].dct_type = 0;
 493                         else
 494                         {
 495                                 /* interlaced frame picture */
 496                                 /*
 497                                  * calculate prediction error (cur-pred) for top (blk0)
 498                                  * and bottom field (blk1)
 499                                  */
 500                                 for (j=0; j<8; j++)
 501                                 {
 502                                         offs = width*((j<<1)+j0) + i0;
 503                                         for (i=0; i<16; i++)
 504                                         {
 505                                                 blk0[16*j+i] = cur[offs] - pred[offs];
 506                                                 blk1[16*j+i] = cur[offs+width] - pred[offs+width];
 507                                                 offs++;
 508                                         }
 509                                 }
 510                                 /* correlate fields */
 511                                 s0=s1=sq0=sq1=s01=0;
 512
 513                                 for (i=0; i<128; i++)
 514                                 {
 515                                         s0+= blk0[i];
 516                                         sq0+= blk0[i]*blk0[i];
 517                                         s1+= blk1[i];
 518                                         sq1+= blk1[i]*blk1[i];
 519                                         s01+= blk0[i]*blk1[i];
 520                                 }
 521
 522                                 d = (sq0-(s0*s0)/128.0)*(sq1-(s1*s1)/128.0);
 523
 524                                 if (d>0.0)
 525                                 {
 526                                         r = (s01-(s0*s1)/128.0)/sqrt(d);
 527                                         if (r>0.5)
 528                                                 mbi[k].dct_type = 0; /* frame DCT */
 529                                         else
 530                                                 mbi[k].dct_type = 1; /* field DCT */
 531                                 }
 532                                 else
 533                                         mbi[k].dct_type = 1; /* field DCT */
 534                         }
 535                         k++;
 536                 }
 537 }