1 /* transfrm.c, forward / inverse transformation */
3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
6 * Disclaimer of Warranty
8 * These software programs are available to the user without any license fee or
9 * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims
10 * any and all warranties, whether express, implied, or statuary, including any
11 * implied warranties or merchantability or of fitness for a particular
12 * purpose. In no event shall the copyright-holder be liable for any
13 * incidental, punitive, or consequential damages of any kind whatsoever
14 * arising from the use of these programs.
16 * This disclaimer of warranty extends to the user of these programs and user's
17 * customers, employees, agents, transferees, successors, and assigns.
19 * The MPEG Software Simulation Group does not represent or warrant that the
20 * programs furnished hereunder are free of infringement of any third-party
23 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
24 * are subject to royalty fees to patent holders. Many of these patents are
25 * general enough such that they are unavoidable regardless of implementation
35 #include "cpu_accel.h"
38 extern void fdct_mmx( int16_t * blk );
39 extern void idct_mmx( int16_t * blk, unsigned char *temp );
41 void add_pred_mmx (uint8_t *pred, uint8_t *cur,
42 int lx, int16_t *blk);
43 void sub_pred_mmx (uint8_t *pred, uint8_t *cur,
44 int lx, int16_t *blk);
47 extern void fdct( int16_t *blk );
48 extern void idct( int16_t *blk, unsigned char *temp );
52 /* private prototypes*/
53 static void add_pred (uint8_t *pred, uint8_t *cur,
54 int lx, int16_t *blk);
55 static void sub_pred (uint8_t *pred, uint8_t *cur,
56 int lx, int16_t *blk);
59 Pointers to version of transform and prediction manipulation
63 static void (*pfdct)( int16_t * blk );
64 static void (*pidct)( int16_t * blk , unsigned char *temp);
65 static void (*padd_pred) (uint8_t *pred, uint8_t *cur,
66 int lx, int16_t *blk);
67 static void (*psub_pred) (uint8_t *pred, uint8_t *cur,
68 int lx, int16_t *blk);
71 Initialise DCT transformation routines
72 Currently just activates MMX routines if available
76 void init_transform_hv()
82 if( (flags & ACCEL_X86_MMX) ) /* MMX CPU */
84 if(verbose) fprintf( stderr, "SETTING MMX for TRANSFORM!\n");
87 padd_pred = add_pred_mmx;
88 psub_pred = sub_pred_mmx;
101 /* add prediction and prediction error, saturate to 0...255 */
102 static void add_pred(unsigned char *pred,
112 * for (i=0; i<8; i++)
113 * cur[i] = clp[blk[i] + pred[i]];
115 cur[0] = clp[blk[0] + pred[0]];
116 cur[1] = clp[blk[1] + pred[1]];
117 cur[2] = clp[blk[2] + pred[2]];
118 cur[3] = clp[blk[3] + pred[3]];
119 cur[4] = clp[blk[4] + pred[4]];
120 cur[5] = clp[blk[5] + pred[5]];
121 cur[6] = clp[blk[6] + pred[6]];
122 cur[7] = clp[blk[7] + pred[7]];
130 /* subtract prediction from block data */
131 static void sub_pred(unsigned char *pred,
141 * for (i=0; i<8; i++)
142 * blk[i] = cur[i] - pred[i];
144 blk[0] = cur[0] - pred[0];
145 blk[1] = cur[1] - pred[1];
146 blk[2] = cur[2] - pred[2];
147 blk[3] = cur[3] - pred[3];
148 blk[4] = cur[4] - pred[4];
149 blk[5] = cur[5] - pred[5];
150 blk[6] = cur[6] - pred[6];
151 blk[7] = cur[7] - pred[7];
159 void transform_engine_loop(transform_engine_t *engine)
163 pthread_mutex_lock(&(engine->input_lock));
167 pict_data_s *picture = engine->picture;
168 uint8_t **pred = engine->pred;
169 uint8_t **cur = engine->cur;
170 mbinfo_s *mbi = picture->mbinfo;
171 int16_t (*blocks)[64] = picture->blocks;
172 int i, j, i1, j1, k, n, cc, offs, lx;
174 k = (engine->start_row / 16) * (width / 16);
176 for(j = engine->start_row; j < engine->end_row; j += 16)
177 for(i = 0; i < width; i += 16)
179 mbi[k].dctblocks = &blocks[k * block_count];
181 for(n = 0; n < block_count; n++)
183 /* color component index */
184 cc = (n < 4) ? 0 : (n & 1) + 1;
187 /* A.Stevens Jul 2000 Record dct blocks associated with macroblock */
188 /* We'll use this for quantisation calculations */
190 if ((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type)
193 offs = i + ((n & 1) << 3) + width * (j + ((n & 2) >> 1));
199 offs = i + ((n & 1) << 3) + width2 * (j + ((n & 2) << 2));
203 if (picture->pict_struct == BOTTOM_FIELD)
209 /* scale coordinates */
210 i1 = (chroma_format == CHROMA444) ? i : i >> 1;
211 j1 = (chroma_format != CHROMA420) ? j : j >> 1;
213 if ((picture->pict_struct==FRAME_PICTURE) && mbi[k].dct_type
214 && (chroma_format!=CHROMA420))
217 offs = i1 + (n&8) + chrom_width*(j1+((n&2)>>1));
223 offs = i1 + (n&8) + chrom_width2*(j1+((n&2)<<2));
227 if(picture->pict_struct==BOTTOM_FIELD)
231 (*psub_pred)(pred[cc]+offs,cur[cc]+offs,lx,
232 blocks[k*block_count+n]);
233 (*pfdct)(blocks[k*block_count+n]);
239 pthread_mutex_unlock(&(engine->output_lock));
243 /* subtract prediction and transform prediction error */
244 void transform(pict_data_s *picture,
245 uint8_t *pred[], uint8_t *cur[])
249 for(i = 0; i < processors; i++)
251 transform_engines[i].picture = picture;
252 transform_engines[i].pred = pred;
253 transform_engines[i].cur = cur;
254 pthread_mutex_unlock(&(transform_engines[i].input_lock));
257 /* Wait for completion */
258 for(i = 0; i < processors; i++)
260 pthread_mutex_lock(&(transform_engines[i].output_lock));
266 void start_transform_engines()
269 int rows_per_processor = (int)((float)height2 / 16 / processors + 0.5);
272 pthread_mutexattr_t mutex_attr;
274 pthread_mutexattr_init(&mutex_attr);
275 pthread_attr_init(&attr);
276 transform_engines = calloc(1, sizeof(transform_engine_t) * processors);
277 for(i = 0; i < processors; i++)
279 transform_engines[i].start_row = current_row * 16;
280 current_row += rows_per_processor;
281 if(current_row > height2 / 16) current_row = height2 / 16;
282 transform_engines[i].end_row = current_row * 16;
283 pthread_mutex_init(&(transform_engines[i].input_lock), &mutex_attr);
284 pthread_mutex_lock(&(transform_engines[i].input_lock));
285 pthread_mutex_init(&(transform_engines[i].output_lock), &mutex_attr);
286 pthread_mutex_lock(&(transform_engines[i].output_lock));
287 transform_engines[i].done = 0;
288 pthread_create(&(transform_engines[i].tid),
290 (void*)transform_engine_loop,
291 &transform_engines[i]);
295 void stop_transform_engines()
298 for(i = 0; i < processors; i++)
300 transform_engines[i].done = 1;
301 pthread_mutex_unlock(&(transform_engines[i].input_lock));
302 pthread_join(transform_engines[i].tid, 0);
303 pthread_mutex_destroy(&(transform_engines[i].input_lock));
304 pthread_mutex_destroy(&(transform_engines[i].output_lock));
306 free(transform_engines);
317 /* inverse transform prediction error and add prediction */
318 void itransform_engine_loop(transform_engine_t *engine)
322 pthread_mutex_lock(&(engine->input_lock));
326 pict_data_s *picture = engine->picture;
327 uint8_t **pred = engine->pred;
328 uint8_t **cur = engine->cur;
329 int i, j, i1, j1, k, n, cc, offs, lx;
330 mbinfo_s *mbi = picture->mbinfo;
331 /* Its the quantised / inverse quantised blocks were interested in
332 for inverse transformation */
333 int16_t (*blocks)[64] = picture->qblocks;
335 k = (engine->start_row / 16) * (width / 16);
337 for(j = engine->start_row; j < engine->end_row; j += 16)
338 for(i = 0; i < width; i += 16)
340 for(n = 0; n < block_count; n++)
342 cc = (n < 4) ? 0 : (n & 1) + 1; /* color component index */
347 if((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type)
350 offs = i + ((n & 1) << 3) + width * (j + ((n & 2) >> 1));
356 offs = i + ((n & 1) << 3) + width2 * (j + ((n & 2) << 2));
360 if(picture->pict_struct == BOTTOM_FIELD)
367 /* scale coordinates */
368 i1 = (chroma_format==CHROMA444) ? i : i>>1;
369 j1 = (chroma_format!=CHROMA420) ? j : j>>1;
371 if((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type
372 && (chroma_format != CHROMA420))
375 offs = i1 + (n & 8) + chrom_width * (j1 + ((n & 2) >> 1));
376 lx = chrom_width << 1;
381 offs = i1 + (n&8) + chrom_width2 * (j1 + ((n & 2) << 2));
385 if(picture->pict_struct == BOTTOM_FIELD)
389 //pthread_mutex_lock(&test_lock);
390 (*pidct)(blocks[k*block_count+n], engine->temp);
391 (*padd_pred)(pred[cc]+offs,cur[cc]+offs,lx,blocks[k*block_count+n]);
392 //pthread_mutex_unlock(&test_lock);
398 pthread_mutex_unlock(&(engine->output_lock));
402 void itransform(pict_data_s *picture,
403 uint8_t *pred[], uint8_t *cur[])
407 for(i = 0; i < processors; i++)
409 itransform_engines[i].picture = picture;
410 itransform_engines[i].cur = cur;
411 itransform_engines[i].pred = pred;
412 pthread_mutex_unlock(&(itransform_engines[i].input_lock));
415 /* Wait for completion */
416 for(i = 0; i < processors; i++)
418 pthread_mutex_lock(&(itransform_engines[i].output_lock));
422 void start_itransform_engines()
425 int rows_per_processor = (int)((float)height2 / 16 / processors + 0.5);
428 pthread_mutexattr_t mutex_attr;
430 pthread_mutexattr_init(&mutex_attr);
431 pthread_attr_init(&attr);
432 itransform_engines = calloc(1, sizeof(transform_engine_t) * processors);
433 for(i = 0; i < processors; i++)
435 itransform_engines[i].start_row = current_row * 16;
436 current_row += rows_per_processor;
437 if(current_row > height2 / 16) current_row = height2 / 16;
438 itransform_engines[i].end_row = current_row * 16;
439 pthread_mutex_init(&(itransform_engines[i].input_lock), &mutex_attr);
440 pthread_mutex_lock(&(itransform_engines[i].input_lock));
441 pthread_mutex_init(&(itransform_engines[i].output_lock), &mutex_attr);
442 pthread_mutex_lock(&(itransform_engines[i].output_lock));
443 itransform_engines[i].done = 0;
444 pthread_create(&(itransform_engines[i].tid),
446 (void*)itransform_engine_loop,
447 &itransform_engines[i]);
451 void stop_itransform_engines()
454 for(i = 0; i < processors; i++)
456 itransform_engines[i].done = 1;
457 pthread_mutex_unlock(&(itransform_engines[i].input_lock));
458 pthread_join(itransform_engines[i].tid, 0);
459 pthread_mutex_destroy(&(itransform_engines[i].input_lock));
460 pthread_mutex_destroy(&(itransform_engines[i].output_lock));
462 free(itransform_engines);
469 * select between frame and field DCT
471 * preliminary version: based on inter-field correlation
474 void dct_type_estimation(
475 pict_data_s *picture,
476 uint8_t *pred, uint8_t *cur
480 struct mbinfo *mbi = picture->mbinfo;
482 int16_t blk0[128], blk1[128];
483 int i, j, i0, j0, k, offs, s0, s1, sq0, sq1, s01;
488 for (j0=0; j0<height2; j0+=16)
489 for (i0=0; i0<width; i0+=16)
491 if (picture->frame_pred_dct || picture->pict_struct!=FRAME_PICTURE)
495 /* interlaced frame picture */
497 * calculate prediction error (cur-pred) for top (blk0)
498 * and bottom field (blk1)
502 offs = width*((j<<1)+j0) + i0;
505 blk0[16*j+i] = cur[offs] - pred[offs];
506 blk1[16*j+i] = cur[offs+width] - pred[offs+width];
510 /* correlate fields */
513 for (i=0; i<128; i++)
516 sq0+= blk0[i]*blk0[i];
518 sq1+= blk1[i]*blk1[i];
519 s01+= blk0[i]*blk1[i];
522 d = (sq0-(s0*s0)/128.0)*(sq1-(s1*s1)/128.0);
526 r = (s01-(s0*s1)/128.0)/sqrt(d);
528 mbi[k].dct_type = 0; /* frame DCT */
530 mbi[k].dct_type = 1; /* field DCT */
533 mbi[k].dct_type = 1; /* field DCT */